! ========================================================================
! Copyright (C) by GemTalk Systems 2011-2020.  All Rights Reserved
! file image/icu.gs
! ========================================================================

!-----------------------------------------------------------

set class IcuLocale
removeallmethods
removeallclassmethods

category: 'Documentation'
classmethod: IcuLocale
comment
^ '  An IcuLocale encapsulates an instance of C++ Locale from
  the libicu libraries, documented at icu-project.org .
  C data is a Locale from locid.h, not freed on GC,
  auto-initialized when the instance is faulted into memory.
  
  Classes Unicode7, Unicode16 and Unicode32 are added to support
  Locale sensitive comparison of Strings, and fix bug 41964 .
  Comparison and conversion between instances of the different
  classes is provided by the libicu libraries, from icu-project.org .  
  These libraries are shipped in $GEMSTONE/lib 
  and automatically loaded during gem or  topaz -l  process initialization.  
  Any libicu libraries installed as part of the operating system 
  are ignored.
 
  Also added is class Utf8 representing Utf8 encoded
  strings, and IcuCollator representing a C++ Collator from libicu.

  These new classes appear in the hierarchy as follows.
  Object
    IcuLocale
    IcuCollator
    Collection
      SequenceableCollection
        ByteArray
          Utf8
        CharacterCollection
          String
            Unicode7
          MultiByteString
            DoubleByteString
              Unicode16
            QuadByteString
              Unicode32

  Class Utf8 supports comparison methods and conversion to
   one of Unicode7, Unicode16, Unicode32  .

  The classes Unicode7, Unicode16, Unicode32 support
  the full CharacterCollection API.

  If the source string of a method compilation is a Unicode32, Unicode16,
  or Unicode7, literal strings within the compiled method will be 
  instances of Unicode32, Unicode16, or Unicode7 using the minimum
  bytes per codePoint required to represent each literal.

Constraints:
	name: String
'

%

category: 'Private'
classmethod: IcuLocale
_locales: opcode value: aValue
  "opcode   libicu function
       0    Locale::getISOCountries
       1    Locale::getISOLanguages  
       2    Locale::getDefault 
       3    Locale::setDefault
       5    Locale::getAvailableLocales 
       6    Collator::getAvailableLocales
  "
<primitive: 914>
opcode == 3 ifTrue:[ aValue _validateClass: IcuLocale ]
            ifFalse:[ aValue _validateClass: UndefinedObject ].
self _primitiveFailed: #_locales:value: args: { opcode . aValue }
%

category: 'Instance Creation'
classmethod: IcuLocale
language: langString country: countryString variant: variantString

" Return a new Locale for the specified language, country and variant.
  Calls  Locale::Locale(const char*  language,
                        const   char*  country,
                        const   char*  variant);
  See source/icu/common/unicode/locid.h 
  or  icu-project.org/apiref/icu4c/classLocale.html  for more details

  Arguments must be nil or Strings of size 127 or less.
  langString must be nonNil"
<primitive: 912>

self _primitiveFailed: #language:country:variant: 
	args:{ langString . countryString. variantString}
%

classmethod: IcuLocale
getUS
  ^ self language:'EN' country:'US' variant:nil
%

! see icuprim.c for comments re: fix 43104 and en_US_POSIX 
classmethod: IcuLocale
default
  "Return an IcuLocale equivalent to the default Locale.
   If  IcuLocale(C)>>default: has not been executed, 
   the result is dependent on the locale of the operating system.

   If the locale of the operating system would imply a locale
   en_US_POSIX, then the locale en_US is returned instead,
   since en_US_POSIX is intended for binary collation and does
   not support case-insensitive searches.
"
  ^ self _locales: 2 value: nil 
%
classmethod: IcuLocale
default: anIcuLocale
  "Sets the default Locale for ICU library to the argument.
   Also installs a default IcuCollator for the argument.
   Returns the receiver."
  | coll |
  coll := IcuCollator forLocale: anIcuLocale .
  self _locales: 3 value: anIcuLocale .
  IcuCollator default: coll .
%
classmethod: IcuLocale
new
  "disallowed"
  self shouldNotImplement: #new
%

category: 'Accessing'
classmethod: IcuLocale
isoCountries
  "calls Locale::getISOCountries() in locid.h Returns an Array of Strings"
  ^ self _locales: 0 value: nil
%
classmethod: IcuLocale
isoLanguages
  "calls Locale::getISOLanguages() in locid.h  Returns an Array of Strings"
  ^ self _locales: 1 value: nil
%
classmethod: IcuLocale
availableLocales
  "calls Locale::getAvailableLocales() in locid.h. Returns an Array of IcuLocales"
  ^ self _locales: 5 value: nil
%

category: 'Private'
method: IcuLocale
_getAttribute: opcode
  "opcodes: 0 getLanguage, 1 getScript, 2 getCountry, 3 getVariant, 4 getName
    5 getDisplayName "
<primitive: 913>
self _primitiveFailed: #_getAttribute: args: { opcode }
%

category: 'Formatting'
method: IcuLocale
asString
  ^ 'IcuLocale ' , name asString
%
method: IcuLocale
printOn: aStream

  aStream nextPutAll: self asString .
%

category: 'Accessing'
method: IcuLocale
language
  "Returns a String, the locale's ISO-639 language code.
   See Locale::getLanguage() in locid.h "
  ^ self _getAttribute: 0 .
%
method: IcuLocale
script  
  "Returns a String, the locale's ISO-15924 abbreviation script code.
   See Locale::getScript() in locid.h "
  ^ self _getAttribute: 1
%
method: IcuLocale
country  
  "Returns a String, the locale's ISO-3166 country code.
   See Locale::getCountry() in locid.h "
  ^ self _getAttribute: 2
%
method: IcuLocale
variant  
  "Returns a String, the locale's variant code.
   See Locale::getVariant() in locid.h "
  ^ self _getAttribute: 3
%
method: IcuLocale
name
  "Returns a String, the name of the entire locale, 
   with the language, country and variant separated by underbars.
   Equivalent to Locale::getName() in locid.h "
  ^ name
%
method: IcuLocale
_name
    "Returns a String, the name of the entire locale,
   with the language, country and variant separated by underbars.
   calls Locale::getName() in locid.h"
  ^ self _getAttribute: 4
%
method: IcuLocale
displayName
  "Returns displayable name of the receiver using the en_US locale.
   Equivalent to Locale::getDisplayName() in locid.h "
  ^ self _getAttribute: 5
%
category: 'Comparing'
method: IcuLocale
= anIcuLocale

anIcuLocale == self ifTrue:[ ^ true ].
(anIcuLocale isKindOf: IcuLocale) ifFalse:[ ^ false ] .
^ name = anIcuLocale name
%
method: IcuLocale
hash
^ name hash
%

!------------------------------------------
set class IcuCollator
removeallmethods
removeallclassmethods

category: 'Documentation'
classmethod: IcuCollator
comment
^'An IcuCollator encapsulates an instance of the C++ class Collator 
from the libicu libraries, documented at icu-project.org .
The C++ instance is automatically freed upon in-memory GC of the
instance and is automatically recreated when the instance is faulted in.

Beginning with Gs64 v3.3.1,
the class variable LibraryVersion contains the result of
IcuCollator libraryVersion from creation of the virgin repository,
or from the last upgrade of indexes depending on ICU sort keys .

Constraints:
	locale: IcuLocale
	strength: SmallInteger'
%

category: 'Accessing'
classmethod: IcuCollator
availableLocales
  "calls Collator::getAvailableLocales() in coll.h. 
   Returns an Array of IcuLocales, for which there are predefined
   Collators in the libicudata.so shared library."
  ^ IcuLocale _locales: 6 value: nil
%

category: 'Accessing'
classmethod: IcuCollator
libraryVersion

"Returns a String such as '58.2'  the version of the libicu library
 that is loaded into this session's virtual machine."

<primitive: 568>
self _primitiveFailed: #libraryVersion
%


category: 'Private'
classmethod: IcuCollator
_forLocale: aIcuLocale
  "private.
   calls Collator::createInstance(const Locale &loc, UErrorCode &err)"   
  <primitive: 915>
  aIcuLocale _validateClass: IcuLocale .
  self _primitiveFailed:#forLocale: args: { aIcuLocale }
%

classmethod: IcuCollator
_availableCollators
  "Return a dictionary of IcuLocales for which
   there are predefined Collators in the libicudata.so library "
    | key dict |
  key := #IcuCollator_AVAILABLE_COLLATORS .
  dict := SessionTemps current at: key otherwise: nil .
  dict ifNil:[
    dict := StringKeyValueDictionary new .
    IcuCollator availableLocales do:[:loc | dict at: loc name put: loc ].
    SessionTemps current at: key put: dict .
  ].
  ^ dict 
%

category: 'Instance Creation'
classmethod: IcuCollator
default
  "Returns the session's default collator. 
   Note that IcuLocale default: will change the default collators,
   and that IcuLocale(C)>>default  is dependent on operating system
   locale if  IcuLocale(C)>>default: has not executed during this
   session."
  ^ (System __sessionStateAt: 20) ifNil:[ | coll |
      coll := self _forLocale: IcuLocale default .
      self default: coll .
      coll 
  ].
%
classmethod: IcuCollator
defaultCaseInsensitive
  "Returns the session's default case-insensitive  collator.
   Note that IcuLocale default: will change the default collators,
   and that IcuLocale(C)>>default  is dependent on operating system
   locale if  IcuLocale(C)>>default: has not executed during this
   session."

  ^ (System __sessionStateAt: 21) ifNil:[ | coll |
      coll := self forLocale: IcuLocale default .
      self default: coll .
      System __sessionStateAt: 21
  ].
%

classmethod:  IcuCollator
default: aIcuCollator
  aIcuCollator _validateClass: IcuCollator .
  System __sessionStateAt: 20 put: aIcuCollator .
  System __sessionStateAt: 21 put: aIcuCollator _copyCaseInsensitive
%

classmethod: IcuCollator
new
  "disallowed"
  self shouldNotImplement: #new
%

classmethod: IcuCollator
forLocale: aIcuLocale
  "Return an instance of IcuCollator built from a predefined Collator
   for the specified IcuLocale . If there is no predefined Collator
   in libicudata.so library for the specified Locale, signals an ArgumentError"

(self _availableCollators at: aIcuLocale name otherwise: nil) ifNil:[
  ArgumentError signal:'No predefined IcuCollator for locale ''' , aIcuLocale displayName , ''' '
].
^ self _forLocale: aIcuLocale
%

! edited for 43082
classmethod: IcuCollator
forLocaleNamed: localeName
  "Return an instance of IcuCollator built from a predefined Collator
   using the IcuLocale for the specified name.
   for the specified IcuLocale . If there is no predefined Collator
   in libicudata.so library for the specified Locale, signals an ArgumentError"

(self _availableCollators at: localeName otherwise: nil ) ifNotNil:[: aLocale|
  ^ self _forLocale: aLocale
] ifNil:[
   ArgumentError signal:'No predefined IcuCollator for locale ''' , localeName, ''' '
]
%

category: 'Private'
classmethod: IcuCollator
_initializeConstants
  | args names vals |
  args := { PRIMARY . SECONDARY . TERTIARY . QUATERNARY . IDENTICAL }.
  names := #( #PRIMARY #SECONDARY #TERTIARY #QUATERNARY #IDENTICAL ) .
  self _addInvariantClassVar: #StrengthArgs value: args immediateInvariant .
  self _addInvariantClassVar: #StrengthNames value: names .
  vals := Array new: IDENTICAL + 1.
  1 to: args size do:[:j | vals at: (args at: j) + 1  put: (names at: j)  ].
  self _addInvariantClassVar: #StrengthVals value: vals immediateInvariant .
%
run
IcuCollator _initializeConstants .
true
%

category: 'Accessing'
classmethod: IcuCollator
strengths
  "Returns an array of the allowed args to IcuCollator>>strength:"

^ StrengthArgs
%
classmethod: IcuCollator
strengthNames
  "Returns an array of the allowed args to IcuCollator>>strength:"

^ StrengthNames
%
method: IcuCollator
strength
 "Returns the strength of the receiver
  as a Symbol,  one of #PRIMARY, #SECONDARY, #TERTIARY, #QUATERNARY, #IDENTICAL." 

 ^ StrengthVals at: (self _getAttribute: 0) + 1
%

category: 'Private'
method: IcuCollator
_setAttribute: opcode value: val

  <primitive: 916>
  opcode == 0 ifTrue:[
    val _validateClass: SmallInteger .
    (self class strengths includesIdentical: val) ifFalse:[
      OutOfRange new name:'strength' min: PRIMARY max: IDENTICAL actual: val; signal.
    ]
  ].
  opcode == 1 ifTrue:[
    val _validateClass: String .
    (val = 'off' or:[ val = 'upperFirst' or:[ val = 'lowerFirst']]) ifFalse:[
       ArgumentError signal:'invalid value for caseFirst:'
    ].
  ].
  val _validateClass: Boolean .
  self _primitiveFailed:#_setAttribute:value: args: { opcode . val }
%

category: 'Private'
method: IcuCollator
_setAttribute: opcode value: val

  <primitive: 916>
  opcode == 0 ifTrue:[
    val _validateClass: SmallInteger .
    (self class strengths includesIdentical: val) ifFalse:[
      OutOfRange new name:'strength' min: PRIMARY max: IDENTICAL actual: val; signal.
    ] 
  ] ifFalse:[
    opcode == 1 ifTrue:[
      val _validateClass: String .
      (val = 'off' or:[ val = 'upperFirst' or:[ val = 'lowerFirst']]) ifFalse:[
         ArgumentError signal:'invalid value for caseFirst:'
      ].
    ] ifFalse:[
      val _validateClass: Boolean .
    ]
  ].
  self _primitiveFailed:#_setAttribute:value: args: { opcode . val }
%

method: IcuCollator
_getAttribute: opcode
  <primitive: 907>

  self _primitiveFailed:#_getAttribute: args: { opcode }
%

category: 'Accessing'
method: IcuCollator
locale
  ^ locale
%

category: 'Comparing'
method: IcuCollator
= anIcuCollator

"Returns true if the argument is equal to the receiver, false otherwise."

<primitive: 959>
anIcuCollator _validateClass: IcuCollator .
self _primitiveFailed: #= args: { anIcuCollator }.
%

method: IcuCollator
hash

^ locale hash
%

method: IcuCollator
frenchCollation

"Returns true or false. 
 See UCOL_FRENCH_COLLATION under  enum UColAttribute  at icu-project.org"
  ^ self _getAttribute: 6
%
method: IcuCollator
alternateHandling
  "Returns true or false.
   See UCOL_ALTERNATE_HANDLING under  enum UColAttribute  at icu-project.org"
  ^ self _getAttribute: 4
%
method: IcuCollator
caseFirst
  "Returns a String, one of  'off', 'upperFirst' , or 'lowerFirst' .
   See UCOL_CASE_FIRST under  enum UColAttribute  at icu-project.org"
  ^ self _getAttribute: 1
%
method: IcuCollator
caseLevel
  "Returns true (UCOL_ON) or false (UCOL_OFF).
   See UCOL_CASE_LEVEL under  enum UColAttribute  at icu-project.org. 

   From i18n/unicode/ucol.h :
      Controls whether an extra case level (positioned before the third
      level) is generated or not. Acceptable values are UCOL_OFF (default),
      when case level is not generated, and UCOL_ON which causes the case
      level to be generated. Contents of the case level are affected by
      the value of UCOL_CASE_FIRST attribute. A simple way to ignore
      accent differences in a string is to set the strength to UCOL_PRIMARY
      and enable case level.

   This is not controlling case-sensitivity.
   "
  ^ self _getAttribute: 2
%
method: IcuCollator
normalization
  "Returns true or false.
   See UCOL_NORMALIZATION_MODE  under  enum UColAttribute  at icu-project.org"
  ^ self _getAttribute: 5
%
method: IcuCollator
hiraganaQuarternary
  "Returns true or false.
   See UCOL_HIRAGANA_QUATERNARY_MODE under  enum UColAttribute  at icu-project.org"
  ^ self _getAttribute: 7
%
method: IcuCollator
numericCollation
  "Returns true or false.
   See UCOL_NUMERIC_COLLATION under  enum UColAttribute  at icu-project.org"
  ^ self _getAttribute: 3
%

category: 'Updating'
method: IcuCollator
strength: aValue
  "aValue should be one of PRIMARY, SECONDARY, TERTIARY,
     QUATERNARY, IDENTICAL per the class variables of IcuCollator,
     or one of the symbols
     #PRIMARY, #SECONDARY, #TERTIARY, #QUATERNARY, #IDENTICAL"
  | val |
  aValue _isSymbol ifTrue:[ | ofs |
    ofs := StrengthNames indexOfIdentical: aValue .
    ofs == 0 ifTrue:[ ArgumentError signal:
     'arg must be one of #PRIMARY #SECONDARY #TERTIARY #QUATERNARY #IDENTICAL'
    ].
    val := StrengthArgs at: ofs .
  ] ifFalse:[
    val := aValue .
  ].
  self _setAttribute: 0 value: val .  "see also usage in _copyCaseInsensitive"
%

method: IcuCollator
_copyCaseInsensitive
  "Returns a copy of the receiver which will do case-insensitive compares."

 | stren c |
 " #( #PRIMARY #SECONDARY #TERTIARY #QUATERNARY #IDENTICAL ) "
 c := self copy .
 stren := (c _getAttribute: 0) + 1 .
 stren >= 3 ifTrue:[
   c strength: #SECONDARY .
 ].
 ^ c
%

method: IcuCollator
frenchCollation: aBoolean
  "Sets UCOL_FRENCH_COLLATION , aBoolean == true means set to UCOL_ON."
  self _setAttribute: 6 value: aBoolean
%
method: IcuCollator
alternateHandling: aBoolean
  "Sets UCOL_ALTERNATE_HANDLING , aBoolean == true  means set to UCOL_SHIFTED."
  self _setAttribute: 4 value: aBoolean
% 

method: IcuCollator
caseFirst: aValue
  "aValue must be a String, one of  'off', 'upperFirst' , or 'lowerFirst' .
   Sets UCOL_CASE_FIRST ."
  self _setAttribute: 1 value: aValue
%
method: IcuCollator
caseLevel: aBoolean
  "Sets UCOL_CASE_LEVEL,  aBoolean == true  means set to UCOL_ON"
  self _setAttribute: 2 value: aBoolean
%
method: IcuCollator
normalization: aBoolean
  "Sets UCOL_NORMALIZATION_MODE aBoolean == true  means set to UCOL_ON"
  self _setAttribute: 5 value: aBoolean
%

! deleted hiraganaQuarternary: , cannot be set since ICU 50

method: IcuCollator
numericCollation: aBoolean
  "Sets UCOL_NUMERIC_COLLATION , aBoolean == true  means set to UCOL_ON"
  self _setAttribute: 3 value: aBoolean
%

! end class IcuCollator

! ------------------------------------
set class Unicode7
removeallmethods
removeallclassmethods

category: 'Documentation'
classmethod: Unicode7
comment
 ^ 'An instance of Unicode7 is a String in which all codePoints 
are 7 bit Ascii.  For every codePoint cp in a Unicode7 
the following evaluates to true:
   cp >= 0 and:[ cp <= 16r7F ] '
%

category: 'Instance Creation'
classmethod: Unicode7
withAll: aString

"Returns an instance of Unicode7, Unicode16 or Unicode32 using the
 minimum bytes per character required to represent the argument."

<primitive: 941>
aString stringCharSize >= 1 ifTrue:[
  ^ Unicode16 withAll: aString
].
(aString isKindOfClass: Utf8) ifTrue:[ ^ aString decodeToUnicode ].
aString _validateClasses: { String . Utf8 } .
^ self _primitiveFailed: #withAll: args: { aString }
%

category: 'Converting'
method: Unicode7
decodeFromUTF8
  "Reciever already 7 bit ascii"
  ^ self
%

! fix 46537
category: 'Adding'
method: Unicode7
at: anIndex put: aChar

"Stores aChar at the specified location."

<primitive: 919>
(aChar class == Character) ifTrue:[  | av |
  (av := aChar codePoint) >= 16rD800 ifTrue:[
    av <= 16rDFFF ifTrue:[ 
      OutOfRange signal:'codePoint 16r', av asHexString ,' is illegal for Unicode'.
    ].
    (av > 16rFFFF) ifTrue:[
      av > 16r10FFFF ifTrue:[
        OutOfRange new name:'aChar codePoint' min: 0 max: 16r10FFFF 
                        actual: av ; signal
      ].
      ^ self _convertToQuadByte at: anIndex put: aChar
    ].
    ^ self _convertToDoubleByte at: anIndex put: aChar
  ].
  av > 16r7F ifTrue:[
    ^ self _convertToDoubleByte at: anIndex put: aChar
  ].
] ifFalse:[
 aChar _validateClass: AbstractCharacter .
 ^ self at: anIndex put: aChar asCharacter
].
(anIndex _isSmallInteger) ifTrue: [
  ((anIndex > (self size + 1)) or: [anIndex <= 0]) ifTrue: [
    ^ self _errorIndexOutOfRange: anIndex
  ]
] ifFalse: [
  ^ self _errorNonIntegerIndex: anIndex
] .
self _primitiveFailed: #at:put: args: { anIndex . aChar }
%
method: Unicode7
codePointAt: anIndex put: anInt

"Stores codePoint anInt at the specified location. Returns anInt.
 Class of receiver is changed to Unicode16 or Unicode32 if needed."

<primitive: 935>
(anInt class == SmallInteger) ifTrue:[  
  anInt >= 16rD800 ifTrue:[
    anInt <= 16rDFFF ifTrue:[
      OutOfRange signal:'codePoint 16r', anInt asHexString ,' is illegal for Unicode'.
    ].
    anInt > 16rFFFF ifTrue:[
      anInt > 16r10FFFF ifTrue:[
        OutOfRange new name:'a codePoint' min: 0 max: 16r10FFFF 
                        actual: anInt ; signal
      ].
      ^ self _convertToQuadByte codePointAt: anIndex put: anInt
    ].
    ^ self _convertToDoubleByte codePointAt: anIndex put: anInt
  ].
  anInt > 16r7F ifTrue:[
    ^ self _convertToDoubleByte codePointAt: anIndex put: anInt
  ].
].
(anIndex _isSmallInteger) ifTrue: [
  ((anIndex > (self size + 1)) or: [anIndex <= 0]) ifTrue: [
    ^ self _errorIndexOutOfRange: anIndex
  ]
] ifFalse: [
  ^ self _errorNonIntegerIndex: anIndex
] .
self _primitiveFailed: #at:put: args: { anIndex . anInt }
%

method:
_basicAt: index put: char
"Disallowed , use codePointAt:put:"
^ self shouldNotImplement: #_basicAt:put:
%

method:
squeakBasicAt: anIndex put: aValue
"Disallowed , use codePointAt:put:"
^ self shouldNotImplement: #squeakBasicAt:put:
%

method: Unicode7
add: aCharOrCharColl

<primitive: 918>
^ self _retryAdd: aCharOrCharColl
%

method: Unicode7
_retryAdd: aCharOrCharColl
| info cSize |
aCharOrCharColl class == Character ifTrue:[ | av |
  (av := aCharOrCharColl codePoint) >= 16rD800 ifTrue:[
    av <= 16rDFFF ifTrue:[
      OutOfRange signal:'codePoint 16r', av asHexString ,' is illegal for Unicode'.
    ].
    av > 16rFFFF ifTrue:[
      ^ self _convertToQuadByte add: aCharOrCharColl
    ].
    ^ self _convertToDoubleByte add: aCharOrCharColl
  ].
  av > 16r7F ifTrue:[
    ^ self _convertToDoubleByte add: aCharOrCharColl
  ].
  ^ OutOfRange new name:'aChar codePoint' min: 0 max: 16r10FFFF 
	             actual: av ; signal
].
info := aCharOrCharColl _stringCharSize .
cSize := info bitAnd: 16r7 .
(info bitAnd: 16r8) ~~ 0 ifTrue:[  "arg is a Unicode string"
  aCharOrCharColl _asUnicode7 ifNotNil:[:src7 |  self add: src7 . ^ aCharOrCharColl].
  cSize == 2 ifTrue:[ 
    ^ self _convertToDoubleByte add: aCharOrCharColl 
  ].
  ^ self _convertToQuadByte add: aCharOrCharColl .
].
cSize ~~ 0 ifTrue:[
  self add: aCharOrCharColl asUnicodeString  .
  ^ aCharOrCharColl
].
(aCharOrCharColl isKindOf: AbstractCharacter) ifTrue:[
  self add: aCharOrCharColl asCharacter . 
  ^ aCharOrCharColl
].
(aCharOrCharColl isKindOf: CharacterCollection) ifTrue:[ 
  ^ self add: aCharOrCharColl asString.
].
aCharOrCharColl do: [:each | self add: each].
^ aCharOrCharColl
%

method: Unicode7
addLast: aCharOrCharColl

<primitive: 918>
^ self _retryAdd: aCharOrCharColl
%
method: Unicode7
addAll: aCharOrCharColl

<primitive: 918>
^ self _retryAdd: aCharOrCharColl
%
method: Unicode7
addCodePoint: aSmallInteger

<primitive: 1049>
^ self add: (Character codePoint: aSmallInteger).
%

method: Unicode7
, aCharOrCharColl

"Returns a new instance of the receiver's class that contains the elements of
 the receiver followed by the elements of aCharOrCharColl.  The argument
 must be a CharacterCollection or an AbstractCharacter."

<primitive: 920>
| info cSize result |
info := aCharOrCharColl _stringCharSize .
cSize := info bitAnd: 16r7 .
(info bitAnd: 16r8) ~~ 0 ifTrue:[  "arg is a Unicode string"
  (cSize == 2) ifTrue:[
    result := Unicode16 withAll: self.
    result addAll: aCharOrCharColl.
    ^ result .
  ] ifFalse:[
    result:= Unicode32 withAll: self.
    result addAll: aCharOrCharColl.
    ^ result .
  ].
].
cSize ~~ 0 ifTrue:[ "arg is a String or MultibyteString not handled by primitive"
  ^ self , (Unicode7 withAll: aCharOrCharColl)
].
(aCharOrCharColl class == Character) ifTrue:[ | av |
  (av := aCharOrCharColl codePoint) >= 16rD800 ifTrue:[
    av <= 16rDFFF ifTrue:[
      OutOfRange signal:'codePoint 16r', av asHexString ,' is illegal for Unicode'.
    ].
    av > 16rFFFF ifTrue:[ result:= Unicode32 withAll: self]
    		ifFalse:[ result:= Unicode16 withAll: self ]
  ] ifFalse:[
    av > 16r7F ifTrue:[ 
      result := Unicode16 withAll: self
    ] ifFalse:[
     ^ OutOfRange new name:'aChar codePoint' min: 0 max: 16r10FFFF 
			actual: av ; signal
    ].
  ].
  result add: aCharOrCharColl .
  ^ result
].   
aCharOrCharColl _validateClasses: { Unicode7 . Unicode16 . Unicode32 } .
self _primitiveFailed: #, args: { aCharOrCharColl } 
%

method: Unicode7
insertAll: aCharOrCharColl at: offset

<primitive: 921>
| aString cSize info |
offset _isSmallInteger ifFalse:[ offset _validateClass: SmallInteger ].
((offset <= 0) or: [offset > (self size + 1)])
  ifTrue: [ ^ self _errorIndexOutOfRange: offset].

info := aCharOrCharColl _stringCharSize .
cSize := info bitAnd: 16r7 .
(info bitAnd: 16r8) ~~ 0 ifTrue:[  "arg is a Unicode string"
  aCharOrCharColl _asUnicode7 ifNotNil:[ :src7 | ^ self insertAll: src7 at: offset].
  cSize == 2 ifTrue:[
     ^ self _convertToDoubleByte insertAll: aCharOrCharColl at: offset 
  ].
  ^ self _convertToQuadByte insertAll: aCharOrCharColl at: offset
].
cSize ~~ 0 ifTrue:[
  ^ self insertAll: aCharOrCharColl asUnicodeString at: offset
].
aCharOrCharColl class == Character ifTrue:[ 
  (aString := Unicode7 new) add: aCharOrCharColl .
  ^ self insertAll: aString at: offset .
].
aCharOrCharColl _validateClasses: { Unicode7 . Unicode16 . Unicode32 } .
self _primitiveFailed: #insertAll:at: args: { aCharOrCharColl . offset }
%

! reimplement = for fix 43751
category: 'Comparing'
method: Unicode7
_unicodeEqual: argString
  "Compares receiver to argument using  IcuCollator default."

^ self _equals: argString collator: nil useMinSize: false
%
method: Unicode7
_unicodeLessThan: argString
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) < 0
%
method: Unicode7
_unicodeGreaterThan: argString
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) > 0
%
! legacy mode compare methods
method: Unicode7
= argString
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
     argString _isSymbol ifTrue:[ ^ false ] .
     info == 0 ifTrue: [ ^ false ].
     ArgumentError signal:'String argument disallowed in Unicode comparison'.
  ].
  ^ self _equals: argString collator: nil useMinSize: false
%
method: Unicode7
< argString
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
    info ~~ 0 ifTrue:[
      ArgumentError signal:'String argument disallowed in Unicode comparison'.
    ]
  ].
  ^ (self compareTo: argString collator: nil useMinSize: false) < 0
%
method: Unicode7
> argString
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
    info ~~ 0 ifTrue:[
      ArgumentError signal:'String argument disallowed in Unicode comparison'.
    ]
  ].
  ^ (self compareTo: argString collator: nil useMinSize: false) > 0
%

! methods for String in Unicode compare mode
method: String
_unicodeEqual: argString
  "Compares receiver to argument using  IcuCollator default."

^ self _equals: argString collator: nil useMinSize: false
% 
method: String
_unicodeLessThan: argString
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) < 0
%
method: String
_unicodeGreaterThan: argString
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) > 0
%


method: Unicode7
equalsNoCase: aString
  "Returns true if receiver and a String are equal using
  full case folding and code point order,
  using  icu:UnicodeString::caseCompare in libicu.
  Returns false otherwise.
  aString must be a Unicode7, Unicode16 , or Unicode32"

  ^ (self compareCase: aString) == 0
%

method: Unicode7
isEquivalent: aString
  "Returns true if receiver and a String are equal using
  full case folding and code point order,
  using  icu:UnicodeString::caseCompare in libicu.
  Returns false otherwise. "

  aString _stringCharSize == 0 ifTrue:[ ^ false "not a string"].
  ^ (self compareCase: aString) == 0
%

method: Unicode7
at: offset equalsNoCase: aString

"Returns true if aString is contained in the receiver, starting at
 offset.  Returns false otherwise.  
 Comparison is done full case folding and code point order,
 using  icu:UnicodeString::caseCompare in libicu.
 aString must be a Unicode7, Unicode16 , or Unicode32"

<primitive: 930>
| info |
info := aString _stringCharSize .
(info bitAnd: 16r8) == 0 ifTrue:[  "arg is not a Unicode string"
   ^ self at: offset equalsNoCase: aString asUnicodeString
].
aString _validateClasses: { Unicode7 . Unicode16 . Unicode32 } .
offset _isSmallInteger ifFalse:[ offset _validateClass: SmallInteger ].
((offset <= 0) or: [offset > (self size + 1)])
  ifTrue: [ ^ self _errorIndexOutOfRange: offset].
^self _primitiveFailed: #at:equalsNoCase: args: { offset . aString }
%

method: Unicode7
compareCase: aString

"Returns -1, 0 or 1,  when receiver is less than,
 equal to, or greater than aString .
 Comparison is done full case folding and code point order,
 using  icu:UnicodeString::caseCompare in libicu.
 aString must be a Unicode7, Unicode16 , or Unicode32"

<primitive: 969>
| info |
info := aString _stringCharSize .
(info bitAnd: 16r8) == 0 ifTrue:[  "arg is not a Unicode string"
   ^ self compareCase: aString asUnicodeString
].
aString _validateClasses: { Unicode7 . Unicode16 . Unicode32 } .
^self _primitiveFailed: #compareCase: args: { aString }
%

method: Unicode7
equals: aString collatingTable: aTable
  "disallowed"
  self shouldNotImplement: #equals:collatingTable:
%
method: Unicode7
greaterThan: aString collatingTable: aTable
  "disallowed"
  self shouldNotImplement: #greaterThan:collatingTable:
%

method: Unicode7
lessThan: aString collatingTable: aTable
  "disallowed"
  self shouldNotImplement: #lessThan:collatingTable:
%

category: 'Private'
method: Unicode7
_findString: subString startingAt: startIndex ignoreCase: aBoolean

aBoolean ifFalse:[
  ^ super _findString: subString startingAt: startIndex ignoreCase: aBoolean
].
^ self _findStringNocase: subString startingAt: startIndex 
	collator: IcuCollator default 
%
method: String
_findStringNocase: subString startingAt: startIndex collator: anIcuCollator

"searches using a copy of anIcuCollator set to TERTIARY matches, and
 using an ICU StringSearch "
<primitive: 944>
subString _validateClasses: { String .  Utf8 } .
startIndex _validateClass: SmallInteger .
(startIndex < 1) | (startIndex > self size)
  ifTrue: [ ^ self _error: #objErrBadOffsetIncomplete args: { startIndex } ].
^ self _primitiveFailed: #_findStringNocase:startingAt:collator:
       args: { subString . startIndex . anIcuCollator }
%

category: 'New Indexing Comparison - prims'
! TODO more work on actual prims
method: Unicode7
_idxPrimCompareEqualTo: aCharCollection
  "This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects.

 This method collates letters AaBb..Zz."

  "The comparison should be compatible with the case-insensitive semantics
 of the String method with selector #= .
 Same primitive as String>>lessThan: "

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    aCharCollection _isSymbol ifTrue: [ ^false ].
    ArgumentError signal:'String argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareEqualTo: aCharCollection
%

category: 'New Indexing Comparison - prims'
method: Unicode7
_idxPrimCompareLessThan: aCharCollection

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method collates the same as the < method, except that
 it returns false if the argument is nil."

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    ArgumentError signal:'String argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareLessThan: aCharCollection
%
method: Unicode7
_idxPrimCompareGreaterThan: aCharCollection

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method collates the same as the > method, except that
 it returns true if the argument is nil."

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    ArgumentError signal:'String argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareGreaterThan: aCharCollection
%

category: 'New Indexing Comparison'
method: Unicode7
_idxUnicodeCompareEqualTo: aCharCollection

  ^ super _idxUnicodeCompareEqualTo: aCharCollection
%

category: 'New Indexing Comparison'
method: Unicode7
_idxUnicodeCompareGreaterThan: aCharCollection

  ^ super _idxUnicodeCompareGreaterThan: aCharCollection
%

category: 'New Indexing Comparison'
method: Unicode7
_idxUnicodeCompareLessThan: aCharCollection

  ^ super _idxUnicodeCompareLessThan: aCharCollection
%

! end class Unicode7
!--------------------------

set class Unicode16
removeallmethods
removeallclassmethods

category: 'Documentation'
classmethod: Unicode16
comment
^ 'An instance of Unicode16 is a DoubleByteString in which all 
Utf16 codePoints are representable using 16 bits.  
For every codePoint cp in a Unicode16 the following evaluates to true:
   (cp >= 0 and:[ cp <= 16rD7FF ]) 
     or:[ cp >= 16rE000 and:[ cp <= 16rFFFF ]] 

String comparision controlled by (Globals at:#StringConfiguration)

  Some definitions
    Legacy strings are kinds of String or MultiByteString, but not
    kinds of Unicode7, Unicode16, nor Unicode32 .
    Unicode strings are kinds of Unicode7, Unicode16, or Unicode32

  The default is
     (Globals at: #StringConfiguration) == String
  In this mode String and MultiByteString have legacy comparison behavior.
  Comparison between legacy strings and Unicode strings will signal errors.   
  Unicode strings will compare to Unicode strings using specified 
  or default instances of IcuCollator.

  If at login the repository has
    (Globals at: #StringConfiguration) == Unicode16
  then legacy and Unicode strings will all compare using Unicode
  semantics using specified or default instances of IcuCollator.
  This is accomplished by installing different comparision methods
  in the transient method dictionaries for the string classes at
  session login.  See classmethods in Unicode16 for details
  of how the transient method dictionaries are initialized.
  You can observe what comparison methods are installed
  buy doing a method lookup.  For example using topaz:
   login
   obj StringConfiguration
   set class String
   look method =
   !observe String>>= implementation using primitive 27

   ! override the default StringConfiguration in this session only
   send Unicode16 _useUnicodeComparePrimitives: true

   look meth =
   !observe String>>_unicodeEqual: implementation 
   logout
'
%

category: 'Session Control - Private'
classmethod: Unicode16
_unicodeCompareEnabled

  | v |
  v := Globals at: #StringConfiguration otherwise: nil .
  v == Unicode16 ifTrue:[ ^ true ].
  v ~~ String ifTrue:[ 
    (System myUserProfile _hasPrivilegeName: #NoGsFileOnServer) ifFalse:[
      GsFile gciLogServer:'WARNING invalid StringConfiguration ' , v asString. 
    ].
  ].
  ^ false
%

category: 'Session Control - Private'
classmethod: Unicode16
_lookupsUsingUnicodeCompares
  "Returns true if Unicode behavior for #< #> #= and various methods in the 
  indexing system are being used, false if legacy behavior is being used."

  (Unicode7 transientMethodDictForEnv: 0) ifNotNil:[:tmd |
    (tmd at: #= otherwise: nil) ifNotNil:[:meth | 
      (meth selector == #_unicodeEqual: ) ifTrue:[ ^ true ]
    ] 
  ].
  ^ false
%

category: 'Session Control'
classmethod: Unicode16
usingUnicodeCompares
  "Returns true if Unicode behavior for #< #> #= and various methods in the
  indexing system are being used, false if legacy behavior is being used."

  ^ System __sessionStateAt: 23
%

category: 'Session Control - Private'
classmethod: Unicode16
_cacheUsingUnicodeCompares

  System __sessionStateAt: 23 put: self _lookupsUsingUnicodeCompares
%

category: 'Session Control - Private'
classmethod: Unicode16
_useUnicodeComparePrimitives: aBoolean

<primitive: 2001>   "enter protected mode"
| prot prev |
prot := System _protectedMode .
[ prev := self _lookupsUsingUnicodeCompares .
  prev == aBoolean ifFalse:[ | list tmd |
    list := self _unicodeCompareMapping .
    1 to: list size by: 2 do:[:j | | aClass |
      aClass := list at: j .
      tmd := aBoolean ifTrue:[ self _unicodeCompareTmdForClass: aClass 
				  selectors:  (list at: j + 1) ]
                     ifFalse:[ nil ] .
      aClass transientMethodDictForEnv: 0 put: tmd . 
    ].
    self _clearLookupCaches: 0 .
  ].
  self _cacheUsingUnicodeCompares .
] ensure:[
  prot _leaveProtectedMode
].
^ prev
%

category: 'Session Control'
classmethod: Unicode16
installUnicodeComparePrimitives
  "Installs appropriate entries in transient method dictionaries.
   Unicode versions of the methods are installed if 
     (Globals at: #StringConfiguration) == Unicode16 
   otherwise legacy methods are installed. 
   Affects method lookup for #< #> #= and various methods in the indexing
   system  when the receiver is a kind of String or MultiByteString.

   See Unicode16(C) >> _unicodeCompareMapping for the detailed list of methods,
   no GsPackagePolicy should override any of those methods.

   A StringConfiguration value of Unicode16 is intended for new applications.
   Legacy applications should use a value of String , otherwise
   they may need to rebuild hashed collections and indexes. 
  "

  self _useUnicodeComparePrimitives: self _unicodeCompareEnabled
%

category: 'Session Control - Private'
classmethod: Unicode16
_unicodeCompareMapping
  | base |
  "Returns an Array of pairs. 
   Each pair is a class and an Array of selectors.
   An Array of selectors contains pairs of  lookup selector, implementation selector."
   
  base := #(#'<' #'_unicodeLessThan:' #'>' #'_unicodeGreaterThan:' 
            #'=' #'_unicodeEqual:' 
            #'_idxPrimCompareGreaterThan:' #'_idxUnicodeCompareGreaterThan:' 
            #'_idxPrimCompareLessThan:' #'_idxUnicodeCompareLessThan:' 
            #'_idxPrimCompareEqualTo:' #'_idxUnicodeCompareEqualTo:').
  ^ { String .  base .
      MultiByteString .  base .
      Unicode7 .   base .
      Unicode16 .  base .
      Unicode32 .  base }
%  

category: 'Session Control - Private'
classmethod: Unicode16
_unicodeCompareTmdForClass: aClass selectors: syms
  "Returns a GsMethodDictionary to be installed as a transient method dictionary"
  | tmd |
  tmd := GsMethodDictionary new .
  1 to: syms size by: 2 do:[:n |
    tmd at:(syms at: n) put:( aClass persistentMethodAt:(syms at: n + 1)).
  ].
  ^ tmd
%


category: 'Instance Creation'
classmethod: Unicode16
withAll: aString

"Returns an instance of Unicode16 or Unicode32 using the
 minimum bytes per character required to represent the argument."

<primitive: 942>
aString stringCharSize >= 2 ifTrue:[
  ^ Unicode32 withAll: aString
].
(aString isKindOfClass: Utf8) ifTrue:[ ^ self withAll: aString decodeToUnicode ].
aString _validateClasses: { String . Utf8 } .
^ self _primitiveFailed: #withAll: args: { aString }
%

! at: inherited

category: 'Adding'
method: Unicode16
add: aCharOrCharColl

<primitive: 932>
^ self _retryAdd: aCharOrCharColl
%
method: Unicode16
_retryAdd: aCharOrCharColl
| info aString cSize |
aCharOrCharColl class == Character ifTrue:[ | av |
  (av := aCharOrCharColl codePoint) >= 16rD800 ifTrue:[
    av <= 16rDFFF ifTrue:[
      OutOfRange signal:'codePoint 16r', av asHexString ,' is illegal for Unicode'.
    ].
    av > 16rFFFF ifTrue:[
      ^ self _convertToQuadByte add: aCharOrCharColl
    ].
  ].
  ^ OutOfRange new name:'aChar codePoint' min: 0 max: 16r10FFFF 
		actual: av ; signal
].
info := aCharOrCharColl _stringCharSize .
(info bitAnd: 16r8) ~~ 0 ifTrue:[  "arg is a Unicode string, assume cSize 4"
  aString := aCharOrCharColl _asUnicode16 .
  aString ifNotNil:[  self add: aString . ^ aCharOrCharColl ]
          ifNil:[ ^ self _convertToQuadByte add: aCharOrCharColl ].
].
cSize := info bitAnd: 16r7 .
cSize ~~ 0 ifTrue:[
  self add: aCharOrCharColl asUnicodeString  .
  ^ aCharOrCharColl
].
(aCharOrCharColl isKindOf: AbstractCharacter) ifTrue:[
  self add: aCharOrCharColl asCharacter .
  ^ aCharOrCharColl
].
(aCharOrCharColl isKindOf: CharacterCollection) ifTrue:[ 
  ^ self add: aCharOrCharColl asString.
].
aCharOrCharColl do: [:each | self add: each].
^ aCharOrCharColl
%

method: Unicode16
addLast: aCharOrCharColl

<primitive: 932>
^ self _retryAdd: aCharOrCharColl
%
method: Unicode16
addAll: aCharOrCharColl

<primitive: 932>
^ self _retryAdd: aCharOrCharColl
%

method: Unicode16
addCodePoint: aSmallInteger

<primitive: 1050>
^ self add: (Character codePoint: aSmallInteger).
%

method: Unicode16
, aCharOrCharColl

"Returns a new instance of the receiver's class that contains the elements of
 the receiver followed by the elements of aCharOrCharColl.  The argument
 must be a CharacterCollection or an AbstractCharacter."

<primitive: 938>
| info cSize result |
info := aCharOrCharColl _stringCharSize .
cSize := info bitAnd: 16r7 .
(info bitAnd: 16r8) ~~ 0 ifTrue:[  "arg is a Unicode string"
  (cSize == 4) ifTrue:[
    result:= Unicode32 withAll: self.
    result addAll: aCharOrCharColl.
    ^ result .
  ].
].
cSize ~~ 0 ifTrue:[ "arg is a MultiByteString not handled by primitive" 
  ^ self , (Unicode16 withAll: aCharOrCharColl)
].
(aCharOrCharColl class == Character) ifTrue:[ | av |
  (av := aCharOrCharColl codePoint) >= 16rD800 ifTrue:[
    av <= 16rDFFF ifTrue:[
      OutOfRange signal:'codePoint 16r', av asHexString ,' is illegal for Unicode'.
    ].
    av > 16rFFFF ifTrue:[ result:= Unicode32 withAll: self]
    		ifFalse:[ result:= Unicode16 withAll: self ]
  ] ifFalse:[
   ^ OutOfRange new name:'aChar codePoint' min: 0 max: 16r10FFFF 
		actual: av ; signal
  ].
  result add: aCharOrCharColl .
  ^ result
].   
aCharOrCharColl _validateClasses: { Unicode7 . Unicode16 . Unicode32 } .
self _primitiveFailed: #, args: { aCharOrCharColl } 
%

method: Unicode16
at: anIndex put: aChar

"Stores aChar at the specified location."

<primitive: 933>
(aChar class == Character) ifTrue:[  | av |
  (av := aChar codePoint) >= 16rD800 ifTrue:[
    av <= 16rDFFF ifTrue:[
      OutOfRange signal:'codePoint 16r', av asHexString ,' is illegal for Unicode'.
    ].
    av > 16rFFFF ifTrue:[
      av > 16r10FFFF ifTrue:[
        OutOfRange new name:'aChar codePoint' min: 0 max: 16r10FFFF 
                        actual: av ; signal
      ].
      ^ self _convertToQuadByte at: anIndex put: aChar
    ].
  ].
] ifFalse:[
 aChar _validateClass: AbstractCharacter .
 ^ self at: anIndex put: aChar asCharacter
].
(anIndex _isSmallInteger) ifTrue: [
  ((anIndex > (self size + 1)) or: [anIndex <= 0]) ifTrue: [
    ^ self _errorIndexOutOfRange: anIndex
  ]
] ifFalse: [
  ^ self _errorNonIntegerIndex: anIndex
] .
self _primitiveFailed: #at:put: args: { anIndex . aChar }
%
method: Unicode16
codePointAt: anIndex put: anInt

"Stores codePoint anInt at the specified location. Returns anInt.
 Class of receiver is changed to Unicode32 if needed."

<primitive: 936>
(anInt class == SmallInteger) ifTrue:[  
  anInt >= 16rD800 ifTrue:[
    anInt <= 16rDFFF ifTrue:[
      OutOfRange signal:'codePoint 16r', anInt asHexString ,' is illegal for Unicode'.
    ].
    anInt > 16rFFFF ifTrue:[
      anInt > 16r10FFFF ifTrue:[
        OutOfRange new name:'a codePoint' min: 0 max: 16r10FFFF 
                        actual: anInt ; signal
      ].
      ^ self _convertToQuadByte codePointAt: anIndex put: anInt
    ].
  ].
  ^ OutOfRange new name:'anInt' min: 0 max: 16r10FFFF 
		actual: anInt ; signal
].
(anIndex _isSmallInteger) ifTrue: [
  ((anIndex > (self size + 1)) or: [anIndex <= 0]) ifTrue: [
    ^ self _errorIndexOutOfRange: anIndex
  ]
] ifFalse: [
  ^ self _errorNonIntegerIndex: anIndex
] .
self _primitiveFailed: #at:put: args: { anIndex . anInt }
%

method:
_basicAt: index put: char
"Disallowed , use codePointAt:put:"
^ self shouldNotImplement: #_basicAt:put:
%

method:
squeakBasicAt: anIndex put: aValue
"Disallowed , use codePointAt:put:"
^ self shouldNotImplement: #squeakBasicAt:put:
%

method: Unicode16
insertAll: aCharOrCharColl at: anIndex

<primitive: 922>
| info aString |
anIndex _isSmallInteger ifFalse:[ anIndex _validateClass: SmallInteger ].
((anIndex <= 0) or: [anIndex > (self size + 1)])
  ifTrue: [ ^ self _errorIndexOutOfRange: anIndex].

info := aCharOrCharColl _stringCharSize .
info ~~ 0 ifTrue:[  "arg is a DoubleByteString or Unicode32 or QuadByteString"
  aString := aCharOrCharColl _asUnicode16 .
  aString ifNotNil:[ ^  self insertAll: aString at: anIndex ]
          ifNil:[ ^ self _convertToQuadByte insertAll: aCharOrCharColl at: anIndex]
].
aCharOrCharColl class == Character ifTrue:[ 
  (aString := Unicode16 new) add: aCharOrCharColl .
  ^ self insertAll: aString at: anIndex .
].
aCharOrCharColl _validateClasses: { Unicode7 . Unicode16 . Unicode32 } .
self _primitiveFailed: #insertAll:at: args: { aCharOrCharColl . anIndex }
%

category: 'Comparing'
method: Unicode16
at: offset equalsNoCase: aString

"Returns true if aString is contained in the receiver, starting at
 offset.  Returns false otherwise.  
 Comparison is done using full case folding and code point order,
 using  icu:UnicodeString::caseCompare in libicu .
 aString must be a Unicode7, Unicode16 , or Unicode32"

<primitive: 930>
| info |
info := aString _stringCharSize .
(info bitAnd: 16r8) == 0 ifTrue:[  "arg is not a Unicode string"
   ^ self at: offset equalsNoCase: aString asUnicodeString
].
aString _validateClasses: { Unicode7 . Unicode16 . Unicode32 } .
offset _isSmallInteger ifFalse:[ offset _validateClass: SmallInteger ].
((offset <= 0) or: [offset > (self size + 1)])
  ifTrue: [ ^ self _errorIndexOutOfRange: offset].
self _primitiveFailed: #at:equalsNoCase: args: { offset . aString }
%

method: Unicode16
compareCase: aString

"Returns -1, 0 or 1,  when receiver is less than,
 equal to, or greater than aString .
 Comparison is done full case folding and code point order,
 using  icu:UnicodeString::caseCompare in libicu.
 aString must be a Unicode7, Unicode16 , or Unicode32"

<primitive: 969>
| info |
info := aString _stringCharSize .
(info bitAnd: 16r8) == 0 ifTrue:[  "arg is not a Unicode string"
   ^ self compareCase: aString asUnicodeString
].
aString _validateClasses: { Unicode7 . Unicode16 . Unicode32 } .
^self _primitiveFailed: #compareCase: args: { aString }
%

! legacy compare methods
method: Unicode16
= argString
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
     argString _isSymbol ifTrue:[ ^ false ] .
     info == 0 ifTrue: [ ^ false ].
     ArgumentError signal:'String argument disallowed in Unicode comparison'.
  ].
  ^ self _equals: argString collator: nil useMinSize: false
%
method: Unicode16
< argString
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
    info ~~ 0 ifTrue:[
      ArgumentError signal:'String argument disallowed in Unicode comparison'.
    ]
  ].
  ^ (self compareTo: argString collator: nil useMinSize: false) < 0
%
method: Unicode16
> argString
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
    info ~~ 0 ifTrue:[
      ArgumentError signal:'String argument disallowed in Unicode comparison'.
    ]
  ].
  ^ (self compareTo: argString collator: nil useMinSize: false) > 0
%

method: Unicode16
_unicodeEqual: argString
  "Compares receiver to argument using  IcuCollator default."

  ^ self _equals: argString collator: nil useMinSize: false 
%
method: Unicode16
_unicodeLessThan: argString
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) < 0
%
method: Unicode16
_unicodeGreaterThan: argString
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) > 0
%

! methods for MultiByteString in Unicode compare mode
method: MultiByteString
_unicodeEqual: argString
  "Compares receiver to argument using  IcuCollator default."

^ self _equals: argString collator: nil useMinSize: false
% 
method: MultiByteString
_unicodeLessThan: argString
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) < 0
%
method: MultiByteString
_unicodeGreaterThan: argString
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) > 0
%


method: Unicode16
equals: aString collatingTable: aTable
  "disallowed"
  self shouldNotImplement: #equals:collatingTable:
%
method: Unicode16
greaterThan: aString collatingTable: aTable
  "disallowed"
  self shouldNotImplement: #greaterThan:collatingTable:
%

method: Unicode16
lessThan: aString collatingTable: aTable
  "disallowed"
  self shouldNotImplement: #lessThan:collatingTable:
%


category: 'Private'
method: Unicode16
_findString: subString startingAt: startIndex ignoreCase: aBoolean

aBoolean ifFalse:[
  ^ super _findString: subString startingAt: startIndex ignoreCase: aBoolean
].
^ self _findStringNocase: subString startingAt: startIndex 
	collator: IcuCollator default 
%
method: MultiByteString
_findStringNocase: subString startingAt: startIndex collator: anIcuCollator

"searches using a copy of anIcuCollator set to TERTIARY matches, and
 using an ICU StringSearch "
<primitive: 944>
subString _validateClasses: { String . Utf8 } .
startIndex _validateClass: SmallInteger .
(startIndex < 1) | (startIndex > self size)
  ifTrue: [ ^ self _error: #objErrBadOffsetIncomplete args: { startIndex } ].
^ self _primitiveFailed: #_findStringNocase:startingAt:collator:
       args: { subString . startIndex . anIcuCollator }
%

category: 'New Indexing Comparison - prims'
! TODO more work on actual prims
method: Unicode16
_idxPrimCompareEqualTo: aCharCollection
  "This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects.

 This method collates letters AaBb..Zz."

  "The comparison should be compatible with the case-insensitive semantics
 of the String method with selector #= .
 Same primitive as String>>lessThan: "

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    aCharCollection _isSymbol ifTrue: [ ^false ].
    ArgumentError signal:'String argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareEqualTo: aCharCollection
%

category: 'New Indexing Comparison - prims'
method: Unicode16
_idxPrimCompareLessThan: aCharCollection

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method collates the same as the < method, except that
 it returns false if the argument is nil."

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    ArgumentError signal:'String argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareLessThan: aCharCollection
%
method: Unicode16
_idxPrimCompareGreaterThan: aCharCollection

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method collates the same as the > method, except that
 it returns true if the argument is nil."

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    ArgumentError signal:'String argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareGreaterThan: aCharCollection
%

category: 'New Indexing Comparison'
method: Unicode16
_idxUnicodeCompareEqualTo: aCharCollection

  ^ super _idxUnicodeCompareEqualTo: aCharCollection
%

category: 'New Indexing Comparison'
method: Unicode16
_idxUnicodeCompareGreaterThan: aCharCollection

  ^ super _idxUnicodeCompareGreaterThan: aCharCollection
%

category: 'New Indexing Comparison'
method: Unicode16
_idxUnicodeCompareLessThan: aCharCollection

  ^ super _idxUnicodeCompareLessThan: aCharCollection
%

category: 'Case-Insensitive Comparisons'
method:  Unicode16
equalsNoCase: aString
  "Returns true if receiver and a String are equal using
  full case folding and code point order,
  using  icu:UnicodeString::caseCompare in libicu.
  Returns false otherwise. 
  aString must be a Unicode7, Unicode16 , or Unicode32"

  ^ (self compareCase: aString) == 0
%
method: 
isEquivalent: aString
  "Returns true if receiver and a String are equal using
  full case folding and code point order,
  using  icu:UnicodeString::caseCompare in libicu.
  Returns false otherwise. 
  Returns false otherwise. "

  aString _stringCharSize == 0 ifTrue:[ ^ false "not a string"].
  ^ (self compareCase: aString) == 0
%

! end class Unicode16
!--------------------------

set class Unicode32
removeallmethods
removeallclassmethods

category: 'Documentation'
classmethod: Unicode32
comment
 ^ 'An instance of Unicode32 is a QuadByteString in which
all codePoints are legal Utf16 characters.
For every codePoint cp in a Unicode32 the following evaluates to true:
      cp >= 0 and:[ cp <= 16r10FFFF] '
%

category: 'Instance Creation'
classmethod: Unicode32
withAll: aString

"Returns an instance of Unicode32 "

<primitive: 943>
(aString isKindOfClass: Utf8) ifTrue:[ ^ self withAll: aString decodeToUnicode ].
aString _validateClasses: { String . Utf8 } .
^ ArgumentError signal:'argument contains codePoints not valid for Unicode'
%

! at:  inherited

category: 'Accessing'
method: Unicode32
at: anIndex put: aChar

"Stores aChar at the specified location."

<primitive: 934>
(aChar class == Character) ifTrue:[  | anInt |
  anInt := aChar codePoint .
  (anInt < 0 or:[ anInt > 16r10FFFF]) ifTrue:[
    ^ OutOfRange new name:'aChar codePoint' min: 0 max: 16r10FFFF 
			actual: anInt ; signal
  ].
] ifFalse:[
 aChar _validateClass: AbstractCharacter .
 ^ self at: anIndex put: aChar asCharacter
].
(anIndex _isSmallInteger) ifTrue: [
  ((anIndex > (self size + 1)) or: [anIndex <= 0]) ifTrue: [
    ^ self _errorIndexOutOfRange: anIndex
  ]
] ifFalse: [
  ^ self _errorNonIntegerIndex: anIndex
] .
self _primitiveFailed: #at:put: args: { anIndex . aChar }
%

method: Unicode32
codePointAt: anIndex put: anInt

"Stores anInt at the specified location. returns anInt"

<primitive: 937>
anInt _validateClass: SmallInteger .
(anInt < 0 or:[ anInt > 16r10FFFF]) ifTrue:[
  ^ OutOfRange new name:'anInt' min: 0 max: 16r10FFFF actual: anInt ; signal
].
(anIndex _isSmallInteger) ifTrue: [
  ((anIndex > (self size + 1)) or: [anIndex <= 0]) ifTrue: [
    ^ self _errorIndexOutOfRange: anIndex
  ]
] ifFalse: [
  ^ self _errorNonIntegerIndex: anIndex
] .
self _primitiveFailed: #at:put: args: { anIndex . anInt }
%

method:
_basicAt: index put: char
"Disallowed , use codePointAt:put:"
^ self shouldNotImplement: #_basicAt:put:
%

method:
squeakBasicAt: anIndex put: aValue
"Disallowed , use codePointAt:put:"
^ self shouldNotImplement: #squeakBasicAt:put:
%

category: 'Adding'
method: Unicode32
add: aCharOrCharColl

<primitive: 940>
^ self _retryAdd: aCharOrCharColl
%
method: Unicode32
_retryAdd: aCharOrCharColl
| info cSize |
aCharOrCharColl class == Character ifTrue:[ 
  ^ OutOfRange new name:'aChar codePoint' min: 0 max: 16r10FFFF actual: aCharOrCharColl ; signal
].
info := aCharOrCharColl _stringCharSize .
(info bitAnd: 16r8) ~~ 0 ifTrue:[  "arg is a Unicode string, assume cSize 4"
  self _primitiveFailed: #add: args: { aCharOrCharColl } .
].
cSize := info bitAnd: 16r7 .
cSize ~~ 0 ifTrue:[
  self add: aCharOrCharColl asUnicodeString .
  ^ aCharOrCharColl
].
(aCharOrCharColl isKindOf: AbstractCharacter) ifTrue:[
  self add: aCharOrCharColl asCharacter .
  ^ aCharOrCharColl
].
(aCharOrCharColl isKindOf: CharacterCollection) ifTrue:[ 
  ^ self add: aCharOrCharColl asString.
].
aCharOrCharColl do: [:each | self add: each].
^ aCharOrCharColl
%

method: Unicode32
addLast: aCharOrCharColl

<primitive: 940>
^ self _retryAdd: aCharOrCharColl
%
method: Unicode32
addAll: aCharOrCharColl

<primitive: 940>
^ self _retryAdd: aCharOrCharColl
%

method: Unicode32
, aCharOrCharColl

"Returns a new instance of the receiver's class that contains the elements of
 the receiver followed by the elements of aCharOrCharColl.  The argument
 must be a CharacterCollection or an AbstractCharacter."

<primitive: 939>
| info cSize result |
info := aCharOrCharColl _stringCharSize .
cSize := info bitAnd: 16r7 .
(cSize == 4) ifTrue:[ "arg is malformed QuadByteString not handled by primitive"
  result:= Unicode32 withAll: self.
  result addAll: aCharOrCharColl.
  ^ result .
].
(aCharOrCharColl class == Character) ifTrue:[ | av |
  (av := aCharOrCharColl codePoint) >= 16rD800 ifTrue:[
    av <= 16rDFFF ifTrue:[
      OutOfRange signal:'codePoint 16r', av asHexString ,' is illegal for Unicode'.
    ].
    result:= Unicode32 withAll: self .
  ] ifFalse:[
   ^ OutOfRange new name:'aChar codePoint' min: 0 max: 16r10FFFF actual: av ; signal
  ].
  result add: aCharOrCharColl .
  ^ result
].   
aCharOrCharColl _validateClasses: { Unicode7 . Unicode16 . Unicode32 } .
self _primitiveFailed: #, args: { aCharOrCharColl } 
%


category: 'Adding'
method: Unicode32
insertAll: aCharOrCharColl at: anIndex
<primitive: 923>
| aString info |
anIndex _isSmallInteger ifFalse:[ anIndex _validateClass: SmallInteger ].
((anIndex <= 0) or: [anIndex > (self size + 1)])
  ifTrue: [ ^ self _errorIndexOutOfRange: anIndex].

info := aCharOrCharColl _stringCharSize .
info == 4 ifTrue:[  "must be a QuadByteString since prim handles Unicode32 arg"
   ^ self insertAll:  aCharOrCharColl asUnicodeString  at: anIndex
].
aCharOrCharColl class == Character ifTrue:[ 
  (aString := Unicode32 new) add: aCharOrCharColl .
  ^ self insertAll: aString at: anIndex .
].
aCharOrCharColl _validateClasses: { Unicode7 . Unicode16 . Unicode32 } .
self _primitiveFailed: #insertAll:at: args: { aCharOrCharColl . anIndex }
%

category: 'Comparing'
method: QuadByteString
compareTo: argString collator: anIcuCollator useMinSize: aMinSize

"Returns -1, 0 or 1,  when receiver is less than,
 equal to, or greater than argString .
 argString must be a String, MultiByteString, or a Utf8.
 anIcuCollator == nil is interpreted as   IcuCollator default .

 If aMinSize == false, normal comparision is done.

 If aMinSize==true, compare stops at (self size min: argString size),
 which is Squeak semantics for comparison.

 If aMinSize is a SmallInteger >= 1, compare stops at
   aMinSize min: (self size min: argString size) ."

<primitive: 929>
anIcuCollator ifNil:[  
  (System __sessionStateAt: 20) ifNil:[
    ^ self compareTo: argString collator: IcuCollator default useMinSize: aMinSize ]
] ifNotNil:[ anIcuCollator _validateClass: IcuCollator ].
aMinSize _validateClasses: { Boolean . SmallInteger }.
argString _validateClasses: { String . Utf8 }.
^ self _primitiveFailed: #compareTo:collator:useMinSize: 
        args: { argString . anIcuCollator . aMinSize }
%

method: QuadByteString
_equals: argString collator: anIcuCollator useMinSize: aFalse

"Returns true if argString compares equals to the receiver using anIcuCollator
 and if argString is not a Symbol, otherwise returns false "
<primitive: 987>
anIcuCollator ifNil:[ (System __sessionStateAt: 20) ifNil:[
  ^ self _equals: argString collator: IcuCollator default useMinSize: aFalse ]
] ifNotNil:[  anIcuCollator _validateClass: IcuCollator ].
(argString isKindOf: CharacterCollection) ifTrue:[ ^ argString = self ].
^ false
%

method: Unicode32
_at: offset equalsNoCase: aString

"Returns true if aString is contained in the receiver, starting at
 offset.  Returns false otherwise.  
 The comparison is done with full case folding and code point order,
 using  icu:UnicodeString::caseCompare in libicu .
 aString must be a Unicode7, Unicode16 , or Unicode32"

<primitive: 930>
| info |
info := aString _stringCharSize .
(info bitAnd: 16r8) == 0 ifTrue:[  "arg is not a Unicode string"
   ^ self _at: offset equalsNoCase: aString asUnicodeString
].
aString _validateClasses: { String } .
offset _isSmallInteger ifFalse:[ offset _validateClass: SmallInteger ].
((offset <= 0) or: [offset > (self size + 1)])
  ifTrue: [ ^ self _errorIndexOutOfRange: offset].
self _primitiveFailed: #_at:equalsNoCase: args: { offset . aString }
%

method: Unicode32
compareCase: aString

"Returns -1, 0 or 1,  when receiver is less than,
 equal to, or greater than aString .
 Comparison is done full case folding and code point order,
 using  icu:UnicodeString::caseCompare in libicu.
 aString must be a Unicode7, Unicode16 , or Unicode32"

<primitive: 969>
| info |
info := aString _stringCharSize .
(info bitAnd: 16r8) == 0 ifTrue:[  "arg is not a Unicode string"
   ^ self compareCase: aString asUnicodeString
].
aString _validateClasses: { String } .
^self _primitiveFailed: #compareCase: args: { aString }
%

method: Unicode32 
at: offset equalsNoCase: aString

"Returns true if aString is contained in the receiver, starting at
 anIndex.  Returns false otherwise.  
 The comparison is done with full case folding and code point order,
 using  icu:UnicodeString::caseCompare in libicu .
 aString must be a Unicode7, Unicode16 , or Unicode32"

offset == 1 ifTrue:[ ^ self _at:offset equalsNoCase: aString].

"prim 930 can't handle random positions in a full Utf32"
^ (self copyFrom: offset to: self size) _at: 1 equalsNoCase: aString.
%

! legacy compare methods
method: Unicode32
= argString
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
     argString _isSymbol ifTrue:[ ^ false ] .
     info == 0 ifTrue: [ ^ false ].
     ArgumentError signal:'String argument disallowed in Unicode comparison'.
  ].
  ^ self _equals: argString collator: nil useMinSize: false
%
method: Unicode32
< argString
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
    info ~~ 0 ifTrue:[
      ArgumentError signal:'String argument disallowed in Unicode comparison'.
    ]
  ].
  ^ (self compareTo: argString collator: nil useMinSize: false) < 0
%
method: Unicode32
> argString
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
    info ~~ 0 ifTrue:[
      ArgumentError signal:'String argument disallowed in Unicode comparison'.
    ]
  ].
  ^ (self compareTo: argString collator: nil useMinSize: false) > 0
%

method: Unicode32
_unicodeEqual: argString
  "Compares receiver to argument using  IcuCollator default."
  ^ self _equals: argString collator: nil useMinSize: false
%
method: Unicode32
_unicodeLessThan: argString
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) < 0
%
method: Unicode32
_unicodeGreaterThan: argString
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) > 0
%


! _reverseFrom: inherited from MultiByteString

category: 'Comparing'
method: Unicode32
equals: aString collatingTable: aTable
  "disallowed"
  self shouldNotImplement: #equals:collatingTable:
%
method: Unicode32
greaterThan: aString collatingTable: aTable
  "disallowed"
  self shouldNotImplement: #greaterThan:collatingTable:
%

method: Unicode32
lessThan: aString collatingTable: aTable
  "disallowed"
  self shouldNotImplement: #lessThan:collatingTable:
%

!    Need to have an Index  capture the Collator used to create it.
!   Btree entries need to contain SortKey and String  pairs,
!   with caching of N bytes of SortKey .

category: 'Private'
method: Unicode32
_findString: subString startingAt: startIndex ignoreCase: aBoolean

aBoolean ifFalse:[
  ^ super _findString: subString startingAt: startIndex ignoreCase: aBoolean
].
^ self _findStringNocase: subString startingAt: startIndex 
	collator: IcuCollator default 
%

category: 'New Indexing Comparison - prims'
! TODO more work on actual prims
method: Unicode32
_idxPrimCompareEqualTo: aCharCollection
  "This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects.

 This method collates letters AaBb..Zz."

  "The comparison should be compatible with the case-insensitive semantics
 of the String method with selector #= .
 Same primitive as String>>lessThan: "

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    aCharCollection _isSymbol ifTrue: [ ^false ].
    ArgumentError signal:'String argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareEqualTo: aCharCollection
%

category: 'New Indexing Comparison - prims'
method: Unicode32
_idxPrimCompareLessThan: aCharCollection

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method collates the same as the < method, except that
 it returns false if the argument is nil."

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    ArgumentError signal:'String argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareLessThan: aCharCollection
%
method: Unicode32
_idxPrimCompareGreaterThan: aCharCollection

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method collates the same as the > method, except that
 it returns true if the argument is nil."

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    ArgumentError signal:'String argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareGreaterThan: aCharCollection
%

category: 'New Indexing Comparison'
method: Unicode32
_idxUnicodeCompareEqualTo: aCharCollection

  ^ super _idxUnicodeCompareEqualTo: aCharCollection
%

category: 'New Indexing Comparison'
method: Unicode32
_idxUnicodeCompareGreaterThan: aCharCollection

  ^ super _idxUnicodeCompareGreaterThan: aCharCollection
%

category: 'New Indexing Comparison'
method: Unicode32
_idxUnicodeCompareLessThan: aCharCollection

  ^ super _idxUnicodeCompareLessThan: aCharCollection
%

category: 'Case-Insensitive Comparisons'
method: 
equalsNoCase: aString
  "Returns true if receiver and a String are equal using
  full case folding and code point order,
  using  icu:UnicodeString::caseCompare in libicu.
  Returns false otherwise. 
  aString must be a Unicode7, Unicode16 , or Unicode32"

  ^ (self compareCase: aString) == 0
%
method: 
isEquivalent: aString
  "Returns true if receiver and a String are equal using
  full case folding and code point order,
  using  icu:UnicodeString::caseCompare in libicu.
  Returns false otherwise. "

  aString _stringCharSize == 0 ifTrue:[ ^ false "not a string"].
  ^ (self compareCase: aString) == 0
%

! end class Unicode32


!-------------------------------------
set class Utf8
removeallmethods
removeallclassmethods

category: 'Documentation'
classmethod: 
comment
 ^ 'An instance of Utf8 is a UTF-8 encoded string.
For every codePoint cp in a Utf8 the following
evaluates to true:
      cp >= 0 and:[ cp <= 16r10FFFF]
Codepoints in a Utf8 use a variable number of bytes
per codePoint, and thus only certain comparison
methods, directly supported by the libicu libraries,
are implemented.  All other string manipulation must
be done on the result of sending asUnicodeString to the
instance of Utf8, and then operating on the
equivalent Unicode7, Unicode16 or Unicode32 string. 

Methods inherited from ByteArray operate on the raw bytes
of the UTF-8 encoded string, and have no support for accessing codePoints. ' 
%

category: 'Private'
classmethod:
withBytes: aCollection

"Returns an instance of the receiver with the byte values of the argument.
 Private, for use in testing."

<primitive: 1021>
((aCollection isKindOf: String) or:[ aCollection isKindOf: ByteArray]) ifFalse:[ 
  | res idx |
  res := self new .
  idx := 1 . 
  aCollection do:[:elem |
     res unsigned8At: idx put: elem .
     idx := idx + 1 .
  ].
  ^ res
].
^ self _primitiveFailed: #withBytes: args: { aCollection }
%

category: 'Instance Creation'
classmethod:
withAll: aStringOrUtf
  ^ aStringOrUtf encodeAsUTF8
%

category: 'Private'
method: 
unsigned8At: index put: aValue
  "for use in testing"
  ^ super at: index put: aValue signed: false width: 1
%


category: 'Converting'
classmethod:
fromString: aString

^ aString encodeAsUTF8
%

method: 
asUnicodeString

"This will eventually be Deprecated. 
 New code should use decodeToUnicode.
 Decode receiver from UTF8 format.
 Returns a Unicode7 , Unicode16 or Unicode32 ,
 using the minimum character size needed to represent decoded result."

^ self _decodeFromUtf8: true maxSize: nil
%

method:
_coerceToUnicode

^ self _decodeFromUtf8: true maxSize: nil
%

method:
decodeToString
  "Decode the receiver returning an instance of String , DoubleByteString
   or QuadByteString"

  ^ self _decodeFromUtf8: false maxSize: nil
%
method:
decodeToUnicode
  "Decode the receiver returning an instance of Unicode7 , Unicode16 or Unicode32."

  ^ self _decodeFromUtf8: true maxSize: nil
%

method:
bytesIntoUnicode
 "Disallowed, use decodeToUnicode"
 ^ self shouldNotImplement: #bytesIntoUnicode
%

! bytesIntoString inherited from ByteArray

! _decode*Utf8 methods inherited from ByteArray

category: 'Converting'
method:
encodeAsUTF8
  "The receiver is already a Utf8."
  ^ self
%

method:
encodeAsUTF16

^ self decodeToUnicode encodeAsUTF16
%

category: 'Comparing'
method:
= argString
  "Compares receiver to argument using  IcuCollator default."

  ^ self _equals: argString collator: nil useMinSize: false
%
category: 'Comparing'
method:
< argString
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) ==  -1
%
category: 'Comparing'
method:
> argString
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) ==  1
%
method:
compareTo: argString collator: anIcuCollator useMinSize: aMinSize

"Returns -1, 0 or 1,  when receiver is less than,
 equal to, or greater than argString .
 argString must be a String, MultiByteString, or a Utf8. 
 The implementation uses libicu comparision code which can operate
 on a UTF-8 encoded string directly.
 anIcuCollator == nil is interpreted as   IcuCollator default .

 If aMinSize == false, normal comparision is done.

 If aMinSize==true, compare stops at (self size min: argString size),
 which is Squeak semantics for comparison.

 If aMinSize is a SmallInteger >= 1, compare stops at
   aMinSize min: (self size min: argString size) ."
<primitive: 931>
anIcuCollator ifNil:[  
  (System __sessionStateAt: 20) ifNil:[
    ^ self compareTo: argString collator: IcuCollator default useMinSize: aMinSize ]
] ifNotNil:[ anIcuCollator _validateClass: IcuCollator ].
aMinSize _validateClasses: { Boolean . SmallInteger }.
argString _validateClasses: { String . Utf8 }.
^ self _primitiveFailed: #compareTo:collator:useMinSize:
        args: { argString . anIcuCollator . aMinSize }
%
method:
_equals: argString collator: anIcuCollator useMinSize: aFalse

"Returns true if argString compares equals to the receiver using anIcuCollator
 and if argString is not a Symbol, otherwise returns false "
<primitive: 988>
anIcuCollator ifNil:[ (System __sessionStateAt: 20) ifNil:[
  ^ self _equals: argString collator: IcuCollator default useMinSize: aFalse ]
] ifNotNil:[  anIcuCollator _validateClass: IcuCollator ].
(argString isKindOf: CharacterCollection) ifTrue:[ ^ argString = self ].
^ false
%

method:
compareTo: aString collator: anIcuCollator

"Returns -1, 0 or 1,  when receiver is less than,
 equal to, or greater than argString .
 argString must be a String, MultiByteString, or a Utf8 .
 anIcuCollator == nil is interpreted as   IcuCollator default ."

^ self compareTo: aString collator: anIcuCollator useMinSize: false
%

method:
<= argString
  ^ (self > argString) not
%
method:
>= argString
  ^ (self < argString ) not
%
method:
hash
"inefficient, not intended for frequent use"
^ self decodeToUnicode hash
%

method:
charAt: index

"Disallowed, convert with decodeFromUTF8 first"
^ self shouldNotImplement: #charAt:
%

method:
doubleByteCharAt: index
"Disallowed, convert with decodeFromUTF8 first"
^ self shouldNotImplement: #doubleByteCharAt:
%

method:
quadByteCharAt: index
"Disallowed, convert with decodeFromUTF8 first"
^ self shouldNotImplement: #quadByteCharAt:
%

method:
at: index putChar: char
"Disallowed , build a complete Unicode7, Unicode16 or Unicode32
and then convert with encodeAsUTF8"
^ self shouldNotImplement: #at:putChar:
%

method:
at: index put: char
"Disallowed , build a complete Unicode7, Unicode16 or Unicode32
and then convert with encodeAsUTF8"
^ self shouldNotImplement: #at:put:
%

method:
_basicAt: index put: char
"Disallowed , build a complete Unicode7, Unicode16 or Unicode32
and then convert with encodeAsUTF8"
^ self shouldNotImplement: #at:put:
%

method:
squeakBasicAt: anIndex put: aValue
"Disallowed , build a complete Unicode7, Unicode16 or Unicode32
and then convert with encodeAsUTF8"
^ self shouldNotImplement: #squeakBasicAt:put:
%


method:
at: index put: aString fromOffset: stringOffset sizeBytes: numBytes
"Disallowed , build a complete Unicode7, Unicode16 or Unicode32
and then convert with encodeAsUTF8"
^ self shouldNotImplement: #at:put:fromOffset:sizeBytes:
%
method:
at: index put: aNumber signed: aBool width: aWidthInBytes
"Disallowed"
^ self shouldNotImplement: #at:put:signed:width:
%
method:
at: index signed: aBool width: aWidthInBytes
"Disallowed"
^ self shouldNotImplement: #at:signed:width:
%
method:
at: index sizeBytes: anInt stringSize: anIntOrNil
"Disallowed"
^ self shouldNotImplement: #at:sizeBytes:stringSize:
%
method:
_deleteNoShrinkFrom: startIndex to: endIndex anchorTailSize: aSize
"Disallowed"
^ self shouldNotImplement: #_deleteNoShrinkFrom:to:anchorTailSize:
%

method:
_reverseDeleteNoShrinkFrom: startIndex to: endIndex anchorHeadSize: aSize
"Disallowed"
^ self shouldNotImplement: #_reverseDeleteNoShrinkFrom:to:anchorHeadSize:
%

method:
deleteIndexKeyAt: anIndex
"Disallowed"
^ self shouldNotImplement: #deleteIndexKeyAt:
%

method:
at: aSmallInt putOopValueOfObject: anObject
"Disallowed"
^ self shouldNotImplement: #at:putOopValueOfObject:
%

method:
at: aSmallInt putOldOopValueOfObject: anObject
"Disallowed"
^ self shouldNotImplement: #at:putOldOopValueOfObject:
%

method:
getObjectWithOopValueAt: anOffset
"Disallowed"
^ self shouldNotImplement: #getObjectWithOopValueAt: 
%

method:
getObjectWithOldOopValueAt: anOffset
"Disallowed"
^ self shouldNotImplement: #getObjectWithOldOopValueAt: 
%

method:
dateTimeAt: startIndex width: anInt
"Disallowed"
^ self shouldNotImplement: #dateTimeAt:width:
%

method:
dateTimeAt: startIndex put: aDateTime width: anInt
"Disallowed"
^ self shouldNotImplement: #dateTimeAt:put:width:
%

method:
compareStringAt: startIndex to: aString startingAt: stringIndex sizeBytes: numSizeBytes useCase: aBool
"Disallowed"
^ self shouldNotImplement: #compareStringAt:to:startingAt:sizeBytes:useCase: 
%

method:
shortStringAt: anIndex compareWith: aByteObject startingAt: stringOffset opCode: anOpCode
"Disallowed"
^ self shouldNotImplement: #shortStringAt:compareWith:startingAt:opCode
%


method:
replaceFrom: startIndex to: stopIndex with: aSeqCollection startingAt: repIndex
"Disallowed"
^ self shouldNotImplement: #replaceFrom:to:with:startingAt:
%

method:
insertAll: aByteArray at: anIndex

anIndex == (self size + 1) ifTrue:[ ^ super insertAll: aByteArray at: anIndex].

"Insertion in the middle of a Utf8 is disallowed"
ArgumentError signal:'a Utf8 may only be created to by encoding, or appended to'
%

method:
_asUnicode7
^ self decodeToUnicode _asUnicode7
%

method:
_asUnicode16
^ self decodeToUnicode _asUnicode16
%

method:
removeFrom: startIndex to: stopIndex

"Disallowed"
^ self shouldNotImplement: #removeFrom:to:
%
method:
copyFrom: startIndex to: stopIndex

"Disallowed"
^ self shouldNotImplement: #copyFrom:to:
%

method:
_primAddRandomBytes: anIntHowMany startingAt: anIntOffset

"Disallowed"
^ self shouldNotImplement: #_primAddRandomBytes:startingAt:
%

method: 
_int32LittleEndianAt: startIndex put: anInteger

"Disallowed"
^ self shouldNotImplement: #_int32LittleEndianAt:put:
%

method:
int32LittleEndianAt: startIndex

"Disallowed"
^ self shouldNotImplement: #int32LittleEndianAt
%

category: 'Encoding'
method: 
addAllUtf8: aCharacterOrString
  "appends the UTF8 encoding of the argument to the receiver."
  <primitive: 1045>
  aCharacterOrString _validateKindOfClasses: { Character . String } .
  ^ self _primitiveFailed: #nextPutAllUtf8: args: { aCharacterOrString }
%

! deleted printOn: to fix 45512

! end class Utf8

!-------------------------------------
set class Utf16
removeallmethods
removeallclassmethods

category: 'Documentation'
classmethod: 
comment
 ^ 'An instance of Utf16 is a UTF-16 encoded string.
For every codePoint cp in a Utf16 the following
evaluates to true:
      cp >= 0 and:[ cp <= 16r10FFFF]
Codepoints in a Utf16 use a variable number of bytes
per codePoint, and thus only certain comparison
methods, directly supported by the libicu libraries,
are implemented.  All other string manipulation must
be done on the result of sending asUnicodeString to the
instance of Utf16, and then operating on the
equivalent Unicode7, Unicode16 or Unicode32 string. 

Methods inherited from ByteArray operate on the raw bytes
of the UTF-16 encoded string, and have no support for accessing codePoints. ' 
%

category: 'Private'
method: 
unsigned16At: index put: aValue
  "Private-for use in testing"
  ^ super at: index put: aValue signed: false width: 2
%

category: 'Instance Creation'
classmethod:
withAll: aStringOrUtf
  ^ aStringOrUtf encodeAsUTF16
%

category: 'Converting'
classmethod:
fromString: aString

^ aString encodeAsUTF16
%
method: 
_coerceToUnicode

^ self _decodeFromUtf16: true
%

method:
decodeToUnicode
  "Decode the receiver returning an instance of Unicode7 , Unicode16 or Unicode32."

  ^ self _decodeFromUtf16: true 
%
method:
asUnicodeString
  "Decode the receiver returning an instance of Unicode7 , Unicode16 or Unicode32."

  ^ self _decodeFromUtf16: true 
%

method:
decodeToString
  "Decode the receiver returning an instance of String, DoubleByteString, or QuadByteString."

  ^ self _decodeFromUtf16: false 
%

method:
bytesIntoUnicode
 "Disallowed, use decodeToUnicode"
 ^ self shouldNotImplement: #bytesIntoUnicode
%

method:
asByteArray
 ^ self shouldNotImplement: #asByteArray
%

method:
bytesIntoString 
  ^ self shouldNotImplement: #bytesIntoString
%

category: 'Private'
method: 
_decodeFromUtf16: unicodeResultBool 

"Decode UTF16 contents of the receiver.
 If unicodeResultBool == true, result is a Unicode7 , Unicode16 or Unicode32.
 If unicodeResultBool == false, result is a String, DoubleByteString or 
  QuadByteString."

<primitive: 1085>
unicodeResultBool _validateClass: Boolean .
^ self _primitiveFailed: #_decodeFromUtf16: args: { unicodeResultBool }
%

method: 
_decodeFromUtf8: unicodeResultBool maxSize: aSize

^ self shouldNotImplement: #_decodeFromUtf8:maxSize:
%

category: 'Converting'
method:
encodeAsUTF16
  "The receiver is already a Utf16."
  ^ self
%

method:
encodeAsUTF8

^ self decodeToUnicode encodeAsUTF8
%

! compares not optimized to used libicu yet
category: 'Comparing'
method:
= argString
  argString _isSymbol ifTrue:[ ^ false ].
  ^ [  self decodeToUnicode = argString _coerceToUnicode 
    ] on: MessageNotUnderstood do:[ :ex | ex return: false ]
%
category: 'Comparing'
method:
< argString
  argString _isSymbol ifTrue:[ ^ false ].
  ^ self decodeToUnicode < argString _coerceToUnicode
%
category: 'Comparing'
method:
> argString
  argString _isSymbol ifTrue:[ ^ false ].
  ^ self decodeToUnicode > argString _coerceToUnicode
%

!  compareTo: argString collator: anIcuCollator useMinSize: aMinSize  ! not implemented
! _equals: argString collator: anIcuCollator useMinSize: aFalse  ! not implemented
! compareTo: aString collator: anIcuCollator ! not implemented yet

method:
<= argString
  ^ (self > argString) not
%
method:
>= argString
  ^ (self < argString ) not
%
method:
hash
  "inefficient, not intended for frequent use"
  ^ self decodeToUnicode hash
%

category: 'Accessing'
method:
charAt: index

"Disallowed, convert with decodeToUnicode first"
^ self shouldNotImplement: #charAt:
%

method:
at: anIndex

"Returns an Integer, a 16 bit word from the UTF16 data"
| word | 
word := super at: (anIndex * 2) -1 signed: false width: 2 .
"word is bigEndian"
System gemIsBigEndian ifFalse:[ | a b |
  a := word bitShift: -8 .
  b := word bitAnd: 16rFF .
  word := (b bitShift: 8) bitOr: a
].
^ word
%

method:
size
"Return the size in units of 16 bit words"
^ super size bitShift: -1
%

method:
doubleByteCharAt: index
"Disallowed, convert with decodeToUnicode first"
^ self shouldNotImplement: #doubleByteCharAt:
%

method:
quadByteCharAt: index
"Disallowed, convert with decodeToUnicode first"
^ self shouldNotImplement: #quadByteCharAt:
%

method:
at: index putChar: char
"Disallowed , build a complete Unicode7, Unicode16 or Unicode32
and then convert with encodeAsUTF16"
^ self shouldNotImplement: #at:putChar:
%

method:
at: index put: char
"Disallowed , build a complete Unicode7, Unicode16 or Unicode32
and then convert with encodeAsUTF16"
^ self shouldNotImplement: #at:put:
%

method:
_basicAt: index put: char
"Disallowed , build a complete Unicode7, Unicode16 or Unicode32
and then convert with encodeAsUTF16"
^ self shouldNotImplement: #at:put:
%

method:
at: index put: aString fromOffset: stringOffset sizeBytes: numBytes
"Disallowed , build a complete Unicode7, Unicode16 or Unicode32
and then convert with encodeAsUTF16"
^ self shouldNotImplement: #at:put:fromOffset:sizeBytes:
%
method:
at: index put: aNumber signed: aBool width: aWidthInBytes
"Disallowed"
^ self shouldNotImplement: #at:put:signed:width:
%
method:
at: index signed: aBool width: aWidthInBytes
"Disallowed"
^ self shouldNotImplement: #at:signed:width:
%
method:
at: index sizeBytes: anInt stringSize: anIntOrNil
"Disallowed"
^ self shouldNotImplement: #at:sizeBytes:stringSize:
%
method:
_deleteNoShrinkFrom: startIndex to: endIndex anchorTailSize: aSize
"Disallowed"
^ self shouldNotImplement: #_deleteNoShrinkFrom:to:anchorTailSize:
%

method:
_reverseDeleteNoShrinkFrom: startIndex to: endIndex anchorHeadSize: aSize
"Disallowed"
^ self shouldNotImplement: #_reverseDeleteNoShrinkFrom:to:anchorHeadSize:
%

method:
deleteIndexKeyAt: anIndex
"Disallowed"
^ self shouldNotImplement: #deleteIndexKeyAt:
%

method:
at: aSmallInt putOopValueOfObject: anObject
"Disallowed"
^ self shouldNotImplement: #at:putOopValueOfObject:
%

method:
at: aSmallInt putOldOopValueOfObject: anObject
"Disallowed"
^ self shouldNotImplement: #at:putOldOopValueOfObject:
%

method:
getObjectWithOopValueAt: anOffset
"Disallowed"
^ self shouldNotImplement: #getObjectWithOopValueAt: 
%

method:
getObjectWithOldOopValueAt: anOffset
"Disallowed"
^ self shouldNotImplement: #getObjectWithOldOopValueAt: 
%

method:
dateTimeAt: startIndex width: anInt
"Disallowed"
^ self shouldNotImplement: #dateTimeAt:width:
%

method:
dateTimeAt: startIndex put: aDateTime width: anInt
"Disallowed"
^ self shouldNotImplement: #dateTimeAt:put:width:
%

method:
compareStringAt: startIndex to: aString startingAt: stringIndex sizeBytes: numSizeBytes useCase: aBool
"Disallowed"
^ self shouldNotImplement: #compareStringAt:to:startingAt:sizeBytes:useCase: 
%

method:
shortStringAt: anIndex compareWith: aByteObject startingAt: stringOffset opCode: anOpCode
"Disallowed"
^ self shouldNotImplement: #shortStringAt:compareWith:startingAt:opCode
%


method:
replaceFrom: startIndex to: stopIndex with: aSeqCollection startingAt: repIndex
"Disallowed"
^ self shouldNotImplement: #replaceFrom:to:with:startingAt:
%

method:
insertAll: aByteArray at: anIndex

anIndex == (self size + 1) ifTrue:[ ^ super insertAll: aByteArray at: anIndex].

"Insertion in the middle of a Utf16 is disallowed"
ArgumentError signal:'a Utf16 may only be created to by encoding, or appended to'
%

method:
_asUnicode7
^ self decodeToUnicode _asUnicode7
%

method:
_asUnicode16
^ self decodeToUnicode _asUnicode16
%

method:
removeFrom: startIndex to: stopIndex

"Disallowed"
^ self shouldNotImplement: #removeFrom:to:
%
method:
copyFrom: startIndex to: stopIndex

"Disallowed"
^ self shouldNotImplement: #copyFrom:to:
%

method:
_primAddRandomBytes: anIntHowMany startingAt: anIntOffset

"Disallowed"
^ self shouldNotImplement: #_primAddRandomBytes:startingAt:
%

method: 
_int32LittleEndianAt: startIndex put: anInteger

"Disallowed"
^ self shouldNotImplement: #_int32LittleEndianAt:put:
%

method:
int32LittleEndianAt: startIndex

"Disallowed"
^ self shouldNotImplement: #int32LittleEndianAt
%

method: 
_decodeUtf8At: anOffset bytesConsumed: anArray 

"Disallowed"
^ self shouldNotImplement: #_decodeUtf8At:bytesConsumed:
%

method:
_decodeUtf8StartingAt: anOffset unicodeResult: unicodeResultBool 
                 maxSize: aSize bytesConsumed: anArray

"Disallowed"
^ self shouldNotImplement: #_decodeUtf8StartingAt:unicodeResult:maxSize:bytesConsumed:
%

! end class Utf16

!-----------------------------------------------------------

expectvalue %String
run
  SortedCollection _newKernelSubclass: 'IcuSortedCollection'
  instVarNames: #( collator ) 
  classVars:#() classInstVars:#() poolDictionaries:#()
  inDictionary: Globals
  options:#() reservedOop: nil
%

expectvalue %String
run
 " save the default sort block from a v3.2.x or v3.3.0  repository for later use in upgrade."
( IcuSortedCollection compiledMethodAt: #_initialize: otherwise: nil) ifNotNil:[:mth|
  (mth blockLiterals atOrNil: 1) ifNotNil:[:blk |
    ((blk isKindOfClass: ExecBlock) and:[ blk argumentCount == 2 ]) ifTrue:[
       Globals at: #IcuSortedCollection_oldDefaultMethod put: blk method . 
       ^ 'saved block ' , blk asOop asString.
    ].
  ].  
].
Globals removeKey: #IcuSortedCollection_oldDefaultMethod ifAbsent:[] .
^ 'no action'
%

set class IcuSortedCollection
removeallmethods
removeallclassmethods

category: 'Documentation'
classmethod:
comment
^ 'An IcuSortedCollection is a SortedCollection that
encapsulates an IcuCollator to be used when comparing elements.  
Elements must respond to compareTo:collator:  and thus should be 
instances of Utf8, Unicode7, Unicode16, or Unicode32. '
%

category: 'Instance Creation'
classmethod: 
new
"Returns a new instance of the receiver which will use
 the current default IcuLocale"

| inst |
(inst := self _basicNew) _initialize: IcuCollator default .
^inst
%
classmethod: 
newUsingCollator: aCollator
"Returns a new instance of the receiver which will use
 the specified collator."

| inst |
(inst := self _basicNew) _initialize: aCollator .
^inst
%

category: 'Initialize'
method:
_initialize: aCollator
  "Use a copy of aCollator to disallow changes to strength, etc,
   that would require a resort of the receiver."
  collator := aCollator copy immediateInvariant .
  sortBlock := self _defaultBlock .
%

category: 'Initialize'
method:
_defaultBlock
 
 ^ [ :a :b :aCollator | (a compareTo: b collator: aCollator useMinSize: false) <= 0 ]
%

category: 'Updating'
method:
collator: aCollator
  "Change the receiver's collator and resort the receiver.
   Use a copy of aCollator to disallow changes to strength, etc,
   that would require a resort of the receiver."

  collator := aCollator copy immediateInvariant .
  self size > 1 ifTrue: [
    self resort
  ]
%

category: 'Accessing'
method:
collator
  ^ collator
%

category: 'Private'
method: 
_findIndex: anObject

"Finds and returns the index for placing the given object into the receiver.
 A simple binary probe is used."

| obj lower upper half probe |

self size == 0 ifTrue: [ ^1 ].

lower := 1.
upper := self size.
[ half := upper - lower + 1 // 2.
  probe := lower + half.
  -1 < half and: [ lower <= upper ] ] whileTrue: [
  obj := self at: probe.
  (sortBlock value: obj value: anObject value: collator) ifTrue: [
    "after this index - go to the next higher cell"
    lower := probe + 1.
  ]
  ifFalse: [
    "before this index - go to the next lower cell"
    upper := probe - 1
  ]
].
^probe
%

category: 'Searching'
method: 
indexOf: anObject

"Returns the index of the first occurrence of an object equal to anObject 
 in the receiver.  If the receiver does not contain such an object, this 
 returns zero."

| idx |
" _findIndex: returns where anObject would be inserted, so look at
surrounding slots to see if the object is present "
idx := self _findIndex: anObject.

(sortBlock value: anObject value: anObject value: collator) ifTrue: [
  "Need to look at the slots less than idx"
  | foundIdx | "We need to find the first one that is equal"
  foundIdx := 0.
  (idx-1) _downTo: 1 do: [ :i | 
    (anObject = (self at: i)) ifTrue: [foundIdx := i].
    (i > 1) ifTrue: [
      "Check to see if the next element might be equal to the current element
       according to the sort block."
      (sortBlock value: (self at: i) value: (self at: i-1) value: collator) ifFalse: [
        "We don't need to look at any more because i-1 can't be equal
         to i."
        ^foundIdx
      ].
    ].
  ].
  ^foundIdx
] ifFalse: [
  "Need to look at the slots greater than idx"
  | lastIdx |
  lastIdx := self size.
  idx to: lastIdx do: [ :i |
    (anObject = (self at: i)) ifTrue: [^i].
    (i < lastIdx) ifTrue: [
      "Check to see if the next element might be equal to the current element
       according to the sort block."
      (sortBlock value: (self at: i) value: (self at: i+1) value: collator) ifTrue: [
        "We don't need to look at any more because i+1 can't be equal
         to i."
        ^0
      ].
    ].
  ].
  ^0.
].
%

category: 'Searching'
method: 
indexOfIdentical: anObject

"Private.  Returns the index of the first element in the receiver that is
 identical to the argument.  If the receiver does not have any elements that are
 identical to the argument, returns zero."

| idx |

(self size <= 2000) ifTrue: [
  "OrderedCollection's indexOfIdentical: uses a primitive and is
   faster for smaller SortedCollections"
  ^super indexOfIdentical: anObject.
].

" _findIndex: returns where anObject would be inserted, so look at
surrounding slots to see if the object is present "
idx := self _findIndex: anObject.

(sortBlock value: anObject value: anObject value: collator) ifTrue: [
  "Need to look at the slots less than idx"
  | foundIdx | "We need to find the first one that is equal"
  foundIdx := 0.
  (idx-1) _downTo: 1 do: [ :i | 
    (anObject == (self at: i)) ifTrue: [foundIdx := i].
    (i > 1) ifTrue: [
      "Check to see if the next element might be equal to the current element
       according to the sort block."
      (sortBlock value: (self at: i) value: (self at: i-1) value: collator) ifFalse: [
        "We don't need to look at any more because i-1 can't be equal
         to i."
        ^foundIdx
      ].
    ].
  ].
  ^foundIdx
] ifFalse: [
  "Need to look at the slots greater than or equal to idx"
  | lastIdx |
  lastIdx := self size.
  idx to: lastIdx do: [ :i |
    (anObject == (self at: i)) ifTrue: [
      ^i
    ].
    (i < lastIdx) ifTrue: [
      "Check to see if the next element might be equal to the current element
       according to the sort block."
      (sortBlock value: (self at: i) value: (self at: i+1) value: collator) ifTrue: [
        "We don't need to look at any more because i+1 can't be equal
         to i."
        ^0
      ].
    ].
  ].
  ^0.
].
%

category: 'Copying'
method:
_mergeSortAddAll: aCollection

"The receiver must be empty. Adds aCollection to the receiver
 using the merge-sort implementation provided by BlockSorter.
 Returns the receiver."

| arr |
self size == 0 ifFalse:[ self error:'not empty' ].
arr := aCollection sortWithBlock: sortBlock collator: collator .  "merge sort"
super _insertAll: arr at: 1 .
^ self
%



! end class IcuSortedCollection
!-----------------------------------------------------------
category: 'Converting'
set compile_env: 0
method: Unicode16
asString

"Returns a String representation of the receiver."

^ String withAll: self
%
category: 'Converting'
set compile_env: 0
method: Unicode32
asString

"Returns a String representation of the receiver."

^ String withAll: self
%
category: 'Converting'
set compile_env: 0
method: Unicode7
asString

"Returns a String representation of the receiver."

^ String withAll: self
%
category: 'Indexing Support'
set compile_env: 0
classmethod: Unicode16
_idxBasicCanCompareWithCharacterCollectionInstance: aCharacterCollection
  "Returns true if <aCharacterCollection> may be inserted into a basic BtreeNode 
   whose #lastElementClass is the receiver (see RangeEqualityIndex 
   class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ Unicode16 usingUnicodeCompares
%
category: 'Indexing Support'
set compile_env: 0
method: Unicode16
_idxBasicCanCompareWithClass: aClass
  "Returns true if the receiver may be inserted into a basic BtreeNode whose 
   #lastElementClass is <aClass> (see RangeEqualityIndex class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ aClass _idxBasicCanCompareWithUnicodeInstance: self
%
category: 'Indexing Support'
set compile_env: 0
classmethod: Unicode16
_idxBasicCanCompareWithUnicodeInstance: aUnicodeString
  "Returns true if <aUnicodeString> may be inserted into a basic BtreeNode whose 
   #lastElementClass is the receiver (see RangeEqualityIndex class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ true
%
category: 'Indexing Support'
set compile_env: 0
classmethod: Unicode32
_idxBasicCanCompareWithCharacterCollectionInstance: aCharacterCollection
  "Returns true if <aCharacterCollection> may be inserted into a basic BtreeNode 
   whose #lastElementClass is the receiver (see RangeEqualityIndex 
   class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ Unicode16 usingUnicodeCompares
%
category: 'Indexing Support'
set compile_env: 0
method: Unicode32
_idxBasicCanCompareWithClass: aClass
  "Returns true if the receiver may be inserted into a basic BtreeNode whose 
   #lastElementClass is <aClass> (see RangeEqualityIndex class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ aClass _idxBasicCanCompareWithUnicodeInstance: self
%
category: 'Indexing Support'
set compile_env: 0
classmethod: Unicode32
_idxBasicCanCompareWithUnicodeInstance: aUnicodeString
  "Returns true if <aUnicodeString> may be inserted into a basic BtreeNode whose 
   #lastElementClass is the receiver (see RangeEqualityIndex class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ true
%
category: 'Indexing Support'
set compile_env: 0
classmethod: Unicode7
_idxBasicCanCompareWithCharacterCollectionInstance: aCharacterCollection
  "Returns true if <aCharacterCollection> may be inserted into a basic BtreeNode 
   whose #lastElementClass is the receiver (see RangeEqualityIndex 
   class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ Unicode16 usingUnicodeCompares
%
category: 'Indexing Support'
set compile_env: 0
method: Unicode7
_idxBasicCanCompareWithClass: aClass
  "Returns true if the receiver may be inserted into a basic BtreeNode whose 
   #lastElementClass is <aClass> (see RangeEqualityIndex class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ aClass _idxBasicCanCompareWithUnicodeInstance: self
%
category: 'Indexing Support'
set compile_env: 0
classmethod: Unicode7
_idxBasicCanCompareWithUnicodeInstance: aUnicodeString
  "Returns true if <aUnicodeString> may be inserted into a basic BtreeNode whose 
   #lastElementClass is the receiver (see RangeEqualityIndex class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ true
%

!--------------------------------------------------------------------------
!  compare and sort methods using IcuCollator that are supported on String and MultiByteString

method: String
sortKeyForCollator: anIcuCollator

"Returns a ByteArray which is the sort key for the
 receiver produced by the specified IcuCollator.

 Calls Collator::getSortKey in coll.h of libicu."
 
<primitive: 975>
anIcuCollator _validateClass: IcuCollator .
^ self _primitiveFailed: #sortKeyForCollator: args: { anIcuCollator }
%

method: MultiByteString
sortKeyForCollator: anIcuCollator

"Returns a ByteArray which is the sort key for the
 receiver produced by the specified IcuCollator.

 Calls Collator::getSortKey in coll.h of libicu."
 
<primitive: 975>
anIcuCollator _validateClass: IcuCollator .
^ self _primitiveFailed: #sortKeyForCollator: args: { anIcuCollator }
%

method: String
compareTo: argString collator: anIcuCollator useMinSize: aMinSize

"Returns -1, 0 or 1,  when receiver is less than,
 equal to, or greater than argString .
 argString must be a String, MultiByteString, or a Utf8.
 anIcuCollator == nil is interpreted as   IcuCollator default .

 If aMinSize == false, normal comparision is done.

 If aMinSize==true, compare stops at (self size min: argString size),
 which is Squeak semantics for comparison.

 If aMinSize is a SmallInteger >= 1, compare stops at
   aMinSize min: (self size min: argString size) ."
<primitive: 977>
anIcuCollator ifNil:[  
  (System __sessionStateAt: 20) ifNil:[
    ^ self compareTo: argString collator: IcuCollator default useMinSize: aMinSize ]
] ifNotNil:[ anIcuCollator _validateClass: IcuCollator ].
aMinSize _validateClasses: { Boolean . SmallInteger }.
argString _validateClasses: { String . Utf8 }.
^ self _primitiveFailed: #compareTo:collator:useMinSize:
        args: { argString . anIcuCollator . aMinSize }
%

method: Unicode7
compareTo: argString collator: anIcuCollator useMinSize: aMinSize

"Returns -1, 0 or 1,  when receiver is less than,
 equal to, or greater than argString .
 argString must be a String, MultiByteString, or a Utf8 .
 anIcuCollator == nil is interpreted as   IcuCollator default .

 If aMinSize == false, normal comparision is done.

 If aMinSize==true, compare stops at (self size min: argString size),
 which is Squeak semantics for comparison.

 If aMinSize is a SmallInteger >= 1, compare stops at
   aMinSize min: (self size min: argString size) ."
<primitive: 917>
anIcuCollator ifNil:[  
  (System __sessionStateAt: 20) ifNil:[
    ^ self compareTo: argString collator: IcuCollator default useMinSize: aMinSize ]
] ifNotNil:[ anIcuCollator _validateClass: IcuCollator ].
aMinSize _validateClasses: { Boolean . SmallInteger }.
argString _validateClasses: { String . Utf8 }.
^ self _primitiveFailed: #compareTo:collator:useMinSize:
        args: { argString . anIcuCollator . aMinSize }
%
method: Unicode7
_equals: argString collator: anIcuCollator useMinSize: aFalse

"Returns true if argString compares equals to the receiver using anIcuCollator
 and if argString is not a Symbol, otherwise returns false "
<primitive: 989>
anIcuCollator ifNil:[ (System __sessionStateAt: 20) ifNil:[
  ^ self _equals: argString collator: IcuCollator default useMinSize: aFalse ]
] ifNotNil:[  anIcuCollator _validateClass: IcuCollator ].
(argString isKindOf: CharacterCollection) ifTrue:[ ^ argString = self ].
^ false
%
method: String
_equals: argString collator: anIcuCollator useMinSize: aFalse

"Returns true if argString compares equals to the receiver using anIcuCollator
 and if argString is not a Symbol, otherwise returns false "
<primitive: 991>
anIcuCollator ifNil:[ (System __sessionStateAt: 20) ifNil:[
  ^ self _equals: argString collator: IcuCollator default useMinSize: aFalse ]
] ifNotNil:[  anIcuCollator _validateClass: IcuCollator ].
(argString isKindOf: CharacterCollection) ifTrue:[ ^ argString = self ].
^ false
%

category: 'Comparing'
method: Unicode16
compareTo: argString collator: anIcuCollator useMinSize: aMinSize

"Returns -1, 0 or 1,  when receiver is less than,
 equal to, or greater than argString .
 argString must be a String, MultiByteString, or a Utf8 .
 anIcuCollator == nil is interpreted as   IcuCollator default .

 If aMinSize == false, normal comparision is done.

 If aMinSize==true, compare stops at (self size min: argString size),
 which is Squeak semantics for comparison.

 If aMinSize is a SmallInteger >= 1, compare stops at
   aMinSize min: (self size min: argString size) ."
<primitive: 928>   "prim handles String, DoubleByteString args"
anIcuCollator ifNil:[  
  (System __sessionStateAt: 20) ifNil:[
    ^ self compareTo: argString collator: IcuCollator default useMinSize: aMinSize ]
] ifNotNil:[ anIcuCollator _validateClass: IcuCollator ].
aMinSize _validateClasses: { Boolean . SmallInteger }.
argString _validateClasses: { String . Utf8 }.
^ self _primitiveFailed: #compareTo:collator:useMinSize:
        args: { argString . anIcuCollator . aMinSize }
%
method: Unicode16
_equals: argString collator: anIcuCollator useMinSize: aFalse

"Returns true if argString compares equals to the receiver using anIcuCollator
 and if argString is not a Symbol, otherwise returns false "
<primitive: 990>
anIcuCollator ifNil:[ (System __sessionStateAt: 20) ifNil:[
  ^ self _equals: argString collator: IcuCollator default useMinSize: aFalse ]
] ifNotNil:[  anIcuCollator _validateClass: IcuCollator ].
(argString isKindOf: CharacterCollection) ifTrue:[ ^ argString = self ].
^ false
%

method: DoubleByteString
_equals: argString collator: anIcuCollator useMinSize: aFalse

"Returns true if argString compares equals to the receiver using anIcuCollator
 and if argString is not a Symbol, otherwise returns false "
<primitive: 992>
anIcuCollator ifNil:[ (System __sessionStateAt: 20) ifNil:[
  ^ self _equals: argString collator: IcuCollator default useMinSize: aFalse ]
] ifNotNil:[  anIcuCollator _validateClass: IcuCollator ].
(argString isKindOf: CharacterCollection) ifTrue:[ ^ argString = self ].
^ false
%

category: 'Comparing'
method: DoubleByteString
compareTo: argString collator: anIcuCollator useMinSize: aMinSize

"Returns -1, 0 or 1,  when receiver is less than,
 equal to, or greater than argString .
 argString must be a String, MultiByteString, or a Utf8.
 anIcuCollator == nil is interpreted as   IcuCollator default .

 If aMinSize == false, normal comparision is done.

 If aMinSize==true, compare stops at (self size min: argString size),
 which is Squeak semantics for comparison.

 If aMinSize is a SmallInteger >= 1, compare stops at
   aMinSize min: (self size min: argString size) ."
<primitive: 978>   "prim handles String, DoubleByteString args"
anIcuCollator ifNil:[  
  (System __sessionStateAt: 20) ifNil:[
    ^ self compareTo: argString collator: IcuCollator default useMinSize: aMinSize ]
] ifNotNil:[ anIcuCollator _validateClass: IcuCollator ].
aMinSize _validateClasses: { Boolean . SmallInteger }.
argString _validateClasses: { String . Utf8 }.
^ self _primitiveFailed: #compareTo:collator:useMinSize:
        args: { argString . anIcuCollator . aMinSize }
%

! ------------
! _idxPrim compare methods for String and MultiByteString, installed in transient method dict
!   when using unicode compares

category: 'New Indexing Comparison'
method: String
_idxUnicodeCompareEqualTo: aCharCollection

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method behaves the same as the = method, except that
 it returns true if the argument is nil."

  aCharCollection ifNil: [ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
     aCharCollection _isSymbol ifTrue:[ ^ false ] .
  ].
  ^ (self
    compareTo: aCharCollection
    collator: nil
    useMinSize: 900) == 0
%

category: 'New Indexing Comparison'
method: String
_idxUnicodeCompareGreaterThan: aCharCollection

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method collates the same as the > method, except that
 it returns true if the argument is nil."

aCharCollection ifNil: [ ^ true ].
^ (self
    compareTo: aCharCollection
    collator: nil
    useMinSize: 900) == 1
%

method: String
_idxUnicodeCompareLessThan: aCharCollection

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method collates the same as the < method, except that
 it returns false if the argument is nil."

 aCharCollection ifNil:[ ^ false ].
 ^ (self
    compareTo: aCharCollection
    collator: nil
    useMinSize: 900) == -1
%

category: 'New Indexing Comparison'
method: MultiByteString
_idxUnicodeCompareEqualTo: aCharCollection

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method behaves the same as the = method, except that
 it returns true if the argument is nil."

  aCharCollection ifNil: [ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
     aCharCollection _isSymbol ifTrue:[ ^ false ] .
  ].

  ^ (self
    compareTo: aCharCollection
    collator: nil
    useMinSize: 900) == 0
%

method: MultiByteString
_idxUnicodeCompareGreaterThan: aCharCollection

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method collates the same as the > method, except that
 it returns true if the argument is nil."

aCharCollection ifNil: [ ^ true ].
^ (self
    compareTo: aCharCollection
    collator: nil
    useMinSize: 900) == 1
%

method: MultiByteString
_idxUnicodeCompareLessThan: aCharCollection

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method collates the same as the < method, except that
 it returns false if the argument is nil."

 aCharCollection ifNil:[ ^ false ].
 ^ (self
    compareTo: aCharCollection
    collator: nil
    useMinSize: 900) == -1
%

!------------
category: 'New Indexing Comparison'
method: Symbol
_idxUnicodeCompareEqualTo: aCharCollection
  "No Unicode Symbols, so answer false"

  ^false
%
category: 'New Indexing Comparison'
method: DoubleByteSymbol
_idxUnicodeCompareEqualTo: aCharCollection
  "No Unicode Symbols, so answer false"

  ^false
%
category: 'New Indexing Comparison'
method: QuadByteSymbol
_idxUnicodeCompareEqualTo: aCharCollection
  "No Unicode Symbols, so answer false"

  ^false
%
!------------

category: 'Converting'
method: String
asUppercaseForLocale: anIcuLocale

"Returns the result of  UncodeString::toUpper(anIcuLocale).
 See also String >> asUppercase"

<primitive: 970>
self _primitiveFailed: #asUppercaseForLocale: args: { anIcuLocale }
%

category: 'Converting'
method: String
asLowercaseForLocale: anIcuLocale

"Returns the result of  UncodeString::toLower(anIcuLocale).
 See also String >> asLowercase"

<primitive: 971>
^ self _primitiveFailed: #asLowercaseForLocale: args: { anIcuLocale }
%

category: 'Converting'
method: String
asTitlecaseForLocale: anIcuLocale

"Returns the result of 
 UncodeString::toTitle(BreakIterator::createTitleInstance(anIcuLocale)"

<primitive: 972>
^ self _primitiveFailed: #asTitlecaseForLocale: args: { anIcuLocale }
%
category: 'Converting'
method: String
asFoldcase

" Returns the result of UncodeString::foldCase( U_FOLD_CASE_DEFAULT)."

<primitive: 973>
^ self _primitiveFailed: #asFoldcase
%

! fix 43763
method: String
asTitlecase

  ^ self asTitlecaseForLocale: IcuLocale default
%

!------------

category: 'Converting'
method: MultiByteString
asUppercaseForLocale: anIcuLocale

"Returns the result of  UncodeString::toUpper(anIcuLocale).
 The result may be a Unicode16 or Unicode32.
See also String >> asUppercase"

<primitive: 970>
self _primitiveFailed: #asUppercaseForLocale: args: { anIcuLocale }
%

category: 'Converting'
method: MultiByteString
asLowercaseForLocale: anIcuLocale

"Returns the result of  UncodeString::toLower(anIcuLocale).
 The result will be a Unicode16 or Unicode32.
 See also String >> asLowercase"

<primitive: 971>
^ self _primitiveFailed: #asLowercaseForLocale: args: { anIcuLocale }
%

category: 'Converting'
method: MultiByteString
asTitlecaseForLocale: anIcuLocale

"Returns the result of 
 UncodeString::toTitle(BreakIterator::createTitleInstance(anIcuLocale)"

<primitive: 972>
^ self _primitiveFailed: #asTitlecaseForLocale: args: { anIcuLocale }
%

! fixed 43763
category: 'Converting'
method: MultiByteString
asTitlecase

  ^ self asTitlecaseForLocale: IcuLocale default
%

method: MultiByteString
asFoldcase

" Returns the result of UncodeString::foldCase( U_FOLD_CASE_DEFAULT)."

<primitive: 973>
^ self _primitiveFailed: #asFoldcase
%

!---------------------------
category: 'Class Membership'
method: Unicode7
speciesForPrint

^ Unicode7
%

category: 'Class Membership'
method: Unicode16
speciesForPrint

^ Unicode16
%

category: 'Class Membership'
method: Unicode32
speciesForPrint

^ Unicode16  "let stream promote if needed"
%

category: 'Indexing Support'
classmethod: Utf8
_canCreateQueryOnInstances
  "GsQuery may be created on most subclasses of Collection. Answer false if a GsQuery is not appropriate
   for the receiver."

  ^ false
%
