Extension { #name : 'Unicode16' }

{ #category : 'Session Control - Private' }
Unicode16 class >> _cacheUsingUnicodeCompares [

  System __sessionStateAt: 23 put: self _lookupsUsingUnicodeCompares

]

{ #category : 'Indexing Support' }
Unicode16 class >> _idxBasicCanCompareWithCharacterCollectionInstance: aCharacterCollection [
  "Returns true if <aCharacterCollection> may be inserted into a basic BtreeNode
   whose #lastElementClass is the receiver (see RangeEqualityIndex
   class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ Unicode16 usingUnicodeCompares

]

{ #category : 'Indexing Support' }
Unicode16 class >> _idxBasicCanCompareWithUnicodeInstance: aUnicodeString [
  "Returns true if <aUnicodeString> may be inserted into a basic BtreeNode whose
   #lastElementClass is the receiver (see RangeEqualityIndex class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ true

]

{ #category : 'Session Control - Private' }
Unicode16 class >> _lookupsUsingUnicodeCompares [
  "Returns true if Unicode behavior for #< #> #= and various methods in the
  indexing system are being used, false if legacy behavior is being used."

  (Unicode7 transientMethodDictForEnv: 0) ifNotNil:[:tmd |
    (tmd at: #= otherwise: nil) ifNotNil:[:meth |
      (meth selector == #_unicodeEqual: ) ifTrue:[ ^ true ]
    ]
  ].
  ^ false

]

{ #category : 'Session Control - Private' }
Unicode16 class >> _unicodeCompareEnabled [

  | v |
  v := Globals at: #StringConfiguration otherwise: nil .
  v == Unicode16 ifTrue:[ ^ true ].
  v ~~ String ifTrue:[
    (System myUserProfile _hasPrivilegeName: #NoGsFileOnServer) ifFalse:[
      GsFile gciLogServer:'WARNING invalid StringConfiguration ' , v asString.
    ].
  ].
  ^ false

]

{ #category : 'Session Control - Private' }
Unicode16 class >> _unicodeCompareMapping [
  | base |
  "Returns an Array of pairs.
   Each pair is a class and an Array of selectors.
   An Array of selectors contains pairs of  lookup selector, implementation selector."

  base := #(#'<' #'_unicodeLessThan:' #'>' #'_unicodeGreaterThan:'
            #'=' #'_unicodeEqual:'
            #'_idxPrimCompareGreaterThan:' #'_idxUnicodeCompareGreaterThan:'
            #'_idxPrimCompareLessThan:' #'_idxUnicodeCompareLessThan:'
            #'_idxPrimCompareEqualTo:' #'_idxUnicodeCompareEqualTo:').
  ^ { String .  base .
      MultiByteString .  base .
      Unicode7 .   base .
      Unicode16 .  base .
      Unicode32 .  base }

]

{ #category : 'Session Control - Private' }
Unicode16 class >> _unicodeCompareTmdForClass: aClass selectors: syms [
  "Returns a GsMethodDictionary to be installed as a transient method dictionary"
  | tmd |
  tmd := GsMethodDictionary new .
  1 to: syms size by: 2 do:[:n |
    tmd at:(syms at: n) put:( aClass persistentMethodAt:(syms at: n + 1)).
  ].
  ^ tmd

]

{ #category : 'Session Control - Private' }
Unicode16 class >> _useUnicodeComparePrimitives: aBoolean [

<primitive: 2001>   "enter protected mode"
| prot prev |
prot := System _protectedMode .
[ prev := self _lookupsUsingUnicodeCompares .
  prev == aBoolean ifFalse:[ | list tmd |
    list := self _unicodeCompareMapping .
    1 to: list size by: 2 do:[:j | | aClass |
      aClass := list at: j .
      tmd := aBoolean ifTrue:[ self _unicodeCompareTmdForClass: aClass
				  selectors:  (list at: j + 1) ]
                     ifFalse:[ nil ] .
      aClass transientMethodDictForEnv: 0 put: tmd .
    ].
    self _clearLookupCaches: 0 .
  ].
  self _cacheUsingUnicodeCompares .
] ensure:[
  prot _leaveProtectedMode
].
^ prev

]

{ #category : 'Session Control' }
Unicode16 class >> installUnicodeComparePrimitives [
  "Installs appropriate entries in transient method dictionaries.
   Unicode versions of the methods are installed if
     (Globals at: #StringConfiguration) == Unicode16
   otherwise legacy methods are installed.
   Affects method lookup for #< #> #= and various methods in the indexing
   system  when the receiver is a kind of String or MultiByteString.

   See Unicode16(C) >> _unicodeCompareMapping for the detailed list of methods,
   no GsPackagePolicy should override any of those methods.

   A StringConfiguration value of Unicode16 is intended for new applications.
   Legacy applications should use a value of String , otherwise
   they may need to rebuild hashed collections and indexes.
  "

  self _useUnicodeComparePrimitives: self _unicodeCompareEnabled

]

{ #category : 'Session Control' }
Unicode16 class >> usingUnicodeCompares [
  "Returns true if Unicode behavior for #< #> #= and various methods in the
  indexing system are being used, false if legacy behavior is being used."

  ^ System __sessionStateAt: 23

]

{ #category : 'Instance Creation' }
Unicode16 class >> withAll: aString [

"Returns an instance of Unicode16 or Unicode32 using the
 minimum bytes per character required to represent the argument."

<primitive: 942>
aString stringCharSize >= 2 ifTrue:[
  ^ Unicode32 withAll: aString
].
(aString isKindOfClass: Utf8) ifTrue:[ ^ self withAll: aString decodeToUnicode ].
aString _validateClasses: { String . Utf8 } .
^ self _primitiveFailed: #withAll: args: { aString }

]

{ #category : 'Adding' }
Unicode16 >> _basicAt: index put: char [
"Disallowed , use codePointAt:put:"
^ self shouldNotImplement: #_basicAt:put:

]

{ #category : 'Comparing' }
Unicode16 >> _equals: argString collator: anIcuCollator useMinSize: aFalse [

"Returns true if argString compares equals to the receiver using anIcuCollator
 and if argString is not a Symbol, otherwise returns false "
<primitive: 990>
anIcuCollator ifNil:[ (System __sessionStateAt: 20) ifNil:[
  ^ self _equals: argString collator: IcuCollator default useMinSize: aFalse ]
] ifNotNil:[  anIcuCollator _validateClass: IcuCollator ].
(argString isKindOf: CharacterCollection) ifTrue:[ ^ argString = self ].
^ false

]

{ #category : 'Private' }
Unicode16 >> _findString: subString startingAt: startIndex ignoreCase: aBoolean [

aBoolean ifFalse:[
  ^ super _findString: subString startingAt: startIndex ignoreCase: aBoolean
].
^ self _findStringNocase: subString startingAt: startIndex
	collator: IcuCollator default

]

{ #category : 'Indexing Support' }
Unicode16 >> _idxBasicCanCompareWithClass: aClass [
  "Returns true if the receiver may be inserted into a basic BtreeNode whose
   #lastElementClass is <aClass> (see RangeEqualityIndex class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ aClass _idxBasicCanCompareWithUnicodeInstance: self

]

{ #category : 'New Indexing Comparison - prims' }
Unicode16 >> _idxPrimCompareEqualTo: aCharCollection [
  "This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects.

 This method collates letters AaBb..Zz."

  "The comparison should be compatible with the case-insensitive semantics
 of the String method with selector #= .
 Same primitive as String>>lessThan: "

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    aCharCollection _isSymbol ifTrue: [ ^false ].
    ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareEqualTo: aCharCollection

]

{ #category : 'New Indexing Comparison - prims' }
Unicode16 >> _idxPrimCompareGreaterThan: aCharCollection [

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method collates the same as the > method, except that
 it returns true if the argument is nil."

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareGreaterThan: aCharCollection

]

{ #category : 'New Indexing Comparison - prims' }
Unicode16 >> _idxPrimCompareLessThan: aCharCollection [

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method collates the same as the < method, except that
 it returns false if the argument is nil."

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareLessThan: aCharCollection

]

{ #category : 'New Indexing Comparison' }
Unicode16 >> _idxUnicodeCompareEqualTo: aCharCollection [

  ^ super _idxUnicodeCompareEqualTo: aCharCollection

]

{ #category : 'New Indexing Comparison' }
Unicode16 >> _idxUnicodeCompareGreaterThan: aCharCollection [

  ^ super _idxUnicodeCompareGreaterThan: aCharCollection

]

{ #category : 'New Indexing Comparison' }
Unicode16 >> _idxUnicodeCompareLessThan: aCharCollection [

  ^ super _idxUnicodeCompareLessThan: aCharCollection

]

{ #category : 'Adding' }
Unicode16 >> _retryAdd: aCharOrCharColl [
| info aString cSize |
aCharOrCharColl class == Character ifTrue:[ | av |
  (av := aCharOrCharColl codePoint) >= 16rD800 ifTrue:[
    av <= 16rDFFF ifTrue:[
      OutOfRange signal:'codePoint 16r', av asHexString ,' is illegal for Unicode'.
    ].
    av > 16rFFFF ifTrue:[
      ^ self _convertToQuadByte add: aCharOrCharColl
    ].
  ].
  ^ OutOfRange new name:'aChar codePoint' min: 0 max: 16r10FFFF
		actual: av ; signal
].
info := aCharOrCharColl _stringCharSize .
(info bitAnd: 16r8) ~~ 0 ifTrue:[  "arg is a Unicode string, assume cSize 4"
  aString := aCharOrCharColl _asUnicode16 .
  aString ifNotNil:[  self add: aString . ^ aCharOrCharColl ]
          ifNil:[ ^ self _convertToQuadByte add: aCharOrCharColl ].
].
cSize := info bitAnd: 16r7 .
cSize ~~ 0 ifTrue:[
  self add: aCharOrCharColl asUnicodeString  .
  ^ aCharOrCharColl
].
(aCharOrCharColl isKindOf: CharacterCollection) ifTrue:[
  ^ self add: aCharOrCharColl asString.
].
aCharOrCharColl do: [:each | self add: each].
^ aCharOrCharColl

]

{ #category : 'Comparing' }
Unicode16 >> _unicodeEqual: argString [
  "Compares receiver to argument using  IcuCollator default."

  ^ self _equals: argString collator: nil useMinSize: false

]

{ #category : 'Comparing' }
Unicode16 >> _unicodeGreaterThan: argString [
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) > 0

]

{ #category : 'Comparing' }
Unicode16 >> _unicodeLessThan: argString [
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) < 0

]

{ #category : 'Adding' }
Unicode16 >> , aCharOrCharColl [

"Returns a new instance of the receiver's class that contains the elements of
 the receiver followed by the elements of aCharOrCharColl.  The argument
 must be a CharacterCollection or a Character."

<primitive: 938>
^ self copy addAll: aCharOrCharColl; yourself

]

{ #category : 'Comparing' }
Unicode16 >> < argString [
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
    info ~~ 0 ifTrue:[
      ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison'.
    ]
  ].
  ^ (self compareTo: argString collator: nil useMinSize: false) < 0

]

{ #category : 'Comparing' }
Unicode16 >> = argString [
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
     argString _isSymbol ifTrue:[ ^ false ] .
     info == 0 ifTrue: [ ^ false ].
     ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison'.
  ].
  ^ self _equals: argString collator: nil useMinSize: false

]

{ #category : 'Comparing' }
Unicode16 >> > argString [
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
    info ~~ 0 ifTrue:[
      ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison'.
    ]
  ].
  ^ (self compareTo: argString collator: nil useMinSize: false) > 0

]

{ #category : 'Adding' }
Unicode16 >> add: aCharOrCharColl [

<primitive: 932>
^ self _retryAdd: aCharOrCharColl

]

{ #category : 'Adding' }
Unicode16 >> addAll: aCharOrCharColl [

<primitive: 932>
^ self _retryAdd: aCharOrCharColl

]

{ #category : 'Adding' }
Unicode16 >> addCodePoint: aSmallInteger [

<primitive: 1050>
^ self add: (Character codePoint: aSmallInteger).

]

{ #category : 'Adding' }
Unicode16 >> addLast: aCharOrCharColl [

<primitive: 932>
^ self _retryAdd: aCharOrCharColl

]

{ #category : 'Converting' }
Unicode16 >> asString [

"Returns a String representation of the receiver."

^ String withAll: self

]

{ #category : 'Comparing' }
Unicode16 >> at: offset equalsNoCase: aString [

"Returns true if aString is contained in the receiver, starting at
 offset.  Returns false otherwise.
 Comparison is done using full case folding and code point order,
 using  icu:UnicodeString::caseCompare in libicu .
 aString must be a Unicode7, Unicode16 , or Unicode32"

<primitive: 930>
| info |
info := aString _stringCharSize .
(info bitAnd: 16r8) == 0 ifTrue:[  "arg is not a Unicode string"
   ^ self at: offset equalsNoCase: aString asUnicodeString
].
aString _validateClasses: { Unicode7 . Unicode16 . Unicode32 } .
offset _isSmallInteger ifFalse:[ offset _validateClass: SmallInteger ].
((offset <= 0) or: [offset > self size ])
  ifTrue: [ ^ self _errorIndexOutOfRange: offset].
self _primitiveFailed: #at:equalsNoCase: args: { offset . aString }

]

{ #category : 'Adding' }
Unicode16 >> at: anIndex put: aChar [

"Stores aChar at the specified location."

<primitive: 933>
(aChar class == Character) ifTrue:[  | av |
  (av := aChar codePoint) >= 16rD800 ifTrue:[
    av <= 16rDFFF ifTrue:[
      OutOfRange signal:'codePoint 16r', av asHexString ,' is illegal for Unicode'.
    ].
    av > 16rFFFF ifTrue:[
      av > 16r10FFFF ifTrue:[
        OutOfRange new name:'aChar codePoint' min: 0 max: 16r10FFFF
                        actual: av ; signal
      ].
      ^ self _convertToQuadByte at: anIndex put: aChar
    ].
  ].
] ifFalse:[
 aChar _validateClass: Character .
].
(anIndex _isSmallInteger) ifTrue: [
  ((anIndex > (self size + 1)) or: [anIndex <= 0]) ifTrue: [
    ^ self _errorIndexOutOfRange: anIndex
  ]
] ifFalse: [
  ^ self _errorNonIntegerIndex: anIndex
] .
self _primitiveFailed: #at:put: args: { anIndex . aChar }

]

{ #category : 'Adding' }
Unicode16 >> codePointAt: anIndex put: anInt [

"Stores codePoint anInt at the specified location. Returns anInt.
 Class of receiver is changed to Unicode32 if needed."

<primitive: 936>
(anInt class == SmallInteger) ifTrue:[
  anInt >= 16rD800 ifTrue:[
    anInt <= 16rDFFF ifTrue:[
      OutOfRange signal:'codePoint 16r', anInt asHexString ,' is illegal for Unicode'.
    ].
    anInt > 16rFFFF ifTrue:[
      anInt > 16r10FFFF ifTrue:[
        OutOfRange new name:'a codePoint' min: 0 max: 16r10FFFF
                        actual: anInt ; signal
      ].
      ^ self _convertToQuadByte codePointAt: anIndex put: anInt
    ].
  ].
  ^ OutOfRange new name:'anInt' min: 0 max: 16r10FFFF
		actual: anInt ; signal
].
(anIndex _isSmallInteger) ifTrue: [
  ((anIndex > (self size + 1)) or: [anIndex <= 0]) ifTrue: [
    ^ self _errorIndexOutOfRange: anIndex
  ]
] ifFalse: [
  ^ self _errorNonIntegerIndex: anIndex
] .
self _primitiveFailed: #at:put: args: { anIndex . anInt }

]

{ #category : 'Comparing' }
Unicode16 >> compareCase: aString [

"Returns -1, 0 or 1,  when receiver is less than,
 equal to, or greater than aString .
 Comparison is done full case folding and code point order,
 using  icu:UnicodeString::caseCompare in libicu.
 aString must be a Unicode7, Unicode16 , or Unicode32"

<primitive: 969>
| info |
info := aString _stringCharSize .
(info bitAnd: 16r8) == 0 ifTrue:[  "arg is not a Unicode string"
   ^ self compareCase: aString asUnicodeString
].
aString _validateClasses: { Unicode7 . Unicode16 . Unicode32 } .
^self _primitiveFailed: #compareCase: args: { aString }

]

{ #category : 'Comparing' }
Unicode16 >> compareTo: argString collator: anIcuCollator useMinSize: aMinSize [

"Returns -1, 0 or 1,  when receiver is less than,
 equal to, or greater than argString .
 argString must be a String, MultiByteString, or a Utf8 .
 anIcuCollator == nil is interpreted as   IcuCollator default .

 If aMinSize == false, normal comparision is done.

 If aMinSize==true, compare stops at (self size min: argString size),
 which is Squeak semantics for comparison.

 If aMinSize is a SmallInteger >= 1, compare stops at
   aMinSize min: (self size min: argString size) ."
<primitive: 928>   "prim handles String, DoubleByteString args"
anIcuCollator ifNil:[
  (System __sessionStateAt: 20) ifNil:[
    ^ self compareTo: argString collator: IcuCollator default useMinSize: aMinSize ]
] ifNotNil:[ anIcuCollator _validateClass: IcuCollator ].
aMinSize _validateClasses: { Boolean . SmallInteger }.
argString _validateClasses: { String . Utf8 }.
^ self _primitiveFailed: #compareTo:collator:useMinSize:
        args: { argString . anIcuCollator . aMinSize }

]

{ #category : 'Comparing' }
Unicode16 >> equals: aString collatingTable: aTable [
  "disallowed"
  self shouldNotImplement: #equals:collatingTable:

]

{ #category : 'Case-Insensitive Comparisons' }
Unicode16 >> equalsNoCase: aString [
  "Returns true if receiver and a String are equal using
  full case folding and code point order,
  using  icu:UnicodeString::caseCompare in libicu.
  Returns false otherwise.
  aString must be a Unicode7, Unicode16 , or Unicode32"

  ^ (self compareCase: aString) == 0

]

{ #category : 'Comparing' }
Unicode16 >> greaterThan: aString collatingTable: aTable [
  "disallowed"
  self shouldNotImplement: #greaterThan:collatingTable:

]

{ #category : 'Adding' }
Unicode16 >> insertAll: aCharOrCharColl at: anIndex [

<primitive: 922>
| info aString |
anIndex _isSmallInteger ifFalse:[ anIndex _validateClass: SmallInteger ].
((anIndex <= 0) or: [anIndex > (self size + 1)])
  ifTrue: [ ^ self _errorIndexOutOfRange: anIndex].

info := aCharOrCharColl _stringCharSize .
info ~~ 0 ifTrue:[  "arg is a DoubleByteString or Unicode32 or QuadByteString"
  aString := aCharOrCharColl _asUnicode16 .
  aString ifNotNil:[ ^  self insertAll: aString at: anIndex ]
          ifNil:[ ^ self _convertToQuadByte insertAll: aCharOrCharColl at: anIndex]
].
aCharOrCharColl class == Character ifTrue:[
  (aString := Unicode16 new) add: aCharOrCharColl .
  ^ self insertAll: aString at: anIndex .
].
aCharOrCharColl _validateClasses: { Unicode7 . Unicode16 . Unicode32 } .
self _primitiveFailed: #insertAll:at: args: { aCharOrCharColl . anIndex }

]

{ #category : 'Case-Insensitive Comparisons' }
Unicode16 >> isEquivalent: aString [
  "Returns true if receiver and a String are equal using
  full case folding and code point order,
  using  icu:UnicodeString::caseCompare in libicu.
  Returns false otherwise.
  Returns false otherwise. "

  aString _stringCharSize == 0 ifTrue:[ ^ false "not a string"].
  ^ (self compareCase: aString) == 0

]

{ #category : 'Comparing' }
Unicode16 >> lessThan: aString collatingTable: aTable [
  "disallowed"
  self shouldNotImplement: #lessThan:collatingTable:

]

{ #category : 'Class Membership' }
Unicode16 >> speciesForPrint [

^ Unicode16

]

{ #category : 'Adding' }
Unicode16 >> squeakBasicAt: anIndex put: aValue [
"Disallowed , use codePointAt:put:"
^ self shouldNotImplement: #squeakBasicAt:put:

]
