Extension { #name : 'Unicode32' }

{ #category : 'Indexing Support' }
Unicode32 class >> _idxBasicCanCompareWithCharacterCollectionInstance: aCharacterCollection [
  "Returns true if <aCharacterCollection> may be inserted into a basic BtreeNode
   whose #lastElementClass is the receiver (see RangeEqualityIndex
   class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ Unicode16 usingUnicodeCompares

]

{ #category : 'Indexing Support' }
Unicode32 class >> _idxBasicCanCompareWithUnicodeInstance: aUnicodeString [
  "Returns true if <aUnicodeString> may be inserted into a basic BtreeNode whose
   #lastElementClass is the receiver (see RangeEqualityIndex class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ true

]

{ #category : 'Instance Creation' }
Unicode32 class >> withAll: aString [

"Returns an instance of Unicode32 "

<primitive: 943>
(aString isKindOfClass: Utf8) ifTrue:[ ^ self withAll: aString decodeToUnicode ].
aString _validateClasses: { String . Utf8 } .
^ ArgumentError signal:'argument contains codePoints not valid for Unicode'

]

{ #category : 'Comparing' }
Unicode32 >> _at: offset equalsNoCase: aString [

"Returns true if aString is contained in the receiver, starting at
 offset.  Returns false otherwise.
 The comparison is done with full case folding and code point order,
 using  icu:UnicodeString::caseCompare in libicu .
 aString must be a Unicode7, Unicode16 , or Unicode32"

<primitive: 930>
| info |
info := aString _stringCharSize .
(info bitAnd: 16r8) == 0 ifTrue:[  "arg is not a Unicode string"
   ^ self _at: offset equalsNoCase: aString asUnicodeString
].
aString _validateClasses: { String } .
offset _isSmallInteger ifFalse:[ offset _validateClass: SmallInteger ].
((offset <= 0) or: [offset > self size ])
  ifTrue: [ ^ self _errorIndexOutOfRange: offset].
self _primitiveFailed: #_at:equalsNoCase: args: { offset . aString }

]

{ #category : 'Accessing' }
Unicode32 >> _basicAt: index put: char [
"Disallowed , use codePointAt:put:"
^ self shouldNotImplement: #_basicAt:put:

]

{ #category : 'Private' }
Unicode32 >> _findString: subString startingAt: startIndex ignoreCase: aBoolean [

aBoolean ifFalse:[
  ^ super _findString: subString startingAt: startIndex ignoreCase: aBoolean
].
^ self _findStringNocase: subString startingAt: startIndex
	collator: IcuCollator default

]

{ #category : 'Indexing Support' }
Unicode32 >> _idxBasicCanCompareWithClass: aClass [
  "Returns true if the receiver may be inserted into a basic BtreeNode whose
   #lastElementClass is <aClass> (see RangeEqualityIndex class>>isBasicClass:)."

  "If using Unicode compares, then *String and Unicode* instances may be compared.
   If not using Unicode compares then *String and Unicode* instances may not be compared."

  ^ aClass _idxBasicCanCompareWithUnicodeInstance: self

]

{ #category : 'New Indexing Comparison - prims' }
Unicode32 >> _idxPrimCompareEqualTo: aCharCollection [
  "This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects.

 This method collates letters AaBb..Zz."

  "The comparison should be compatible with the case-insensitive semantics
 of the String method with selector #= .
 Same primitive as String>>lessThan: "

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    aCharCollection _isSymbol ifTrue: [ ^false ].
    ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareEqualTo: aCharCollection

]

{ #category : 'New Indexing Comparison - prims' }
Unicode32 >> _idxPrimCompareGreaterThan: aCharCollection [

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method collates the same as the > method, except that
 it returns true if the argument is nil."

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareGreaterThan: aCharCollection

]

{ #category : 'New Indexing Comparison - prims' }
Unicode32 >> _idxPrimCompareLessThan: aCharCollection [

"This comparison operation is used for the indexing subsystem to
 determine an ordering for insertion into indexing objects and for
 doing indexing subsystem comparisons.

 This method collates the same as the < method, except that
 it returns false if the argument is nil."

  aCharCollection ifNil:[ ^ false ].
  aCharCollection isUnicodeString ifFalse: [ "not Unicode"
    ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison' ].
  ^self _idxUnicodeCompareLessThan: aCharCollection

]

{ #category : 'New Indexing Comparison' }
Unicode32 >> _idxUnicodeCompareEqualTo: aCharCollection [

  ^ super _idxUnicodeCompareEqualTo: aCharCollection

]

{ #category : 'New Indexing Comparison' }
Unicode32 >> _idxUnicodeCompareGreaterThan: aCharCollection [

  ^ super _idxUnicodeCompareGreaterThan: aCharCollection

]

{ #category : 'New Indexing Comparison' }
Unicode32 >> _idxUnicodeCompareLessThan: aCharCollection [

  ^ super _idxUnicodeCompareLessThan: aCharCollection

]

{ #category : 'Adding' }
Unicode32 >> _retryAdd: aCharOrCharColl [
| info cSize |
aCharOrCharColl class == Character ifTrue:[
  ^ OutOfRange new name:'aChar codePoint' min: 0 max: 16r10FFFF actual: aCharOrCharColl ; signal
].
info := aCharOrCharColl _stringCharSize .
(info bitAnd: 16r8) ~~ 0 ifTrue:[  "arg is a Unicode string, assume cSize 4"
  self _primitiveFailed: #add: args: { aCharOrCharColl } .
].
cSize := info bitAnd: 16r7 .
cSize ~~ 0 ifTrue:[
  self add: aCharOrCharColl asUnicodeString .
  ^ aCharOrCharColl
].
(aCharOrCharColl class == Character) ifTrue:[
  self add: aCharOrCharColl asCharacter .
  ^ aCharOrCharColl
].
(aCharOrCharColl isKindOf: CharacterCollection) ifTrue:[
  ^ self add: aCharOrCharColl asString.
].
aCharOrCharColl do: [:each | self add: each].
^ aCharOrCharColl

]

{ #category : 'Comparing' }
Unicode32 >> _unicodeEqual: argString [
  "Compares receiver to argument using  IcuCollator default."
  ^ self _equals: argString collator: nil useMinSize: false

]

{ #category : 'Comparing' }
Unicode32 >> _unicodeGreaterThan: argString [
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) > 0

]

{ #category : 'Comparing' }
Unicode32 >> _unicodeLessThan: argString [
  "Compares receiver to argument using  IcuCollator default."
  ^ (self compareTo: argString collator: nil useMinSize: false) < 0

]

{ #category : 'Adding' }
Unicode32 >> , aCharOrCharColl [

"Returns a new instance of the receiver's class that contains the elements of
 the receiver followed by the elements of aCharOrCharColl.  The argument
 must be a CharacterCollection or a Character."

<primitive: 939>
^ self copy addAll: aCharOrCharColl; yourself

]

{ #category : 'Comparing' }
Unicode32 >> < argString [
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
    info ~~ 0 ifTrue:[
      ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison'.
    ]
  ].
  ^ (self compareTo: argString collator: nil useMinSize: false) < 0

]

{ #category : 'Comparing' }
Unicode32 >> = argString [
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
     argString _isSymbol ifTrue:[ ^ false ] .
     info == 0 ifTrue: [ ^ false ].
     ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison'.
  ].
  ^ self _equals: argString collator: nil useMinSize: false

]

{ #category : 'Comparing' }
Unicode32 >> > argString [
  "Compares receiver to argument using  IcuCollator default."
  | info |
  ((info := argString _stringCharSize) bitAnd: 16r8) == 0 ifTrue:[ "not Unicode"
    info ~~ 0 ifTrue:[
      ArgumentError signal:'non-Unicode argument disallowed in Unicode comparison'.
    ]
  ].
  ^ (self compareTo: argString collator: nil useMinSize: false) > 0

]

{ #category : 'Adding' }
Unicode32 >> add: aCharOrCharColl [

<primitive: 940>
^ self _retryAdd: aCharOrCharColl

]

{ #category : 'Adding' }
Unicode32 >> addAll: aCharOrCharColl [

<primitive: 940>
^ self _retryAdd: aCharOrCharColl

]

{ #category : 'Adding' }
Unicode32 >> addLast: aCharOrCharColl [

<primitive: 940>
^ self _retryAdd: aCharOrCharColl

]

{ #category : 'Converting' }
Unicode32 >> asString [

"Returns a String representation of the receiver."

^ String withAll: self

]

{ #category : 'Comparing' }
Unicode32 >> at: offset equalsNoCase: aString [

"Returns true if aString is contained in the receiver, starting at
 anIndex.  Returns false otherwise.
 The comparison is done with full case folding and code point order,
 using  icu:UnicodeString::caseCompare in libicu .
 aString must be a Unicode7, Unicode16 , or Unicode32"

offset == 1 ifTrue:[ ^ self _at:offset equalsNoCase: aString].

"prim 930 can't handle random positions in a full Utf32"
^ (self copyFrom: offset to: self size) _at: 1 equalsNoCase: aString.

]

{ #category : 'Accessing' }
Unicode32 >> at: anIndex put: aChar [

"Stores aChar at the specified location."

<primitive: 934>
(aChar class == Character) ifTrue:[  | anInt |
  anInt := aChar codePoint .
  (anInt < 0 or:[ anInt > 16r10FFFF]) ifTrue:[
    ^ OutOfRange new name:'aChar codePoint' min: 0 max: 16r10FFFF
			actual: anInt ; signal
  ].
] ifFalse:[
 aChar _validateClass: Character .
].
(anIndex _isSmallInteger) ifTrue: [
  ((anIndex > (self size + 1)) or: [anIndex <= 0]) ifTrue: [
    ^ self _errorIndexOutOfRange: anIndex
  ]
] ifFalse: [
  ^ self _errorNonIntegerIndex: anIndex
] .
self _primitiveFailed: #at:put: args: { anIndex . aChar }

]

{ #category : 'Accessing' }
Unicode32 >> codePointAt: anIndex put: anInt [

"Stores anInt at the specified location. returns anInt"

<primitive: 937>
anInt _validateClass: SmallInteger .
(anInt < 0 or:[ anInt > 16r10FFFF]) ifTrue:[
  ^ OutOfRange new name:'anInt' min: 0 max: 16r10FFFF actual: anInt ; signal
].
(anIndex _isSmallInteger) ifTrue: [
  ((anIndex > (self size + 1)) or: [anIndex <= 0]) ifTrue: [
    ^ self _errorIndexOutOfRange: anIndex
  ]
] ifFalse: [
  ^ self _errorNonIntegerIndex: anIndex
] .
self _primitiveFailed: #at:put: args: { anIndex . anInt }

]

{ #category : 'Comparing' }
Unicode32 >> compareCase: aString [

"Returns -1, 0 or 1,  when receiver is less than,
 equal to, or greater than aString .
 Comparison is done full case folding and code point order,
 using  icu:UnicodeString::caseCompare in libicu.
 aString must be a Unicode7, Unicode16 , or Unicode32"

<primitive: 969>
| info |
info := aString _stringCharSize .
(info bitAnd: 16r8) == 0 ifTrue:[  "arg is not a Unicode string"
   ^ self compareCase: aString asUnicodeString
].
aString _validateClasses: { String } .
^self _primitiveFailed: #compareCase: args: { aString }

]

{ #category : 'Comparing' }
Unicode32 >> equals: aString collatingTable: aTable [
  "disallowed"
  self shouldNotImplement: #equals:collatingTable:

]

{ #category : 'Case-Insensitive Comparisons' }
Unicode32 >> equalsNoCase: aString [
  "Returns true if receiver and a String are equal using
  full case folding and code point order,
  using  icu:UnicodeString::caseCompare in libicu.
  Returns false otherwise.
  aString must be a Unicode7, Unicode16 , or Unicode32"

  ^ (self compareCase: aString) == 0

]

{ #category : 'Comparing' }
Unicode32 >> greaterThan: aString collatingTable: aTable [
  "disallowed"
  self shouldNotImplement: #greaterThan:collatingTable:

]

{ #category : 'Adding' }
Unicode32 >> insertAll: aCharOrCharColl at: anIndex [
<primitive: 923>
| aString info |
anIndex _isSmallInteger ifFalse:[ anIndex _validateClass: SmallInteger ].
((anIndex <= 0) or: [anIndex > (self size + 1)])
  ifTrue: [ ^ self _errorIndexOutOfRange: anIndex].

info := aCharOrCharColl _stringCharSize .
info ~~ 0 ifTrue:[  "argument is a String, DoubleByteString, QuadByteString, or ByteArray"
   ^ self insertAll:  aCharOrCharColl asUnicodeString  at: anIndex
].
aCharOrCharColl class == Character ifTrue:[
  (aString := Unicode32 new) add: aCharOrCharColl .
  ^ self insertAll: aString at: anIndex .
].
aCharOrCharColl _validateClasses: { Unicode7 . Unicode16 . Unicode32 } .
self _primitiveFailed: #insertAll:at: args: { aCharOrCharColl . anIndex }

]

{ #category : 'Case-Insensitive Comparisons' }
Unicode32 >> isEquivalent: aString [
  "Returns true if receiver and a String are equal using
  full case folding and code point order,
  using  icu:UnicodeString::caseCompare in libicu.
  Returns false otherwise. "

  aString _stringCharSize == 0 ifTrue:[ ^ false "not a string"].
  ^ (self compareCase: aString) == 0

]

{ #category : 'Comparing' }
Unicode32 >> lessThan: aString collatingTable: aTable [
  "disallowed"
  self shouldNotImplement: #lessThan:collatingTable:

]

{ #category : 'Class Membership' }
Unicode32 >> speciesForPrint [

^ Unicode16  "let stream promote if needed"

]

{ #category : 'Accessing' }
Unicode32 >> squeakBasicAt: anIndex put: aValue [
"Disallowed , use codePointAt:put:"
^ self shouldNotImplement: #squeakBasicAt:put:

]
