"
TreeSet is a set that has good performance in GemStone at a wide variety of sizes, and grows gracefully without long pauses.

It is structured as a a tree which is sorted based on a permutation of the #hash of the elements. Nodes of the tree are sized to match the GemStone page size. Leaf nodes are page-sized hash tables, and internal nodes are a B+tree variant that allows duplicate keys.


Instance variables:

tally				How many key-value pairs I contain
rootNode		Either a HtSetLeafNode (if my size is small) or a HtSetInternalNode (if my contents will not fit in one leaf node)
walker			A cached instance of HtSetTreeWalker that helps with operations on the tree
scratchLeaf	A cached instance of HtSetScratchLeafNode that is used for temporary storage during the splitting of a leaf node when it becomes full
heap				A cached instance of HtHeap, used to heap-sort a leaf node being split. This is necessary only in uncommon 
						situations where the normal heuristic for splitting a leaf node results in a very uneven split.

The cached instances are all dbTransient, so modifications to them will not need to be written to tranlogs or extents upon commit.
They are cached, rather than created at each operation, to avoid creating unnecessary garbage during normal operations on the dictionary.
"
Class {
	#name : 'TreeSet',
	#superclass : 'UnorderedCollection',
	#instVars : [
		'rootNode',
		'tally',
		'walker',
		'heap',
		'scratchLeaf'
	],
	#category : 'HashTree-Core'
}

{ #category : 'instance creation' }
TreeSet class >> new [
	^ self basicNew initialize
]

{ #category : 'instance creation' }
TreeSet class >> new: someSize [
	self error: 'TreeSets are not pre-sizeable. Send #new instead.'
]

{ #category : 'private' }
TreeSet >> _asCollectionForSorting [
  | ary |
  ary := { } .
  self do:[:elem | ary add: elem ].
  ^ ary
]

{ #category : 'private' }
TreeSet >> _asIdentityBag [
	"Private.  Returns an IdentitySet containing all of the 
	elements of the receiver."

	"Used by index creation."

	| result |
	result := IdentitySet new.
	self do: [ :each | result add: each ].
	^ result
]

{ #category : 'private' }
TreeSet >> _deferredGciUpdateWith: valueArray [
	" semantics of the GCI update not defined"

	self _error: #'errNoStructuralUpdate'
]

{ #category : 'private' }
TreeSet >> _detect: aBlock [

	self do: [:each | (aBlock value: each) ifTrue: [^each]].
	^self _error: #assocErrNoElementsDetected args: {aBlock}
]

{ #category : 'private' }
TreeSet >> _nodesObjectSecurityPolicy: anObjectSecurityPolicy [
	"Assigns receiver's components to the given security policy. "

	heap objectSecurityPolicy: anObjectSecurityPolicy.
	rootNode objectSecurityPolicy: anObjectSecurityPolicy.
	scratchLeaf objectSecurityPolicy: anObjectSecurityPolicy.
	walker objectSecurityPolicy: anObjectSecurityPolicy
]

{ #category : 'private' }
TreeSet >> _reject: aBlock [
  | result |
  result := self class new .
  self do:[:elem |
     (aBlock value: elem) ifFalse:[ result add: elem ].
  ].
  ^ result.
]

{ #category : 'private' }
TreeSet >> _select: aBlock [
  | result |
  result := self class new .
  self do:[:elem |
     (aBlock value: elem) ifTrue:[ result add: elem ].
  ].
  ^ result.
]

{ #category : 'adding' }
TreeSet >> add: newObject [
	"Unless I already contain an object #= to newObject, add newObject as one of my elements.
	In either case, answer newObject."

	| hash newNode theWalker |
	newObject ifNil: [ ^ self _error: #'rtErrNilKey' ].	"I cannot contain nil."
	tally := tally.	"Put myself in the write set, for the case where the key is already present."
	hash := self permutedHashOf: newObject.
	theWalker := walker.	"Ensure walker stays in memory until this method returns."
	theWalker collection: self.	"Might have dropped from memory since last use."
	newNode := theWalker searchTree: rootNode toAdd: newObject withHash: hash.
	newNode ifNotNil: [ self splitRootWith: newNode ].
  ^ newObject
]

{ #category : 'auditing' }
TreeSet >> audit [
	"Check myself for consistency; answer 
		true if audit passed, a string report on problems."

	| stream elementCount passingPosition |
	stream := WriteStream on: String new.
	stream nextPutAll: 'Audit report for ' , self class name , ' '.
	self asOop printOn: stream.
	stream lf.
	passingPosition := stream position.
	heap auditEmptyOnto: stream.
	scratchLeaf auditEmptyOnto: stream for: self.
	walker auditOnto: stream for: self.
	elementCount := rootNode
		auditOnto: stream
		for: self
		lowestHash: 0
		highestHash: SmallInteger maximumValue.
	elementCount = tally
		ifFalse: [ 
			stream
				nextPutAll:
						'Root tally mis-match: tally is ' , tally printString
								, ' but counting nodes gives ' , elementCount printString;
				lf ].
	^ stream position = passingPosition
		ifTrue: [ true ]
		ifFalse: [ stream contents ]
]

{ #category : 'node access' }
TreeSet >> decrementTally [
	"Should only be sent from TdLeafNode"

	tally := tally - 1.
	tally negative ifTrue: [
		self error: 'decrementTally when already empty' ]
]

{ #category : 'enumerating' }
TreeSet >> do: unaryBlock [

	"Evaluate unaryBlock once for each element I contain,
	in no particular order, with the element as the argument
	to the block."

	rootNode do: unaryBlock
]

{ #category : 'private' }
TreeSet >> errorNotFound: anObject [
	"Sends an error message indicating that the expected object was not found."

	^ self _error: #'objErrNotInColl' args: {anObject}
]

{ #category : 'node access' }
TreeSet >> heap [

	^ heap
]

{ #category : 'private' }
TreeSet >> heapSize [
	^ 1600
]

{ #category : 'private' }
TreeSet >> highestHash [
	"2 ** 60 - 1"

	^ 16rFFFFFFFFFFFFFFF
]

{ #category : 'testing' }
TreeSet >> includes: key [
	"Answer true if I contain an element #= to key, false otherwise."

	| hash theWalker result |
	key ifNil: [ ^ self _error: #'rtErrNilKey' ].	"I cannot contain nil."
	hash := self permutedHashOf: key.
	theWalker := walker.	"Ensure walker stays in memory until this method returns."
	theWalker collection: self.	"Might have dropped from memory since last use."
	result := theWalker searchTree: rootNode forValueAt: key withHash: hash.
	theWalker reset.
	^ result
]

{ #category : 'testing' }
TreeSet >> includesIdentical: key [
	"Answer true if I contain an element #== to key, false otherwise."

	| hash theWalker found result |
	key ifNil: [ ^ self _error: #'rtErrNilKey' ].	"I cannot contain nil."
	hash := self permutedHashOf: key.
	theWalker := walker.	"Ensure walker stays in memory until this method returns."
	theWalker collection: self.	"Might have dropped from memory since last use."
	found := theWalker searchTree: rootNode forValueAt: key withHash: hash.
	result := found
		ifTrue: [ theWalker value == key ]
		ifFalse: [ false ].
	theWalker reset.
	^ result
]

{ #category : 'testing' }
TreeSet >> includesValue: key [
  ^ self includes: key
]

{ #category : 'node access' }
TreeSet >> incrementTally [
	"Should only be sent from TdLeafNode"

	tally := tally + 1
]

{ #category : 'initialization' }
TreeSet >> initialize [
	"In a sufficiently small collection, the root node is a leaf."

	super initialize.
	self initializeDbTransients.
	tally := 0.
	rootNode := self leafNodeClass
		forCollection: self
		lowestHash: 0
		highestHash: SmallInteger maximumValue
]

{ #category : 'private' }
TreeSet >> initializeDbTransients [
	walker := self walkerClass forCollection: self.	"Cache a walker to avoid creating garbage walkers."
	heap := HtHeap new: self heapSize.	"Size must be greater than fillLine of a leaf."
	scratchLeaf := self scratchLeafNodeClass
		forCollection: self
		lowestHash: 0
		highestHash: SmallInteger maximumValue	"In leaf splitting, used as a temporary
	leaf, to avoid creating garbage."
]

{ #category : 'private' }
TreeSet >> internalNodeClass [

	^ HtSetInternalNode
]

{ #category : 'private' }
TreeSet >> leafNodeClass [

	^ HtSetLeafNode
]

{ #category : 'accessing' }
TreeSet >> permutedHashOf: anObject [
	"This method is central to getting good performance from TreeDictionary.
	Internal algorithms such as searching internal nodes and splitting leaf nodes
	require making 'good guesses' about where in a node to find a hash value. 
	In order for these guesses to be close to the real value most of the time, the 
	hash values must be roughly evenly distributed through the entire non-negative 
	SmallInteger range,	[0..2^60).
	But the answers to #hash are generally not distributed this way. For instance,
	String hashes are only 24 bits, the hashes of numbers are limited to about 
	30 bits and may be negative.

	This method takes the integer answered by sending #hash to the object and
	runs that integer through a permutation of the [0..2^60) space of numbers.
	The permutation takes any integer in that range and produces a different
	number in that range. It's a full permutation with no loss of uniqueness;
	each of the 2^60 possible inputs produces a unique output. 2^60 possible
	inputs, 2^60 possible outputs, just scrambled in a way that makes it *much*
	more likely that the resulting set of permuted hashes used internally by 
	TreeDictionary is roughly evenly distributed across the entire 2^60 range.

	Permutation is 
	
	f(x) = dx^2 + ax + c (mod 2^60)
	
	Where
	a=10699279521569479
	c=7836386368351
	d=7952157022
	
	equivalent to the Smalltalk
	
	(x * d + a * x + c) bitAnd: 16rFFFFFFFFFFFFFFF
	
	Equivalent to

	^ rawHash * 7952157022 + 10699279521569479 * rawHash + 7836386368351
		  bitAnd: 16rFFFFFFFFFFFFFFF"

	| rawHash |
	rawHash := anObject hash abs.
	^ rawHash permutedHashA: 10699279521569479 c: 7836386368351 d: 7952157022
]

{ #category : 'copying' }
TreeSet >> postCopy [
	self initializeDbTransients.
	rootNode := rootNode copyForCollection: self
]

{ #category : 'removing' }
TreeSet >> remove: key [
	"If I contain an element #= to key, remove it, and answer key.
	If not, report an error.
	Don't send removeKey:ifAbsent: to avoid creating complex block."

	| hash newRoot theWalker |
	key ifNil: [ ^ self errorNotFound: key ].
	hash := self permutedHashOf: key.
	theWalker := walker.	"Ensure walker stays in memory until this method returns."
	theWalker collection: self.	"Might have dropped from memory since last use."
	newRoot := theWalker searchTree: rootNode removeKey: key withHash: hash.
	newRoot ifNotNil: [ rootNode := newRoot ].
	^ theWalker found
		ifTrue: [ 
			theWalker reset.
			key ]
		ifFalse: [ 
			theWalker reset.
			self errorNotFound: key ]
]

{ #category : 'removing' }
TreeSet >> remove: key ifAbsent: aNilaryBlock [
	"If I contain an element #= to key, remove it, and answer key.
	If not, answer the result of aNilaryBlock."

	| hash newRoot theWalker |
	key ifNil: [ ^ self errorNotFound: key ].
	hash := self permutedHashOf: key.
	theWalker := walker.	"Ensure walker stays in memory until this method returns."
	theWalker collection: self.	"Might have dropped from memory since last use."
  newRoot := theWalker searchTree: rootNode removeKey: key withHash: hash.
  newRoot ifNotNil: [ rootNode := newRoot ].
	^ theWalker found
		ifTrue: [ 
			theWalker reset.
			key ]
		ifFalse: [ 
			theWalker reset.
			aNilaryBlock value ]
]

{ #category : 'testing' }
TreeSet >> removeIdentical: key ifAbsent: anExceptionBlock [
	"Answer key if I contain an element #== to key, otherwise return result of anExceptionBlock"
	| hash theWalker found |
	key ifNil: [ ^ self _error: #'rtErrNilKey' ].	"I cannot contain nil."
	hash := self permutedHashOf: key .
	theWalker := walker.	"Ensure walker stays in memory until this method returns."
	theWalker collection: self.	"Might have dropped from memory since last use."
	found := theWalker searchTree: rootNode forValueAt: key  withHash: hash.
  theWalker value == key  ifFalse:[ ^ anExceptionBlock value ].
  theWalker reset .
	(theWalker searchTree: rootNode removeKey: key withHash: hash) 
     ifNotNil:[:newRoot | rootNode := newRoot ].
  theWalker found ifFalse:[ ^ Error signal:'inconsistent search for remove'].
  theWalker reset .
  ^ key.
]

{ #category : 'test access' }
TreeSet >> rootNode [
	^ rootNode
]

{ #category : 'node access' }
TreeSet >> scratchLeaf [

	^ scratchLeaf
]

{ #category : 'private' }
TreeSet >> scratchLeafNodeClass [
	^ HtSetScratchLeafNode
]

{ #category : 'accessing' }
TreeSet >> size [

	^ tally
]

{ #category : 'private' }
TreeSet >> splitRootWith: newNode [
	"The tree is getting one level deeper."

	| newRoot highestHash |
	highestHash := self highestHash.
	newRoot := self internalNodeClass forCollection: self.
	newRoot
		appendSortedChild: rootNode;
		appendSortedChild: newNode;
		appendHash: highestHash;
		highestHash: highestHash.
	newRoot computeConstants.
	rootNode := newRoot
]

{ #category : 'private' }
TreeSet >> walkerClass [
	^ HtSetTreeWalker
]
