
GemStone Smalltalk Formal Syntax
--------------------------------

This is a complete BNF description of GemStone Smalltalk

Here are a few notes about interpreting the grammar:

A = expr	This defines the syntactic production `A' in terms of the
		expression on the right side of the equals sign.

B = C | D	The vertical bar `|' defines alternatives. In this case, the
		production "B" is one of either "C" or "D".

C = '<'		A symbol in accents is a literal symbol.

D = F G		A sequence of two or more productions means the
		productions in the order of their appearance.

E = [ A ]	Brackets indicate optional productions.

F = { B }	Braces indicate zero or more occurrences of the productions
		contained within.

G = A | (B|C)	Parentheses can be used to remove ambiguity.

GemStone Smalltalk BNF
----------------------

ArrayBuilder = '#[' [ AExpression { ',' AExpression } ] ']'
  [NOTE The ArrayBuilder production is exists only if
  ( System configurationAt:#GemConvertArrayBuilder )==true,
  and incoming ArrayBuilder's in the source code will be converted
  to CurlyArrayBuilder syntax by automatic execution of
  preprocessor methods in GsNMethod(C), and the sourceString
  for the resulting GsNMethod's will contain the converted syntax.]

Assignment = VariableName ':=' Statement |  VariableName ' _ ' Statement
  [NOTE white space is allowed with ':=' and required with '_']

AExpression = Primary [ AMessage { ';' ACascadeMessage } ]

ABinaryMessage = [ EnvSpecifier ] ABinarySelector Primary [ UnaryMessages ]

ABinaryMessages = ABinaryMessage { ABinaryMessage }

ACascadeMessage = UnaryMessage | ABinaryMessage | AKeyWordMessage

AKeyWordMessage = [ EnvSpecifier ] AKeyWordPart { AKeyWordPart }

AKeyWordPart = KeyWord Primary UnaryMessages { ABinaryMessage }

AMessage = [ UnaryMessages ] [ ABinaryMessages ] [ AKeyWordMessage ]

Array = '(' { ArrayItem } ')'

ArrayLiteral = '#' Array

ArrayItem = Number | SymbolArrayItem | SymbolLiteral | StringLiteral |
            CharacterLiteral | Array | ArrayLiteral | ByteArrayLiteral

BinaryMessage = [ EnvSpecifier ] BinarySelector Primary [ UnaryMessages ]

BinaryMessages = BinaryMessage { BinaryMessage }

BinaryPattern = BinarySelector VariableName

Block = '[' [ BlockParameters ] [ Temporaries ] Statements ']'

BlockParameters = { Parameter } '|'

ByteArrayLiteral = '#' '[' [ Number { Number } ] ']'
  The ByteArrayLiteral production exists only if
  ( System configurationAt:#GemConvertArrayBuilder )==false ,

CascadeMessage = UnaryMessage | BinaryMessage | KeyWordMessage

CurlyArrayBuilder = '{' [ AExpression { '.' AExpression } ] '}'

Expression = Primary [ Message { ';' CascadeMessage } ]

KeyWordMessage = [ EnvSpecifier ] KeyWordPart { KeyWordPart }

KeyWordPart = KeyWord Primary UnaryMessages { BinaryMessage }

KeyWordPattern = KeyWord VariableName { KeyWord VariableName }

Literal = Number | StringLiteral | CharacterLiteral |
          SymbolLiteral | ArrayLiteral | SpecialLiteral | ByteArrayLiteral

Message = [ UnaryMessages ] [ BinaryMessages ] [ KeyWordMessage ]

MessagePattern = UnaryPattern | BinaryPattern | KeyWordPattern

Method = MessagePattern [ Primitive ] MethodBody

MethodBody = [ Pragmas ] [ Temporaries ] [ Statements ]

Operand = Path | Literal | Identifier

Operator = '=' | '==' | '<' | '>' | '<=' | '>=' | '~=' | '~~'

ParenStatement = '(' Statement ')'

Predicate = ( AnyTerm | ParenTerm ) { '&' Term }

Primary = ArrayBuilder | CurlyArrayBuilder | Literal | Path | Block | SelectionBlock |
          ParenStatement | VariableName

Primitive = '<' [ 'protected' | 'unprotected' ] [ 'primitive:' Digits ] '>'

Pragmas =   Pragma [ Pragma ]

Pragma = '< PragmaBody '>'

PragmaBody =  UnaryPragma | KeyWordPragma

UnaryPragma  = SpecialLiteral | UnaryPragmaIdentifier

KeyWordPragma = PragmaPair [ PragmaPair ]

PragmaPair =  ( KeyWordNotPrimitive | BinarySelector ) PragmaLiteral

UnaryPragmaIdentifier is any Identifier  except 'protected' , 'unprotected' , 'requiresVc'

PragmaLiteral = Number | StringLiteral | CharacterLiteral |
		SymbolLiteral | SpecialLiteral

SelectionBlock = '{' Parameter '|' Predicate '}'

Statement = Assignment | Expression

Statements = { [ Pragmas] { Statement '.' } } [ Pragmas ] [ ['^'] Statement ['.' [ Pragmas ] ]]

Temporaries = '|' { VariableName } '|'

ParenTerm = '(' AnyTerm ')'

Term = ParenTerm | Operand

AnyTerm = Operand [ Operator Operand ]

UnaryMessage = [ EnvSpecifier ] Identifier

UnaryMessages = { UnaryMessage }

UnaryPattern = Identifier


GemStone Smalltalk Lexical Tokens
---------------------------------
The following are lexical tokens.
No white space is allowed within lexical tokens.

ABinarySelector = any BinarySelector except comma

BinaryExponent = ( 'e' | 'E' | 'd' | 'D' | 'q' ) ['-'] Digits

BinarySelector =  SelectorCharacter { SelectorCharacter }

Character = Any Ascii character with ordinal value 0..255

CharacterLiteral = '$' Character

Comment = '"' { Character } '"'

DecimalExponent = ( 'f' | 'F' ) [ '-' ] Digits

Digit = '0' | '1' | '2' | ... | '9'

Digits = Digit {Digit}

EndOfSource = the end of the method source string

Exponent = BinaryExponent | DecimalExponent | ScaledDecimalExponent | FixedPointExponent

FractionalPart = '.' Digits [ Exponent ]

FixedPointExponent =  'p' [ [ '-' ] Digits ]
  [NOTE: if FixedPointExponent has no Digits, then a peek on the
   source stream by the lexer must yield a ScdExponTerminator ].

Identifier =  SingleLetterIdentifier | MultiLetterIdentifier

KeyWord = Identifier ':'

KeyWordNotPrimitive is any KeyWord other than 'primitive:'

AlphaLetter = 'A' | 'B' | ... | 'Z' | 'a' | 'b' | ... | 'z'

Letter = 'A' | 'B' | ... | 'Z' | 'a' | 'b' | ... | 'z' | '_'

MultiLetterIdentifier = Letter ( Letter | Digit ) { Letter | Digit }

Number = [ - ] RadixedLiteral | [ '-' ] NumericLiteral | RadixedLiteralNegBody

Numeric = Digit | 'A' | 'B' | ... | 'Z'

NumericLiteral = Digits ( [ FractionalPart ] | [ Exponent ] )

Numerics = Numeric { Numeric }

Parameter = ':' VariableName      [NOTE: white space allowed between : and variableName ]

Path = Identifier '.' PathIdentifier { '.' PathIdentifier }

PathIdentifier  =  Identifier |  '*'

EnvSpecifier = '@env' Digits ':'    [NOTE: no white space before or after Digits]

RadixedLiteral = Digits ( '#' | 'r' ) Numerics

RadixedLiteralNegBody =  Digits ( '#' | 'r' ) '-' Numerics

ScaledDecimalExponent =  's' [ ['-' ] Digits ]
  [NOTE: if ScaledDecimalExponent has no Digits, then a peek on the
   source stream by the lexer must yield a ScdExponTerminator ].

ScdExponTerminator = '"' | WhiteSpace | ',' | ')' | ']' | '}' | '.' | ';' | EndOfSource

SelectorCharacter = '+' | '-' | '\' | '*' | '~' | '<' | '>' | '='

            | '|' | '/' | '&' | '@' | '%' | ',' | '?' | '!'

SingleLetterIdentifier =  AlphaLetter

SpecialLiteral = 'true' | 'false' | 'nil' | '_remoteNil'

StringLiteral = "'" { Character | "''" } "'"

Symbol = Identifier | BinarySelector | ( KeyWord { KeyWord } )

SymbolArrayItem = Identifier | ( KeyWord { KeyWord } )

SymbolLiteral = '#' ( Symbol | StringLiteral )

VariableName = Identifier
