
GemStone Smalltalk Formal Syntax
--------------------------------

This is a complete BNF description of GemStone Smalltalk
$Id$


Here are a few notes about interpreting the grammar:

A = expr	This defines the syntactic production `A' in terms of the 
		expression on the right side of the equals sign.

B = C | D	The vertical bar `|' defines alternatives. In this case, the 
		production "B" is one of either "C" or "D".

C = '<'		A symbol in accents is a literal symbol.

D = F G		A sequence of two or more productions means the 
		productions in the order of their appearance.

E = [ A ]	Brackets indicate optional productions.

F = { B }	Braces indicate zero or more occurrences of the productions 
		contained within.

G = A | (B|C)	Parentheses can be used to remove ambiguity.

GemStone Smalltalk BNF
----------------------

ArrayBuilder = '#[' [ AExpression { ',' AExpression } ] ']'
  [NOTE The ArrayBuilder production is exists only if 
  ( System configurationAt:#GemConvertArrayBuilder )==true,
  and incoming ArrayBuilder's in the source code will be converted
  to CurlyArrayBuilder syntax by automatic execution of
  preprocessor methods in GsNMethod(C), and the sourceString
  for the resulting GsNMethod's will contain the converted syntax.]

Assignment = VariableName ':=' Statement |  VariableName ' _ ' Statement
  [NOTE white space is allowed with ':=' and required with '_']

AExpression = Primary [ AMessage { ';' ACascadeMessage } ]

ABinaryMessage = [ EnvSpecifier | RubyEnvSpecifier ] ABinarySelector Primary [ UnaryMessages ]

ABinaryMessages = ABinaryMessage { ABinaryMessage }

ACascadeMessage = UnaryMessage | ABinaryMessage | AKeyWordMessage

AKeyWordMessage = [ EnvSpecifier | RubyEnvSpecifier ] AKeyWordPart { AKeyWordPart }

AKeyWordPart = KeyWord Primary UnaryMessages { ABinaryMessage }

AMessage = [ UnaryMessages ] [ ABinaryMessages ] [ AKeyWordMessage ]

Array = '(' { ArrayItem } ')'

ArrayLiteral = '#' Array

ArrayItem = Number | SymbolArrayItem | SymbolLiteral | StringLiteral |
            CharacterLiteral | Array | ArrayLiteral

BinaryMessage = [ EnvSpecifier | RubyEnvSpecifier ] BinarySelector Primary [ UnaryMessages ]

BinaryMessages = BinaryMessage { BinaryMessage }

BinaryPattern = BinarySelector VariableName

Block = '[' [ BlockParameters ] [ Temporaries ] Statements ']'

BlockParameters = { Parameter } '|'

ByteArrayLiteral = '#' '[' [ Number { Number } ] ']'
  The ByteArrayLiteral production exists only if 
  ( System configurationAt:#GemConvertArrayBuilder )==false ,

CascadeMessage = UnaryMessage | BinaryMessage | KeyWordMessage

CurlyArrayBuilder = '{' [ AExpression { '.' AExpression } ] '}'

Expression = Primary [ Message { ';' CascadeMessage } ]

KeyWordMessage = [ EnvSpecifier | RubyEnvSpecifier ] KeyWordPart { KeyWordPart }

KeyWordPart = KeyWord Primary UnaryMessages { BinaryMessage }

KeyWordPattern = KeyWord VariableName { KeyWord VariableName }

Literal = Number | NegNumber | StringLiteral | CharacterLiteral |
          SymbolLiteral | ArrayLiteral | SpecialLiteral | ByteArrayLiteral

Message = [ UnaryMessages ] [ BinaryMessages ] [ KeyWordMessage ]

MessagePattern = UnaryPattern | BinaryPattern | KeyWordPattern

Method = MessagePattern [ Primitive ] MethodBody

MethodBody = [ Pragmas ] [ Temporaries ] [ Statements ]

NegNumber = '-' Number

Operand = Path | Literal | Identifier

Operator = '=' | '==' | '<' | '>' | '<=' | '>=' | '~=' | '~~'

ParenStatement = '(' Statement ')'

Predicate = ( AnyTerm | ParenTerm ) { '&' Term }

Primary = ArrayBuilder | CurlyArrayBuilder | Literal | Path | Block | SelectionBlock |
          ParenStatement | VariableName 

Primitive = '<' [ 'protected' | 'unprotected' ] [ 'primitive:' Digits ] '>'

Pragmas =   Pragma [ Pragma ]

Pragma = '< PragmaBody '>'

PragmaBody =  UnaryPragma | KeyWordPragma

UnaryPragma  = SpecialLiteral | UnaryPragmaIdentifier

KeyWordPragma = PragmaPair [ PragmaPair ]

PragmaPair =  ( KeyWordNotPrimitive | BinarySelector ) PragmaLiteral

UnaryPragmaIdentifier is any Identifier  except 'protected' , 'unprotected' , 'requiresVc'

PragmaLiteral = Number | NegNumber | StringLiteral | CharacterLiteral |
		SymbolLiteral | SpecialLiteral

SelectionBlock = '{' Parameter '|' Predicate '}'

Statement = Assignment | Expression

Statements = { [ Pragmas] { Statement '.' } } [ Pragmas ] [ ['^'] Statement ['.' [ Pragmas ] ]] 

Temporaries = '|' { VariableName } '|'

ParenTerm = '(' AnyTerm ')'

Term = ParenTerm | Operand

AnyTerm = Operand [ Operator Operand ]

UnaryMessage = [ EnvSpecifier | RubyEnvSpecifier ] Identifier

UnaryMessages = { UnaryMessage }

UnaryPattern = Identifier


GemStone Smalltalk Lexical Tokens
---------------------------------
The following are lexical tokens. 
No white space is allowed within lexical tokens.

ABinarySelector = any BinarySelector except comma

BinaryExponent = ( 'e' | 'E' | 'd' | 'D' | 'q' ) ['-'] Digits

BinarySelector =  SelectorCharacter { SelectorCharacter }

Character = Any Ascii character with ordinal value 0..255

CharacterLiteral = '$' Character

Comment = '"' { Character } '"'

DecimalExponent = ( 'f' | 'F' ) [ '-' ] Digits

Digit = '0' | '1' | '2' | ... | '9'

Digits = Digit {Digit}

EndOfSource = the end of the method source string

Exponent = BinaryExponent | DecimalExponent | ScaledDecimalExponent | FixedPointExponent

FractionalPart = '.' Digits [ Exponent ]

FixedPointExponent =  'p' [ [ '-' ] Digits ] 
  [NOTE: if FixedPointExponent has no Digits, then a peek on the
   source stream by the lexer must yield a ScdExponTerminator ].

Identifier =  SingleLetterIdentifier | MultiLetterIdentifier 

KeyWord = Identifier ':'

KeyWordNotPrimitive is any KeyWord other than 'primitive:'

Letter = 'A' | 'B' | ... | 'Z' | 'a' | 'b' | ... | 'z' | '_'

MultiLetterIdentifier = Letter ( Letter | Digit ) { Letter | Digit }

Number = RadixedLiteral | NumericLiteral

Numeric = Digit | 'A' | 'B' | ... | 'Z'

NumericLiteral = Digits ( [ FractionalPart ] | [ Exponent ] )

Numerics = Numeric { Numeric }

Parameter = ':' VariableName      [NOTE: white space allowed between : and variableName ]

Path = Identifier '.' PathIdentifier { '.' PathIdentifier }

PathIdentifier  =  Identifier |  '*'

EnvSpecifier = '@env' Digits ':'    [NOTE: no white space before or after Digits]

RubyEnvSpecifier '@ruby' Digits ':'    [NOTE: no white space before or after Digits]

  [NOTE , after a RubyEnvSpecifier ,   
       '<=>:' and '==:'  are allowed as keyword tokens .
       '?' and '!'   are allowed within keyword tokens .
       The keyword '__each__:' is translated to 'each&' in the generated code.  
       Second and subsequent keywords '_:' , '__BLOCK:', '__STAR:' 
       are used to generate the ruby selector suffix.  
       __STAR: may occur only once, and must be after any _:  .  
       __BLOCK: may occur only once and must be after any _: and after any  __STAR: .
       Ruby selectors #'[]' and #'[]=' are not supported after @ruby .
  ]

RadixedLiteral = Digits ( '#' | 'r' ) Numerics

ScaledDecimalExponent =  's' [ ['-' ] Digits ]
  [NOTE: if ScaledDecimalExponent has no Digits, then a peek on the
   source stream by the lexer must yield a ScdExponTerminator ].

ScdExponTerminator = '"' | WhiteSpace | ',' | ')' | ']' | '}' | '.' | ';' | EndOfSource

SelectorCharacter = '+' | '-' | '\' | '*' | '~' | '<' | '>' | '=' 

            | '|' | '/' | '&' | '@' | '%' | ',' | '?' | '!' 

SingleLetter 'A' | 'B' | ... | 'Z' | 'a' | 'b' | ... | 'z' 

SingleLetterIdentifier =  SingleLetter

SpecialLiteral = 'true' | 'false' | 'nil' | '_remoteNil' 

StringLiteral = "'" { Character | "''" } "'"

Symbol = Identifier | BinarySelector | ( KeyWord { KeyWord } )

SymbolArrayItem = Identifier | ( KeyWord { KeyWord } )

SymbolLiteral = '#' ( Symbol | StringLiteral )

VariableName = Identifier
