The lexer scans source code, looking for tokens. Tokens are matched based on regular expressions, giving preference to longest matches and order. States are used to determine when certain expressions can be matched.
let function readline():string = /* strips leading spaces */ let var ret:string := "" var buffer := getchar() in (while buffer = " " do buffer := getchar(); while buffer <> "\n" do (ret := concat(ret, buffer); buffer := getchar()); ret) end class Person extends Object { var name:string := "anonymous" method init() = (print("What is your name: "); name := readline()) } var you := new Person in you.init(); print("hello, "); print(you.name); print("\n") end
Input
State
initial
LET FUNCTION ID "readline" LPAREN RPAREN COLON ID "string" EQ LET VAR ID "ret" COLON ID "string" ASSIGN STRING "" VAR ID "buffer" ASSIGN ID "getchar" LPAREN RPAREN IN LPAREN WHILE ID "buffer" EQ STRING "" DO ID "buffer" ASSIGN ID "getchar" LPAREN RPAREN SEMICOLON WHILE ID "buffer" NEQ STRING "" DO LPAREN ID "ret" ASSIGN ID "concat" LPAREN ID "ret" COMMA ID "buffer" RPAREN SEMICOLON ID "buffer" ASSIGN ID "getchar" LPAREN RPAREN RPAREN SEMICOLON ID "ret" RPAREN END CLASS ID "Person" EXTENDS ID "Object" LBRACE VAR ID "name" COLON ID "string" ASSIGN STRING "" METHOD ID "init" LPAREN RPAREN EQ LPAREN ID "print" LPAREN STRING "" RPAREN SEMICOLON ID "name" ASSIGN ID "readline" LPAREN RPAREN RPAREN RBRACE VAR ID "you" ASSIGN NEW ID "Person" IN ID "you" DOT ID "init" LPAREN RPAREN SEMICOLON ID "print" LPAREN STRING "" RPAREN SEMICOLON ID "print" LPAREN ID "you" DOT ID "name" RPAREN SEMICOLON ID "print" LPAREN STRING "" RPAREN END