Forráskód Böngészése

[executable semantics] Use type-safe variant in C++ parser (#364)

We're now one step away from eliminating bare pointers in semantic actions.

There are plenty of other cleanups and modernizations that can be made in the
parser, but the elimination of bare pointers is the one that has the highest
impact for the codebase.
Dave Abrahams 5 éve
szülő
commit
120e0145ce

+ 121 - 61
executable_semantics/syntax/lexer.lpp

@@ -6,6 +6,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 %{
 #include <cstdlib>
+#include <iostream>
 #include "executable_semantics/syntax/parse_and_lex_context.h"
 %}
 
@@ -17,79 +18,138 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 */
 %option yylineno
 
-AND      "and"
-ARROW    "->"
-AUTO     "auto"
-BOOL     "Bool"
-BREAK    "break"
-CASE     "case"
-CHOICE   "choice"
-COMMENT  \/\/[^\n]*\n
-CONTINUE "continue"
-DBLARROW "=>"
-DEFAULT  "default"
-ELSE     "else"
-EQUAL    "=="
-FALSE    "false"
-FN       "fn"
-FNTY     "fnty"
-IF       "if"
-INT      "Int"
-MATCH    "match"
-NOT      "not"
-OR       "or"
-RETURN   "return"
-STRUCT   "struct"
-TRUE     "true"
-TYPE     "Type"
-VAR      "var"
-WHILE    "while"
+AND               "and"
+ARROW             "->"
+AUTO              "auto"
+BOOL              "Bool"
+BREAK             "break"
+CASE              "case"
+CHOICE            "choice"
+ONE_LINE_COMMENT  \/\/[^\n]*\n
+CONTINUE          "continue"
+DBLARROW          "=>"
+DEFAULT           "default"
+ELSE              "else"
+EQUAL_EQUAL       "=="
+FALSE             "false"
+FN                "fn"
+FNTY              "fnty"
+IF                "if"
+INT               "Int"
+MATCH             "match"
+NOT               "not"
+OR                "or"
+RETURN            "return"
+STRUCT            "struct"
+TRUE              "true"
+TYPE              "Type"
+VAR               "var"
+WHILE             "while"
 
 identifier    [A-Za-z_][A-Za-z0-9_]*
 integer_literal   [0-9]+
+horizontal_whitespace [ \t\r]
+
+%{
+  // This macro is expanded to run each time a token is recognized.
+  //
+  // Advances the current token position by yyleng columns without changing
+  // the line number.
+  # define YY_USER_ACTION context.currentTokenPosition.columns(yyleng);
+%}
 
 %%
 
-{AND}      { return yy::parser::token::AND; }
-{ARROW}    { return yy::parser::token::ARROW; }
-{AUTO}     { return yy::parser::token::AUTO; }
-{BOOL}     { return yy::parser::token::BOOL; }
-{BREAK}    { return yy::parser::token::BREAK; }
-{CASE}     { return yy::parser::token::CASE; }
-{CHOICE}   { return yy::parser::token::CHOICE; }
-{COMMENT}  ;
-{CONTINUE} { return yy::parser::token::CONTINUE; }
-{DBLARROW} { return yy::parser::token::DBLARROW; }
-{DEFAULT}  { return yy::parser::token::DEFAULT; }
-{ELSE}     { return yy::parser::token::ELSE; }
-{EQUAL}    { return yy::parser::token::EQUAL; }
-{FALSE}    { return yy::parser::token::FALSE; }
-{FN}       { return yy::parser::token::FN; }
-{FNTY}     { return yy::parser::token::FNTY; }
-{IF}       { return yy::parser::token::IF; }
-{INT}      { return yy::parser::token::INT; }
-{MATCH}    { return yy::parser::token::MATCH; }
-{NOT}      { return yy::parser::token::NOT; }
-{OR}       { return yy::parser::token::OR; }
-{RETURN}   { return yy::parser::token::RETURN; }
-{STRUCT}   { return yy::parser::token::STRUCT; }
-{TRUE}     { return yy::parser::token::TRUE; }
-{TYPE}     { return yy::parser::token::TYPE; }
-{VAR}      { return yy::parser::token::VAR; }
-{WHILE}    { return yy::parser::token::WHILE; }
+%{
+  // Code run each time yylex is called.
+
+  // Begin with an empty token span starting where its previous end was.
+  context.currentTokenPosition.step();
+%}
+
+{AND}      { return yy::parser::make_AND(context.currentTokenPosition); }
+{ARROW}    { return yy::parser::make_ARROW(context.currentTokenPosition); }
+{AUTO}     { return yy::parser::make_AUTO(context.currentTokenPosition); }
+{BOOL}     { return yy::parser::make_BOOL(context.currentTokenPosition); }
+{BREAK}    { return yy::parser::make_BREAK(context.currentTokenPosition); }
+{CASE}     { return yy::parser::make_CASE(context.currentTokenPosition); }
+{CHOICE}   { return yy::parser::make_CHOICE(context.currentTokenPosition); }
+{CONTINUE} { return yy::parser::make_CONTINUE(context.currentTokenPosition); }
+{DBLARROW} { return yy::parser::make_DBLARROW(context.currentTokenPosition); }
+{DEFAULT}  { return yy::parser::make_DEFAULT(context.currentTokenPosition); }
+{ELSE}     { return yy::parser::make_ELSE(context.currentTokenPosition); }
+"=="       { return yy::parser::make_EQUAL_EQUAL(context.currentTokenPosition); }
+{FALSE}    { return yy::parser::make_FALSE(context.currentTokenPosition); }
+{FN}       { return yy::parser::make_FN(context.currentTokenPosition); }
+{FNTY}     { return yy::parser::make_FNTY(context.currentTokenPosition); }
+{IF}       { return yy::parser::make_IF(context.currentTokenPosition); }
+{INT}      { return yy::parser::make_INT(context.currentTokenPosition); }
+{MATCH}    { return yy::parser::make_MATCH(context.currentTokenPosition); }
+{NOT}      { return yy::parser::make_NOT(context.currentTokenPosition); }
+{OR}       { return yy::parser::make_OR(context.currentTokenPosition); }
+{RETURN}   { return yy::parser::make_RETURN(context.currentTokenPosition); }
+{STRUCT}   { return yy::parser::make_STRUCT(context.currentTokenPosition); }
+{TRUE}     { return yy::parser::make_TRUE(context.currentTokenPosition); }
+{TYPE}     { return yy::parser::make_TYPE(context.currentTokenPosition); }
+{VAR}      { return yy::parser::make_VAR(context.currentTokenPosition); }
+{WHILE}    { return yy::parser::make_WHILE(context.currentTokenPosition); }
+
+"=" return yy::parser::make_EQUAL(context.currentTokenPosition);
+"-" return yy::parser::make_MINUS(context.currentTokenPosition);
+"+" return yy::parser::make_PLUS(context.currentTokenPosition);
+"*" return yy::parser::make_STAR(context.currentTokenPosition);
+"/" return yy::parser::make_SLASH(context.currentTokenPosition);
+"(" return yy::parser::make_LEFT_PARENTHESIS(context.currentTokenPosition);
+")" return yy::parser::make_RIGHT_PARENTHESIS(context.currentTokenPosition);
+"{" return yy::parser::make_LEFT_CURLY_BRACE(context.currentTokenPosition);
+"}" return yy::parser::make_RIGHT_CURLY_BRACE(context.currentTokenPosition);
+"[" return yy::parser::make_LEFT_SQUARE_BRACKET(context.currentTokenPosition);
+"]" return yy::parser::make_RIGHT_SQUARE_BRACKET(context.currentTokenPosition);
+"." return yy::parser::make_PERIOD(context.currentTokenPosition);
+"," return yy::parser::make_COMMA(context.currentTokenPosition);
+";" return yy::parser::make_SEMICOLON(context.currentTokenPosition);
+":" return yy::parser::make_COLON(context.currentTokenPosition);
 
 {identifier} {
   int n = strlen(yytext);
-  yylval->identifier = reinterpret_cast<char*>(malloc((n + 1) * sizeof(char)));
-  strncpy(yylval->identifier, yytext, n + 1);
-  return yy::parser::token::identifier;
+  auto r = reinterpret_cast<char*>(malloc((n + 1) * sizeof(char)));
+  strncpy(r, yytext, n + 1);
+  return yy::parser::make_identifier(r, context.currentTokenPosition);
 }
+
 {integer_literal} {
-  yylval->integer_literal = atof(yytext);
-  return yy::parser::token::integer_literal;
+  auto r = atof(yytext);
+  return yy::parser::make_integer_literal(r, context.currentTokenPosition);
 }
 
-[ \t\n]+ ;
-. { return yytext[0]; }
+{ONE_LINE_COMMENT} {
+  // Advance end by 1 line, resetting the column to zero.
+  context.currentTokenPosition.lines(1);
+  // Make the span empty by setting start to end.
+  context.currentTokenPosition.step();
+}
+
+{horizontal_whitespace}+ {
+  // Make the span empty by setting start to end.
+  context.currentTokenPosition.step();
+}
+
+\n+ {
+  // Advance end by yyleng lines, resetting the column to zero.
+  context.currentTokenPosition.lines(yyleng);
+  // Make the span empty by setting start to end.
+  context.currentTokenPosition.step();
+}
+
+. {
+  std::cerr << context.currentTokenPosition << ": invalid character '"
+            << yytext[0] << "' in source file." << std::endl;
+  std::exit(1);
+}
+
+<<EOF>>    {
+  // A more modern Bison would give us make_EOF.
+  return yy::parser::make_END_OF_FILE(context.currentTokenPosition);
+}
 
 %%

+ 5 - 4
executable_semantics/syntax/parse_and_lex_context.h

@@ -23,6 +23,9 @@ class ParseAndLexContext {
   // the given line, to standard error.
   auto PrintDiagnostic(const std::string& message, int lineNumber) -> void;
 
+  // The source range of the token being (or just) lex'd.
+  yy::location currentTokenPosition;
+
  private:
   // A path to the file processed, relative to the current working directory
   // when *this is called.
@@ -32,10 +35,8 @@ class ParseAndLexContext {
 }  // namespace Carbon
 
 // Gives flex the yylex prototype we want.
-#define YY_DECL                                        \
-  int yylex(yy::parser::semantic_type* yylval,         \
-            yy::parser::location_type* sourceLocation, \
-            Carbon::ParseAndLexContext& context)
+#define YY_DECL \
+  yy::parser::symbol_type yylex(Carbon::ParseAndLexContext& context)
 
 // Declares yylex for the parser's sake.
 YY_DECL;

+ 62 - 38
executable_semantics/syntax/parser.ypp

@@ -9,9 +9,16 @@
 %require "3.2"
 %language "c++"
 
-// We don't need a separate header for Bison locations yet.
+// We don't need a separate header for Bison locations.
 %define api.location.file none
 
+// Use a type-safe C++ variant for semantic values
+%define api.value.type variant
+
+// Have Bison generate the functions ‘make_TEXT’ and ‘make_NUMBER’, but also
+// ‘make_YYEOF’, for the end of input.
+%define api.token.constructor
+
 //
 // Parameters to the parser and lexer
 //
@@ -64,8 +71,6 @@ void yy::parser::error(
 
 }
 
-%define api.value.type union
-
 %token <int> integer_literal
 %token <char*> identifier
 %type <char*> designator
@@ -89,6 +94,7 @@ void yy::parser::error(
 %type <std::list<std::pair<std::string, Carbon::Expression*>>*> alternative_list
 %type <std::pair<Carbon::Expression*, Carbon::Statement*>*> clause
 %type <std::list<std::pair<Carbon::Expression*, Carbon::Statement*>>*> clause_list
+%token END_OF_FILE 0
 %token AND
 %token OR
 %token NOT
@@ -99,7 +105,7 @@ void yy::parser::error(
 %token FNTY
 %token ARROW
 %token VAR
-%token EQUAL
+%token EQUAL_EQUAL
 %token IF
 %token ELSE
 %token WHILE
@@ -115,13 +121,31 @@ void yy::parser::error(
 %token DBLARROW
 %token DEFAULT
 %token AUTO
-%nonassoc '{' '}'
-%nonassoc ':' ',' DBLARROW
+%token
+  EQUAL  "="
+  MINUS  "-"
+  PLUS   "+"
+  STAR   "*"
+  SLASH  "/"
+  LEFT_PARENTHESIS "("
+  RIGHT_PARENTHESIS ")"
+  LEFT_CURLY_BRACE "{"
+  RIGHT_CURLY_BRACE "}"
+  LEFT_SQUARE_BRACKET "["
+  RIGHT_SQUARE_BRACKET "]"
+  PERIOD "."
+  COMMA ","
+  SEMICOLON ";"
+  COLON ":"
+;
+
+%nonassoc "{" "}"
+%nonassoc ":" "," DBLARROW
 %left OR AND
-%nonassoc EQUAL NOT
-%left '+' '-'
-%left '.' ARROW
-%nonassoc '(' ')' '[' ']'
+%nonassoc EQUAL_EQUAL NOT
+%left "+" "-"
+%left "." ARROW
+%nonassoc "(" ")" "[" "]"
 %start input
 %locations
 %%
@@ -137,9 +161,9 @@ expression:
     { $$ = Carbon::MakeVar(yylineno, $1); }
 | expression designator
     { $$ = Carbon::MakeGetField(yylineno, $1, $2); }
-| expression '[' expression ']'
+| expression "[" expression "]"
     { $$ = Carbon::MakeIndex(yylineno, $1, $3); }
-| expression ':' identifier
+| expression ":" identifier
     { $$ = Carbon::MakeVarPat(yylineno, $3, $1); }
 | integer_literal
     { $$ = Carbon::MakeInt(yylineno, $1); }
@@ -156,11 +180,11 @@ expression:
 | AUTO
     { $$ = Carbon::MakeAutoType(yylineno); }
 | paren_expression { $$ = $1; }
-| expression EQUAL expression
+| expression EQUAL_EQUAL expression
     { $$ = Carbon::MakeBinOp(yylineno, Carbon::Operator::Eq, $1, $3); }
-| expression '+' expression
+| expression "+" expression
     { $$ = Carbon::MakeBinOp(yylineno, Carbon::Operator::Add, $1, $3); }
-| expression '-' expression
+| expression "-" expression
     { $$ = Carbon::MakeBinOp(yylineno, Carbon::Operator::Sub, $1, $3); }
 | expression AND expression
     { $$ = Carbon::MakeBinOp(yylineno, Carbon::Operator::And, $1, $3); }
@@ -168,16 +192,16 @@ expression:
     { $$ = Carbon::MakeBinOp(yylineno, Carbon::Operator::Or, $1, $3); }
 | NOT expression
     { $$ = Carbon::MakeUnOp(yylineno, Carbon::Operator::Not, $2); }
-| '-' expression
+| "-" expression
     { $$ = Carbon::MakeUnOp(yylineno, Carbon::Operator::Neg, $2); }
 | expression tuple
     { $$ = Carbon::MakeCall(yylineno, $1, $2); }
 | FNTY tuple return_type
     { $$ = Carbon::MakeFunType(yylineno, $2, $3); }
 ;
-designator: '.' identifier { $$ = $2; }
+designator: "." identifier { $$ = $2; }
 ;
-paren_expression: '(' field_list ')'
+paren_expression: "(" field_list ")"
     {
      if ($2->fields->size() == 1 &&
          $2->fields->front().first == "" &&
@@ -190,7 +214,7 @@ paren_expression: '(' field_list ')'
       }
     }
 ;
-tuple: '(' field_list ')'
+tuple: "(" field_list ")"
     {
      auto vec = new std::vector<std::pair<std::string,Carbon::Expression*>>(
          $2->fields->begin(), $2->fields->end());
@@ -204,7 +228,7 @@ field:
       fields->push_back(std::make_pair("", $1));
       $$ = Carbon::MakeFieldList(fields);
     }
-| designator '=' pattern
+| designator "=" pattern
     {
       auto fields =
           new std::list<std::pair<std::string, Carbon::Expression*>>();
@@ -220,7 +244,7 @@ field_list:
     }
 | field
     { $$ = $1; }
-| field ',' field_list
+| field "," field_list
     { $$ = Carbon::MakeConsField($1, $3); }
 ;
 clause:
@@ -242,25 +266,25 @@ clause_list:
     { $$ = $2; $$->push_front(*$1); }
 ;
 statement:
-  expression '=' expression ';'
+  expression "=" expression ";"
     { $$ = Carbon::MakeAssign(yylineno, $1, $3); }
-| VAR pattern '=' expression ';'
+| VAR pattern "=" expression ";"
     { $$ = Carbon::MakeVarDef(yylineno, $2, $4); }
-| expression ';'
+| expression ";"
     { $$ = Carbon::MakeExpStmt(yylineno, $1); }
-| IF '(' expression ')' statement optional_else
+| IF "(" expression ")" statement optional_else
     { $$ = Carbon::MakeIf(yylineno, $3, $5, $6); }
-| WHILE '(' expression ')' statement
+| WHILE "(" expression ")" statement
     { $$ = Carbon::MakeWhile(yylineno, $3, $5); }
-| BREAK ';'
+| BREAK ";"
     { $$ = Carbon::MakeBreak(yylineno); }
-| CONTINUE ';'
+| CONTINUE ";"
     { $$ = Carbon::MakeContinue(yylineno); }
-| RETURN expression ';'
+| RETURN expression ";"
     { $$ = Carbon::MakeReturn(yylineno, $2); }
-| '{' statement_list '}'
+| "{" statement_list "}"
     { $$ = Carbon::MakeBlock(yylineno, $2); }
-| MATCH '(' expression ')' '{' clause_list '}'
+| MATCH "(" expression ")" "{" clause_list "}"
     { $$ = Carbon::MakeMatch(yylineno, $3, $6); }
 ;
 optional_else:
@@ -285,20 +309,20 @@ return_type:
     { $$ = $2; }
 ;
 function_definition:
-  FN identifier tuple return_type '{' statement_list '}'
+  FN identifier tuple return_type "{" statement_list "}"
     { $$ = MakeFunDef(yylineno, $2, $4, $3, $6); }
-| FN identifier tuple DBLARROW expression ';'
+| FN identifier tuple DBLARROW expression ";"
     {
       $$ = Carbon::MakeFunDef(yylineno, $2, Carbon::MakeAutoType(yylineno), $3,
                               Carbon::MakeReturn(yylineno, $5));
     }
 ;
 function_declaration:
-  FN identifier tuple return_type ';'
+  FN identifier tuple return_type ";"
     { $$ = MakeFunDef(yylineno, $2, $4, $3, 0); }
 ;
 member:
-  VAR expression ':' identifier ';'
+  VAR expression ":" identifier ";"
     { $$ = MakeField(yylineno, $4, $2); }
 ;
 member_list:
@@ -326,7 +350,7 @@ alternative_list:
       $$ = new std::list<std::pair<std::string, Carbon::Expression*>>();
       $$->push_front(*$1);
     }
-| alternative ',' alternative_list
+| alternative "," alternative_list
     { $$ = $3; $$->push_front(*$1); }
 ;
 declaration:
@@ -334,12 +358,12 @@ declaration:
     { $$ = new Carbon::Declaration(Carbon::FunctionDeclaration{$1}); }
 | function_declaration
     { $$ = new Carbon::Declaration(Carbon::FunctionDeclaration{$1}); }
-| STRUCT identifier '{' member_list '}'
+| STRUCT identifier "{" member_list "}"
     {
       $$ = new Carbon::Declaration(
         Carbon::StructDeclaration{yylineno, $2, $4});
     }
-| CHOICE identifier '{' alternative_list '}'
+| CHOICE identifier "{" alternative_list "}"
     {
       $$ = new Carbon::Declaration(
         Carbon::ChoiceDeclaration{yylineno, $2, std::list(*$4)});