Bläddra i källkod

Express prelude using Carbon syntax (#955)

This should be clearer, more maintainable, and more scalable than building the prelude AST by hand.
Geoff Romer 4 år sedan
förälder
incheckning
3e188c8562

+ 2 - 0
executable_semantics/BUILD

@@ -13,6 +13,8 @@ cc_binary(
     name = "executable_semantics",
     srcs = ["main.cpp"],
     deps = [
+        "//executable_semantics/common:arena",
+        "//executable_semantics/common:nonnull",
         "//executable_semantics/interpreter:exec_program",
         "//executable_semantics/syntax",
         "@llvm-project//llvm:Support",

+ 14 - 0
executable_semantics/ast/expression.cpp

@@ -4,6 +4,7 @@
 
 #include "executable_semantics/ast/expression.h"
 
+#include <map>
 #include <optional>
 
 #include "executable_semantics/common/arena.h"
@@ -17,6 +18,19 @@ namespace Carbon {
 using llvm::cast;
 using llvm::isa;
 
+auto IntrinsicExpression::FindIntrinsic(std::string_view name,
+                                        SourceLocation source_loc)
+    -> Intrinsic {
+  static const auto& intrinsic_map =
+      *new std::map<std::string_view, Intrinsic>({{"print", Intrinsic::Print}});
+  name.remove_prefix(std::strlen("__intrinsic_"));
+  auto it = intrinsic_map.find(name);
+  if (it == intrinsic_map.end()) {
+    FATAL_COMPILATION_ERROR(source_loc) << "Unknown intrinsic '" << name << "'";
+  }
+  return it->second;
+}
+
 auto ExpressionFromParenContents(
     Nonnull<Arena*> arena, SourceLocation source_loc,
     const ParenContents<Expression>& paren_contents) -> Nonnull<Expression*> {

+ 14 - 4
executable_semantics/ast/expression.h

@@ -404,19 +404,29 @@ class IntrinsicExpression : public Expression {
     Print,
   };
 
-  explicit IntrinsicExpression(Intrinsic intrinsic)
-      : AstNode(AstNodeKind::IntrinsicExpression,
-                SourceLocation("<intrinsic>", 0)),
-        intrinsic_(intrinsic) {}
+  explicit IntrinsicExpression(std::string_view intrinsic_name,
+                               Nonnull<TupleLiteral*> args,
+                               SourceLocation source_loc)
+      : AstNode(AstNodeKind::IntrinsicExpression, source_loc),
+        intrinsic_(FindIntrinsic(intrinsic_name, source_loc)),
+        args_(args) {}
 
   static auto classof(const AstNode* node) -> bool {
     return InheritsFromIntrinsicExpression(node->kind());
   }
 
   auto intrinsic() const -> Intrinsic { return intrinsic_; }
+  auto args() const -> const TupleLiteral& { return *args_; }
+  auto args() -> TupleLiteral& { return *args_; }
 
  private:
+  // Returns the enumerator corresponding to the intrinsic named `name`,
+  // or raises a fatal compile error if there is no such enumerator.
+  static auto FindIntrinsic(std::string_view name, SourceLocation source_loc)
+      -> Intrinsic;
+
   Intrinsic intrinsic_;
+  Nonnull<TupleLiteral*> args_;
 };
 
 // Converts paren_contents to an Expression, interpreting the parentheses as

+ 2 - 23
executable_semantics/interpreter/exec_program.cpp

@@ -4,6 +4,8 @@
 
 #include "executable_semantics/interpreter/exec_program.h"
 
+#include <variant>
+
 #include "common/check.h"
 #include "common/ostream.h"
 #include "executable_semantics/common/arena.h"
@@ -14,30 +16,7 @@
 
 namespace Carbon {
 
-// Adds builtins, currently only Print(). Note Print() is experimental, not
-// standardized, but is made available for printing state in tests.
-static void AddIntrinsics(Nonnull<Arena*> arena,
-                          std::vector<Nonnull<Declaration*>>* declarations) {
-  SourceLocation source_loc("<intrinsic>", 0);
-  std::vector<Nonnull<Pattern*>> print_params = {arena->New<BindingPattern>(
-      source_loc, "format_str",
-      arena->New<ExpressionPattern>(
-          arena->New<StringTypeLiteral>(source_loc)))};
-  auto print_return = arena->New<Block>(
-      source_loc, std::vector<Nonnull<Statement*>>({arena->New<Return>(
-                      source_loc,
-                      arena->New<IntrinsicExpression>(
-                          IntrinsicExpression::Intrinsic::Print),
-                      false)}));
-  auto print = arena->New<FunctionDeclaration>(
-      source_loc, "Print", std::vector<Nonnull<GenericBinding*>>(),
-      arena->New<TuplePattern>(source_loc, print_params),
-      ReturnTerm::Explicit(arena->New<TupleLiteral>(source_loc)), print_return);
-  declarations->insert(declarations->begin(), print);
-}
-
 void ExecProgram(Nonnull<Arena*> arena, AST ast, bool trace) {
-  AddIntrinsics(arena, &ast.declarations);
   if (trace) {
     llvm::outs() << "********** source program **********\n";
     for (const auto decl : ast.declarations) {

+ 11 - 8
executable_semantics/interpreter/interpreter.cpp

@@ -594,19 +594,22 @@ void Interpreter::StepExp() {
       } else {
         FATAL() << "in handle_value with Call pos " << act.pos();
       }
-    case ExpressionKind::IntrinsicExpression:
-      CHECK(act.pos() == 0);
+    case ExpressionKind::IntrinsicExpression: {
+      const auto& intrinsic = cast<IntrinsicExpression>(exp);
+      if (act.pos() == 0) {
+        return todo_.Spawn(
+            std::make_unique<ExpressionAction>(&intrinsic.args()));
+      }
       // { {n :: C, E, F} :: S, H} -> { {n' :: C, E, F} :: S, H}
       switch (cast<IntrinsicExpression>(exp).intrinsic()) {
-        case IntrinsicExpression::Intrinsic::Print:
-          Address pointer = GetFromEnv(exp.source_loc(), "format_str");
-          Nonnull<const Value*> pointee = heap_.Read(pointer, exp.source_loc());
-          CHECK(pointee->kind() == Value::Kind::StringValue);
+        case IntrinsicExpression::Intrinsic::Print: {
+          const auto& args = cast<TupleValue>(*act.results()[0]);
           // TODO: This could eventually use something like llvm::formatv.
-          llvm::outs() << cast<StringValue>(*pointee).value();
+          llvm::outs() << cast<StringValue>(*args.elements()[0]).value();
           return todo_.FinishAction(TupleValue::Empty());
+        }
       }
-
+    }
     case ExpressionKind::IntTypeLiteral: {
       CHECK(act.pos() == 0);
       return todo_.FinishAction(arena_->New<IntType>());

+ 12 - 2
executable_semantics/interpreter/type_checker.cpp

@@ -699,12 +699,22 @@ auto TypeChecker::TypeCheckExp(Nonnull<Expression*> e, TypeEnv types,
     case ExpressionKind::StringLiteral:
       SetStaticType(e, arena_->New<StringType>());
       return TCResult(types);
-    case ExpressionKind::IntrinsicExpression:
+    case ExpressionKind::IntrinsicExpression: {
+      auto& intrinsic_exp = cast<IntrinsicExpression>(*e);
+      TCResult arg_res = TypeCheckExp(&intrinsic_exp.args(), types, values);
       switch (cast<IntrinsicExpression>(*e).intrinsic()) {
         case IntrinsicExpression::Intrinsic::Print:
+          if (intrinsic_exp.args().fields().size() != 1) {
+            FATAL_COMPILATION_ERROR(e->source_loc())
+                << "__intrinsic_print takes 1 argument";
+          }
+          ExpectType(e->source_loc(), "__intrinsic_print argument",
+                     arena_->New<StringType>(),
+                     &intrinsic_exp.args().fields()[0]->static_type());
           SetStaticType(e, TupleValue::Empty());
-          return TCResult(types);
+          return TCResult(arg_res.types);
       }
+    }
     case ExpressionKind::IntTypeLiteral:
     case ExpressionKind::BoolTypeLiteral:
     case ExpressionKind::StringTypeLiteral:

+ 38 - 0
executable_semantics/main.cpp

@@ -5,12 +5,47 @@
 #include <cstdio>
 #include <cstring>
 #include <iostream>
+#include <string>
+#include <vector>
 
+#include "executable_semantics/common/arena.h"
+#include "executable_semantics/common/nonnull.h"
 #include "executable_semantics/interpreter/exec_program.h"
 #include "executable_semantics/syntax/parse.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/InitLLVM.h"
 
+// The Carbon prelude.
+//
+// TODO: Make this a separate source file that's embedded in the interpreter
+// at build time. See https://github.com/bazelbuild/rules_cc/issues/41 for a
+// possible mechanism.
+static constexpr std::string_view Prelude = R"(
+package Carbon api;
+
+// Note that Print is experimental, and not part of an accepted proposal, but
+// is included here for printing state in tests.
+fn Print(format_str: String) {
+  __intrinsic_print(format_str);
+}
+)";
+
+// Adds the Carbon prelude to `declarations`.
+static void AddPrelude(
+    Carbon::Nonnull<Carbon::Arena*> arena,
+    std::vector<Carbon::Nonnull<Carbon::Declaration*>>* declarations) {
+  std::variant<Carbon::AST, Carbon::SyntaxErrorCode> parse_result =
+      ParseFromString(arena, "<prelude>", Prelude, false);
+  if (std::holds_alternative<Carbon::SyntaxErrorCode>(parse_result)) {
+    // Try again with tracing, to help diagnose the problem.
+    ParseFromString(arena, "<prelude>", Prelude, true);
+    FATAL() << "Failed to parse prelude.";
+  }
+  const auto& prelude = std::get<Carbon::AST>(parse_result);
+  declarations->insert(declarations->begin(), prelude.declarations.begin(),
+                       prelude.declarations.end());
+}
+
 auto main(int argc, char* argv[]) -> int {
   llvm::setBugReportMsg(
       "Please report issues to "
@@ -38,6 +73,9 @@ auto main(int argc, char* argv[]) -> int {
     // Diagnostic already reported to std::cerr; this is just a return code.
     return *error;
   }
+  auto& ast = std::get<Carbon::AST>(ast_or_error);
+
+  AddPrelude(&arena, &ast.declarations);
 
   // Typecheck and run the parsed program.
   Carbon::ExecProgram(&arena, std::get<Carbon::AST>(ast_or_error),

+ 10 - 1
executable_semantics/syntax/lexer.lpp

@@ -83,6 +83,7 @@ WHILE                "while"
 
 /* This should be kept table-like, but isn't automatic due to spaces. */
 identifier            [A-Za-z_][A-Za-z0-9_]*
+intrinsic_identifier  __intrinsic_[A-Za-z0-9_]*
 sized_type_literal    [iuf][1-9][0-9]*
 integer_literal       [0-9]+
 horizontal_whitespace [ \t\r]
@@ -226,7 +227,15 @@ string_literal        \"([^\\\"\n\v\f\r]|\\.)*\"
  /* `*` operator case 5: */
 <INITIAL,AFTER_OPERAND>"*" { return SIMPLE_TOKEN(UNARY_STAR); }
 
-{sized_type_literal} { return ARG_TOKEN(sized_type_literal, yytext); }
+{sized_type_literal} {
+  BEGIN(AFTER_OPERAND);
+  return ARG_TOKEN(sized_type_literal, yytext);
+}
+
+{intrinsic_identifier} {
+  BEGIN(AFTER_OPERAND);
+  return ARG_TOKEN(intrinsic_identifier, yytext);
+}
 
 {identifier} {
   BEGIN(AFTER_OPERAND);

+ 3 - 0
executable_semantics/syntax/parser.ypp

@@ -89,6 +89,7 @@
 
 %token <int> integer_literal
 %token <std::string> identifier
+%token <std::string> intrinsic_identifier
 %token <std::string> sized_type_literal
 %token <std::string> string_literal
 %type <std::string> designator
@@ -302,6 +303,8 @@ expression:
 | paren_expression { $$ = $1; }
 | struct_literal { $$ = $1; }
 | struct_type_literal { $$ = $1; }
+| intrinsic_identifier tuple
+    { $$ = arena->New<IntrinsicExpression>($1, $2, context.source_loc()); }
 | expression EQUAL_EQUAL expression
     {
       $$ = arena->New<PrimitiveOperatorExpression>(