Forráskód Böngészése

Add string parsing and a print builtin (#721)

It was in my mind to add String in order to support libraries in `package`.  `print` is added in order to have a String go to stdout. I've tried to do `print` in a way that won't be too hard to add other printable types, but it's probably also somewhat optional here -- that is, if desired, I could remove it. But it was a lot easier to doublecheck `\n` behavior with it, and I suspect it'll be helpful in other tests if it supports more value types.

On the side, this also fixes dereferencing in Pattern/Expression Print() calls, which I was noticing printing pointers instead of values. This may be another argument for moving away from passing pointers, since this seems to be a difficult-to-catch error.

Co-authored-by: Geoff Romer <gromer@google.com>
Jon Meow 4 éve
szülő
commit
250ce4ab00
36 módosított fájl, 549 hozzáadás és 33 törlés
  1. 20 0
      common/BUILD
  2. 88 0
      common/string_helpers.cpp
  3. 25 0
      common/string_helpers.h
  4. 54 0
      common/string_helpers_test.cpp
  5. 16 1
      executable_semantics/ast/expression.cpp
  6. 47 0
      executable_semantics/ast/expression.h
  7. 4 4
      executable_semantics/ast/pattern.cpp
  8. 48 16
      executable_semantics/interpreter/interpreter.cpp
  9. 26 6
      executable_semantics/interpreter/typecheck.cpp
  10. 16 0
      executable_semantics/interpreter/value.cpp
  11. 30 2
      executable_semantics/interpreter/value.h
  12. 1 0
      executable_semantics/syntax/BUILD
  13. 21 0
      executable_semantics/syntax/lexer.lpp
  14. 6 2
      executable_semantics/syntax/parse_and_lex_context.cpp
  15. 6 0
      executable_semantics/syntax/parser.ypp
  16. 24 1
      executable_semantics/syntax/syntax_helpers.cpp
  17. 1 1
      executable_semantics/syntax/syntax_helpers.h
  18. 9 0
      executable_semantics/test_list.bzl
  19. 9 0
      executable_semantics/testdata/print1.carbon
  20. 2 0
      executable_semantics/testdata/print1.golden
  21. 14 0
      executable_semantics/testdata/string1.carbon
  22. 1 0
      executable_semantics/testdata/string1.golden
  23. 14 0
      executable_semantics/testdata/string2.carbon
  24. 1 0
      executable_semantics/testdata/string2.golden
  25. 14 0
      executable_semantics/testdata/string3.carbon
  26. 1 0
      executable_semantics/testdata/string3.golden
  27. 8 0
      executable_semantics/testdata/string_fail1.carbon
  28. 2 0
      executable_semantics/testdata/string_fail1.golden
  29. 8 0
      executable_semantics/testdata/string_fail2.carbon
  30. 2 0
      executable_semantics/testdata/string_fail2.golden
  31. 8 0
      executable_semantics/testdata/string_fail3.carbon
  32. 2 0
      executable_semantics/testdata/string_fail3.golden
  33. 8 0
      executable_semantics/testdata/string_fail4.carbon
  34. 2 0
      executable_semantics/testdata/string_fail4.golden
  35. 9 0
      executable_semantics/testdata/string_fail5.carbon
  36. 2 0
      executable_semantics/testdata/string_fail5.golden

+ 20 - 0
common/BUILD

@@ -44,3 +44,23 @@ cc_library(
         "@llvm-project//llvm:Support",
     ],
 )
+
+cc_library(
+    name = "string_helpers",
+    srcs = ["string_helpers.cpp"],
+    hdrs = ["string_helpers.h"],
+    deps = [
+        ":check",
+        "@llvm-project//llvm:Support",
+    ],
+)
+
+cc_test(
+    name = "string_helpers_test",
+    srcs = ["string_helpers_test.cpp"],
+    deps = [
+        ":string_helpers",
+        "@llvm-project//llvm:gtest",
+        "@llvm-project//llvm:gtest_main",
+    ],
+)

+ 88 - 0
common/string_helpers.cpp

@@ -0,0 +1,88 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "common/string_helpers.h"
+
+#include "common/check.h"
+#include "llvm/ADT/StringExtras.h"
+
+namespace Carbon {
+
+// Carbon only takes uppercase hex input.
+static auto FromHex(char c) -> std::optional<char> {
+  if (c >= '0' && c <= '9') {
+    return c - '0';
+  }
+  if (c >= 'A' && c <= 'F') {
+    return 10 + c - 'A';
+  }
+  return std::nullopt;
+}
+
+auto UnescapeStringLiteral(llvm::StringRef source)
+    -> std::optional<std::string> {
+  std::string ret;
+  ret.reserve(source.size());
+  size_t i = 0;
+  while (i < source.size()) {
+    char c = source[i];
+    if (c == '\\') {
+      ++i;
+      if (i == source.size()) {
+        return std::nullopt;
+      }
+      switch (source[i]) {
+        case 'n':
+          ret.push_back('\n');
+          break;
+        case 'r':
+          ret.push_back('\r');
+          break;
+        case 't':
+          ret.push_back('\t');
+          break;
+        case '0':
+          if (i + 1 < source.size() && llvm::isDigit(source[i + 1])) {
+            // \0[0-9] is reserved.
+            return std::nullopt;
+          }
+          ret.push_back('\0');
+          break;
+        case '"':
+          ret.push_back('"');
+          break;
+        case '\'':
+          ret.push_back('\'');
+          break;
+        case '\\':
+          ret.push_back('\\');
+          break;
+        case 'x': {
+          i += 2;
+          if (i >= source.size()) {
+            return std::nullopt;
+          }
+          std::optional<char> c1 = FromHex(source[i - 1]);
+          std::optional<char> c2 = FromHex(source[i]);
+          if (c1 == std::nullopt || c2 == std::nullopt) {
+            return std::nullopt;
+          }
+          ret.push_back(16 * *c1 + *c2);
+          break;
+        }
+        case 'u':
+          FATAL() << "\\u is not yet supported in string literals";
+        default:
+          // Unsupported.
+          return std::nullopt;
+      }
+    } else {
+      ret.push_back(c);
+    }
+    ++i;
+  }
+  return ret;
+}
+
+}  // namespace Carbon

+ 25 - 0
common/string_helpers.h

@@ -0,0 +1,25 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef COMMON_STRING_HELPERS_H_
+#define COMMON_STRING_HELPERS_H_
+
+#include <optional>
+#include <string>
+
+#include "llvm/ADT/StringRef.h"
+
+namespace Carbon {
+
+// Note llvm StringExtras has significant functionality which is intended to be
+// complementary to this.
+
+// Unescapes Carbon escape sequences in the source string. Returns std::nullopt
+// on bad input.
+auto UnescapeStringLiteral(llvm::StringRef source)
+    -> std::optional<std::string>;
+
+}  // namespace Carbon
+
+#endif  // COMMON_STRING_HELPERS_H_

+ 54 - 0
common/string_helpers_test.cpp

@@ -0,0 +1,54 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "common/string_helpers.h"
+
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using ::testing::Eq;
+using ::testing::Optional;
+
+namespace Carbon {
+namespace {
+
+TEST(UnescapeStringLiteral, Valid) {
+  EXPECT_THAT(UnescapeStringLiteral("test"), Optional(Eq("test")));
+  EXPECT_THAT(UnescapeStringLiteral("test\n"), Optional(Eq("test\n")));
+  EXPECT_THAT(UnescapeStringLiteral("test\\n"), Optional(Eq("test\n")));
+  EXPECT_THAT(UnescapeStringLiteral("abc\\ndef"), Optional(Eq("abc\ndef")));
+  EXPECT_THAT(UnescapeStringLiteral("test\\\\n"), Optional(Eq("test\\n")));
+  EXPECT_THAT(UnescapeStringLiteral("\\xAA"), Optional(Eq("\xAA")));
+  EXPECT_THAT(UnescapeStringLiteral("\\x12"), Optional(Eq("\x12")));
+}
+
+TEST(UnescapeStringLiteral, Invalid) {
+  // Missing char after `\`.
+  EXPECT_THAT(UnescapeStringLiteral("a\\"), Eq(std::nullopt));
+  // Not a supported escape.
+  EXPECT_THAT(UnescapeStringLiteral("\\e"), Eq(std::nullopt));
+  // Needs 2 hex chars.
+  EXPECT_THAT(UnescapeStringLiteral("\\x"), Eq(std::nullopt));
+  // Needs 2 hex chars.
+  EXPECT_THAT(UnescapeStringLiteral("\\xA"), Eq(std::nullopt));
+  // Needs uppercase hex.
+  EXPECT_THAT(UnescapeStringLiteral("\\xaa"), Eq(std::nullopt));
+  // Reserved.
+  EXPECT_THAT(UnescapeStringLiteral("\\00"), Eq(std::nullopt));
+}
+
+TEST(UnescapeStringLiteral, Nul) {
+  std::optional<std::string> str = UnescapeStringLiteral("a\\0b");
+  ASSERT_NE(str, std::nullopt);
+  EXPECT_THAT(str->size(), Eq(3));
+  EXPECT_THAT(strlen(str->c_str()), Eq(1));
+  EXPECT_THAT((*str)[0], Eq('a'));
+  EXPECT_THAT((*str)[1], Eq('\0'));
+  EXPECT_THAT((*str)[2], Eq('b'));
+}
+
+}  // namespace
+}  // namespace Carbon

+ 16 - 1
executable_semantics/ast/expression.cpp

@@ -67,7 +67,7 @@ static void PrintFields(llvm::raw_ostream& out,
                         const std::vector<FieldInitializer>& fields) {
   llvm::ListSeparator sep;
   for (const auto& field : fields) {
-    out << sep << field.name << " = " << field.expression;
+    out << sep << field.name << " = " << *field.expression;
   }
 }
 
@@ -129,6 +129,14 @@ void Expression::Print(llvm::raw_ostream& out) const {
     case Expression::Kind::IntTypeLiteral:
       out << "i32";
       break;
+    case Expression::Kind::StringLiteral:
+      out << "\"";
+      out.write_escaped(cast<StringLiteral>(*this).Val());
+      out << "\"";
+      break;
+    case Expression::Kind::StringTypeLiteral:
+      out << "String";
+      break;
     case Expression::Kind::TypeTypeLiteral:
       out << "Type";
       break;
@@ -140,6 +148,13 @@ void Expression::Print(llvm::raw_ostream& out) const {
       out << "fn " << *fn.Parameter() << " -> " << *fn.ReturnType();
       break;
     }
+    case Expression::Kind::IntrinsicExpression:
+      out << "intrinsic_expression(";
+      switch (cast<IntrinsicExpression>(*this).Intrinsic()) {
+        case IntrinsicExpression::IntrinsicKind::Print:
+          out << "print";
+      }
+      out << ")";
   }
 }
 

+ 47 - 0
executable_semantics/ast/expression.h

@@ -29,9 +29,12 @@ class Expression {
     ContinuationTypeLiteral,  // The type of a continuation value.
     IntLiteral,
     PrimitiveOperatorExpression,
+    StringLiteral,
+    StringTypeLiteral,
     TupleLiteral,
     TypeTypeLiteral,
     IdentifierExpression,
+    IntrinsicExpression,
   };
 
   // Returns the enumerator corresponding to the most-derived type of this
@@ -178,6 +181,31 @@ class BoolLiteral : public Expression {
   bool val;
 };
 
+class StringLiteral : public Expression {
+ public:
+  explicit StringLiteral(int line_num, std::string val)
+      : Expression(Kind::StringLiteral, line_num), val(std::move(val)) {}
+
+  static auto classof(const Expression* exp) -> bool {
+    return exp->Tag() == Kind::StringLiteral;
+  }
+
+  auto Val() const -> const std::string& { return val; }
+
+ private:
+  std::string val;
+};
+
+class StringTypeLiteral : public Expression {
+ public:
+  explicit StringTypeLiteral(int line_num)
+      : Expression(Kind::StringTypeLiteral, line_num) {}
+
+  static auto classof(const Expression* exp) -> bool {
+    return exp->Tag() == Kind::StringTypeLiteral;
+  }
+};
+
 class TupleLiteral : public Expression {
  public:
   explicit TupleLiteral(int line_num) : TupleLiteral(line_num, {}) {}
@@ -301,6 +329,25 @@ class TypeTypeLiteral : public Expression {
   }
 };
 
+class IntrinsicExpression : public Expression {
+ public:
+  enum class IntrinsicKind {
+    Print,
+  };
+
+  explicit IntrinsicExpression(IntrinsicKind intrinsic)
+      : Expression(Kind::IntrinsicExpression, -1), intrinsic(intrinsic) {}
+
+  static auto classof(const Expression* exp) -> bool {
+    return exp->Tag() == Kind::IntrinsicExpression;
+  }
+
+  auto Intrinsic() const -> IntrinsicKind { return intrinsic; }
+
+ private:
+  IntrinsicKind intrinsic;
+};
+
 }  // namespace Carbon
 
 #endif  // EXECUTABLE_SEMANTICS_AST_EXPRESSION_H_

+ 4 - 4
executable_semantics/ast/pattern.cpp

@@ -37,19 +37,19 @@ void Pattern::Print(llvm::raw_ostream& out) const {
       out << "(";
       llvm::ListSeparator sep;
       for (const TuplePattern::Field& field : tuple.Fields()) {
-        out << sep << field.name << " = " << field.pattern;
+        out << sep << field.name << " = " << *field.pattern;
       }
       out << ")";
       break;
     }
     case Kind::AlternativePattern: {
       const auto& alternative = cast<AlternativePattern>(*this);
-      out << alternative.ChoiceType() << "." << alternative.AlternativeName()
-          << alternative.Arguments();
+      out << *alternative.ChoiceType() << "." << alternative.AlternativeName()
+          << *alternative.Arguments();
       break;
     }
     case Kind::ExpressionPattern:
-      out << cast<ExpressionPattern>(*this).Expression();
+      out << *cast<ExpressionPattern>(*this).Expression();
       break;
   }
 }

+ 48 - 16
executable_semantics/interpreter/interpreter.cpp

@@ -60,6 +60,15 @@ auto CurrentEnv(State* state) -> Env {
   return frame->scopes.Top()->values;
 }
 
+// Returns the given name from the environment, printing an error if not found.
+static auto GetFromEnv(int line_num, const std::string& name) -> Address {
+  std::optional<Address> pointer = CurrentEnv(state).Get(name);
+  if (!pointer) {
+    FATAL_RUNTIME_ERROR(line_num) << "could not find `" << name << "`";
+  }
+  return *pointer;
+}
+
 void PrintState(llvm::raw_ostream& out) {
   out << "{\nstack: ";
   PrintStack(state->stack, out);
@@ -411,14 +420,9 @@ void StepLvalue() {
     case Expression::Kind::IdentifierExpression: {
       //    { {x :: C, E, F} :: S, H}
       // -> { {E(x) :: C, E, F} :: S, H}
-      std::optional<Address> pointer =
-          CurrentEnv(state).Get(cast<IdentifierExpression>(*exp).Name());
-      if (!pointer) {
-        FATAL_RUNTIME_ERROR(exp->LineNumber())
-            << "could not find `" << cast<IdentifierExpression>(*exp).Name()
-            << "`";
-      }
-      const Value* v = global_arena->New<PointerValue>(*pointer);
+      Address pointer = GetFromEnv(exp->LineNumber(),
+                                   cast<IdentifierExpression>(*exp).Name());
+      const Value* v = global_arena->New<PointerValue>(pointer);
       frame->todo.Pop();
       frame->todo.Push(global_arena->New<ValAction>(v));
       break;
@@ -496,10 +500,12 @@ void StepLvalue() {
     case Expression::Kind::BoolTypeLiteral:
     case Expression::Kind::TypeTypeLiteral:
     case Expression::Kind::FunctionTypeLiteral:
-    case Expression::Kind::ContinuationTypeLiteral: {
+    case Expression::Kind::ContinuationTypeLiteral:
+    case Expression::Kind::StringLiteral:
+    case Expression::Kind::StringTypeLiteral:
+    case Expression::Kind::IntrinsicExpression:
       FATAL_RUNTIME_ERROR_NO_LINE()
           << "Can't treat expression as lvalue: " << *exp;
-    }
   }
 }
 
@@ -597,12 +603,8 @@ void StepExp() {
       CHECK(act->Pos() == 0);
       const auto& ident = cast<IdentifierExpression>(*exp);
       // { {x :: C, E, F} :: S, H} -> { {H(E(x)) :: C, E, F} :: S, H}
-      std::optional<Address> pointer = CurrentEnv(state).Get(ident.Name());
-      if (!pointer) {
-        FATAL_RUNTIME_ERROR(exp->LineNumber())
-            << "could not find `" << ident.Name() << "`";
-      }
-      const Value* pointee = state->heap.Read(*pointer, exp->LineNumber());
+      Address pointer = GetFromEnv(exp->LineNumber(), ident.Name());
+      const Value* pointee = state->heap.Read(pointer, exp->LineNumber());
       frame->todo.Pop(1);
       frame->todo.Push(global_arena->New<ValAction>(pointee));
       break;
@@ -660,6 +662,22 @@ void StepExp() {
         FATAL() << "in handle_value with Call pos " << act->Pos();
       }
       break;
+    case Expression::Kind::IntrinsicExpression:
+      CHECK(act->Pos() == 0);
+      // { {n :: C, E, F} :: S, H} -> { {n' :: C, E, F} :: S, H}
+      frame->todo.Pop(1);
+      switch (cast<IntrinsicExpression>(*exp).Intrinsic()) {
+        case IntrinsicExpression::IntrinsicKind::Print:
+          Address pointer = GetFromEnv(exp->LineNumber(), "format_str");
+          const Value* pointee = state->heap.Read(pointer, exp->LineNumber());
+          CHECK(pointee->Tag() == Value::Kind::StringValue);
+          // TODO: This could eventually use something like llvm::formatv.
+          llvm::outs() << cast<StringValue>(*pointee).Val();
+          frame->todo.Push(global_arena->New<ValAction>(&TupleValue::Empty()));
+          break;
+      }
+      break;
+
     case Expression::Kind::IntTypeLiteral: {
       CHECK(act->Pos() == 0);
       const Value* v = global_arena->New<IntType>();
@@ -710,6 +728,20 @@ void StepExp() {
       frame->todo.Push(global_arena->New<ValAction>(v));
       break;
     }
+    case Expression::Kind::StringLiteral:
+      CHECK(act->Pos() == 0);
+      // { {n :: C, E, F} :: S, H} -> { {n' :: C, E, F} :: S, H}
+      frame->todo.Pop(1);
+      frame->todo.Push(global_arena->New<ValAction>(
+          global_arena->New<StringValue>(cast<StringLiteral>(*exp).Val())));
+      break;
+    case Expression::Kind::StringTypeLiteral: {
+      CHECK(act->Pos() == 0);
+      const Value* v = global_arena->New<StringType>();
+      frame->todo.Pop(1);
+      frame->todo.Push(global_arena->New<ValAction>(v));
+      break;
+    }
   }  // switch (exp->Tag)
 }
 

+ 26 - 6
executable_semantics/interpreter/typecheck.cpp

@@ -82,7 +82,19 @@ static auto ReifyType(const Value* t, int line_num) -> const Expression* {
     case Value::Kind::VariableType:
       return global_arena->New<IdentifierExpression>(
           0, cast<VariableType>(*t).Name());
-    default:
+    case Value::Kind::StringType:
+      return global_arena->New<StringTypeLiteral>(0);
+    case Value::Kind::AlternativeConstructorValue:
+    case Value::Kind::AlternativeValue:
+    case Value::Kind::AutoType:
+    case Value::Kind::BindingPlaceholderValue:
+    case Value::Kind::BoolValue:
+    case Value::Kind::ContinuationValue:
+    case Value::Kind::FunctionValue:
+    case Value::Kind::IntValue:
+    case Value::Kind::PointerValue:
+    case Value::Kind::StringValue:
+    case Value::Kind::StructValue:
       FATAL() << "expected a type, not " << *t;
   }
 }
@@ -158,10 +170,10 @@ static auto ArgumentDeduction(int line_num, TypeEnv deduced, const Value* param,
     case Value::Kind::ChoiceType:
     case Value::Kind::IntType:
     case Value::Kind::BoolType:
-    case Value::Kind::TypeType: {
+    case Value::Kind::TypeType:
+    case Value::Kind::StringType:
       ExpectType(line_num, "argument deduction", param, arg);
       return deduced;
-    }
     // The rest of these cases should never happen.
     case Value::Kind::IntValue:
     case Value::Kind::BoolValue:
@@ -172,6 +184,7 @@ static auto ArgumentDeduction(int line_num, TypeEnv deduced, const Value* param,
     case Value::Kind::BindingPlaceholderValue:
     case Value::Kind::AlternativeConstructorValue:
     case Value::Kind::ContinuationValue:
+    case Value::Kind::StringValue:
       FATAL() << "In ArgumentDeduction: expected type, not value " << *param;
   }
 }
@@ -213,6 +226,7 @@ static auto Substitute(TypeEnv dict, const Value* type) -> const Value* {
     case Value::Kind::StructType:
     case Value::Kind::ChoiceType:
     case Value::Kind::ContinuationType:
+    case Value::Kind::StringType:
       return type;
     // The rest of these cases should never happen.
     case Value::Kind::IntValue:
@@ -224,6 +238,7 @@ static auto Substitute(TypeEnv dict, const Value* type) -> const Value* {
     case Value::Kind::BindingPlaceholderValue:
     case Value::Kind::AlternativeConstructorValue:
     case Value::Kind::ContinuationValue:
+    case Value::Kind::StringValue:
       FATAL() << "In Substitute: expected type, not value " << *type;
   }
 }
@@ -475,12 +490,17 @@ auto TypeCheckExp(const Expression* e, TypeEnv types, Env values)
           /*is_omitted_return_type=*/false);
       return TCExpression(new_e, global_arena->New<TypeType>(), types);
     }
+    case Expression::Kind::StringLiteral:
+      return TCExpression(e, global_arena->New<StringType>(), types);
+    case Expression::Kind::IntrinsicExpression:
+      switch (cast<IntrinsicExpression>(*e).Intrinsic()) {
+        case IntrinsicExpression::IntrinsicKind::Print:
+          return TCExpression(e, &TupleValue::Empty(), types);
+      }
     case Expression::Kind::IntTypeLiteral:
-      return TCExpression(e, global_arena->New<TypeType>(), types);
     case Expression::Kind::BoolTypeLiteral:
-      return TCExpression(e, global_arena->New<TypeType>(), types);
+    case Expression::Kind::StringTypeLiteral:
     case Expression::Kind::TypeTypeLiteral:
-      return TCExpression(e, global_arena->New<TypeType>(), types);
     case Expression::Kind::ContinuationTypeLiteral:
       return TCExpression(e, global_arena->New<TypeType>(), types);
   }

+ 16 - 0
executable_semantics/interpreter/value.cpp

@@ -236,6 +236,14 @@ void Value::Print(llvm::raw_ostream& out) const {
       // TODO: Find a way to print useful information about the continuation
       // without creating a dependency cycle.
       break;
+    case Value::Kind::StringType:
+      out << "String";
+      break;
+    case Value::Kind::StringValue:
+      out << "\"";
+      out.write_escaped(cast<StringValue>(*this).Val());
+      out << "\"";
+      break;
   }
 }
 
@@ -293,6 +301,10 @@ auto CopyVal(const Value* val, int line_num) -> const Value* {
       return global_arena->New<AutoType>();
     case Value::Kind::ContinuationType:
       return global_arena->New<ContinuationType>();
+    case Value::Kind::StringType:
+      return global_arena->New<StringType>();
+    case Value::Kind::StringValue:
+      return global_arena->New<StringValue>(cast<StringValue>(*val).Val());
     case Value::Kind::VariableType:
     case Value::Kind::StructType:
     case Value::Kind::ChoiceType:
@@ -339,6 +351,7 @@ auto TypeEqual(const Value* t1, const Value* t2) -> bool {
     case Value::Kind::BoolType:
     case Value::Kind::ContinuationType:
     case Value::Kind::TypeType:
+    case Value::Kind::StringType:
       return true;
     case Value::Kind::VariableType:
       return cast<VariableType>(*t1).Name() == cast<VariableType>(*t2).Name();
@@ -390,6 +403,8 @@ auto ValueEqual(const Value* v1, const Value* v2, int line_num) -> bool {
     case Value::Kind::TupleValue:
       return FieldsValueEqual(cast<TupleValue>(*v1).Elements(),
                               cast<TupleValue>(*v2).Elements(), line_num);
+    case Value::Kind::StringValue:
+      return cast<StringValue>(*v1).Val() == cast<StringValue>(*v2).Val();
     case Value::Kind::IntType:
     case Value::Kind::BoolType:
     case Value::Kind::TypeType:
@@ -400,6 +415,7 @@ auto ValueEqual(const Value* v1, const Value* v2, int line_num) -> bool {
     case Value::Kind::ChoiceType:
     case Value::Kind::ContinuationType:
     case Value::Kind::VariableType:
+    case Value::Kind::StringType:
       return TypeEqual(v1, v2);
     case Value::Kind::StructValue:
     case Value::Kind::AlternativeValue:

+ 30 - 2
executable_semantics/interpreter/value.h

@@ -48,10 +48,12 @@ class Value {
     StructType,
     ChoiceType,
     ContinuationType,  // The type of a continuation.
-    VariableType,      // e.g. generic type parameters
+    VariableType,      // e.g., generic type parameters.
     BindingPlaceholderValue,
     AlternativeConstructorValue,
-    ContinuationValue  // A first-class continuation value.
+    ContinuationValue,  // A first-class continuation value.
+    StringType,
+    StringValue,
   };
 
   Value(const Value&) = delete;
@@ -442,6 +444,32 @@ class ContinuationValue : public Value {
   std::vector<Frame*> stack;
 };
 
+// The String type.
+class StringType : public Value {
+ public:
+  StringType() : Value(Kind::StringType) {}
+
+  static auto classof(const Value* value) -> bool {
+    return value->Tag() == Kind::StringType;
+  }
+};
+
+// A string value.
+class StringValue : public Value {
+ public:
+  explicit StringValue(std::string val)
+      : Value(Kind::StringValue), val(std::move(val)) {}
+
+  static auto classof(const Value* value) -> bool {
+    return value->Tag() == Kind::StringValue;
+  }
+
+  auto Val() const -> const std::string& { return val; }
+
+ private:
+  std::string val;
+};
+
 auto CopyVal(const Value* val, int line_num) -> const Value*;
 
 auto TypeEqual(const Value* t1, const Value* t2) -> bool;

+ 1 - 0
executable_semantics/syntax/BUILD

@@ -31,6 +31,7 @@ cc_library(
         ":paren_contents",
         "//common:check",
         "//common:ostream",
+        "//common:string_helpers",
         "//executable_semantics/ast:declaration",
         "//executable_semantics/ast:expression",
         "//executable_semantics/common:arena",

+ 21 - 0
executable_semantics/syntax/lexer.lpp

@@ -8,6 +8,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 #include <cstdlib>
 
 #include "common/check.h"
+#include "common/string_helpers.h"
 #include "executable_semantics/common/tracing_flag.h"
 #include "executable_semantics/syntax/parse_and_lex_context.h"
 #include "llvm/ADT/StringExtras.h"
@@ -49,6 +50,7 @@ MATCH             "match"
 NOT               "not"
 OR                "or"
 RETURN            "return"
+STRING            "String"
 STRUCT            "struct"
 TRUE              "true"
 TYPE              "Type"
@@ -63,6 +65,7 @@ UNDERSCORE        "_"
 identifier    [A-Za-z_][A-Za-z0-9_]*
 sized_type_literal [iuf][1-9][0-9]*
 integer_literal   [0-9]+
+string_literal    \"([^\\\"\n]|\\.)*\"
 horizontal_whitespace [ \t\r]
 whitespace [ \t\r\n]
 operand_start [(A-Za-z0-9_"]
@@ -120,6 +123,7 @@ operand_start [(A-Za-z0-9_"]
 {RUN}      { return Carbon::Parser::make_RUN(context.current_token_position); }
 {AWAIT}    { return Carbon::Parser::make_AWAIT(context.current_token_position); }
 {UNDERSCORE}      { return Carbon::Parser::make_UNDERSCORE(context.current_token_position); }
+{STRING}   { return Carbon::Parser::make_STRING(context.current_token_position); }
 
 {sized_type_literal} { return Carbon::Parser::make_sized_type_literal(yytext, context.current_token_position); }
 
@@ -189,6 +193,23 @@ operator and its operand, leading to three more cases:
   return Carbon::Parser::make_integer_literal(val, context.current_token_position);
 }
 
+{string_literal} {
+  llvm::StringRef str(yytext);
+  CHECK(str.consume_front("\"") && str.consume_back("\""));
+  std::optional<std::string> unescaped = Carbon::UnescapeStringLiteral(str);
+  if (unescaped == std::nullopt) {
+    if (Carbon::tracing_output) {
+      // Print a newline because tracing prints an incomplete line
+      // "Reading a token: ".
+      llvm::errs() << "\n";
+    }
+    FATAL_COMPILATION_ERROR(yylineno)
+        << "Invalid escaping in string: " << yytext;
+  }
+  return Carbon::Parser::make_string_literal(
+      *unescaped, context.current_token_position);
+}
+
 {ONE_LINE_COMMENT} {
   // Advance end by 1 line, resetting the column to zero.
   context.current_token_position.lines(1);

+ 6 - 2
executable_semantics/syntax/parse_and_lex_context.cpp

@@ -4,12 +4,16 @@
 
 #include "executable_semantics/syntax/parse_and_lex_context.h"
 
+namespace Carbon {
+
 // Writes a syntax error diagnostic, containing message, for the input file at
 // the given line, to standard error.
-auto Carbon::ParseAndLexContext::PrintDiagnostic(const std::string& message,
-                                                 int line_num) -> void {
+auto ParseAndLexContext::PrintDiagnostic(const std::string& message,
+                                         int line_num) -> void {
   // TODO: Do we really want this to be fatal?  It makes the comment and the
   // name a lie, and renders some of the other yyparse() result propagation code
   // moot.
   FATAL_COMPILATION_ERROR(line_num) << message;
 }
+
+}  // namespace Carbon

+ 6 - 0
executable_semantics/syntax/parser.ypp

@@ -90,6 +90,7 @@ void Carbon::Parser::error(const location_type&, const std::string& message) {
 %token <int> integer_literal
 %token <std::string> identifier
 %token <std::string> sized_type_literal
+%token <std::string> string_literal
 %type <std::string> designator
 %type <const Declaration*> declaration
 %type <FunctionDefinition> function_declaration
@@ -131,6 +132,7 @@ void Carbon::Parser::error(const location_type&, const std::string& message) {
 %token AND
 %token OR
 %token NOT
+%token STRING
 %token BOOL
 %token TYPE
 %token FN
@@ -218,6 +220,8 @@ expression:
     { $$ = global_arena->New<IndexExpression>(yylineno, $1, $3); }
 | integer_literal
     { $$ = global_arena->New<IntLiteral>(yylineno, $1); }
+| string_literal
+    { $$ = global_arena->New<StringLiteral>(yylineno, $1); }
 | TRUE
     { $$ = global_arena->New<BoolLiteral>(yylineno, true); }
 | FALSE
@@ -229,6 +233,8 @@ expression:
       CHECK($1[0] == 'i' && val == 32)  << "Only i32 is supported for now: " << $1;
       $$ = global_arena->New<IntTypeLiteral>(yylineno);
     }
+| STRING
+    { $$ = global_arena->New<StringTypeLiteral>(yylineno); }
 | BOOL
     { $$ = global_arena->New<BoolTypeLiteral>(yylineno); }
 | TYPE

+ 24 - 1
executable_semantics/syntax/syntax_helpers.cpp

@@ -4,6 +4,7 @@
 
 #include "executable_semantics/syntax/syntax_helpers.h"
 
+#include "common/check.h"
 #include "common/ostream.h"
 #include "executable_semantics/common/arena.h"
 #include "executable_semantics/common/tracing_flag.h"
@@ -12,7 +13,29 @@
 
 namespace Carbon {
 
-void ExecProgram(const std::list<const Declaration*>& fs) {
+// Adds builtins, currently only Print(). Note Print() is experimental, not
+// standardized, but is made available for printing state in tests.
+static void AddIntrinsics(std::list<const Declaration*>* fs) {
+  std::vector<TuplePattern::Field> print_fields = {TuplePattern::Field(
+      "0", global_arena->New<BindingPattern>(
+               -1, "format_str",
+               global_arena->New<ExpressionPattern>(
+                   global_arena->New<StringTypeLiteral>(-1))))};
+  auto* print_return =
+      global_arena->New<Return>(-1,
+                                global_arena->New<IntrinsicExpression>(
+                                    IntrinsicExpression::IntrinsicKind::Print),
+                                false);
+  auto* print = global_arena->New<FunctionDeclaration>(FunctionDefinition(
+      -1, "Print", std::vector<GenericBinding>(),
+      global_arena->New<TuplePattern>(-1, print_fields),
+      global_arena->New<ExpressionPattern>(global_arena->New<TupleLiteral>(-1)),
+      /*is_omitted_return_type=*/false, print_return));
+  fs->insert(fs->begin(), print);
+}
+
+void ExecProgram(std::list<const Declaration*> fs) {
+  AddIntrinsics(&fs);
   if (tracing_output) {
     llvm::outs() << "********** source program **********\n";
     for (const auto* decl : fs) {

+ 1 - 1
executable_semantics/syntax/syntax_helpers.h

@@ -16,7 +16,7 @@
 namespace Carbon {
 
 // Runs the top-level declaration list.
-void ExecProgram(const std::list<const Declaration*>& fs);
+void ExecProgram(std::list<const Declaration*> fs);
 
 }  // namespace Carbon
 

+ 9 - 0
executable_semantics/test_list.bzl

@@ -65,6 +65,7 @@ TEST_LIST = [
     "pattern_init",
     "pattern_variable_fail",
     "placeholder_variable",
+    "print1",
     "record1",
     "return_auto",
     "return_empty_explicit",
@@ -74,6 +75,14 @@ TEST_LIST = [
     "return_empty_implicit_fail1",
     "return_empty_implicit_fail2",
     "star",
+    "string1",
+    "string2",
+    "string3",
+    "string_fail1",
+    "string_fail2",
+    "string_fail3",
+    "string_fail4",
+    "string_fail5",
     "struct1",
     "struct2",
     "struct3",

+ 9 - 0
executable_semantics/testdata/print1.carbon

@@ -0,0 +1,9 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+fn main() -> i32 {
+  var s: auto = "Hello world!\n";
+  Print(s);
+  return 0;
+}

+ 2 - 0
executable_semantics/testdata/print1.golden

@@ -0,0 +1,2 @@
+Hello world!
+result: 0

+ 14 - 0
executable_semantics/testdata/string1.carbon

@@ -0,0 +1,14 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+fn CompareStr(s: String) -> i32 {
+  if (s == "str") {
+    return 0;
+  }
+  return 1;
+}
+
+fn main() -> i32 {
+  return CompareStr("str");
+}

+ 1 - 0
executable_semantics/testdata/string1.golden

@@ -0,0 +1 @@
+result: 0

+ 14 - 0
executable_semantics/testdata/string2.carbon

@@ -0,0 +1,14 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+fn CompareStr(s: String) -> i32 {
+  if (s == "str\n") {
+    return 0;
+  }
+  return 1;
+}
+
+fn main() -> i32 {
+  return CompareStr("str\n");
+}

+ 1 - 0
executable_semantics/testdata/string2.golden

@@ -0,0 +1 @@
+result: 0

+ 14 - 0
executable_semantics/testdata/string3.carbon

@@ -0,0 +1,14 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+fn CompareStr(s: String) -> i32 {
+  if (s == "str;") {
+    return 0;
+  }
+  return 1;
+}
+
+fn main() -> i32 {
+  return CompareStr("\x73\x74\x72\x3B");
+}

+ 1 - 0
executable_semantics/testdata/string3.golden

@@ -0,0 +1 @@
+result: 0

+ 8 - 0
executable_semantics/testdata/string_fail1.carbon

@@ -0,0 +1,8 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+fn main() -> i32 {
+  Print("str\e");
+  return 0;
+}

+ 2 - 0
executable_semantics/testdata/string_fail1.golden

@@ -0,0 +1,2 @@
+COMPILATION ERROR: 6: Invalid escaping in string: "str\e"
+EXIT CODE: 255

+ 8 - 0
executable_semantics/testdata/string_fail2.carbon

@@ -0,0 +1,8 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+fn main() -> i32 {
+  Print("str\x");
+  return 0;
+}

+ 2 - 0
executable_semantics/testdata/string_fail2.golden

@@ -0,0 +1,2 @@
+COMPILATION ERROR: 6: Invalid escaping in string: "str\x"
+EXIT CODE: 255

+ 8 - 0
executable_semantics/testdata/string_fail3.carbon

@@ -0,0 +1,8 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+fn main() -> i32 {
+  Print("str\xaa");
+  return 0;
+}

+ 2 - 0
executable_semantics/testdata/string_fail3.golden

@@ -0,0 +1,2 @@
+COMPILATION ERROR: 6: Invalid escaping in string: "str\xaa"
+EXIT CODE: 255

+ 8 - 0
executable_semantics/testdata/string_fail4.carbon

@@ -0,0 +1,8 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+fn main() -> i32 {
+  Print("str\01");
+  return 0;
+}

+ 2 - 0
executable_semantics/testdata/string_fail4.golden

@@ -0,0 +1,2 @@
+COMPILATION ERROR: 6: Invalid escaping in string: "str\01"
+EXIT CODE: 255

+ 9 - 0
executable_semantics/testdata/string_fail5.carbon

@@ -0,0 +1,9 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+fn main() -> i32 {
+  Print("new
+line");
+  return 0;
+}

+ 2 - 0
executable_semantics/testdata/string_fail5.golden

@@ -0,0 +1,2 @@
+COMPILATION ERROR: 6: invalid character '\x22' in source file.
+EXIT CODE: 255