LLVM 官方教程编译器实现笔记

主要研究使用 LLVM 进行开发的流程,参考教程 1~7 章节。而 8~9章节介绍的主要是如何生成 object 文件并增加调试信息,需要链接到一个手写的 main.cpp 文件。

项目结构

1.
2├── CMakeLists.txt
3├── include
4   └── KaleidoscopeJIT.h
5├── src
6   └── main.cpp
7└── tests
8    └── fib.ks

代码清单

tests/fib.ks

1def fib(x)
2  if (x < 3) then
3    1
4  else
5    fib(x-1)+fib(x-2);
6
7fib(10);

CMakeLists.txt

 1cmake_minimum_required(VERSION 3.15)
 2
 3set(CMAKE_CXX_STANDARD 20)
 4set(CMAKE_CXX_STANDARD_REQUIRED ON)
 5set(CMAKE_CXX_EXTENSION OFF)
 6
 7project(kaldeidoscope VERSION 1.0.0 LANGUAGES CXX C)
 8
 9# llvm
10
11find_package(LLVM REQUIRED CONFIG)
12add_definitions(${LLVM_DEFINITIONS})
13
14message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
15message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
16
17llvm_map_components_to_libnames(llvm_libs
18    Analysis
19    Core
20    ExecutionEngine
21    InstCombine
22    Object
23    OrcJIT
24    RuntimeDyld
25    ScalarOpts
26    Support
27    native
28)
29
30add_executable("main")
31aux_source_directory("src" sources)
32aux_source_directory("include" sources)
33target_sources("main" PUBLIC ${sources})
34
35target_link_libraries(main PRIVATE ${llvm_libs})

KaleidoscopeJIT.h

  1//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===//
  2//
  3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4// See https://llvm.org/LICENSE.txt for license information.
  5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6//
  7//===----------------------------------------------------------------------===//
  8//
  9// Contains a simple JIT definition for use in the kaleidoscope tutorials.
 10//
 11//===----------------------------------------------------------------------===//
 12
 13#ifndef LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
 14#define LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
 15
 16#include "llvm/ADT/StringRef.h"
 17#include "llvm/ExecutionEngine/JITSymbol.h"
 18#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
 19#include "llvm/ExecutionEngine/Orc/Core.h"
 20#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
 21#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
 22#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
 23#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
 24#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
 25#include "llvm/ExecutionEngine/SectionMemoryManager.h"
 26#include "llvm/IR/DataLayout.h"
 27#include "llvm/IR/LLVMContext.h"
 28#include <memory>
 29
 30namespace llvm {
 31namespace orc {
 32
 33class KaleidoscopeJIT {
 34private:
 35  std::unique_ptr<ExecutionSession> ES;
 36
 37  DataLayout DL;
 38  MangleAndInterner Mangle;
 39
 40  RTDyldObjectLinkingLayer ObjectLayer;
 41  IRCompileLayer CompileLayer;
 42
 43  JITDylib &MainJD;
 44
 45public:
 46  KaleidoscopeJIT(std::unique_ptr<ExecutionSession> ES,
 47                  JITTargetMachineBuilder JTMB, DataLayout DL)
 48      : ES(std::move(ES)), DL(std::move(DL)), Mangle(*this->ES, this->DL),
 49        ObjectLayer(*this->ES,
 50                    []() { return std::make_unique<SectionMemoryManager>(); }),
 51        CompileLayer(*this->ES, ObjectLayer,
 52                     std::make_unique<ConcurrentIRCompiler>(std::move(JTMB))),
 53        MainJD(this->ES->createBareJITDylib("<main>")) {
 54    MainJD.addGenerator(
 55        cantFail(DynamicLibrarySearchGenerator::GetForCurrentProcess(
 56            DL.getGlobalPrefix())));
 57    if (JTMB.getTargetTriple().isOSBinFormatCOFF()) {
 58      ObjectLayer.setOverrideObjectFlagsWithResponsibilityFlags(true);
 59      ObjectLayer.setAutoClaimResponsibilityForObjectSymbols(true);
 60    }
 61  }
 62
 63  ~KaleidoscopeJIT() {
 64    if (auto Err = ES->endSession())
 65      ES->reportError(std::move(Err));
 66  }
 67
 68  static Expected<std::unique_ptr<KaleidoscopeJIT>> Create() {
 69    auto EPC = SelfExecutorProcessControl::Create();
 70    if (!EPC)
 71      return EPC.takeError();
 72
 73    auto ES = std::make_unique<ExecutionSession>(std::move(*EPC));
 74
 75    JITTargetMachineBuilder JTMB(
 76        ES->getExecutorProcessControl().getTargetTriple());
 77
 78    auto DL = JTMB.getDefaultDataLayoutForTarget();
 79    if (!DL)
 80      return DL.takeError();
 81
 82    return std::make_unique<KaleidoscopeJIT>(std::move(ES), std::move(JTMB),
 83                                             std::move(*DL));
 84  }
 85
 86  const DataLayout &getDataLayout() const { return DL; }
 87
 88  JITDylib &getMainJITDylib() { return MainJD; }
 89
 90  Error addModule(ThreadSafeModule TSM, ResourceTrackerSP RT = nullptr) {
 91    if (!RT)
 92      RT = MainJD.getDefaultResourceTracker();
 93    return CompileLayer.add(RT, std::move(TSM));
 94  }
 95
 96  Expected<JITEvaluatedSymbol> lookup(StringRef Name) {
 97    return ES->lookup({&MainJD}, Mangle(Name.str()));
 98  }
 99};
100
101} // end namespace orc
102} // end namespace llvm
103
104#endif // LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H

main.cpp

   1#include "llvm/ADT/STLExtras.h"
   2#include "llvm/Analysis/BasicAliasAnalysis.h"
   3#include "llvm/Analysis/Passes.h"
   4#include "llvm/IR/DIBuilder.h"
   5#include "llvm/IR/IRBuilder.h"
   6#include "llvm/IR/LLVMContext.h"
   7#include "llvm/IR/LegacyPassManager.h"
   8#include "llvm/IR/Module.h"
   9#include "llvm/IR/Verifier.h"
  10#include "llvm/Support/Host.h"
  11#include "llvm/Support/TargetSelect.h"
  12#include "llvm/Transforms/Scalar.h"
  13#include <cctype>
  14#include <cstdio>
  15#include <map>
  16#include <string>
  17#include <vector>
  18#include "../include/KaleidoscopeJIT.h"
  19
  20using namespace llvm;
  21using namespace llvm::orc;
  22
  23//===----------------------------------------------------------------------===//
  24// Lexer
  25//===----------------------------------------------------------------------===//
  26
  27// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
  28// of these for known things.
  29enum Token {
  30  tok_eof = -1,
  31
  32  // commands
  33  tok_def = -2,
  34  tok_extern = -3,
  35
  36  // primary
  37  tok_identifier = -4,
  38  tok_number = -5,
  39
  40  // control
  41  tok_if = -6,
  42  tok_then = -7,
  43  tok_else = -8,
  44  tok_for = -9,
  45  tok_in = -10,
  46
  47  // operators
  48  tok_binary = -11,
  49  tok_unary = -12,
  50
  51  // var definition
  52  tok_var = -13
  53};
  54
  55std::string getTokName(int Tok) {
  56  switch (Tok) {
  57  case tok_eof:
  58    return "eof";
  59  case tok_def:
  60    return "def";
  61  case tok_extern:
  62    return "extern";
  63  case tok_identifier:
  64    return "identifier";
  65  case tok_number:
  66    return "number";
  67  case tok_if:
  68    return "if";
  69  case tok_then:
  70    return "then";
  71  case tok_else:
  72    return "else";
  73  case tok_for:
  74    return "for";
  75  case tok_in:
  76    return "in";
  77  case tok_binary:
  78    return "binary";
  79  case tok_unary:
  80    return "unary";
  81  case tok_var:
  82    return "var";
  83  }
  84  return std::string(1, (char)Tok);
  85}
  86
  87namespace {
  88class PrototypeAST;
  89class ExprAST;
  90}
  91
  92struct DebugInfo {
  93  DICompileUnit *TheCU;
  94  DIType *DblTy;
  95  std::vector<DIScope *> LexicalBlocks;
  96
  97  void emitLocation(ExprAST *AST);
  98  DIType *getDoubleTy();
  99} KSDbgInfo;
 100
 101struct SourceLocation {
 102  int Line;
 103  int Col;
 104};
 105static SourceLocation CurLoc;
 106static SourceLocation LexLoc = {1, 0};
 107
 108static int advance() {
 109  int LastChar = getchar();
 110
 111  if (LastChar == '\n' || LastChar == '\r') {
 112    LexLoc.Line++;
 113    LexLoc.Col = 0;
 114  } else
 115    LexLoc.Col++;
 116  return LastChar;
 117}
 118
 119static std::string IdentifierStr; // Filled in if tok_identifier
 120static double NumVal;             // Filled in if tok_number
 121
 122/// gettok - Return the next token from standard input.
 123static int gettok() {
 124  static int LastChar = ' ';
 125
 126  // Skip any whitespace.
 127  while (isspace(LastChar))
 128    LastChar = advance();
 129
 130  CurLoc = LexLoc;
 131
 132  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
 133    IdentifierStr = LastChar;
 134    while (isalnum((LastChar = advance())))
 135      IdentifierStr += LastChar;
 136
 137    if (IdentifierStr == "def")
 138      return tok_def;
 139    if (IdentifierStr == "extern")
 140      return tok_extern;
 141    if (IdentifierStr == "if")
 142      return tok_if;
 143    if (IdentifierStr == "then")
 144      return tok_then;
 145    if (IdentifierStr == "else")
 146      return tok_else;
 147    if (IdentifierStr == "for")
 148      return tok_for;
 149    if (IdentifierStr == "in")
 150      return tok_in;
 151    if (IdentifierStr == "binary")
 152      return tok_binary;
 153    if (IdentifierStr == "unary")
 154      return tok_unary;
 155    if (IdentifierStr == "var")
 156      return tok_var;
 157    return tok_identifier;
 158  }
 159
 160  if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
 161    std::string NumStr;
 162    do {
 163      NumStr += LastChar;
 164      LastChar = advance();
 165    } while (isdigit(LastChar) || LastChar == '.');
 166
 167    NumVal = strtod(NumStr.c_str(), nullptr);
 168    return tok_number;
 169  }
 170
 171  if (LastChar == '#') {
 172    // Comment until end of line.
 173    do
 174      LastChar = advance();
 175    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
 176
 177    if (LastChar != EOF)
 178      return gettok();
 179  }
 180
 181  // Check for end of file.  Don't eat the EOF.
 182  if (LastChar == EOF)
 183    return tok_eof;
 184
 185  // Otherwise, just return the character as its ascii value.
 186  int ThisChar = LastChar;
 187  LastChar = advance();
 188  return ThisChar;
 189}
 190
 191//===----------------------------------------------------------------------===//
 192// Abstract Syntax Tree (aka Parse Tree)
 193//===----------------------------------------------------------------------===//
 194namespace {
 195
 196raw_ostream &indent(raw_ostream &O, int size) {
 197  return O << std::string(size, ' ');
 198}
 199
 200/// ExprAST - Base class for all expression nodes.
 201class ExprAST {
 202  SourceLocation Loc;
 203
 204public:
 205  ExprAST(SourceLocation Loc = CurLoc) : Loc(Loc) {}
 206  virtual ~ExprAST() {}
 207  virtual Value *codegen() = 0;
 208  int getLine() const { return Loc.Line; }
 209  int getCol() const { return Loc.Col; }
 210  virtual raw_ostream &dump(raw_ostream &out, int ind) {
 211    return out << ':' << getLine() << ':' << getCol() << '\n';
 212  }
 213};
 214
 215/// NumberExprAST - Expression class for numeric literals like "1.0".
 216class NumberExprAST : public ExprAST {
 217  double Val;
 218
 219public:
 220  NumberExprAST(double Val) : Val(Val) {}
 221  raw_ostream &dump(raw_ostream &out, int ind) override {
 222    return ExprAST::dump(out << Val, ind);
 223  }
 224  Value *codegen() override;
 225};
 226
 227/// VariableExprAST - Expression class for referencing a variable, like "a".
 228class VariableExprAST : public ExprAST {
 229  std::string Name;
 230
 231public:
 232  VariableExprAST(SourceLocation Loc, const std::string &Name)
 233      : ExprAST(Loc), Name(Name) {}
 234  const std::string &getName() const { return Name; }
 235  Value *codegen() override;
 236  raw_ostream &dump(raw_ostream &out, int ind) override {
 237    return ExprAST::dump(out << Name, ind);
 238  }
 239};
 240
 241/// UnaryExprAST - Expression class for a unary operator.
 242class UnaryExprAST : public ExprAST {
 243  char Opcode;
 244  std::unique_ptr<ExprAST> Operand;
 245
 246public:
 247  UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
 248      : Opcode(Opcode), Operand(std::move(Operand)) {}
 249  Value *codegen() override;
 250  raw_ostream &dump(raw_ostream &out, int ind) override {
 251    ExprAST::dump(out << "unary" << Opcode, ind);
 252    Operand->dump(out, ind + 1);
 253    return out;
 254  }
 255};
 256
 257/// BinaryExprAST - Expression class for a binary operator.
 258class BinaryExprAST : public ExprAST {
 259  char Op;
 260  std::unique_ptr<ExprAST> LHS, RHS;
 261
 262public:
 263  BinaryExprAST(SourceLocation Loc, char Op, std::unique_ptr<ExprAST> LHS,
 264                std::unique_ptr<ExprAST> RHS)
 265      : ExprAST(Loc), Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
 266  Value *codegen() override;
 267  raw_ostream &dump(raw_ostream &out, int ind) override {
 268    ExprAST::dump(out << "binary" << Op, ind);
 269    LHS->dump(indent(out, ind) << "LHS:", ind + 1);
 270    RHS->dump(indent(out, ind) << "RHS:", ind + 1);
 271    return out;
 272  }
 273};
 274
 275/// CallExprAST - Expression class for function calls.
 276class CallExprAST : public ExprAST {
 277  std::string Callee;
 278  std::vector<std::unique_ptr<ExprAST>> Args;
 279
 280public:
 281  CallExprAST(SourceLocation Loc, const std::string &Callee,
 282              std::vector<std::unique_ptr<ExprAST>> Args)
 283      : ExprAST(Loc), Callee(Callee), Args(std::move(Args)) {}
 284  Value *codegen() override;
 285  raw_ostream &dump(raw_ostream &out, int ind) override {
 286    ExprAST::dump(out << "call " << Callee, ind);
 287    for (const auto &Arg : Args)
 288      Arg->dump(indent(out, ind + 1), ind + 1);
 289    return out;
 290  }
 291};
 292
 293/// IfExprAST - Expression class for if/then/else.
 294class IfExprAST : public ExprAST {
 295  std::unique_ptr<ExprAST> Cond, Then, Else;
 296
 297public:
 298  IfExprAST(SourceLocation Loc, std::unique_ptr<ExprAST> Cond,
 299            std::unique_ptr<ExprAST> Then, std::unique_ptr<ExprAST> Else)
 300      : ExprAST(Loc), Cond(std::move(Cond)), Then(std::move(Then)),
 301        Else(std::move(Else)) {}
 302  Value *codegen() override;
 303  raw_ostream &dump(raw_ostream &out, int ind) override {
 304    ExprAST::dump(out << "if", ind);
 305    Cond->dump(indent(out, ind) << "Cond:", ind + 1);
 306    Then->dump(indent(out, ind) << "Then:", ind + 1);
 307    Else->dump(indent(out, ind) << "Else:", ind + 1);
 308    return out;
 309  }
 310};
 311
 312/// ForExprAST - Expression class for for/in.
 313class ForExprAST : public ExprAST {
 314  std::string VarName;
 315  std::unique_ptr<ExprAST> Start, End, Step, Body;
 316
 317public:
 318  ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
 319             std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
 320             std::unique_ptr<ExprAST> Body)
 321      : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
 322        Step(std::move(Step)), Body(std::move(Body)) {}
 323  Value *codegen() override;
 324  raw_ostream &dump(raw_ostream &out, int ind) override {
 325    ExprAST::dump(out << "for", ind);
 326    Start->dump(indent(out, ind) << "Cond:", ind + 1);
 327    End->dump(indent(out, ind) << "End:", ind + 1);
 328    Step->dump(indent(out, ind) << "Step:", ind + 1);
 329    Body->dump(indent(out, ind) << "Body:", ind + 1);
 330    return out;
 331  }
 332};
 333
 334/// VarExprAST - Expression class for var/in
 335class VarExprAST : public ExprAST {
 336  std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
 337  std::unique_ptr<ExprAST> Body;
 338
 339public:
 340  VarExprAST(
 341      std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames,
 342      std::unique_ptr<ExprAST> Body)
 343      : VarNames(std::move(VarNames)), Body(std::move(Body)) {}
 344  Value *codegen() override;
 345  raw_ostream &dump(raw_ostream &out, int ind) override {
 346    ExprAST::dump(out << "var", ind);
 347    for (const auto &NamedVar : VarNames)
 348      NamedVar.second->dump(indent(out, ind) << NamedVar.first << ':', ind + 1);
 349    Body->dump(indent(out, ind) << "Body:", ind + 1);
 350    return out;
 351  }
 352};
 353
 354/// PrototypeAST - This class represents the "prototype" for a function,
 355/// which captures its name, and its argument names (thus implicitly the number
 356/// of arguments the function takes), as well as if it is an operator.
 357class PrototypeAST {
 358  std::string Name;
 359  std::vector<std::string> Args;
 360  bool IsOperator;
 361  unsigned Precedence; // Precedence if a binary op.
 362  int Line;
 363
 364public:
 365  PrototypeAST(SourceLocation Loc, const std::string &Name,
 366               std::vector<std::string> Args, bool IsOperator = false,
 367               unsigned Prec = 0)
 368      : Name(Name), Args(std::move(Args)), IsOperator(IsOperator),
 369        Precedence(Prec), Line(Loc.Line) {}
 370  Function *codegen();
 371  const std::string &getName() const { return Name; }
 372
 373  bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
 374  bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
 375
 376  char getOperatorName() const {
 377    assert(isUnaryOp() || isBinaryOp());
 378    return Name[Name.size() - 1];
 379  }
 380
 381  unsigned getBinaryPrecedence() const { return Precedence; }
 382  int getLine() const { return Line; }
 383};
 384
 385/// FunctionAST - This class represents a function definition itself.
 386class FunctionAST {
 387  std::unique_ptr<PrototypeAST> Proto;
 388  std::unique_ptr<ExprAST> Body;
 389
 390public:
 391  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
 392              std::unique_ptr<ExprAST> Body)
 393      : Proto(std::move(Proto)), Body(std::move(Body)) {}
 394  Function *codegen();
 395  raw_ostream &dump(raw_ostream &out, int ind) {
 396    indent(out, ind) << "FunctionAST\n";
 397    ++ind;
 398    indent(out, ind) << "Body:";
 399    return Body ? Body->dump(out, ind) : out << "null\n";
 400  }
 401};
 402} // end anonymous namespace
 403
 404//===----------------------------------------------------------------------===//
 405// Parser
 406//===----------------------------------------------------------------------===//
 407
 408/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
 409/// token the parser is looking at.  getNextToken reads another token from the
 410/// lexer and updates CurTok with its results.
 411static int CurTok;
 412static int getNextToken() { return CurTok = gettok(); }
 413
 414/// BinopPrecedence - This holds the precedence for each binary operator that is
 415/// defined.
 416static std::map<char, int> BinopPrecedence;
 417
 418/// GetTokPrecedence - Get the precedence of the pending binary operator token.
 419static int GetTokPrecedence() {
 420  if (!isascii(CurTok))
 421    return -1;
 422
 423  // Make sure it's a declared binop.
 424  int TokPrec = BinopPrecedence[CurTok];
 425  if (TokPrec <= 0)
 426    return -1;
 427  return TokPrec;
 428}
 429
 430/// LogError* - These are little helper functions for error handling.
 431std::unique_ptr<ExprAST> LogError(const char *Str) {
 432  fprintf(stderr, "Error: %s\n", Str);
 433  return nullptr;
 434}
 435
 436std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) {
 437  LogError(Str);
 438  return nullptr;
 439}
 440
 441static std::unique_ptr<ExprAST> ParseExpression();
 442
 443/// numberexpr ::= number
 444static std::unique_ptr<ExprAST> ParseNumberExpr() {
 445  auto Result = std::make_unique<NumberExprAST>(NumVal);
 446  getNextToken(); // consume the number
 447  return std::move(Result);
 448}
 449
 450/// parenexpr ::= '(' expression ')'
 451static std::unique_ptr<ExprAST> ParseParenExpr() {
 452  getNextToken(); // eat (.
 453  auto V = ParseExpression();
 454  if (!V)
 455    return nullptr;
 456
 457  if (CurTok != ')')
 458    return LogError("expected ')'");
 459  getNextToken(); // eat ).
 460  return V;
 461}
 462
 463/// identifierexpr
 464///   ::= identifier
 465///   ::= identifier '(' expression* ')'
 466static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
 467  std::string IdName = IdentifierStr;
 468
 469  SourceLocation LitLoc = CurLoc;
 470
 471  getNextToken(); // eat identifier.
 472
 473  if (CurTok != '(') // Simple variable ref.
 474    return std::make_unique<VariableExprAST>(LitLoc, IdName);
 475
 476  // Call.
 477  getNextToken(); // eat (
 478  std::vector<std::unique_ptr<ExprAST>> Args;
 479  if (CurTok != ')') {
 480    while (true) {
 481      if (auto Arg = ParseExpression())
 482        Args.push_back(std::move(Arg));
 483      else
 484        return nullptr;
 485
 486      if (CurTok == ')')
 487        break;
 488
 489      if (CurTok != ',')
 490        return LogError("Expected ')' or ',' in argument list");
 491      getNextToken();
 492    }
 493  }
 494
 495  // Eat the ')'.
 496  getNextToken();
 497
 498  return std::make_unique<CallExprAST>(LitLoc, IdName, std::move(Args));
 499}
 500
 501/// ifexpr ::= 'if' expression 'then' expression 'else' expression
 502static std::unique_ptr<ExprAST> ParseIfExpr() {
 503  SourceLocation IfLoc = CurLoc;
 504
 505  getNextToken(); // eat the if.
 506
 507  // condition.
 508  auto Cond = ParseExpression();
 509  if (!Cond)
 510    return nullptr;
 511
 512  if (CurTok != tok_then)
 513    return LogError("expected then");
 514  getNextToken(); // eat the then
 515
 516  auto Then = ParseExpression();
 517  if (!Then)
 518    return nullptr;
 519
 520  if (CurTok != tok_else)
 521    return LogError("expected else");
 522
 523  getNextToken();
 524
 525  auto Else = ParseExpression();
 526  if (!Else)
 527    return nullptr;
 528
 529  return std::make_unique<IfExprAST>(IfLoc, std::move(Cond), std::move(Then),
 530                                      std::move(Else));
 531}
 532
 533/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
 534static std::unique_ptr<ExprAST> ParseForExpr() {
 535  getNextToken(); // eat the for.
 536
 537  if (CurTok != tok_identifier)
 538    return LogError("expected identifier after for");
 539
 540  std::string IdName = IdentifierStr;
 541  getNextToken(); // eat identifier.
 542
 543  if (CurTok != '=')
 544    return LogError("expected '=' after for");
 545  getNextToken(); // eat '='.
 546
 547  auto Start = ParseExpression();
 548  if (!Start)
 549    return nullptr;
 550  if (CurTok != ',')
 551    return LogError("expected ',' after for start value");
 552  getNextToken();
 553
 554  auto End = ParseExpression();
 555  if (!End)
 556    return nullptr;
 557
 558  // The step value is optional.
 559  std::unique_ptr<ExprAST> Step;
 560  if (CurTok == ',') {
 561    getNextToken();
 562    Step = ParseExpression();
 563    if (!Step)
 564      return nullptr;
 565  }
 566
 567  if (CurTok != tok_in)
 568    return LogError("expected 'in' after for");
 569  getNextToken(); // eat 'in'.
 570
 571  auto Body = ParseExpression();
 572  if (!Body)
 573    return nullptr;
 574
 575  return std::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End),
 576                                       std::move(Step), std::move(Body));
 577}
 578
 579/// varexpr ::= 'var' identifier ('=' expression)?
 580//                    (',' identifier ('=' expression)?)* 'in' expression
 581static std::unique_ptr<ExprAST> ParseVarExpr() {
 582  getNextToken(); // eat the var.
 583
 584  std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
 585
 586  // At least one variable name is required.
 587  if (CurTok != tok_identifier)
 588    return LogError("expected identifier after var");
 589
 590  while (true) {
 591    std::string Name = IdentifierStr;
 592    getNextToken(); // eat identifier.
 593
 594    // Read the optional initializer.
 595    std::unique_ptr<ExprAST> Init = nullptr;
 596    if (CurTok == '=') {
 597      getNextToken(); // eat the '='.
 598
 599      Init = ParseExpression();
 600      if (!Init)
 601        return nullptr;
 602    }
 603
 604    VarNames.push_back(std::make_pair(Name, std::move(Init)));
 605
 606    // End of var list, exit loop.
 607    if (CurTok != ',')
 608      break;
 609    getNextToken(); // eat the ','.
 610
 611    if (CurTok != tok_identifier)
 612      return LogError("expected identifier list after var");
 613  }
 614
 615  // At this point, we have to have 'in'.
 616  if (CurTok != tok_in)
 617    return LogError("expected 'in' keyword after 'var'");
 618  getNextToken(); // eat 'in'.
 619
 620  auto Body = ParseExpression();
 621  if (!Body)
 622    return nullptr;
 623
 624  return std::make_unique<VarExprAST>(std::move(VarNames), std::move(Body));
 625}
 626
 627/// primary
 628///   ::= identifierexpr
 629///   ::= numberexpr
 630///   ::= parenexpr
 631///   ::= ifexpr
 632///   ::= forexpr
 633///   ::= varexpr
 634static std::unique_ptr<ExprAST> ParsePrimary() {
 635  switch (CurTok) {
 636  default:
 637    return LogError("unknown token when expecting an expression");
 638  case tok_identifier:
 639    return ParseIdentifierExpr();
 640  case tok_number:
 641    return ParseNumberExpr();
 642  case '(':
 643    return ParseParenExpr();
 644  case tok_if:
 645    return ParseIfExpr();
 646  case tok_for:
 647    return ParseForExpr();
 648  case tok_var:
 649    return ParseVarExpr();
 650  }
 651}
 652
 653/// unary
 654///   ::= primary
 655///   ::= '!' unary
 656static std::unique_ptr<ExprAST> ParseUnary() {
 657  // If the current token is not an operator, it must be a primary expr.
 658  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
 659    return ParsePrimary();
 660
 661  // If this is a unary operator, read it.
 662  int Opc = CurTok;
 663  getNextToken();
 664  if (auto Operand = ParseUnary())
 665    return std::make_unique<UnaryExprAST>(Opc, std::move(Operand));
 666  return nullptr;
 667}
 668
 669/// binoprhs
 670///   ::= ('+' unary)*
 671static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
 672                                              std::unique_ptr<ExprAST> LHS) {
 673  // If this is a binop, find its precedence.
 674  while (true) {
 675    int TokPrec = GetTokPrecedence();
 676
 677    // If this is a binop that binds at least as tightly as the current binop,
 678    // consume it, otherwise we are done.
 679    if (TokPrec < ExprPrec)
 680      return LHS;
 681
 682    // Okay, we know this is a binop.
 683    int BinOp = CurTok;
 684    SourceLocation BinLoc = CurLoc;
 685    getNextToken(); // eat binop
 686
 687    // Parse the unary expression after the binary operator.
 688    auto RHS = ParseUnary();
 689    if (!RHS)
 690      return nullptr;
 691
 692    // If BinOp binds less tightly with RHS than the operator after RHS, let
 693    // the pending operator take RHS as its LHS.
 694    int NextPrec = GetTokPrecedence();
 695    if (TokPrec < NextPrec) {
 696      RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
 697      if (!RHS)
 698        return nullptr;
 699    }
 700
 701    // Merge LHS/RHS.
 702    LHS = std::make_unique<BinaryExprAST>(BinLoc, BinOp, std::move(LHS),
 703                                           std::move(RHS));
 704  }
 705}
 706
 707/// expression
 708///   ::= unary binoprhs
 709///
 710static std::unique_ptr<ExprAST> ParseExpression() {
 711  auto LHS = ParseUnary();
 712  if (!LHS)
 713    return nullptr;
 714
 715  return ParseBinOpRHS(0, std::move(LHS));
 716}
 717
 718/// prototype
 719///   ::= id '(' id* ')'
 720///   ::= binary LETTER number? (id, id)
 721///   ::= unary LETTER (id)
 722static std::unique_ptr<PrototypeAST> ParsePrototype() {
 723  std::string FnName;
 724
 725  SourceLocation FnLoc = CurLoc;
 726
 727  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
 728  unsigned BinaryPrecedence = 30;
 729
 730  switch (CurTok) {
 731  default:
 732    return LogErrorP("Expected function name in prototype");
 733  case tok_identifier:
 734    FnName = IdentifierStr;
 735    Kind = 0;
 736    getNextToken();
 737    break;
 738  case tok_unary:
 739    getNextToken();
 740    if (!isascii(CurTok))
 741      return LogErrorP("Expected unary operator");
 742    FnName = "unary";
 743    FnName += (char)CurTok;
 744    Kind = 1;
 745    getNextToken();
 746    break;
 747  case tok_binary:
 748    getNextToken();
 749    if (!isascii(CurTok))
 750      return LogErrorP("Expected binary operator");
 751    FnName = "binary";
 752    FnName += (char)CurTok;
 753    Kind = 2;
 754    getNextToken();
 755
 756    // Read the precedence if present.
 757    if (CurTok == tok_number) {
 758      if (NumVal < 1 || NumVal > 100)
 759        return LogErrorP("Invalid precedence: must be 1..100");
 760      BinaryPrecedence = (unsigned)NumVal;
 761      getNextToken();
 762    }
 763    break;
 764  }
 765
 766  if (CurTok != '(')
 767    return LogErrorP("Expected '(' in prototype");
 768
 769  std::vector<std::string> ArgNames;
 770  while (getNextToken() == tok_identifier)
 771    ArgNames.push_back(IdentifierStr);
 772  if (CurTok != ')')
 773    return LogErrorP("Expected ')' in prototype");
 774
 775  // success.
 776  getNextToken(); // eat ')'.
 777
 778  // Verify right number of names for operator.
 779  if (Kind && ArgNames.size() != Kind)
 780    return LogErrorP("Invalid number of operands for operator");
 781
 782  return std::make_unique<PrototypeAST>(FnLoc, FnName, ArgNames, Kind != 0,
 783                                         BinaryPrecedence);
 784}
 785
 786/// definition ::= 'def' prototype expression
 787static std::unique_ptr<FunctionAST> ParseDefinition() {
 788  getNextToken(); // eat def.
 789  auto Proto = ParsePrototype();
 790  if (!Proto)
 791    return nullptr;
 792
 793  if (auto E = ParseExpression())
 794    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
 795  return nullptr;
 796}
 797
 798/// toplevelexpr ::= expression
 799static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
 800  SourceLocation FnLoc = CurLoc;
 801  if (auto E = ParseExpression()) {
 802    // Make an anonymous proto.
 803    auto Proto = std::make_unique<PrototypeAST>(FnLoc, "__anon_expr",
 804                                                 std::vector<std::string>());
 805    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
 806  }
 807  return nullptr;
 808}
 809
 810/// external ::= 'extern' prototype
 811static std::unique_ptr<PrototypeAST> ParseExtern() {
 812  getNextToken(); // eat extern.
 813  return ParsePrototype();
 814}
 815
 816//===----------------------------------------------------------------------===//
 817// Code Generation Globals
 818//===----------------------------------------------------------------------===//
 819
 820static std::unique_ptr<LLVMContext> TheContext;
 821static std::unique_ptr<Module> TheModule;
 822static std::unique_ptr<IRBuilder<>> Builder;
 823static ExitOnError ExitOnErr;
 824
 825static std::map<std::string, AllocaInst *> NamedValues;
 826static std::unique_ptr<KaleidoscopeJIT> TheJIT;
 827static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
 828
 829//===----------------------------------------------------------------------===//
 830// Debug Info Support
 831//===----------------------------------------------------------------------===//
 832
 833static std::unique_ptr<DIBuilder> DBuilder;
 834
 835DIType *DebugInfo::getDoubleTy() {
 836  if (DblTy)
 837    return DblTy;
 838
 839  DblTy = DBuilder->createBasicType("double", 64, dwarf::DW_ATE_float);
 840  return DblTy;
 841}
 842
 843void DebugInfo::emitLocation(ExprAST *AST) {
 844  if (!AST)
 845    return Builder->SetCurrentDebugLocation(DebugLoc());
 846  DIScope *Scope;
 847  if (LexicalBlocks.empty())
 848    Scope = TheCU;
 849  else
 850    Scope = LexicalBlocks.back();
 851  Builder->SetCurrentDebugLocation(DILocation::get(
 852      Scope->getContext(), AST->getLine(), AST->getCol(), Scope));
 853}
 854
 855static DISubroutineType *CreateFunctionType(unsigned NumArgs, DIFile *Unit) {
 856  SmallVector<Metadata *, 8> EltTys;
 857  DIType *DblTy = KSDbgInfo.getDoubleTy();
 858
 859  // Add the result type.
 860  EltTys.push_back(DblTy);
 861
 862  for (unsigned i = 0, e = NumArgs; i != e; ++i)
 863    EltTys.push_back(DblTy);
 864
 865  return DBuilder->createSubroutineType(DBuilder->getOrCreateTypeArray(EltTys));
 866}
 867
 868//===----------------------------------------------------------------------===//
 869// Code Generation
 870//===----------------------------------------------------------------------===//
 871
 872Value *LogErrorV(const char *Str) {
 873  LogError(Str);
 874  return nullptr;
 875}
 876
 877Function *getFunction(std::string Name) {
 878  // First, see if the function has already been added to the current module.
 879  if (auto *F = TheModule->getFunction(Name))
 880    return F;
 881
 882  // If not, check whether we can codegen the declaration from some existing
 883  // prototype.
 884  auto FI = FunctionProtos.find(Name);
 885  if (FI != FunctionProtos.end())
 886    return FI->second->codegen();
 887
 888  // If no existing prototype exists, return null.
 889  return nullptr;
 890}
 891
 892/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
 893/// the function.  This is used for mutable variables etc.
 894static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
 895                                          StringRef VarName) {
 896  IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
 897                   TheFunction->getEntryBlock().begin());
 898  return TmpB.CreateAlloca(Type::getDoubleTy(*TheContext), nullptr, VarName);
 899}
 900
 901Value *NumberExprAST::codegen() {
 902  KSDbgInfo.emitLocation(this);
 903  return ConstantFP::get(*TheContext, APFloat(Val));
 904}
 905
 906Value *VariableExprAST::codegen() {
 907  // Look this variable up in the function.
 908  Value *V = NamedValues[Name];
 909  if (!V)
 910    return LogErrorV("Unknown variable name");
 911
 912  KSDbgInfo.emitLocation(this);
 913  // Load the value.
 914  return Builder->CreateLoad(Type::getDoubleTy(*TheContext), V, Name.c_str());
 915}
 916
 917Value *UnaryExprAST::codegen() {
 918  Value *OperandV = Operand->codegen();
 919  if (!OperandV)
 920    return nullptr;
 921
 922  Function *F = getFunction(std::string("unary") + Opcode);
 923  if (!F)
 924    return LogErrorV("Unknown unary operator");
 925
 926  KSDbgInfo.emitLocation(this);
 927  return Builder->CreateCall(F, OperandV, "unop");
 928}
 929
 930Value *BinaryExprAST::codegen() {
 931  KSDbgInfo.emitLocation(this);
 932
 933  // Special case '=' because we don't want to emit the LHS as an expression.
 934  if (Op == '=') {
 935    // Assignment requires the LHS to be an identifier.
 936    // This assume we're building without RTTI because LLVM builds that way by
 937    // default.  If you build LLVM with RTTI this can be changed to a
 938    // dynamic_cast for automatic error checking.
 939    VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get());
 940    if (!LHSE)
 941      return LogErrorV("destination of '=' must be a variable");
 942    // Codegen the RHS.
 943    Value *Val = RHS->codegen();
 944    if (!Val)
 945      return nullptr;
 946
 947    // Look up the name.
 948    Value *Variable = NamedValues[LHSE->getName()];
 949    if (!Variable)
 950      return LogErrorV("Unknown variable name");
 951
 952    Builder->CreateStore(Val, Variable);
 953    return Val;
 954  }
 955
 956  Value *L = LHS->codegen();
 957  Value *R = RHS->codegen();
 958  if (!L || !R)
 959    return nullptr;
 960
 961  switch (Op) {
 962  case '+':
 963    return Builder->CreateFAdd(L, R, "addtmp");
 964  case '-':
 965    return Builder->CreateFSub(L, R, "subtmp");
 966  case '*':
 967    return Builder->CreateFMul(L, R, "multmp");
 968  case '<':
 969    L = Builder->CreateFCmpULT(L, R, "cmptmp");
 970    // Convert bool 0/1 to double 0.0 or 1.0
 971    return Builder->CreateUIToFP(L, Type::getDoubleTy(*TheContext), "booltmp");
 972  default:
 973    break;
 974  }
 975
 976  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
 977  // a call to it.
 978  Function *F = getFunction(std::string("binary") + Op);
 979  assert(F && "binary operator not found!");
 980
 981  Value *Ops[] = {L, R};
 982  return Builder->CreateCall(F, Ops, "binop");
 983}
 984
 985Value *CallExprAST::codegen() {
 986  KSDbgInfo.emitLocation(this);
 987
 988  // Look up the name in the global module table.
 989  Function *CalleeF = getFunction(Callee);
 990  if (!CalleeF)
 991    return LogErrorV("Unknown function referenced");
 992
 993  // If argument mismatch error.
 994  if (CalleeF->arg_size() != Args.size())
 995    return LogErrorV("Incorrect # arguments passed");
 996
 997  std::vector<Value *> ArgsV;
 998  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
 999    ArgsV.push_back(Args[i]->codegen());
1000    if (!ArgsV.back())
1001      return nullptr;
1002  }
1003
1004  return Builder->CreateCall(CalleeF, ArgsV, "calltmp");
1005}
1006
1007Value *IfExprAST::codegen() {
1008  KSDbgInfo.emitLocation(this);
1009
1010  Value *CondV = Cond->codegen();
1011  if (!CondV)
1012    return nullptr;
1013
1014  // Convert condition to a bool by comparing non-equal to 0.0.
1015  CondV = Builder->CreateFCmpONE(
1016      CondV, ConstantFP::get(*TheContext, APFloat(0.0)), "ifcond");
1017
1018  Function *TheFunction = Builder->GetInsertBlock()->getParent();
1019
1020  // Create blocks for the then and else cases.  Insert the 'then' block at the
1021  // end of the function.
1022  BasicBlock *ThenBB = BasicBlock::Create(*TheContext, "then", TheFunction);
1023  BasicBlock *ElseBB = BasicBlock::Create(*TheContext, "else");
1024  BasicBlock *MergeBB = BasicBlock::Create(*TheContext, "ifcont");
1025
1026  Builder->CreateCondBr(CondV, ThenBB, ElseBB);
1027
1028  // Emit then value.
1029  Builder->SetInsertPoint(ThenBB);
1030
1031  Value *ThenV = Then->codegen();
1032  if (!ThenV)
1033    return nullptr;
1034
1035  Builder->CreateBr(MergeBB);
1036  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
1037  ThenBB = Builder->GetInsertBlock();
1038
1039  // Emit else block.
1040  TheFunction->getBasicBlockList().push_back(ElseBB);
1041  Builder->SetInsertPoint(ElseBB);
1042
1043  Value *ElseV = Else->codegen();
1044  if (!ElseV)
1045    return nullptr;
1046
1047  Builder->CreateBr(MergeBB);
1048  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
1049  ElseBB = Builder->GetInsertBlock();
1050
1051  // Emit merge block.
1052  TheFunction->getBasicBlockList().push_back(MergeBB);
1053  Builder->SetInsertPoint(MergeBB);
1054  PHINode *PN = Builder->CreatePHI(Type::getDoubleTy(*TheContext), 2, "iftmp");
1055
1056  PN->addIncoming(ThenV, ThenBB);
1057  PN->addIncoming(ElseV, ElseBB);
1058  return PN;
1059}
1060
1061// Output for-loop as:
1062//   var = alloca double
1063//   ...
1064//   start = startexpr
1065//   store start -> var
1066//   goto loop
1067// loop:
1068//   ...
1069//   bodyexpr
1070//   ...
1071// loopend:
1072//   step = stepexpr
1073//   endcond = endexpr
1074//
1075//   curvar = load var
1076//   nextvar = curvar + step
1077//   store nextvar -> var
1078//   br endcond, loop, endloop
1079// outloop:
1080Value *ForExprAST::codegen() {
1081  Function *TheFunction = Builder->GetInsertBlock()->getParent();
1082
1083  // Create an alloca for the variable in the entry block.
1084  AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
1085
1086  KSDbgInfo.emitLocation(this);
1087
1088  // Emit the start code first, without 'variable' in scope.
1089  Value *StartVal = Start->codegen();
1090  if (!StartVal)
1091    return nullptr;
1092
1093  // Store the value into the alloca.
1094  Builder->CreateStore(StartVal, Alloca);
1095
1096  // Make the new basic block for the loop header, inserting after current
1097  // block.
1098  BasicBlock *LoopBB = BasicBlock::Create(*TheContext, "loop", TheFunction);
1099
1100  // Insert an explicit fall through from the current block to the LoopBB.
1101  Builder->CreateBr(LoopBB);
1102
1103  // Start insertion in LoopBB.
1104  Builder->SetInsertPoint(LoopBB);
1105
1106  // Within the loop, the variable is defined equal to the PHI node.  If it
1107  // shadows an existing variable, we have to restore it, so save it now.
1108  AllocaInst *OldVal = NamedValues[VarName];
1109  NamedValues[VarName] = Alloca;
1110
1111  // Emit the body of the loop.  This, like any other expr, can change the
1112  // current BB.  Note that we ignore the value computed by the body, but don't
1113  // allow an error.
1114  if (!Body->codegen())
1115    return nullptr;
1116
1117  // Emit the step value.
1118  Value *StepVal = nullptr;
1119  if (Step) {
1120    StepVal = Step->codegen();
1121    if (!StepVal)
1122      return nullptr;
1123  } else {
1124    // If not specified, use 1.0.
1125    StepVal = ConstantFP::get(*TheContext, APFloat(1.0));
1126  }
1127
1128  // Compute the end condition.
1129  Value *EndCond = End->codegen();
1130  if (!EndCond)
1131    return nullptr;
1132
1133  // Reload, increment, and restore the alloca.  This handles the case where
1134  // the body of the loop mutates the variable.
1135  Value *CurVar = Builder->CreateLoad(Type::getDoubleTy(*TheContext), Alloca,
1136                                      VarName.c_str());
1137  Value *NextVar = Builder->CreateFAdd(CurVar, StepVal, "nextvar");
1138  Builder->CreateStore(NextVar, Alloca);
1139
1140  // Convert condition to a bool by comparing non-equal to 0.0.
1141  EndCond = Builder->CreateFCmpONE(
1142      EndCond, ConstantFP::get(*TheContext, APFloat(0.0)), "loopcond");
1143
1144  // Create the "after loop" block and insert it.
1145  BasicBlock *AfterBB =
1146      BasicBlock::Create(*TheContext, "afterloop", TheFunction);
1147
1148  // Insert the conditional branch into the end of LoopEndBB.
1149  Builder->CreateCondBr(EndCond, LoopBB, AfterBB);
1150
1151  // Any new code will be inserted in AfterBB.
1152  Builder->SetInsertPoint(AfterBB);
1153
1154  // Restore the unshadowed variable.
1155  if (OldVal)
1156    NamedValues[VarName] = OldVal;
1157  else
1158    NamedValues.erase(VarName);
1159
1160  // for expr always returns 0.0.
1161  return Constant::getNullValue(Type::getDoubleTy(*TheContext));
1162}
1163
1164Value *VarExprAST::codegen() {
1165  std::vector<AllocaInst *> OldBindings;
1166
1167  Function *TheFunction = Builder->GetInsertBlock()->getParent();
1168
1169  // Register all variables and emit their initializer.
1170  for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
1171    const std::string &VarName = VarNames[i].first;
1172    ExprAST *Init = VarNames[i].second.get();
1173
1174    // Emit the initializer before adding the variable to scope, this prevents
1175    // the initializer from referencing the variable itself, and permits stuff
1176    // like this:
1177    //  var a = 1 in
1178    //    var a = a in ...   # refers to outer 'a'.
1179    Value *InitVal;
1180    if (Init) {
1181      InitVal = Init->codegen();
1182      if (!InitVal)
1183        return nullptr;
1184    } else { // If not specified, use 0.0.
1185      InitVal = ConstantFP::get(*TheContext, APFloat(0.0));
1186    }
1187
1188    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
1189    Builder->CreateStore(InitVal, Alloca);
1190
1191    // Remember the old variable binding so that we can restore the binding when
1192    // we unrecurse.
1193    OldBindings.push_back(NamedValues[VarName]);
1194
1195    // Remember this binding.
1196    NamedValues[VarName] = Alloca;
1197  }
1198
1199  KSDbgInfo.emitLocation(this);
1200
1201  // Codegen the body, now that all vars are in scope.
1202  Value *BodyVal = Body->codegen();
1203  if (!BodyVal)
1204    return nullptr;
1205
1206  // Pop all our variables from scope.
1207  for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
1208    NamedValues[VarNames[i].first] = OldBindings[i];
1209
1210  // Return the body computation.
1211  return BodyVal;
1212}
1213
1214Function *PrototypeAST::codegen() {
1215  // Make the function type:  double(double,double) etc.
1216  std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(*TheContext));
1217  FunctionType *FT =
1218      FunctionType::get(Type::getDoubleTy(*TheContext), Doubles, false);
1219
1220  Function *F =
1221      Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
1222
1223  // Set names for all arguments.
1224  unsigned Idx = 0;
1225  for (auto &Arg : F->args())
1226    Arg.setName(Args[Idx++]);
1227
1228  return F;
1229}
1230
1231Function *FunctionAST::codegen() {
1232  // Transfer ownership of the prototype to the FunctionProtos map, but keep a
1233  // reference to it for use below.
1234  auto &P = *Proto;
1235  FunctionProtos[Proto->getName()] = std::move(Proto);
1236  Function *TheFunction = getFunction(P.getName());
1237  if (!TheFunction)
1238    return nullptr;
1239
1240  // If this is an operator, install it.
1241  if (P.isBinaryOp())
1242    BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
1243
1244  // Create a new basic block to start insertion into.
1245  BasicBlock *BB = BasicBlock::Create(*TheContext, "entry", TheFunction);
1246  Builder->SetInsertPoint(BB);
1247
1248  // Create a subprogram DIE for this function.
1249  DIFile *Unit = DBuilder->createFile(KSDbgInfo.TheCU->getFilename(),
1250                                      KSDbgInfo.TheCU->getDirectory());
1251  DIScope *FContext = Unit;
1252  unsigned LineNo = P.getLine();
1253  unsigned ScopeLine = LineNo;
1254  DISubprogram *SP = DBuilder->createFunction(
1255      FContext, P.getName(), StringRef(), Unit, LineNo,
1256      CreateFunctionType(TheFunction->arg_size(), Unit), ScopeLine,
1257      DINode::FlagPrototyped, DISubprogram::SPFlagDefinition);
1258  TheFunction->setSubprogram(SP);
1259
1260  // Push the current scope.
1261  KSDbgInfo.LexicalBlocks.push_back(SP);
1262
1263  // Unset the location for the prologue emission (leading instructions with no
1264  // location in a function are considered part of the prologue and the debugger
1265  // will run past them when breaking on a function)
1266  KSDbgInfo.emitLocation(nullptr);
1267
1268  // Record the function arguments in the NamedValues map.
1269  NamedValues.clear();
1270  unsigned ArgIdx = 0;
1271  for (auto &Arg : TheFunction->args()) {
1272    // Create an alloca for this variable.
1273    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
1274
1275    // Create a debug descriptor for the variable.
1276    DILocalVariable *D = DBuilder->createParameterVariable(
1277        SP, Arg.getName(), ++ArgIdx, Unit, LineNo, KSDbgInfo.getDoubleTy(),
1278        true);
1279
1280    DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(),
1281                            DILocation::get(SP->getContext(), LineNo, 0, SP),
1282                            Builder->GetInsertBlock());
1283
1284    // Store the initial value into the alloca.
1285    Builder->CreateStore(&Arg, Alloca);
1286
1287    // Add arguments to variable symbol table.
1288    NamedValues[std::string(Arg.getName())] = Alloca;
1289  }
1290
1291  KSDbgInfo.emitLocation(Body.get());
1292
1293  if (Value *RetVal = Body->codegen()) {
1294    // Finish off the function.
1295    Builder->CreateRet(RetVal);
1296
1297    // Pop off the lexical block for the function.
1298    KSDbgInfo.LexicalBlocks.pop_back();
1299
1300    // Validate the generated code, checking for consistency.
1301    verifyFunction(*TheFunction);
1302
1303    return TheFunction;
1304  }
1305
1306  // Error reading body, remove function.
1307  TheFunction->eraseFromParent();
1308
1309  if (P.isBinaryOp())
1310    BinopPrecedence.erase(Proto->getOperatorName());
1311
1312  // Pop off the lexical block for the function since we added it
1313  // unconditionally.
1314  KSDbgInfo.LexicalBlocks.pop_back();
1315
1316  return nullptr;
1317}
1318
1319//===----------------------------------------------------------------------===//
1320// Top-Level parsing and JIT Driver
1321//===----------------------------------------------------------------------===//
1322
1323static void InitializeModule() {
1324  // Open a new module.
1325  TheContext = std::make_unique<LLVMContext>();
1326  TheModule = std::make_unique<Module>("my cool jit", *TheContext);
1327  TheModule->setDataLayout(TheJIT->getDataLayout());
1328
1329  Builder = std::make_unique<IRBuilder<>>(*TheContext);
1330}
1331
1332static void HandleDefinition() {
1333  if (auto FnAST = ParseDefinition()) {
1334    if (!FnAST->codegen())
1335      fprintf(stderr, "Error reading function definition:");
1336  } else {
1337    // Skip token for error recovery.
1338    getNextToken();
1339  }
1340}
1341
1342static void HandleExtern() {
1343  if (auto ProtoAST = ParseExtern()) {
1344    if (!ProtoAST->codegen())
1345      fprintf(stderr, "Error reading extern");
1346    else
1347      FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
1348  } else {
1349    // Skip token for error recovery.
1350    getNextToken();
1351  }
1352}
1353
1354static void HandleTopLevelExpression() {
1355  // Evaluate a top-level expression into an anonymous function.
1356  if (auto FnAST = ParseTopLevelExpr()) {
1357    if (!FnAST->codegen()) {
1358      fprintf(stderr, "Error generating code for top level expr");
1359    }
1360  } else {
1361    // Skip token for error recovery.
1362    getNextToken();
1363  }
1364}
1365
1366/// top ::= definition | external | expression | ';'
1367static void MainLoop() {
1368  while (true) {
1369    switch (CurTok) {
1370    case tok_eof:
1371      return;
1372    case ';': // ignore top-level semicolons.
1373      getNextToken();
1374      break;
1375    case tok_def:
1376      HandleDefinition();
1377      break;
1378    case tok_extern:
1379      HandleExtern();
1380      break;
1381    default:
1382      HandleTopLevelExpression();
1383      break;
1384    }
1385  }
1386}
1387
1388//===----------------------------------------------------------------------===//
1389// "Library" functions that can be "extern'd" from user code.
1390//===----------------------------------------------------------------------===//
1391
1392#ifdef _WIN32
1393#define DLLEXPORT __declspec(dllexport)
1394#else
1395#define DLLEXPORT
1396#endif
1397
1398/// putchard - putchar that takes a double and returns 0.
1399extern "C" DLLEXPORT double putchard(double X) {
1400  fputc((char)X, stderr);
1401  return 0;
1402}
1403
1404/// printd - printf that takes a double prints it as "%f\n", returning 0.
1405extern "C" DLLEXPORT double printd(double X) {
1406  fprintf(stderr, "%f\n", X);
1407  return 0;
1408}
1409
1410//===----------------------------------------------------------------------===//
1411// Main driver code.
1412//===----------------------------------------------------------------------===//
1413
1414int main() {
1415  InitializeNativeTarget();
1416  InitializeNativeTargetAsmPrinter();
1417  InitializeNativeTargetAsmParser();
1418
1419  // Install standard binary operators.
1420  // 1 is lowest precedence.
1421  BinopPrecedence['='] = 2;
1422  BinopPrecedence['<'] = 10;
1423  BinopPrecedence['+'] = 20;
1424  BinopPrecedence['-'] = 20;
1425  BinopPrecedence['*'] = 40; // highest.
1426
1427  // Prime the first token.
1428  getNextToken();
1429
1430  TheJIT = ExitOnErr(KaleidoscopeJIT::Create());
1431
1432  InitializeModule();
1433
1434  // Add the current debug info version into the module.
1435  TheModule->addModuleFlag(Module::Warning, "Debug Info Version",
1436                           DEBUG_METADATA_VERSION);
1437
1438  // Darwin only supports dwarf2.
1439  if (Triple(sys::getProcessTriple()).isOSDarwin())
1440    TheModule->addModuleFlag(llvm::Module::Warning, "Dwarf Version", 2);
1441
1442  // Construct the DIBuilder, we do this here because we need the module.
1443  DBuilder = std::make_unique<DIBuilder>(*TheModule);
1444
1445  // Create the compile unit for the module.
1446  // Currently down as "fib.ks" as a filename since we're redirecting stdin
1447  // but we'd like actual source locations.
1448  KSDbgInfo.TheCU = DBuilder->createCompileUnit(
1449      dwarf::DW_LANG_C, DBuilder->createFile("fib.ks", "."),
1450      "Kaleidoscope Compiler", false, "", 0);
1451
1452  // Run the main "interpreter loop" now.
1453  MainLoop();
1454
1455  // Finalize the debug info.
1456  DBuilder->finalize();
1457
1458  // Print out all of the generated code.
1459  TheModule->print(errs(), nullptr);
1460
1461  return 0;
1462}

整体流程

初始化

  1. native target 相关初始化

  2. 运算符优先级初始化

  3. token 初始化(预先读入一个 token)

之所以预先读入一个 token, 是为了后面 MainLoop 能够开始 switch 到一个正确的 token.

然后创建 KaleidoscopeJIT 对象。初始化 Module 和 PassManager

主循环

会进行词法分析,并在遇到特定 token 时转入语法分析,然后生成 LLVM IR,进行 JIT 操作并通过函数指针的形式调用执行代码。循环进行这一过程。

Kaleidoscope 的 JIT 基于 llvm::orc 执行引擎实现。

基本操作

LLVM 如何生成 IR 的?

LLVM 提供 IRBuilder 类,用于构建 LLVM IR,通过调用 Builder.CreateFAdd 之类的方法,可以生成一个对应指令的 IR Value。Value 是 LLVM 中的一种万能对象。

Function *FunctionAST::codegen() 可以看到具体的函数范围 IR 的生成操作。

  • BasicBlock::Create 创建函数 entry 块

  • Builder->SetInsertPoint(BB); 设置将 Builder 构建的 Value 插入到哪个基本块。

  • Builder->CreateStore 为参数创建 Store 指令

  • Body->codegen() 创建函数体代码

  • Builder->CreateRet(RetVal) 将函数体和返回值交给 Builder 插入

LLVM 的 Pass 是如何应用到代码的?

首先我们会创建 PassManager,向其中添加各种 Pass。当生成 IR 后,通过 TheFPM->run(*TheFunction); 将 Pass 应用到函数上。

LLVM 的 JIT 的工作流程(Orc)

ResourceTracker 用于管理 JIT 的内存分配回收。

auto RT = TheJIT->getMainJITDylib().createResourceTracker();
// ...
ExitOnErr(RT->remove());

CompileLayer 是 JIT 的核心。我们通过 addModule 将当前 Module 添加到 Layer 里,然后通过 ExecutionSession->lookup 查找编译出来的 __anon_expr 符号并执行其地址表示的函数。

一些有用的资料

https://vod.video.cornell.edu/media/CS+6120A+Lesson+6A+Writing+an+LLVM+Pass/1_4nrtmvc9/179754792 https://www.cs.cornell.edu/~asampson/blog/llvm.html

介绍如何自定义一个 Pass