+1 (315) 557-6473 

Convert Simple Programming Language into AST Assignment Solution


Instructions

Objective

Write a program to convert a simple programming language into AST.

Requirements and Specifications

Implement to complete an OCAML assignment interpreter for an extended version of the calculator language, again with if and do/check statements. You will write your interpreter in OCaml. We are providing you with a parser generator and driver that builds an explicit parse tree. The provided code also includes the skeleton of a possible solution that converts the parse tree to a syntax tree and then recursively “walks” that tree to affect the interpretation. You are of course welcome to adopt a different skeleton if you prefer. Since this onehas been excised from a complete working solution, however, you may find it a good place to start. The provided code has two main entry points

get_parse_table : grammar -> parse_table = … parse : parse_table -> string -> parse_tree = … The first of these routines returns a parse table, in the format expected as the first argument of the second routine. The second returns a parse tree. (You’ll want to print some of these trees to see what they look like.) If the program has syntax errors (according to the grammar), parse will print an error message (as a side effect) and return a PT_error value (it does not do error recovery). If the input grammar you provide is malformed, you may get unhelpful run-time errors from the parser generator—it isn’t very robust. The grammar takes the form of a list of production sets, each ofwhich is a pair containing the LHS symbol and k right-hand sides, each of which is itself a list of symbols. When get_parse_table builds the parse table, the grammar is augmented with a start production that mentions an explicit end of file $$. Later, parse will remove this production from the resulting parse tree.

Screenshots of output

program to convert simple prograaming language into AST

Source Code

(*******************************************************************

    LL(1) parser generator, syntax tree builder for an extended

    calculator language, and (skeleton of an) interpreter for the

    generated syntax trees.

    If compiled and run, will execute "main()".

    Alternatively, can be "#use"-ed (or compiled and then "#load"-ed)

    into the top-level interpreter.

 *******************************************************************)

open List;;

(* The List library includes a large collection of useful functions.

   In the provided code, I've used assoc, exists, filter, find,

   fold_left, hd, length, map, and rev.

*)

open Str;; (* for split *)

(* The Str library provides a few extra string-processing routines.

   In particular, it provides "split", which I use to break program

   input into whitespace-separated words. Note, however, that this

   library is not automatically available. If you are using the

   top-level interpreter, you have to say

        #load "str.cma";;

   before you say

        #use "interpreter.ml";;

   If you are generating an executable from the shell, you have to

   include the library name on the command line:

        ocamlc -o interpreter str.cma interpreter.ml

*)

(* Surprisingly, compose isn't built in. It's included in various

   widely used commercial packages, but not in the core libraries. *)

let compose f g x = f (g x);;

type symbol_productions = (string * string list list);;

type grammar = symbol_productions list;;

type parse_table = (string * (string list * string list) list) list;;

(* nt predict_set rhs *)

let calc_gram : grammar =

  [ ("P", [["SL"; "$$"]])

  ; ("SL", [["S"; "SL"]; []])

  ; ("S", [ ["id"; ":="; "E"]; ["read"; "id"]; ["write"; "E"]])

  ; ("E", [["T"; "TT"]])

  ; ("T", [["F"; "FT"]])

  ; ("TT", [["ao"; "T"; "TT"]; []])

  ; ("FT", [["mo"; "F"; "FT"]; []])

  ; ("ao", [["+"]; ["-"]])

  ; ("mo", [["*"]; ["/"]])

  ; ("F", [["id"]; ["num"]; ["("; "E"; ")"]])

  ];;

let ecg : grammar =

  [ ("P", [["SL"; "$$"]])

  ; ("SL", [["S"; "SL"]; []])

  ; ("S", [ ["id"; ":="; "R"]; ["read"; "id"]; ["write"; "R"]

           ; ["if"; "R"; "SL"; "fi"]; ["do"; "SL"; "od"]; ["check"; "R"]

           ])

  ; ("R", [["E"; "ET"]])

  ; ("E", [["T"; "TT"]])

  ; ("T", [["F"; "FT"]])

  ; ("F", [["id"]; ["num"]; ["("; "R"; ")"]])

  ; ("ET", [["ro"; "E"]; []])

  ; ("TT", [["ao"; "T"; "TT"]; []])

  ; ("FT", [["mo"; "F"; "FT"]; []])

  ; ("ro", [["=="]; ["<>"]; ["<"]; [">"]; ["<="]; [">="]])

  ; ("ao", [["+"]; ["-"]])

  ; ("mo", [["*"]; ["/"]])

  ];;

(* is e a member of list l? *)

let member e l = exists ((=) e) l;;

(* OCaml has a built-in quicksort; this was just an exercise. *)

let rec sort l =

  let rec partition pivot l left right =

    match l with

    | [] -> (left, right)

    | c :: rest -> if (compare c pivot) < 0

                   then partition pivot rest (c :: left) right

                   else partition pivot rest left (c :: right) in

  match l with

  | [] -> l

  | h :: [] -> l

  | h :: rest -> let (left, right) = partition h rest [] [] in

                 (sort left) @ [h] @ (sort right);;

(* leave only one of any consecutive identical elements in list *)

let rec unique l =

  match l with

  | [] -> l

  | h :: [] -> l

  | a :: b :: rest -> if a = b (* structural eq *)

                      then unique (b :: rest)

                      else a :: unique (b :: rest);;

let unique_sort l = unique (sort l);;

(* Return all individual productions in grammar. *)

let productions gram : (string * string list) list =

  let prods (lhs, rhss) = map (fun rhs -> (lhs, rhs)) rhss in

  fold_left (@) [] (map prods gram);;

(* Return all symbols in grammar. *)

let gsymbols gram : string list =

  unique_sort

    (fold_left (@) [] (map (compose (fold_left (@) []) snd) gram));;

(* Return all elements of l that are not in to_exclude.

   Assume that both lists are sorted *)

let list_minus l to_exclude =

  let rec helper rest te rtn =

    match rest with

    | [] -> rtn

    | h :: t -> match te with

                | [] -> (rev rest) @ rtn

                | h2 :: t2 -> match compare h h2 with

                              | -1 -> helper t te (h :: rtn)

                              | 0 -> helper t t2 rtn

                              | _ -> helper rest t2 rtn in

  rev (helper l to_exclude []);;

(* Return just the nonterminals. *)

let nonterminals gram : string list = map fst gram;;

(* Return just the terminals. *)

let terminals gram : string list =

    list_minus (gsymbols gram) (unique_sort (nonterminals gram));;

(* Return the start symbol. Throw exception if grammar is empty. *)

let start_symbol gram : string = fst (hd gram);;

let is_nonterminal e gram = member e (nonterminals gram);;

let is_terminal e gram = member e (terminals gram);;

let union s1 s2 = unique_sort (s1 @ s2);;

(* return suffix of lst that begins with first occurrence of sym

   (or [] if there is no such occurrence) *)

let rec suffix sym lst =

  match lst with

  | [] -> []

  | h :: t -> if h = sym (* structural eq *)

              then lst else suffix sym t;;

(* Return a list of pairs.

   Each pair consists of a symbol A and a list of symbols beta

   such that for some alpha, A -> alpha B beta. *)

type right_context = (string * string list) list;;

let get_right_context (b:string) gram : right_context =

  fold_left (@) []

            (map (fun prod ->

                    let a = fst prod in

                    let rec helper accum rhs =

                      let b_beta = suffix b rhs in

                      match b_beta with

                      | [] -> accum

                      | x :: beta -> (* assert x = b *)

                                      helper ((a, beta) :: accum) beta in

                    helper [] (snd prod))

                 (productions gram));;

(* parser generator starts here *)

type symbol_knowledge = string * bool * string list * string list;;

type knowledge = symbol_knowledge list;;

let symbol_field (s, e, fi, fo) = s;;

let eps_field (s, e, fi, fo) = e;;

let first_field (s, e, fi, fo) = fi;;

let follow_field (s, e, fi, fo) = fo;;

let initial_knowledge gram : knowledge =

  map (fun a -> (a, false, [], [])) (nonterminals gram);;

let get_symbol_knowledge (a:string) (kdg:knowledge) : symbol_knowledge =

  find (fun (s, e, fi, fo) -> s = a) kdg;;

(* Can word list w generate epsilon based on current estimates?

   if w is an empty list, yes

   if w is a single terminal, no

   if w is a single nonterminal, look it up

   if w is a non-empty list, "iterate" over elements *)

let rec generates_epsilon (w:string list) (kdg:knowledge) gram : bool =

  match w with

  | [] -> true

  | h :: t -> if is_terminal h gram then false

              else eps_field (get_symbol_knowledge h kdg)

                   && generates_epsilon t kdg gram;;

(* Return FIRST(w), based on current estimates.

   if w is an empty list, return [] [empty set]

   if w is a single terminal, return [w]

   if w is a single nonterminal, look it up

   if w is a non-empty list, "iterate" over elements *)

let rec first (w:string list) (kdg:knowledge) gram : (string list) =

  match w with

  | [] -> []

  | x :: _ when is_terminal x gram -> [x]

  | x :: rest -> let s = first_field (get_symbol_knowledge x kdg) in

                 if generates_epsilon [x] kdg gram

                 then union s (first rest kdg gram)

                 else s;;

let follow (a:string) (kdg:knowledge) : string list =

  follow_field (get_symbol_knowledge a kdg);;

let rec map3 f l1 l2 l3 =

  match (l1, l2, l3) with

  | ([], [], []) -> []

  | (h1 :: t1, h2 :: t2, h3 :: t3) -> (f h1 h2 h3) :: map3 f t1 t2 t3

  | _ -> raise (Failure "mismatched_lists");;

(* Return knowledge structure for grammar.

   Start with (initial_knowledge grammar) and "iterate",

   until the structure doesn't change.

   Uses (get_right_context B gram), for all nonterminals B,

   to help compute follow sets. *)

let get_knowledge gram : knowledge =

  let nts : string list = nonterminals gram in

  let right_contexts : right_context list

     = map (fun s -> get_right_context s gram) nts in

  let rec helper kdg =

    let update : symbol_knowledge -> symbol_productions

                   -> right_context -> symbol_knowledge

          = fun old_sym_kdg sym_prods sym_right_context ->

      let my_first s = first s kdg gram in

      let my_eps s = generates_epsilon s kdg gram in

      let filtered_follow p = if my_eps (snd p)

                              then (follow (fst p) kdg)

                              else [] in

      (

        symbol_field old_sym_kdg, (* nonterminal itself *)

        (eps_field old_sym_kdg) (* previous estimate *)

            || (fold_left (||) false (map my_eps (snd sym_prods))),

        union (first_field old_sym_kdg) (* previous estimate *)

            (fold_left union [] (map my_first (snd sym_prods))),

        union (union

                (follow_field old_sym_kdg) (* previous estimate *)

                (fold_left union [] (map my_first

                                      (map (fun p ->

                                              match snd p with

                                              | [] -> []

                                              | h :: t -> [h])

                                           sym_right_context))))

              (fold_left union [] (map filtered_follow sym_right_context))

      ) in (* end of update *)

    let new_kdg = map3 update kdg gram right_contexts in

    (* body of helper: *)

    if new_kdg = kdg then kdg else helper new_kdg in

  (* body of get_knowledge: *)

  helper (initial_knowledge gram);;

let get_parse_table (gram:grammar) : parse_table =

  let kdg = get_knowledge gram in

  map (fun (lhs, rhss) ->

        (lhs, (map (fun rhs ->

                      (union (first rhs kdg gram)

                             (if (generates_epsilon rhs kdg gram)

                              then (follow lhs kdg) else []),

                      rhs))

                   rhss)))

      gram;;

(* convert string to list of char *)

let explode (s:string) : char list =

  let rec exp i l = if i < 0 then l else exp (i-1) (s.[i] :: l) in

  exp (String.length s - 1) [];;

(* convert list of char to string

   (This uses imperative features. It used to be a built-in.) *)

let implode (l:char list) : string =

  let res = Bytes.create (length l) in

  let rec imp i l =

    match l with

    | [] -> Bytes.to_string res

    | c :: l -> Bytes.set res i c; imp (i + 1) l in

  imp 0 l;;

(*******************************************************************

   Scanner. Note that for the sake of generality (so you can

   experiment with other languages), this accepts certain tokens

   that are not needed in the calculator language.

 *******************************************************************)

type token = string * string;;

(* category * name *)

let tokenize (program:string) : token list =

  let get_id prog =

    let rec gi tok p =

        match p with

        | c :: rest when (('a' <= c && c <= 'z')

                       || ('A' <= c && c <= 'Z')

                       || ('0' <= c && c <= '9') || (c = '_'))

            -> gi (c :: tok) rest

        | _ -> (implode (rev tok), p) in

    gi [] prog in

  let get_int prog =

    let rec gi tok p =

        match p with

        | c :: rest when ('0' <= c && c <= '9')

            -> gi (c :: tok) rest

        | _ -> (implode (rev tok), p) in

    gi [] prog in

  let get_num prog =

    let (tok, rest) = get_int prog in

    match rest with

    | '.' :: c :: r when ('0' <= c && c <= '9')

        -> let (tok2, rest2) = get_int (c :: r) in

           ((tok ^ "." ^ tok2), rest2)

    | _ -> (tok, rest) in

  let rec get_error tok prog =

    match prog with

    | [] -> (implode (rev tok), prog)

    | c :: rest ->

        match c with

        | ':' | '+' | '-' | '*' | '/' | '(' | ')' | '[' | ']' | '_'

        | '<' | '>' | '=' | '!' | ',' | 'a'..'z' | 'A'..'Z' | '0'..'9'

            -> (implode (rev tok), prog)

        | _ -> get_error (c :: tok) rest in

  let rec skip_space prog =

    match prog with

    | [] -> []

    | c :: rest -> if (c = ' ' || c = '\n' || c = '\r' || c = '\t')

                      then skip_space rest else prog in

  let rec tkize toks prog =

    match prog with

    | [] -> (("$$" :: toks), [])

    | '\n' :: rest

    | '\r' :: rest

    | '\t' :: rest

    | ' ' :: rest -> tkize toks (skip_space prog)

    | ':' :: '=' :: rest -> tkize (":=" :: toks) rest

    | ':' :: rest -> tkize (":" :: toks) rest

    | '+' :: rest -> tkize ("+" :: toks) rest

    | '-' :: rest -> tkize ("-" :: toks) rest

    | '*' :: rest -> tkize ("*" :: toks) rest

    | '/' :: rest -> tkize ("/" :: toks) rest

    | '(' :: rest -> tkize ("(" :: toks) rest

    | ')' :: rest -> tkize (")" :: toks) rest

    | '[' :: rest -> tkize ("[" :: toks) rest

    | ']' :: rest -> tkize ("]" :: toks) rest

    | '<' :: '>' :: rest -> tkize ("<>" :: toks) rest

    | '<' :: '=' :: rest -> tkize ("<=" :: toks) rest

    | '<' :: rest -> tkize ("<" :: toks) rest

    | '>' :: '=' :: rest -> tkize (">=" :: toks) rest

    | '>' :: rest -> tkize (">" :: toks) rest

    | '=' :: '=' :: rest -> tkize ("==" :: toks) rest

    | ',' :: rest -> tkize ("," :: toks) rest

    | h :: t -> match h with

           | '0'..'9' -> let (t, rest) = get_num prog in

                         tkize (t :: toks) rest

           | 'a'..'z'

           | 'A'..'Z'

           | '_' -> let (t, rest) = get_id prog in

                         tkize (t :: toks) rest

           | c -> let (t, rest) = get_error [c] t in

                         tkize (t :: toks) rest in

  let (toks, _) = (tkize [] (explode program)) in

  let categorize tok =

    match tok with

    | "array" | "begin" | "check" | "do" | "else" | "end" | "fi"

    | "float" | "for" | "if" | "int" | "od" | "proc" | "read"

    | "real" | "repeat" | "trunc" | "while" | "write"

    | ":=" | ":" | "+" | "-" | "*" | "/" | "(" | ")"

    | "[" | "]" | "<" | "<=" | ">" | ">=" | "==" | "<>" | "," | "$$"

        -> (tok, tok)

    | _ -> match tok.[0] with

           | '0'..'9' -> ("num", tok)

           | 'a'..'z'

           | 'A'..'Z' | '_' -> ("id", tok)

           | _ -> ("error", tok) in

  map categorize (rev toks);;

(*******************************************************************

   The main parse routine below returns a parse tree (or PT_error if

   the input program is syntactically invalid). To build that tree it

   employs a simplified version of the "attribute stack" described in

   Section 4.5.2 (pages 50-55) on the PLP companion site.

   When it predicts A -> B C D, the parser pops A from the parse stack

   and then, before pushing D, C, and B (in that order), it pushes a

   number (in this case 3) indicating the length of the right hand side.

   It also pushes A into the attribute stack.

   When it matches a token, the parser pushes this into the attribute

   stack as well.

   Finally, when it encounters a number (say k) in the stack (as opposed

   to a character string), the parser pops k+1 symbols from the

   attribute stack, joins them together into a list, and pushes the list

   back into the attribute stack.

   These rules suffice to accumulate a complete parse tree into the

   attribute stack at the end of the parse.

   Note that everything is done functionally. We don't really modify

   the stacks; we pass new versions to a tail recursive routine.

 *******************************************************************)

(* Extract grammar from parse-tab, so we can invoke the various routines

   that expect a grammar as argument. *)

let grammar_of (parse_tab:parse_table) : grammar =

    map (fun p -> (fst p, (fold_left (@) [] (map (fun (a, b) -> [b])

                                                 (snd p))))) parse_tab;;

type parse_tree = (* among other things, parse_trees are *)

| PT_error (* the elements of the attribute stack *)

| PT_id of string

| PT_num of string

| PT_term of string

| PT_nt of (string * parse_tree list);;

(* Pop rhs-len + 1 symbols off the attribute stack,

   assemble into a production, and push back onto the stack. *)

let reduce_1_prod (astack:parse_tree list) (rhs_len:int) : parse_tree list =

  let rec helper atk k prod =

    match (k, atk) with

    | (0, PT_nt(nt, []) :: t) -> PT_nt(nt, prod) :: t

    | (n, h :: t) when n <> 0 -> helper t (k - 1) (h :: prod)

    | _ -> raise (Failure "expected nonterminal at top of astack") in

   helper astack rhs_len [];;

let sum_ave_prog = "read a read b sum := a + b write sum write sum / 2";;

let primes_prog = "

     read n

     cp := 2

     do check n > 0

         found := 0

         cf1 := 2

         cf1s := cf1 * cf1

         do check cf1s <= cp

             cf2 := 2

             pr := cf1 * cf2

             do check pr <= cp

                 if pr == cp

                     found := 1

                 fi

                 cf2 := cf2 + 1

                 pr := cf1 * cf2

             od

             cf1 := cf1 + 1

             cf1s := cf1 * cf1

         od

         if found == 0

             write cp

             n := n - 1

         fi

         cp := cp + 1

     od";;

type parse_action = PA_error | PA_prediction of string list;;

(* Double-index to find prediction (list of RHS symbols) for

   nonterminal nt and terminal t.

   Return PA_error if not found. *)

let get_parse_action (nt:string) (t:string) (parse_tab:parse_table) : parse_action =

    let rec helper l =

        match l with

        | [] -> PA_error

        | (fs, rhs) :: rest -> if member t fs then PA_prediction(rhs)

                               else helper rest in

    helper (assoc nt parse_tab);;

type ps_item =

| PS_end of int

| PS_sym of string;;

(* Parse program according to grammar.

   [Commented-out code would

       print predictions and matches (imperatively) along the way.]

   Return parse tree if the program is in the language; PT_error if it's not. *)

let parse (parse_tab:parse_table) (program:string) : parse_tree =

  let die msg = begin

                  print_string ("syntax error: " ^ msg);

                  print_newline ();

                  PT_error

                end in

  let gram = grammar_of parse_tab in

  let rec helper pstack tokens astack =

    match pstack with

    | [] ->

        if tokens = [] then

          (* assert: astack is nonempty *)

          hd astack

        else die "extra input beyond end of program"

    | PS_end(n) :: ps_tail ->

        helper ps_tail tokens (reduce_1_prod astack n)

    | PS_sym(tos) :: ps_tail ->

        match tokens with

        | [] -> die "unexpected end of program"

        | (term, tok) :: more_tokens ->

           (* if tok is an individual identifier or number,

              term will be a generic "id" or "num" *)

          if is_terminal tos gram then

            if tos = term then

              begin

              (*

                print_string (" match " ^ tos);

                print_string

                    (if tos <> term (* value comparison *)

                         then (" (" ^ tok ^ ")") else "");

                print_newline ();

              *)

                helper ps_tail more_tokens

                       (( match term with

                          | "id" -> PT_id tok

                          | "num" -> PT_num tok

                          | _ -> PT_term tok ) :: astack)

              end

                       (* note push of tok into astack *)

            else die ("expected " ^ tos ^ " ; saw " ^ tok)

          else (* nonterminal *)

            match get_parse_action tos term parse_tab with

            | PA_error -> die ("no prediction for " ^ tos

                               ^ " when seeing " ^ tok)

            | PA_prediction(rhs) ->

                begin

                (*

                  print_string (" predict " ^ tos ^ " ->");

                  print_string (fold_left (fun a b -> a ^ " " ^ b) "" rhs);

                  print_newline ();

                *)

                  helper ((fold_left (@) []

                                    (map (fun s -> [PS_sym(s)]) rhs))

                              @ [PS_end(length rhs)] @ ps_tail)

                         tokens (PT_nt(tos, []) :: astack)

                end in

  helper [PS_sym(start_symbol gram)] (tokenize program) [];;

let cg_parse_table = get_parse_table calc_gram;;

let ecg_parse_table = get_parse_table ecg;;

(*******************************************************************

  Everything above this point in the file is complete and (I think)

  usable as-is. The rest of the file, from here down, is a skeleton

  for the extra code you need to write. It has been excised from my

  working solution to the assignment. You are welcome, of course, to

  use a different organization if you prefer. This is provided in the

  hope you may find it useful.

 *******************************************************************)

type ast_sl = ast_s list

and ast_s =

| AST_error

| AST_assign of (string * ast_e)

| AST_read of string

| AST_write of ast_e

| AST_if of (ast_e * ast_sl)

| AST_do of ast_sl

| AST_check of ast_e

and ast_e =

| AST_binop of (string * ast_e * ast_e)

| AST_id of string

| AST_num of string;;

let rec ast_ize_prog (p:parse_tree) : ast_sl =

  match p with

  | PT_nt ("P", [pt_stl; PT_term "$$"]) -> (* match program and end *)

      ast_ize_stmt_list pt_stl (* go to statement list *)

  | _ -> raise (Failure "malformed parse tree in ast_ize_prog")

and ast_ize_stmt_list (sl:parse_tree) : ast_sl =

  match sl with

  | PT_nt ("SL", []) -> [] (* match empty list *)

  | PT_nt ("SL", [pt_st; pt_stl]) -> (* match statement and rest *)

      (* process statement and recurse with rest of list *)

      (ast_ize_stmt pt_st) :: (ast_ize_stmt_list pt_stl)

  | _ -> raise (Failure "malformed parse tree in ast_ize_stmt_list")

and ast_ize_stmt (s:parse_tree) : ast_s =

  match s with

  | PT_nt ("S", [PT_id lhs; PT_term ":="; expr]) ->

      (* if assignment, get id and expression and return assign node *)

      AST_assign (lhs, (ast_ize_expr expr))

  | PT_nt ("S", [PT_term "read"; PT_id id]) ->

      (* if read, get id and return read node *)

      AST_read id

  | PT_nt ("S", [PT_term "write"; expr]) ->

      (* if write, get expression and return write node *)

      AST_write (ast_ize_expr expr)

  | PT_nt ("S", [PT_term "if"; expr; pt_stl; PT_term "fi"]) ->

      (* when if, get expression, process statement list and return if node *)

      AST_if (ast_ize_expr expr, ast_ize_stmt_list pt_stl)

  | PT_nt ("S", [PT_term "do"; pt_stl; PT_term "od"]) ->

      (* if do, process statement list and return do node *)

      AST_do (ast_ize_stmt_list pt_stl)

  | PT_nt ("S", [PT_term "check"; expr]) ->

      (* if check, get expression and return check node *)

      AST_check (ast_ize_expr expr)

  | _ -> raise (Failure "malformed parse tree in ast_ize_stmt")

and ast_ize_expr (e:parse_tree) : ast_e =

  (* e is an R, E, T, or F parse tree node *)

  match e with

  | PT_nt ("R", [expr; pt_et]) -> (* match R node *)

      (* if R node, recurse expression and return reln tail *)

      ast_ize_reln_tail (ast_ize_expr expr) pt_et

  | PT_nt ("E", [expr; pt_et]) -> (* match E node *)

      (* if E node, recurse expression and return expr tail *)

      ast_ize_expr_tail (ast_ize_expr expr) pt_et

  | PT_nt ("T", [expr; pt_et]) -> (* match T node *)

      (* if T node, recurse expression and return expr tail *)

      ast_ize_expr_tail (ast_ize_expr expr) pt_et

  | PT_nt ("F", [PT_id id]) -> (* match F id node *)

      (* get id and return id node *)

      AST_id id

  | PT_nt ("F", [PT_num num]) -> (* match F num node *)

      (* get num and return num node *)

      AST_num num

  (* match F parenthesis node *)

  | PT_nt ("F", [PT_term "("; expr; PT_term ")"]) ->

      (* recurse expression, return result *)

      ast_ize_expr expr

  | _ -> raise (Failure "malformed parse tree in ast_ize_expr")

and ast_ize_reln_tail (lhs:ast_e) (tail:parse_tree) : ast_e =

  (* lhs in an inherited attribute.

     tail is an ET parse tree node *)

  match tail with

  | PT_nt ("ET", []) -> (* match empty ET node *)

      lhs (* simply return expression *)

  | PT_nt ("ET", [PT_nt ("ro", [PT_term op]); pt_et]) -> (* match ET op node *)

      (* build binomial operation with lhs expression and rest of expression*)

      AST_binop (op, lhs, ast_ize_expr pt_et)

  | _ -> raise (Failure "malformed parse tree in ast_ize_reln_tail")

and ast_ize_expr_tail (lhs:ast_e) (tail:parse_tree) : ast_e =

  (* lhs in an inherited attribute.

     tail is a TT or FT parse tree node *)

  match tail with

  | PT_nt ("TT", []) -> (* match empty TT node *)

      lhs (* simply return expression *)

  (* match TT op node *)

  | PT_nt ("TT", [PT_nt ("ao", [PT_term op]); pt_et; pt_tt]) ->

      (* build binomial operation with expression and rest of expression

         and recurse using built binop *)

      ast_ize_expr_tail (AST_binop (op, lhs, ast_ize_expr pt_et)) pt_tt

  | PT_nt ("FT", []) -> (* match empty FT node *)

      lhs (* simply return expression *)

  (* match FT op node *)

  | PT_nt ("FT", [PT_nt ("mo", [PT_term op]); pt_et; pt_tt]) ->

      (* build binomial operation with lhs expression and rest of expression

         and recurse using built binop *)

      ast_ize_expr_tail (AST_binop (op, lhs, ast_ize_expr pt_et)) pt_tt

  | _ -> raise (Failure "malformed parse tree in ast_ize_expr_tail")

;;

(*******************************************************************

    Interpreter

 *******************************************************************)

type memory = (string * int) list;;

(* name * val *)

(* If you do the extra credit, you might want an extra Boolean

   field in the tuple to indicate whether the value has been used. *)

type value = (* an integer or an error message *)

| Value of int

| Error of string;;

(* concatenate strings, with a separator in between if both were nonempty *)

let str_cat sep a b =

  match (a, b) with

  | (a, "") -> a

  | ("", b) -> b

  | (_, _) -> a ^ sep ^ b;;

type status =

| Good

| Bad (* run-time error *)

| Done;; (* failed check *)

(* Input to a calculator program is just a sequence of numbers. We use

   the standard Str library to split the single input string into

   whitespace-separated words, each of which is subsequently checked

   for valid integer format.

*)

let rec interpret (ast:ast_sl) (full_input:string) : string =

  let inp = split (regexp "[ \t\n\r]+") full_input in

  let (_, _, _, outp) = interpret_sl ast [] inp [] in

    (fold_left (str_cat " ") "" outp) ^ "\n"

and interpret_sl (sl:ast_sl) (mem:memory)

                 (inp:string list) (outp:string list)

    : status * memory * string list * string list =

    (* ok? new_mem new_input new_output *)

  match sl with

  | [] -> (Good, mem, inp, outp) (* if end of statements, return *)

  | st::sts ->

      match (interpret_s st mem inp outp) with

      (* recurse to next statement *)

      | (Good, m, i, o) -> interpret_sl sts m i o

      (* if done or bad, return value unchanged *)

      | r -> r

(* NB: the following routine is complete. You can call it on any

   statement node and it figures out what more specific case to invoke.

*)

and interpret_s (s:ast_s) (mem:memory)

                (inp:string list) (outp:string list)

    : status * memory * string list * string list =

  match s with

  | AST_assign(id, expr) -> interpret_assign id expr mem inp outp

  | AST_read(id) -> interpret_read id mem inp outp

  | AST_write(expr) -> interpret_write expr mem inp outp

  | AST_if(cond, sl) -> interpret_if cond sl mem inp outp

  | AST_do(sl) -> interpret_do sl mem inp outp

  | AST_check(cond) -> interpret_check cond mem inp outp

  | AST_error -> raise (Failure "cannot interpret erroneous tree")

and interpret_assign (lhs:string) (rhs:ast_e) (mem:memory)

                     (inp:string list) (outp:string list)

    : status * memory * string list * string list =

  (* evaluate expression and match result *)

  match (interpret_expr rhs mem) with

  | (Value v, new_mem)

      (* remove old variable and use new value *)

      -> (Good, (lhs, v) :: remove_assoc lhs new_mem, inp, outp)

  | (Error es, new_mem)

      -> (Bad, new_mem, inp, outp @ [es]) (* return error string as output *)

and interpret_read (id:string) (mem:memory)

                   (inp:string list) (outp:string list)

    : status * memory * string list * string list =

  match inp with

  (* if no more input values it's an error *)

  | [] -> (Bad, mem, inp, outp @ ["unexpected end of input"])

  | v::inps ->

      if string_match (regexp "[0-9]+$") v 0 then (* if a valid number *)

        (* remove old variable, convert input to integer and add new var *)

        (Good, (id, int_of_string v) :: remove_assoc id mem, inps, outp)

      else

        (Bad, mem, inp, outp @ ["non-numeric input"])

and interpret_write (expr:ast_e) (mem:memory)

                    (inp:string list) (outp:string list)

    : status * memory * string list * string list =

  (* evaluate expression and match result *)

  match (interpret_expr expr mem) with

  (* add expression value to output *)

  | (Value v, new_mem) -> (Good, new_mem, inp, outp @ [string_of_int v])

  (* return error string as output *)

  | (Error es, new_mem) -> (Bad, new_mem, inp, outp @ [es])

and interpret_if (cond:ast_e) (sl:ast_sl) (mem:memory)

                 (inp:string list) (outp:string list)

    : status * memory * string list * string list =

  (* evaluate expression and match result *)

  match (interpret_expr cond mem) with

  | (Value 0, new_mem) -> (Good, new_mem, inp, outp) (* if false, return *)

  (* if true, interpret the statements inside the if *)

  | (Value _, new_mem) -> interpret_sl sl new_mem inp outp

  (* return error string as output *)

  | (Error es, new_mem) -> (Bad, new_mem, inp, outp @ [es])

and interpret_do (sl:ast_sl) (mem:memory)

                 (inp:string list) (outp:string list)

    : status * memory * string list * string list =

  (* interpret statements in do *)

  match interpret_sl sl mem inp outp with

  (* if loop is done, return last state *)

  | (Done, m, i, o) -> (Good, m, i, o)

  (* if good, recurse using last state *)

  | (Good, m, i, o) -> interpret_do sl m i o

  (* if bad, simply return it *)

  | b -> b

and interpret_check (cond:ast_e) (mem:memory)

                    (inp:string list) (outp:string list)

    : status * memory * string list * string list =

  (* evaluate condition and match result *)

  match (interpret_expr cond mem) with

  | (Value 0, new_mem) -> (Done, new_mem, inp, outp) (* if false, we are done*)

  | (Value _, new_mem) -> (Good, new_mem, inp, outp) (* if true, continue *)

  (* return error string as output *)

  | (Error es, new_mem) -> (Bad, new_mem, inp, outp @ [es])

and interpret_expr (expr:ast_e) (mem:memory) : value * memory =

  match expr with

  (* for an id, return its value *)

  | AST_id id ->

      (* if variable exists return its value and var list *)

      if mem_assoc id mem then (Value (assoc id mem), mem)

      (* else return error *)

      else (Error (id ^ ": symbol not found"), mem)

  (* for a number, return its value *)

  | AST_num num -> (Value (int_of_string num), mem)

  | AST_binop (op, expr1, expr2) ->

      match interpret_expr expr1 mem with

      | (Error e1, m1) -> (Error e1, m1)

      | (Value v1, m1) ->

          match interpret_expr expr2 m1 with

          | (Error e2, m2) -> (Error e2, m2)

          | (Value v2, m2) ->

              (* make operation and return result, relational operators return

              values 0 or 1 to indicate false or true, respectively *)

              match op with

              | "==" -> if v1 == v2 then (Value 1, m2) else (Value 0, m2)

              | "<>" -> if v1 <> v2 then (Value 1, m2) else (Value 0, m2)

              | "<" -> if v1 < v2 then (Value 1, m2) else (Value 0, m2)

              | ">" -> if v1 > v2 then (Value 1, m2) else (Value 0, m2)

              | "<=" -> if v1 <= v2 then (Value 1, m2) else (Value 0, m2)

              | ">=" -> if v1 >= v2 then (Value 1, m2) else (Value 0, m2)

              | "+" -> (Value (v1 + v2), m2)

              | "-" -> (Value (v1 - v2), m2)

              | "*" -> (Value (v1 * v2), m2)

              | "/" ->

                  if v2 <> 0 then (Value (v1 / v2), m2)

                  else (Error "divide by zero", m2)

              (* last one shouldn't happen, added to avoid warnings *)

              | x -> (Error ("unknown operator " ^ x), m2)

(*******************************************************************

    Testing

 *******************************************************************)

let sum_ave_parse_tree = parse ecg_parse_table sum_ave_prog;;

let sum_ave_syntax_tree = ast_ize_prog sum_ave_parse_tree;;

let primes_parse_tree = parse ecg_parse_table primes_prog;;

let primes_syntax_tree = ast_ize_prog primes_parse_tree;;

let ecg_run prog inp =

  interpret (ast_ize_prog (parse ecg_parse_table prog)) inp;;

let main () =

  print_string (interpret sum_ave_syntax_tree "4 6");

    (* should print "10 5" *)

  print_newline ();

  print_string (interpret primes_syntax_tree "10");

    (* should print "2 3 5 7 11 13 17 19 23 29" *)

  print_newline ();

  print_string (interpret sum_ave_syntax_tree "4 foo");

    (* should print "non-numeric input" *)

  print_newline ();

  print_string (ecg_run "write 3 write 2 / 0" "");

    (* should print "3 divide by zero" *)

  print_newline ();

  print_string (ecg_run "write foo" "");

    (* should print "foo: symbol not found" *)

  print_newline ();

  print_string (ecg_run "read a read b" "3");

    (* should print "unexpected end of input" *)

  print_newline ();;

(* Execute function "main" iff run as a stand-alone program. *)

if !Sys.interactive then () else main ();;