/*
   parse.cc -- illustrate how to build a recursive-descent parser

Author:  Larry Morell

Algorithm:

   The program parses according to the following grammar

PROGRAM        ::= <scanner-list> SCANNER-LIST </scanner-list>
SCANNER-LIST ::= SCANNER | SCANNER SCANNER-LIST
SCANNER  ::= <scanner> NAME ALPHABET STATES START FINAL-STATES TRANSITION-TABLE </scanner>
NAME         ::= <name>.1.</name>
ALPHABET     ::= <alphabet> .2. </alphabet>
STATES       ::= <states> STATE-LIST </states>
STATE-LIST   ::= STATE | STATE STATE-LIST
FINAL-STATES ::= <final-states> STATE-LIST </final-states>
STATE            ::= <state> .4. </state>
START        ::= <start> .4. </final>
TRANSITION-TABLE ::= <transition-table> TRANSITION-LIST </transition-table>
TRANSITION-LIST  ::= TRANSITION | TRANSITION TRANSITION-LIST
TRANSITION       ::= <transition> INPUT STATE NEXTSTATE </transition>
INPUT            ::= <input> .3. </input>
NEXTSTATE        ::= <nextstate> .4. </nextstate>

Parsing is the act of recognizing whether the input tokens are in the correct
order, as specified by a grammar.  Each non-terminal is translated into a single
procedure.  The translated procedure calls one of three routines to do its parsing:
1. check(tokenType, string) -- a function that returns true iff the current token, `t'
   (i.e. the token most recently obtained by calling t.get(), or t.getStuff())
   has the appropriate tokentype and value.  For example:
      t.get();
      if (check(otagSym,"state"))  ...

   checks to see if the current value of `t' is  <state>.

2. verify(tokenType, string) -- a procedure that confirms that the current token,`t',
   matches the supplied tokenType and value.  If it does not, it issues and error and
   halts.  For example:

      verify(otagSym,"state"); 

   halts the program if the current token is not <state>

   `t.get()'  or `t.getStuff()' should normally be called after calling `verify(...)'

3. Another parsing routine, `parseXXX()', to parse for input that satisfies `XXX'.
   For example:

      parseInput(); 

   This will attempt to recognize next in the input stream:

     <input>a</input>

   No call to t.get() or t.getStuff() is necessary after calling `parseXXX()' because
   the parsing routine will ensure that one of these has been called immediately before
   exiting.   

As an example consider:

TRANSITION      ::= <transition> INPUT STATE NEXTSTATE </transition> 

void parseTransition () {
   entering("parseTransition");
   verify (otagSym, "transition");  t.get();
   BinNode *root, *c1, *c2, *c3;
   parseInput();
   parseState();
   parseNextState();
   verify (ctagSym, "transition");  t.get();
   leaving("parseTransition");
}

Note that the first item to be recognized is the `<transition>' tag, Since this
is the only option, `verify()' is called.   Note the required call to t.get();

After that, this parsing routine must
rely on three other parsing routines to do their work: 
   parseInput();
   parseState();
   parseNextState();

At this point the next item in the input stream better be a closing `</transition>' 
tag, hence the final call to `verify()' along with the required call to `t.get()'

Please note that if the last item in a grammar rule is a non-terminal XXX, then
the last activity of the the parsing routine will be `parseXXX()'.  There is
no final call to get another token  since that the call to `parseXXX()' guarantees
that token has been read already.


Revision history
Ancient times -- various versions built in various languages
3/14/06 -- C++ version built for parsing an XML-formated definition for a scanner-generator
*/

using namespace std;
#include <iostream>
#include "Envs.h"
#include "Tokens.h"
#include "Basics.h"

typedef IntSet TokenTypeSet; 

Token  t;   
IntSet  ES; // empty set

// check -- true iff the current token (t) is contained in the TokenTypeSet s
int check(TokenTypeSet s, string v)
{
   entering("check");
   //cout << "s=";s.displayln();
   //cout << "v="<<v << endl;
   //cout << s.contains(t.type()) << endl;
   //cout << t.val() << endl;
   leaving("check");
  return s.contains(t.type()) && t.val() == v;
}

// verify -- if current token (t) is in in the set TokenTypeSet s, 
//           then get the next token
//           else error message and halt the program
void verify(TokenTypeSet s, string v)
{
   TokenType c,d;
   entering("verify");
   //cout << "Verifying:"; s.displayln();
   if (check(s,v)){
   }
   else {
      cout << "Error: verify: looking for:";
      c = ctagSym; //  first in TokenType
      while (! s.contains(c))
            c =  succ(c);
      cout << tokenTypeStr(c) << endl;
      if (c != stuffSym) {  // loop for commas 
         c = succ(c);
         for ( d = c ; d != stuffSym; d=succ(d)) {
             if (s.contains(d))
               cout << "," << tokenTypeStr(c) 
                           << endl;
         }
         if (s.contains(stuffSym))
            cout << "," << tokenTypeStr(c) 
                        << endl;
      }
      cout << "found " ;
      t.displayln();
      exit(1);
   }
   leaving("verify");
}

/*
PROGRAM      ::= <scanner-list> SCANNER-LIST </scanner-list>
SCANNER-LIST ::= SCANNER | SCANNER SCANNER-LIST
SCANNER      ::= <scanner> NAME ALPHABET STATES START FINAL-STATES TRANSITION-TABLE </scanner>
STATES       ::= <states> STATE-LIST </states> 
FINAL-STATES ::= <final-states> STATE-LIST </final-states>
TRANSITION-TABLE ::= <transition-table> TRANSITION-LIST </transition-table>
TRANSITION-LIST  ::= TRANSITION | TRANSITION TRANSITION-LIST
*/

/* START        ::= <start> .4. </final> */
void parseStart() {
    entering("parseStart");
    verify (otagSym, "start");
    t.getStuff();
    t.get();
    verify (ctagSym, "start");
    t.get();
    leaving("parseStart");
}

/* NAME         ::= <name>.1.</name> */
void parseName() {
    entering("parseName");
    verify (otagSym, "name");
    t.getStuff();
    t.get();
    verify (ctagSym, "name");
    t.get();
    leaving("parseName");
}

/* ALPHABET     ::= <alphabet> .2. </alphabet> */
void parseAlphabet() {
    entering("parseAlphabet");
    verify (otagSym, "alphabet");
    t.getStuff();
    t.get();
    verify (ctagSym, "alphabet");
    t.get();
    leaving("parseAlphabet");
}

/* INPUT        ::= <scanner-list> SCANNER-LIST </scanner-list> */
void parseInput() {
    entering("parseInput");
    verify (otagSym, "input");
    t.getStuff();
    t.get();
    verify (ctagSym, "input");
    t.get();
    leaving("parseInput");
}

/* NEXTSTATE        ::= <nextstate> .4. </nextstate> */
void parseNextState() {
    entering("parseNextState");
    verify (otagSym, "nextstate");
    t.getStuff();
    t.get();
    verify (ctagSym, "nextstate");
    t.get();
    leaving("parseNextState");
}

/* STATE            ::= <state> .4. </state> */
void parseState() {
    entering("parseState");
    verify (otagSym, "state");
    t.getStuff();
    t.get();
    verify (ctagSym, "state");
    t.get();
    leaving("parseState");
}


/* STATE-LIST   ::= STATE | STATE STATE-LIST */
void parseStateList() {
    entering("parseStateList");
    parseState(); 
    if (check(otagSym,"state") )
       parseStateList();
    leaving("parseStateLists");
}


/*
  PROGRAM      ::= <scanner-list> SCANNER-LIST </scanner-list>
  SCANNER-LIST ::= SCANNER | SCANNER SCANNER-LIST
  SCANNER      ::= <scanner> NAME ALPHABET STATES START FINAL-STATES TRANSITION-TABLE </scanner>
 * STATES       ::= <states> STATE-LIST </states>
 * FINAL-STATES ::= <final-states> STATE-LIST </final-states>
 * TRANSITION-TABLE ::= <transition-table> TRANSITION-LIST </transition-table>
 * TRANSITION-LIST  ::= TRANSITION | TRANSITION TRANSITION-LIST
  */

//prototype
void parseTransition();


/* TRANSITION-LIST  ::= TRANSITION | TRANSITION TRANSITION-LIST  */
void parseTransitionList()
{
    entering("parseTransitionList");
    parseTransition();
    if (check(otagSym,"transition") )
       parseTransitionList();
    leaving("parseTransitionList");

}

/*  TRANSITION-TABLE ::= <transition-table> TRANSITION-LIST </transition-table>  */
void parseTransitionTable()
{
    entering("parseTransitionTable");
    verify (otagSym, "transition-table");
    t.get();
    parseTransitionList();
    verify(ctagSym, "transition-table");
    t.get();
    leaving("parseTransitionTable");
}

/* STATES       ::= <states> STATE-LIST </states>  */
void parseStates()
{
  entering("parseStates");
  verify (otagSym, "states");
  t.get();
  parseStateList();
  verify (ctagSym, "states");
  t.get();
  leaving("parseStates");
}

/* TRANSITION       ::= <transition> INPUT STATE NEXTSTATE </transition> */
void parseTransition () {
  entering("parseTransition");
  verify (otagSym, "transition");
  t.get();
  parseInput();
  parseState();
  parseNextState();
  verify (ctagSym, "transition");
  t.get();
  leaving("parseTransition");
}

/*  FINAL-STATES ::= <final-states> STATE-LIST </final-states>  */
void parseFinalStates()
{
  entering("parseFinalStates");
  verify(otagSym, "final-states");
  t.get();
  parseStateList();
  verify(ctagSym, "final-states");
  t.get();
  leaving("parseFinalStates");
}

/*  SCANNER  ::= <scanner> NAME ALPHABET STATES START FINAL-STATES TRANSITION-TABLE </scanner> */
void parseScanner()
{
  entering("parseScanner");
  verify(otagSym, "scanner");
  t.get();
  parseName();
  parseAlphabet();
  parseStates();
  parseStart();
  parseFinalStates();
  parseTransitionTable();
  verify(ctagSym, "scanner");
  t.get();
  leaving("parseScanner");
}

/*  SCANNER-LIST ::= SCANNER | SCANNER SCANNER-LIST  */
void parseScannerList()
{
  entering("parseScannerList");
  parseScanner();
  if (check(otagSym,"scanner") )
    parseScannerList();
  leaving("parseScannerList");
}

/* PROGRAM        ::= <scanner-list> SCANNER-LIST </scanner-list> */
void parseProgram()
{
    entering("parseProgram");
    verify(otagSym, "scanner-list");
    t.get();
    parseScannerList();
    verify(ctagSym, "scanner-list");
    leaving("parseProgram");  
}



int main(int argc, char *argv[])
{
   if (argc == 2) 
      setTokenSource(argv[1]); // set input to kbd
   else
      setTokenSource(""); // set input to kbd

   DEBUG=0;
   t.init(initSym);
   t.get();
//   cout << "first token:"; 
//   t.val();
   //parseNextState(); 
//   parseStateList();
    parseProgram();
   return 0;
}

