
# parser for the IttyBitty language
#   - produces an array-based representation of the program structure,
#     annotated with component identification
#
# A summary of what the parser produces for each component type
#   is provided below.
#
# Notes:
#    StatementList returns a non-empty array of statements, which is
#       in Program and Block, and referred to below as a StatementsArray
#    Integer values are stored as their text representation
#
# Program --> ['Program',StatementsArray]
#
# Statement --> ['Assign', varname, opname, Expression ]
#           --> ['Print', Value ]
#           --> ['Print', ['Text', ArrayOfWords] ]
#           --> ['If', ['Cond', Value, opname, Value ],
#                      ['Block', StatementsArray]]
#           --> ['While', ['Cond', Value, opname, Value ],
#                         ['Block', StatementsArray]]
#
# Value --> ['Variable', varname]
#       --> ['Integer', value]
#
# Expression --> Value
#            --> [ 'Expression', Value opname Expression ]


# parsing debug flag (true if parser debugging turned on, false otherwise)
PRSDEBUG = false

# $ymtable: parsing symbol table (global)
# ========
#    hash table of variable names and current initialization state,
#       true if initialized, valse otherwise
#    the hash table is cleared whenever the parser is called on a
#       Program component
$ymtable = {}

# Track the current active scope id, $SID
$SID = -1

# (parse tokenlist) builds a parse tree from the token list for a program
# =================
# (parse tokenlist component position scopes) - builds a parse (sub)tree for the
#    specified component, assuming it begins at the specified positon in the token list
#    scopes is a list of the current active scopes, newest scope at the end
# scope id's are unique, based on the position of their first token in the tokenlist,
#    which is 0 for global scope, the position of the { token for blocks
#
# a parser for the IttyBitty language
#    checks if a source code is syntactically correct for the language,
#    and builds a parse tree
# expects three parameters:
#      the overall token list for the source code
#      the type of language component we are checking for next in the token list
#          (defaults to an entire valid program)
#      the token list position where parsing for the component should begin
#          (defaults to the start of the token list)
# returns two values:
#     if parsing succeeds it returns the parse tree
#        and the position of the token immediately after the component
#     otherwise
#        it returns false and the position originally passed to it
def parse(tokenlist, component="Program", position=0, scopes=[])

if PRSDEBUG then
   puts "\n---parsing #{component} from position #{position} (#{tokenlist[position]}) ---"
end

    # make sure we're not off the end of the token list
    if tokenlist.length <= position then
       return false,tokenlist.length
    end

    # default parse tree is empty array
    parsetree = Array.new

    # based on the component type, try to parse the tokenlist appropriately
    case component
        when "Program"
            # initialize
            $ymtable.clear # make sure we start with a clean table
            $SID = 0       # set the current scope id to 0 (global)
            scopes << $SID # add the global scope to the list of active scopes

            # Program --> StatementList
            parsetree,pos = parse(tokenlist, "StatementList", position, scopes)
            if parsetree == false then
               return false, position
            elsif tokenlist.length > pos then
               puts "---parsing successful(?) but unused tokens from position #{pos} ---"
               return false, position
            end
            return ['Program',parsetree], pos

        when "StatementList"
            parsetree,pos = parse(tokenlist, "Statement", position, scopes)
            # StatementList --> Statement
            if parsetree == false then
               return false,position
            # StatementList --> Statement StatementList
            else
               backuppos = pos
               backuptree = parsetree
               parsetree,pos = parse(tokenlist, "StatementList", pos, scopes)
               if parsetree == false then
                  return [backuptree],backuppos
               else
                  return [backuptree] + parsetree,pos
               end
            end

        when "Statement"
           # Statement --> Print
           parsetree,pos = parse(tokenlist, "Print",position, scopes)
           if parsetree != false then
              return parsetree,pos
           end
           # Statement --> VarAssign
           parsetree,pos = parse(tokenlist, "VarAssign",position, scopes)
           if parsetree != false then
              return parsetree,pos
           end
           # Statement --> Loop
           parsetree,pos = parse(tokenlist, "Loop",position, scopes)
           if parsetree != false then
              return parsetree,pos
           end
           # Statement --> Conditional
           parsetree,pos = parse(tokenlist, "Conditional",position, scopes)
           if parsetree != false then
              return parsetree,pos
           end
           return false,position

        when "VarAssign"
           # VarAssign --> variable = Expression
           if tokenlist.length < position+3 then
              return false,position
           end
           var = tokenlist[position]
           op = tokenlist[position+1]
           if var[0] == 'variable' and op[0] == 'assignop' then
              parsetree,pos = parse(tokenlist, "Expression",position+2, scopes)
              if parsetree != false then
                 # if the variable is not in the symbol table,
                 #    add it with value [ $SID ] (the active scope id)
                 if !$ymtable.has_key?(var[1]) then
                    $ymtable[var[1]] = [ $SID ]
                    varscopes = $ymtable[var[1]]
                 else
                    # otherwise, get the variable's entry from the symbol table
                    #    (its list of scopes in which it has been initialized)
                    #    if $SID is not in that list, add it and update
                    #       variable's entry in the scopes list
                    varscopes = $ymtable[var[1]]
                    if !varscopes.include?($SID) then
                       varscopes << $SID
                       $ymtable[var[1]] = varscopes
                    end
                 end
                 return ['Assign', var[1], op[1], parsetree],pos
              end
           end
           return false,position

        when "Print"
           if tokenlist.length < position+2 then
              return false,position
           end
           prt = tokenlist[position]
           if prt[0] != 'keyword' or prt[1] != 'print' then
              return false,position
           end
           parsetree,pos = parse(tokenlist, "Value", position+1, scopes)
           if parsetree != false then
              return ['Print', parsetree],pos
           end
           # Print --> print text
           parsetree,pos = parse(tokenlist, "Text", position+1, scopes)
           if parsetree != false then
              return ['Print', parsetree],pos
           end
           return false,position

        when "Value"
           if tokenlist.length < position+1 then
              return false,position
           end
           val = tokenlist[position]
           # Value --> variable
           if val[0] == 'variable' then
              # if the variable is not yet in the symbol table add it now as uninitialized
              if !$ymtable.has_key?(val[1]) then
                 $ymtable[val[1]] = []
              end
              # get the variable's list of scopes from the symbol table,
              #     and compare it to the list of active scopes
              # if there is no overlap between the two then the
              #     variable has not been initialized,
              #     so generate a warning message
              varscopes = $ymtable[val[1]]
              overlap = varscopes & scopes
              if overlap.length < 1 then
                 puts "WARNING: use of uninitialized variable #{val[1]}"
              end
              return ['Variable', val[1]],position+1
           # Value --> integer
           elsif val[0] == 'integer' then
              return ['Integer',val[1]],position+1
           end
           return false,position

        when "Conditional"
           if tokenlist.length <= position then
              return false,position
           end
           iftok = tokenlist[position]
           # Conditional --> if Condition Block
           if iftok[0] == 'keyword' and iftok[1] == 'if' then
              condtree,newpos = parse(tokenlist, "Condition",position+1, scopes)
              if condtree != false then
                 blocktree,lastpos = parse(tokenlist, "Block",newpos, scopes)
                 if blocktree != false then
                    return ['If',condtree,blocktree],lastpos
                 end
              end
           end
           return false,position

        when "Loop"
           if tokenlist.length <= position then
              return false,position
           end
           whiletok = tokenlist[position]
           # Loop --> while Condition Block
           if whiletok[0] == 'keyword' and whiletok[1] == 'while' then
              condtree,newpos = parse(tokenlist, "Condition",position+1, scopes)
              if condtree != false then
                 blocktree,lastpos = parse(tokenlist, "Block",newpos, scopes)
                 if blocktree != false then
                    return ['While',condtree,blocktree],lastpos
                 end
              end
           end
           return false,position

        when "Condition"
           # Condition --> ( Value comparisonop Value )
           if tokenlist.length < position+5 then
              return false,position
           end
           br1 = tokenlist[position]
           if br1[0] == 'bracket' and br1[1] == "(" then
              val1tree,pos = parse(tokenlist, "Value",position+1, scopes)
              if val1tree != false then
                 op = tokenlist[position+2]
                 if op[0] == 'compop' then
                    val2tree,lastpos = parse(tokenlist, "Value",position+3, scopes)
                    if val2tree != false then
                       br2 = tokenlist[position+4]
                       if br2[0] == 'bracket' and br2[1] == ')' then
                          return ['Cond',val1tree,op[1],val2tree],position+5
                       end
                    end
                 end
              end
           end
           return false,position

        when "Block"
           # Block --> { StatementList }
           if tokenlist.length < position+1 then
              return false,position
           end
           br1 = tokenlist[position]
           if br1[0] == 'delimiter' and br1[1] == '{' then
              oldSID = $SID     # remember the current scope id
              $SID = position   # capture the new scope id
              scopes << $SID    # add the id for the current scope to the active list
              parsetree,pos = parse(tokenlist, "StatementList",position+1, scopes)
              scopes.pop        # remove the scope id, back to the previous scope list
              $SID = oldSID     # restore the correct active scope id
              if parsetree != false then
                 if tokenlist.length < pos+1 then
                    return false,position
                 end
                 br2 = tokenlist[pos]
                 if br2[0] == 'delimiter' and br2[1] == '}' then
                    return ['Block',parsetree],pos+1
                 end
              end
           end
           return false,position

        when "Expression"
           # Expression --> Value
           parsetree,pos = parse(tokenlist, "Value",position, scopes)
           if parsetree != false then
              if tokenlist.length <= pos
                 return parsetree,pos
              end
              backuppos = pos
              # Expression --> Value mathop Expression
              op = tokenlist[pos]
              if op[0] == 'mathop' then
                 exprtree,pos = parse(tokenlist, "Expression",pos+1, scopes)
                 if exprtree != false then
                    return ['Expression', parsetree, op[1], exprtree],pos
                 end
              end
              return parsetree,backuppos
           end
           return false,position

        when "Text"
           # Text --> " WordList "
           if tokenlist.length <= position then
              return false,position
           end
           q1 = tokenlist[position]
           if q1[0] == 'quote' then
              parsetree,pos = parse(tokenlist, "WordList",position+1, scopes)
              if parsetree != false then
                 if tokenlist.length <= pos then
                    return false,position
                 end
                 q2 = tokenlist[pos]
                 if q2[0] == 'quote' then
                    return ['Text', parsetree],pos+1
                 end
              end
           end
           return false,position

        when "WordList"
           # WordList --> text
           if tokenlist.length <= position then
              return false,position
           end
           word = tokenlist[position]
           if word[0] == 'text' then
              # WordList --> text WordList
              wordstree,pos = parse(tokenlist, "WordList",position+1, scopes)
              if wordstree != false then
                 return [word[1]]+wordstree,pos
              end
              return [word[1]],position+1
           end
           return false,position

        else # this else is for the overall component case
           puts "Error, invalid component type specified: #{component}"
           return false,position
    end

    # return the results of parsing as the suggested component type
    #    should not be reachable?
    puts "---???Reached default end of parse???---"
    return parsetree,position
end

