% a simple dcg that checks the validity of simple programs of the form
%    begin
%       statements
%    end
%
% the query format is: checkProgram(SourceText).
% e.g.    checkProgram("begin abc = 123. end").
% and the query response will succeed if the source code is valid
%     or will be false if the source code is invalid
% (note that string_codes computes L as the list of ascii codes matching your source)
%
% statements can be either variable declarations or assignment statements
%    var x = 13.
%    var Pi = 3.14.
%    var z = x.
%    z = z * 3 + x / 12 - Pi.
%
% binary math operators + - * / are supported with the usual
%    precedence rules, but right-to-left associativity
%
% variable names must begin with an alphabetic character,
%    then can be alphanumeric or underscores
%
% most code segments are required to be whitespace delimited

% -----------------------------------------------------------------------
% checkProgram(Source)
% --------------------
% checks the source code against the grammar, e.g.
%    checkProgram("begin abc = 123. end").
checkProgram(Source) :- string_codes(Source, L), phrase(program, L, []).

% -----------------------------------------------------------------------

% a program has the form
%   begin
%     one or more statements 
%   end
program --> "begin", wspace, statements, wspace, "end".

% a set of statements can be a single statement
%   or a statement followed by more statements
statements --> statement.
statements --> statement, wspace, statements.

% a statement can be either a declaration or an assignment
statement --> declaration.
statement --> assignment.

% a declaration has the form
%    var:variableName=initialValue.
declaration --> "var", wspace, variable, wspace, "=", wspace, value,  terminator.  % [ 46 ].

% an assignment statement has the form 
%    variable=expression.
% e.g.  foo=10*x.
assignment --> variable, wspace, "=", wspace, expr,  terminator.

% an expression is either simple subexpression
%    or an addition/subtraction of other terms
expr --> addexpr.

addexpr --> mulexpr.
addexpr --> mulexpr, wspace, "+", wspace, addexpr.
addexpr --> mulexpr, wspace, "-", wspace, addexpr.

% a subexpression is either a value or a product/division
%   of other terms
mulexpr --> value.
mulexpr --> value, wspace, "*", wspace, mulexpr.
mulexpr --> value, wspace, "/", wspace, mulexpr.

% a value is a variable, integer, or float
value --> integer.
value --> float.
value --> variable.

% an integer is one or more digits
integer --> digits.

% a float is one or more digits, followed by a decimal point, followed by one or more digits
float --> digits, ".", digits.

% recognize digit sequences
digits --> digit.
digits --> digit, digits.
digit --> [ C ], { integer(C), C >= 48, C =< 57 }.  % ascii for 0..9

% a variable is an alphabetic character followed by
%    zero or more alphanumeric characters and underscores
variable --> alpha.
variable --> alpha, varletters.
varletters --> varchar.
varletters --> varchar, varletters.
varchar --> digit.
varchar --> alpha.
varchar --> "_".

% classify alphabetic characters as upper or lowercase
alpha --> uppercase ; lowercase.
uppercase --> [ A ], { integer(A), A >= 65, A =< 90 }.  % ascii for A-Z
lowercase --> [ A ], { integer(A), A >= 97, A =< 122 }. % ascii for a-z

% a terminator is either a period or whitespace then a period
terminator --> ".".
terminator --> wspace, ".".

% classify whitespace as one or more whitespace characters
wspace --> space.
wspace --> space, wspace.
space --> " ".
space --> [ 13 ].
space --> [ 10 ].
space --> [ 9 ].

