# /=====================================================================\ #
# |  LaTeXML::MathGrammar                                         | #
# | LaTeXML's Math Grammar for postprocessing                           | #
# |=====================================================================| #
# | Part of LaTeXML:                                                    | #
# |  Public domain software, produced as part of work done by the       | #
# |  United States Government & not subject to copyright in the US.     | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov>                        #_#     | #
# | http://dlmf.nist.gov/LaTeXML/                              (o o)    | #
# \=========================================================ooo==U==ooo=/ #
# ================================================================================
# LaTeXML's MathGrammar.
# To compile :
#      perl -MParse::RecDescent - MathGrammar LaTeXML::MathGrammar
# ================================================================================
# Startup actions: import the constructors
{ BEGIN{ use LaTeXML::MathParser qw(:constructors); 
#### $::RD_TRACE=1;
}}
  
# Rules section
# ========================================
# Naming Conventions:
#   UPPERCASE   : is for terminals, ie. classes of TeX tokens.
#   Initial Cap : for non-terminal rules that can possibly be invoked externally.
#   Initial lowercase : internal rules.
# ========================================
# For internal rules
#   moreFoos[$foo] : Looks for more Foo's w/appropriate punctuation or operators, 
#     whatever is appropriate, and combines it with whatever was passed in
#     as pattern arg. Typically, the last clause would be simply
#       | { $arg[0]; }
#     to return $foo without having found any more foo's.
#     In such a case, it appears to be advantageous to have the first clause be
#       : /^\Z/ { $arg[0]; }
#     which will return immediately if there is no additional input.
#   addFoo[$bar]  : Check for a following Foo and add it, as appropriate to
#   the $bar.
# ========================================
# Note that Parse:RecDescent does NOT backtrack within a rule:
#  If a given production succeeds, the rule succeeds, but even if the ultimate
# parse fails, the parser will NOT go back and try another production within
# that same rule!!!  Of course, if a production fails, it goes on to the next,
# and if that rule fails, etc...
#
# For example ||a|-|b|| won't work (in spite of various attempts to control it)
# After seeing the initial || and attempting to parse an Expression, it gets
#   a * abs( - abs(b))
# without anything to match the initial ||; and it will NOT backtrack to try
# a shorter Expression!
#
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Top Level expressions; Just about anything?
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Note in particular, that many inline formula contain `half' a formula,
# with the lead-in text effectively being the LHS. eg. function $=foo$;
# similarly you can end up with a missing RHS, $x=$ even.

Start	: Anything /^\Z/			{ $item[1]; }

#======================================================================
Anything : <rulevar: local $MaxAbsDepth = $LaTeXML::MathParser::MAX_ABS_DEPTH>
Anything : AnythingAny /^\Z/			{ $item[1]; }

#======================================================================
AnythingAny :
 	  Formulae 
	| OPEN Formulae CLOSE		  { Fence($item[1],$item[2],$item[3]); }
	| modifierFormula
	| OPEN modifierFormula CLOSE	  { Fence($item[1],$item[2],$item[3]); }
        | MODIFIER
 	| MODIFIEROP Expression           { Apply($item[1],Absent(),$item[2]);}
	| METARELOP Formula		  { Apply($item[1],Absent(),$item[2]); }
	| anyop (PUNCT(?) anyop {[$item[1]->[0]||InvisibleComma(), $item[2]]})(s)
	    			          { NewList($item[1],map(@$_,@{$item[2]})); }
        | FLOATSUPERSCRIPT FLOATSUBSCRIPT  { NewScript(NewScript(Absent(),$item[1]),$item[2]); }
        | FLOATSUBSCRIPT FLOATSUPERSCRIPT  { NewScript(NewScript(Absent(),$item[1]),$item[2]); }
        | FLOATSUPERSCRIPT  { NewScript(Absent(),$item[1]); }
        | FLOATSUBSCRIPT    { NewScript(Absent(),$item[1]); }
					  
# a top level rule for sub and superscripts that can accept all sorts of junk.
Subscript : <rulevar: local $MaxAbsDepth = $LaTeXML::MathParser::MAX_ABS_DEPTH>
Subscript :
          aSubscript   (PUNCT(?) aSubscript {[$item[1]->[0] || InvisibleComma(),$item[2]]; })(s?)
	         { NewList($item[1],map(@$_,@{$item[2]})); }

Superscript : <rulevar: local $MaxAbsDepth = $LaTeXML::MathParser::MAX_ABS_DEPTH>
Superscript :
          aSuperscript (PUNCT(?) aSuperscript {[$item[1]->[0] || InvisibleComma(),$item[2]]; })(s?)
	         { NewList($item[1],map(@$_,@{$item[2]})); }

aSubscript :
          Formulae
 	| anyop Expression               { Apply($item[1],Absent(),$item[2]);}
	| anyop

aSuperscript :
          supops
	| Formulae 
 	| anyop Expression               { Apply($item[1],Absent(),$item[2]);}
	| anyop

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Formulae  (relations or grouping of expressions or relations)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# This maze attempts to recognize the various meaningful(?) alternations of
# Expression(s) separated by punctuation, relational operators or metarelational
# operators [Think of     $a=b=c$ vs $a=b, c=d$  vs. $a=b,c,d$  .. ]
# and group them into Formulae (collections of relations), including relations
# which have punctuated collections of Expression(s) on either the LHS or RHS, 
# as well as `multirelation' like a = b = c, or simply punctuated collections of
# Expression(s)

Formulae : Formula moreFormulae[$item[1]]

# moreFormulae[$formula]; Got a Formula, what can follow?
moreFormulae :
          /^\Z/ { $arg[0];}   # short circuit!
	| (endPunct Formula { [$item[1],$item[2]]; })(s)
	            { NewFormulae($arg[0],map(@$_,@{$item[1]})); }
        | METARELOP Formula 	  { Apply($item[1],$arg[0],$item[2]); }
	| { $arg[0]; }

# Punctuation that ends a formula
endPunct : PUNCT | PERIOD

Formula : Expression extendFormula[$item[1]]

# extendFormula[$expression] ; expression might be followed by punct Expression... 
#   or relop Expression... or arrow Expression or nothing.
extendFormula :
          /^\Z/ { $arg[0];}   # short circuit!
        | punctExpr(s) maybeRHS[$arg[0],map(@$_,@{$item[1]})]
	| relop Expression moreRHS[$arg[0],$item[1],$item[2]]
	| relop /^\Z/    { NewFormula($arg[0],$item[1], Absent()); }
	| { $arg[0]; }

# maybeRHS[$expr,(punct,$expr)*]; 
#    Could have RELOP Expression (which means the (collected LHS) relation RHS)
#    or done (just collection)
maybeRHS :
	  /^\Z/ { NewList(@arg); }
	| relopExpr(s) { NewFormula(NewList(@arg),map(@$_,@{$item[1]})); }
	| { NewList(@arg); }
# --- either line could be followed by (>0)
# For the latter, does a,b,c (<0) mean c<0 or all of them are <0 ????

# moreRHS[$expr,$relop,$expr]; Could have more (relop Expression)
# or (punct Expression)*
moreRHS :
	  /^\Z/   { NewFormula($arg[0],$arg[1],$arg[2]); } # short circuit!
	| PUNCT Expression maybeColRHS[@arg,$item[1],$item[2]]
	| relopExpr(s?) { NewFormula($arg[0],$arg[1],$arg[2],
				     map(@$_,@{$item[1]})); }
# --- 1st line could be preceded by (>0) IF it ends up end of formula
# --- 2nd line could be followed by (>0)

# maybeColRHS[$expr,$relop,$expr,(punct, $expr)*];
#    Could be done, get punct (collection) or rel Expression (another formula)
maybeColRHS :
	  /^\Z/ { NewFormula($arg[0],$arg[1],NewList(@arg[2..$#arg])); }
        | relop Expression moreRHS[$arg[$#arg],$item[1],$item[2]]
	    { NewFormulae(NewFormula($arg[0],$arg[1],
		  	  NewList(@arg[2..$#arg-2])),$arg[$#arg-1],$item[3]); }
	| PUNCT Expression maybeColRHS[@arg,$item[1],$item[2]]
	| { NewFormula($arg[0],$arg[1],NewList(@arg[2..$#arg])); }
# --- 1st line handles it through more RHS ???
# --- 2nd line could be preceded by (>0) if it ends formula
# --- 3rd line could be followed by (>0)


punctExpr : PUNCT Expression			{ [$item[1],$item[2]]; }
relopExpr : relop Expression			{ [$item[1],$item[2]]; }
	  | relop /^\Z/				{ [$item[1], Absent()]; }

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# `Modifier' formula, things like $<0$, that might follow another formula or text.
# Absent() is a placeholder for the missing thing... (?)

#modifierFormula : relopExpr(s) { NewFormula(Absent(),map(@{$_},@{$item[1]}));}
modifierFormula : relop Expression punctExpr(s?) 
		 { NewFormula(Absent(),$item[1],
			      NewList($item[2],map(@$_,@{$item[3]})));}

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Expressions; sums of terms
# Abstractly, things combined by operators binding tighter than relations
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Expressions : Expression punctExpr(s?)
				{ NewList($item[1],map(@$_,@{$item[2]})); }

Expression  : SignedTerm moreTerms[[],$item[1]] addExpressionModifier[$item[2]]

# moreTerms[ [($term,$addop)*], $term];  Check for more addop & term's
moreTerms :
          /^\Z/ { LeftRec(@{$arg[0]},$arg[1]); }   # short circuit!
        | ADDOP moreTerms2[$arg[0],$arg[1],$item[1]]
	| { LeftRec(@{$arg[0]},$arg[1]); }

# moreTerms2[ [($term,$addop)*], $term, $addop]; Check if addop is followed
#  by another term, or if not, it presumably represents a limiting form
#  like "a+" (ie a from above)
moreTerms2   : Term moreTerms[ [@{$arg[0]},$arg[1],$arg[2]],$item[1] ]
	    | { LeftRec(@{$arg[0]},Apply(New('limit-from'),$arg[1],$arg[2])); }


addExpressionModifier :
          /^\Z/ { $arg[0];}   # short circuit!
        | PUNCT(?) OPEN relop Expression balancedClose[$item[2]]
	    { Apply(New('annotated'),$arg[0],
 		    Fence($item[2], Apply($item[3],Absent(),$item[4]),$item[5])); }
        | PUNCT(?) OPEN Expression relop Expression
	  	    moreRHS[$item[3],$item[4],$item[5]] balancedClose[$item[2]]
            { Apply(New('annotated'),$arg[0],Fence($item[2],$item[6],$item[7])); }
	| PUNCT(?) OPEN MODIFIEROP Expression balancedClose[$item[2]]
            { Apply($item[3],$arg[0],$item[4]); }
        | MODIFIER
            { Apply(New('annotated'),$arg[0],$item[1]); }
	| MODIFIEROP Expression
            { Apply($item[1],$arg[0],$item[2]); }
	| { $arg[0]; }


#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Terms: products of factors
# Abstractly, things combined by operators binding tighter than addition
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

SignedTerm : ADDOP Term				{ Apply($item[1],$item[2]); }
        | Term

### Term	: Factor (MULOP(?) Factor { [$item[1]->[0] || InvisibleTimes(), $item[2]]; })(s?)
###					{ LeftRec($item[1],map(@$_,@{$item[2]})); }

Term    : Factor moreFactors[$item[1]]
moreFactors :
          /^\Z/ { $arg[0];}   # short circuit!
        | MULOP Factor moreFactors[ApplyNary($item[1],$arg[0],$item[2])]
	# Given an explicit COMPOSEOP, we'll assume the preceding is
	# an implicit lambda of some sort(?)
	| COMPOSEOP makeComposition[$arg[0],$item[1]]
#        | { ($forbidEvalAt ? undef : 1); }
#	  evalAtOp POSTSUBSCRIPT
#	  	   moreFactors[Apply(New('evaluated-at'),$arg[0],Arg($item[3],0))]
        | { ($forbidEvalAt ? undef : 1); }
	  evalAtOp maybeEvalAt[$arg[0],$item[2]]
 	| Factor moreFactors[ApplyNary(InvisibleTimes(),$arg[0],$item[1])]
	| { $arg[0]; }


#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Factors: function applications, postfix on atoms, etc.
# Abstractly, things combined by operators binding tighter than multiplication
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Factor	: 
	# These 2nd two are Iffy; hopefully the 1st rule will protect from backtrack?
	  OPEN ARRAY CLOSE  addScripts[Fence($item[1],$item[2],$item[3])]
	  # perhaps only when OPEN or CLOSED is { or } ??
	| OPEN ARRAY  { Annotate(Apply(New('cases'),$item[2]),open=>$item[1]); }
	| ARRAY CLOSE { Annotate(Apply(New('cases'),$item[1]),close=>$item[2]); }
	| preScripted['FUNCTION'] addArgs[$item[1]]
	| preScripted['OPFUNCTION'] addOpFunArgs[$item[1]]
	| preScripted['TRIGFUNCTION'] addTrigFunArgs[$item[1]]
	| preScripted['ATOM_OR_ID'] maybeArgs[$item[1]]
	| preScripted['UNKNOWN'] doubtArgs[$item[1]]
	| NUMBER   addScripts[$item[1]]
	| SCRIPTOPEN scriptFactorOpen[$item[1]]
	| OPEN factorOpen[$item[1]]
	| preScripted['bigop'] addOpArgs[$item[1]]
	| { ($forbidVertBar ? undef : 1); }
	  VERTBAR VERTBAR absExpression VERTBAR VERTBAR # || exp || ==> norm
	      addScripts[Fence('||',$item[4],'||')]
	| { ($forbidVertBar ? undef : 1); }
	  VERTBAR absExpression VERTBAR			    # | exp | => absolute-value
	      addScripts[Fence($item[2],$item[3],$item[4])]
	| { ($forbidVertBar ? undef : IsNotationAllowed('QM')); }
	  VERTBAR ketExpression RANGLE { SawNotation('QM'); } # | exp > ==> ket
 	      addScripts[Fence('|',$item[3],'>')] # ket
	| { IsNotationAllowed('QM'); }
	  LANGLE ketExpression VERTBAR maybeBra[$item[3]]
	| { IsNotationAllowed('QM'); }
	  LANGLE absExpression RANGLE
	       addScripts[Fence($item[2],$item[3],$item[4])]
	| OPERATOR addScripts[$item[1]] nestOperators[$item[2]]
	  	    addOpFunArgs[$item[3]]

ATOM_OR_ID : ATOM | ID | ARRAY


# A restricted sort of Factor for the unparenthesized argument to a function.
# Note f g h => f*g*h, but f g h x => f(g(h(x)))  Seems like what people mean...
# Should there be a special case for trigs?
barearg : aBarearg moreBareargs[$item[1]]
aBarearg : 
	  preScripted['FUNCTION'] addArgs[$item[1]]
	| preScripted['OPFUNCTION'] addOpFunArgs[$item[1]]
	| preScripted['TRIGFUNCTION'] addTrigFunArgs[$item[1]]
	| preScripted['ATOM_OR_ID'] maybeArgs[$item[1]]
	| preScripted['UNKNOWN'] doubtArgs[$item[1]]
	| NUMBER   addScripts[$item[1]]
	| VERTBAR Expression VERTBAR addScripts[Fence($item[1],$item[2],$item[3])]

moreBareargs :
	  /^\Z/ { $arg[0];}   # short circuit!
	| MULOP aBarearg moreBareargs[ApplyNary($item[1],$arg[0],$item[2])]
	| aBarearg moreBareargs[ApplyNary(InvisibleTimes(),$arg[0],$item[1])]
	| { $arg[0]; }

# A variation that does not allow a bare trig function
trigBarearg : aTrigBarearg moreTrigBareargs[$item[1]]
aTrigBarearg : 
	  preScripted['FUNCTION'] addArgs[$item[1]]
	| preScripted['OPFUNCTION'] addOpFunArgs[$item[1]]
	| preScripted['ATOM_OR_ID'] maybeArgs[$item[1]]
	| preScripted['UNKNOWN'] doubtArgs[$item[1]]
	| NUMBER   addScripts[$item[1]]
	| VERTBAR Expression VERTBAR addScripts[Fence($item[1],$item[2],$item[3])]
moreTrigBareargs :
	  /^\Z/ { $arg[0];}   # short circuit!
        | MULOP aTrigBarearg
	   	 moreTrigBareargs[ApplyNary($item[1],$arg[0],$item[2])]
 	| aTrigBarearg 
	  moreTrigBareargs[ApplyNary(InvisibleTimes(),$arg[0],$item[1])]
	| { $arg[0]; }

# maybeEvalAt[$thing,$at_op]
maybeEvalAt :
	  POSTSUBSCRIPT moreEvalAt[$arg[0],$arg[1],Arg($item[1],0)]
	| POSTSUPERSCRIPT POSTSUBSCRIPT
	  	   moreFactors[Apply(New('evaluated-at'),$arg[0],Arg($item[2],0),Arg($item[1],0))]

# maybeEvalAt[$thing,$atop,$sub]
moreEvalAt :
	  POSTSUPERSCRIPT
	  	   moreFactors[Apply(New('evaluated-at'),$arg[0],$arg[2],Arg($item[1],0))]
         | moreFactors[Apply(New('evaluated-at'),$arg[0],$arg[2])]

#======================================================================
# After < a | we might be done, or get <a|b> or <a|H|b>

# <$expr |   maybeBra[$expr]
maybeBra :
	  ketExpression maybeBraket[$arg[0],$item[1]]
	| { SawNotation('QM'); } addScripts[Fence('<',$arg[0],'|')]
# <$expr1|$expr2   maybeBraket[$expr1,$expr2]
maybeBraket :
          RANGLE { SawNotation('QM'); }
	      addScripts[Apply(New('inner-product'),
		               Fence('<',$arg[0],'|'),
	                       Fence('|',$arg[1],'>'))]
        | VERTBAR ketExpression RANGLE { SawNotation('QM'); }
	      addScripts[Apply(New('inner-product'), # Is this a good representation?
		               Fence('<',$arg[0],'|'),
			       Apply($arg[1],Fence('|',$item[2],'>')))]

# bra's and ket's (ie <foo| & |foo>) can contain a rather wide variety of things
# from simple symbols to full (but typically short) formula, and so we
# want to use the Formulae production.  However, for that to work,
# we need to keep |, < and > (which delimit the bra & ket) from being
# interpreted as usual, otherwise the parse will walk off the end, or
# fail at a level that precludes backtracking.
ketExpression : <rulevar: local $forbidVertBar = 1>
ketExpression : <rulevar: local $forbidLRAngle = 1>
ketExpression : Formulae
	      | METARELOP | ARROW | ADDOP | MULOP | MODIFIEROP

#======================================================================
# absExpression; need to be careful about misinterpreting the next |
# since we can't backtrack across productions.
# Disable evalAt notation ( |_{x=0} ) and explicitly control abs nesting.
absExpression : <rulevar: local $forbidEvalAt = 1>
absExpression : <rulevar: local $MaxAbsDepth = $MaxAbsDepth-1>
absExpression : { ($MaxAbsDepth >= 0 ? 1 : (SawNotation('AbsFail')&& undef)); } Expression

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Adding pre|post sub|super scripts to various things.
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# addScripts[$base] ; adds any following sub/super scripts to $base.
addScripts :
          /^\Z/ { $arg[0];}   # short circuit!
        | POSTSUPERSCRIPT  addScripts[NewScript($arg[0],$item[1])]
        | POSTSUBSCRIPT    addScripts[NewScript($arg[0],$item[1])]
	| POSTFIX          addScripts[Apply($item[1],$arg[0])]
	| { $arg[0]; }

# ================================================================================
# preScripted['RULE']; match a RULE possibly preceded by sub/super prescripts,
#  possibly followed by sub/superscripts

preScripted :
	  FLOATSUPERSCRIPT preScripted[$arg[0]] { NewScript($item[2],$item[1]);}
	| FLOATSUBSCRIPT   preScripted[$arg[0]] { NewScript($item[2],$item[1]);}
        | <matchrule:$arg[0]> addScripts[$item[1]]

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Parenthetical: Things wrapped in OPEN .. CLOSE
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# ================================================================================
# Factors that begin with OPEN; grouped expressions and objects like sets,
# intervals, etc.
# factorOpen[$open] : Dealing with various things that start with an open.
factorOpen :
	  ADDOP balancedClose[$arg[0]] addScripts[Fence($arg[0],$item[1],$item[2])] # For (-)
        # Parenthesized Operator possibly w/scripts
        | preScripted['bigop'] balancedClose[$arg[0]] 
	  	 addScripts[Fence($arg[0],$item[1],$item[2])] Factor
            { Apply($item[3],$item[4]); }
	# Parenthesized Operator including a pre-factor
	| Factor preScripted['bigop'] balancedClose[$arg[0]] 
	     addScripts[Fence($arg[0],
                        Apply(InvisibleTimes(),$item[1],$item[2]),$item[3])] Factor
          { Apply($item[4],$item[5]); }
	# read expression too? match subcases.
        | Expression factorOpenExpr[$arg[0],$item[1]]

# factorOpenExpr[$open,$expr];  Try to recognize various things that start
#   this way. Need some extra productions for sets (w/possible middle '|' )
#   and vectors; all n-ary.
factorOpenExpr :
	# 2nd expression; some kind of pair, interval, set, whatever
         (PUNCT Expression { [$item[1],$item[2]]; })(s)  CLOSE
		  addScripts[Fence($arg[0],$arg[1],map(@$_,@{$item[1]}),$item[2])]
	# parenthesized expression.
	| balancedClose[$arg[0]] addScripts[Fence($arg[0],$arg[1],$item[1])]

# ================================================================================
# Sets special cases
# A conditionalized set
scriptFactorOpen :
          Formula suchThatOp Formulae balancedClose[$arg[0]]
	  addScripts[ApplyDelimited(New('conditional-set',undef,role=>'FENCED'),
	                            $arg[0], $item[1],$item[2], $item[3],$item[4])]
        # Else fall through to normal factorOpen
        | factorOpen[$arg[0]]

# The "such that" that can appear in a sets like {a "such that" predicate(a)}
# accept vertical bars, and colon
suchThatOp : MIDDLE | VERTBAR 
	 | /METARELOP:colon:\d+/	{ Lookup($item[1]); }
# ================================================================================
# Function args, etc.

# maybeArgs[$function] ; Add arguments to an identifier, if made explict.
maybeArgs : 
	  /^\Z/ { $arg[0];}   # short circuit!
        | APPLYOP requireArgs[$arg[0]]
	| { $arg[0]; }

# doubtArgs[$unknown]; Check for apparent arguments following an
#   Unknown (unclassified) item. If an explicit APPLYOP follows,
#   it seemingly asserts that the preceding _is_ a function,
#   otherwise Warn if there seems to be an arglist.
doubtArgs :
	  /^\Z/ { $arg[0];}   # short circuit!
        | APPLYOP requireArgs[$arg[0]]
	| OPEN forbidArgs[$arg[0],$item[1]]
	| { $arg[0]; }

# forbidArgs[$unknown,$open]; Got a suspicious pattern: an unknown and open. 
#    If the following seems to be an argument list, warn.
forbidArgs :
	  Expression (argPunct Expression)(s) balancedClose[$arg[1]]
	     		       		        { MaybeFunction($arg[0]); undef; }
	| Term balancedClose[$arg[1]]		{ MaybeFunction($arg[0]); undef; }

# requireArgs[$function]; Add arguments following a function, failing if it
#   isn't there! Typically this follows an explicit applyop
requireArgs :
	  OPEN Expression (argPunct Expression {[$item[1],$item[2]];})(s?)
	       balancedClose[$item[1]]
                  { ApplyDelimited($arg[0],$item[1],$item[2],
				   map(@$_,@{$item[3]}),$item[4]); }
        # Hmm, should only be applicable to _some_ functions ???		  
	| barearg				{ Apply($arg[0],$item[1]); }

# addArgs[$function]; We've got a function; Add following arguments to a
#   function, if present.  Also recognizes compostion type ops (something
#   combining two functions into a function)
addArgs :
	  /^\Z/ { $arg[0];}   # short circuit!
        | addEasyArgs[$arg[0]]
	# Accept bare arg (w/o parens) ONLY if an explicit APPLYOP
	| APPLYOP barearg				{ Apply($arg[0],$item[2]);}
	| { $arg[0]; }   # Just return the function itself,then.

# addOpFunArgs[$function]; Same as above but for functions classified as
#   OPFUNCTION. Ie operator-like functions such as \sin, that don't
#   absolutely require parens around args.
addOpFunArgs :
	  /^\Z/ { $arg[0];}   # short circuit!
        | addEasyArgs[$arg[0]]
        # Accept bare arg (w/o parens) for this class of functions.
	| APPLYOP(?) barearg				{ Apply($arg[0],$item[2]);}
	| { $arg[0]; }   # Just return the function itself,then.

# addTrigFunArgs[$function]; Yet another variation;
#   It differs in the barearg is restricted to non-trig
addTrigFunArgs :
	  /^\Z/ { $arg[0];}   # short circuit!
        | addEasyArgs[$arg[0]]
        # Accept bare arg (w/o parens) for this class of functions.
	| APPLYOP(?) trigBarearg			{ Apply($arg[0],$item[2]);}
	| { $arg[0]; }   # Just return the function itself,then.

# addEasyArgs[$function]; gets unambiguous compositions or parenthesized arguments
#  These are the "easy" cases for addArgs and addOpFunArgs.
addEasyArgs :
##	  COMPOSEOP preScripted['FUNCTION']
##	  	    addArgs[Apply($item[1],$arg[0],$item[2])]
##		   { $item[3]; }
##        | COMPOSEOP preScripted['OPFUNCTION']
##	  	    addOpFunArgs[Apply($item[1],$arg[0],$item[2])]
##		   { $item[3]; }
##        | COMPOSEOP preScripted['TRIGFUNCTION']
##	  	    addTrigFunArgs[Apply($item[1],$arg[0],$item[2])]
##		   { $item[3]; }
	  COMPOSEOP makeComposition[$arg[0],$item[1]]
        |  APPLYOP(?) OPEN Expression
	   	      (argPunct Expression {[$item[1],$item[2]];})(s?)
                   balancedClose[$item[2]]
 	  	  { ApplyDelimited($arg[0],$item[2],$item[3],
				   map(@$_,@{$item[4]}),$item[5]); }

# makeComposition[$thing,$comp]; Given something that presumably is a function,
#  and a composition operator, read another function and possibly args
makeComposition :
	  preScripted['FUNCTION'] addArgs[Apply($arg[1],$arg[0],$item[1])]
		   { $item[2]; }
        | preScripted['OPFUNCTION'] addOpFunArgs[Apply($arg[1],$arg[0],$item[1])]
		   { $item[2]; }
        | preScripted['TRIGFUNCTION']
	  addTrigFunArgs[Apply($arg[1],$arg[0],$item[1])]	{ $item[2]; }
	# Given an explicit composition operator, the next thing may safely(?)
	# be assumed to be a function, so treat it as such.
	| Factor addArgs[Apply($arg[1],$arg[0],$item[1])]		{ $item[2]; }

# addOpArgs[$bigop]; Add following Term to a bigop, if present.
addOpArgs :
	  /^\Z/ { $arg[0];}   # short circuit!
        | APPLYOP(?) Factor moreOpArgFactors[$item[2]] { Apply($arg[0],$item[3]);}
	| { $arg[0]; }

# moreOpArgFactors[$factor1] : Similar to moreFactors, 
#   but w/o evalAtOp since that most likely belongs to the operator, not
#   the factors.
moreOpArgFactors :
	  /^\Z/ { $arg[0];}   # short circuit!
        | MULOP Factor moreOpArgFactors[ApplyNary($item[1],$arg[0],$item[2])]
	| Factor moreOpArgFactors[ApplyNary(InvisibleTimes(),$arg[0],$item[1])]
	| { $arg[0]; }


# Punctuation separating function arguments; things marked MIDDLE could
# also separate arguments
# With great trepidation, I'm adding VERBAR here
argPunct : PUNCT | MIDDLE | VERTBAR

# ================================================================================
# Operator args, etc.

# nestOperators[$operator*]; Nest a possible sequence of operators
nestOperators :
	  /^\Z/ { recApply(@arg); }
	| OPERATOR addScripts[$item[1]] nestOperators[@arg,$item[2]]
	| FUNCTION addScripts[$item[1]] { recApply(@arg,$item[2]); }
	| OPFUNCTION addScripts[$item[1]] { recApply(@arg,$item[2]); }
	| TRIGFUNCTION addScripts[$item[1]] { recApply(@arg,$item[2]); }
	| OPEN Expression balancedClose[$item[1]] 
               { recApply(@arg[0..$#arg-1],
			ApplyDelimited($arg[$#arg],$item[1],$item[2],$item[3])); }
	| { recApply(@arg); }

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Pseudo-Terminals. 
#  Useful combinations or subsets of terminals.
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

anyop   : relop | METARELOP | ARROW | ADDOP | MULOP | MODIFIEROP
	| preScripted['bigop']
	| OPERATOR addScripts[$item[1]]

#relop	: RELOP | ARROW

# A generalized relational operator or arrow
# Note we disallow < or > if we're parsing the contents of a bra or ket!
relop	: 
	  { ($forbidLRAngle ? 1 : undef); } /RELOP:(less|greater)-than:\d+/ <commit> <reject>
	| RELOP | ARROW

# Check out whether diffop should be treated as bigop or operator
# It depends on the binding 
bigop   : BIGOP | SUMOP | INTOP | LIMITOP | DIFFOP
operator: OPERATOR

# SUPOP is really only \prime(s) (?)
supops   : SUPOP(s)				{ New(undef,
	   					      join('',map($_->textContent,@{$item[1]})),
 						       name=>'prime'.scalar(@{$item[1]})); }

# ================================================================================
# And some special cases...

# Match a CLOSE that `corresponds' to the OPEN
balancedClose : CLOSE { isMatchingClose($arg[0],$item[1]) && $item[1]; }

# The "evaluated at" operator, typically a vertical bar followed by a subscript
# equation. But it is ofen used in \left. \right| pairs!
evalAtOp : VERTBAR
	 | /CLOSE:\|:\d+/	{ Lookup($item[1]); }

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Terminals / Lexer
#   These correspond to the TeX tokens.
# The Lexer strings are of the form TYPE:NAME:NUMBER where
#    TYPE is the grammatical role, or part of speech,
#    NAME is the specific name (semantic or presentation) of the token 
#    NUMBER is the position of the specific token in the current token sequence.
#
# NOTE: RecDescent doesn't clearly distinguish lexing from parsing
# and so it allows us to interpret the same item as several distinct
# terminals; Presumably other parsers would not allow this.
# In a couple of cases, we have symbols that can be used in a few
# different ways:
#   | as vertical bar, open or close, also as a close used for eval-at!
#   : as meta-relation, as such-that
#   <, >  can be relop or part of brackets (eg. qm, etc)
# Perhaps these symbols should get a special role reflecting it's specialness
# and then have pseudo-terminals that combine (eg. relop == RELOP | langle)
# This nibbles at the edge of the Ambiguity issue; if it turns out that
# a multi-meaning symbol gets used in a particular way, we'd want to assure
# that it's role, meaning, etc, gets changed to reflect the specific usage!
#
# Upon reflection, this implies that OPEN|CLOSE are rather awkward as roles.
# \left< can be an OPEN _or_ RELOP
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

ATOM     	: /ATOM:\S*:\d+/		{ Lookup($item[1]); }
UNKNOWN		: /UNKNOWN:\S*:\d+/		{ Lookup($item[1]); }
ID		: /ID:\S*:\d+/			{ Lookup($item[1]); }
ARRAY		: /ARRAY:\S*:\d+/		{ Lookup($item[1]); }
NUMBER      	: /NUMBER:\S*:\d+/		{ Lookup($item[1]); }
PUNCT		: /PUNCT:\S*:\d+/		{ Lookup($item[1]); }
PERIOD		: /PERIOD:\S*:\d+/		{ Lookup($item[1]); }
RELOP		: /RELOP:\S*:\d+/		{ Lookup($item[1]); }
LANGLE          : /RELOP:less-than:\d+/		{ Lookup($item[1]); }
		| /OPEN:langle:\d+/		{ Lookup($item[1]); }
RANGLE          : /RELOP:greater-than:\d+/	{ Lookup($item[1]); }
		| /CLOSE:rangle:\d+/		{ Lookup($item[1]); }
METARELOP	: /METARELOP:\S*:\d+/		{ Lookup($item[1]); }
MODIFIEROP      : /MODIFIEROP:\S*:\d+/		{ Lookup($item[1]); }
MODIFIER        : /MODIFIER:\S*:\d+/		{ Lookup($item[1]); }
ARROW		: /ARROW:\S*:\d+/		{ Lookup($item[1]); }
ADDOP		: /ADDOP:\S*:\d+/		{ Lookup($item[1]); }
MULOP		: /MULOP:\S*:\d+/		{ Lookup($item[1]); }
POSTFIX		: /POSTFIX:\S*:\d+/		{ Lookup($item[1]); }
FUNCTION        : /FUNCTION:\S*:\d+/		{ Lookup($item[1]); }
OPFUNCTION      : /OPFUNCTION:\S*:\d+/		{ Lookup($item[1]); }
TRIGFUNCTION    : /TRIGFUNCTION:\S*:\d+/	{ Lookup($item[1]); }
APPLYOP		: /APPLYOP:\S*:\d+/		{ Lookup($item[1]); }
COMPOSEOP	: /COMPOSEOP:\S*:\d+/		{ Lookup($item[1]); }
SUPOP		: /SUPOP:\S*:\d+/		{ Lookup($item[1]); }
OPEN		: /OPEN:\S*:\d+/		{ Lookup($item[1]); }
SCRIPTOPEN	: /OPEN:\{:\d+/			{ Lookup($item[1]); }
CLOSE		: /CLOSE:\S*:\d+/		{ Lookup($item[1]); }
MIDDLE		: /MIDDLE:\S*:\d+/		{ Lookup($item[1]); }
VERTBAR		: /VERTBAR:\S*:\d+/		{ Lookup($item[1]); }
BIGOP		: /BIGOP:\S*:\d+/		{ Lookup($item[1]); }
SUMOP		: /SUMOP:\S*:\d+/		{ Lookup($item[1]); }
INTOP		: /INTOP:\S*:\d+/		{ Lookup($item[1]); }
LIMITOP		: /LIMITOP:\S*:\d+/		{ Lookup($item[1]); }
DIFFOP		: /DIFFOP:\S*:\d+/		{ Lookup($item[1]); }
OPERATOR	: /OPERATOR:\S*:\d+/		{ Lookup($item[1]); }
##DIFF		: /DIFF:\S*:\d+/		{ Lookup($item[1]); }
POSTSUBSCRIPT   : /POSTSUBSCRIPT:\S*:\d+/	{ Lookup($item[1]); }
POSTSUPERSCRIPT : /POSTSUPERSCRIPT:\S*:\d+/	{ Lookup($item[1]); }
FLOATSUPERSCRIPT : /FLOATSUPERSCRIPT:\S*:\d+/	{ Lookup($item[1]); }
FLOATSUBSCRIPT	: /FLOATSUBSCRIPT:\S*:\d+/	{ Lookup($item[1]); }

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
