type = $type; $this->state = $state; } } class parse_premature_eof extends parse_error { public function __construct(array $expect) { parent::__construct('Premature EOF'); } } class parse_stack { public $q; public $qs = array(); /** * Stack of semantic actions */ public $ss = array(); public function __construct($qi) { $this->q = $qi; } public function shift($q, $semantic) { $this->ss[] = $semantic; $this->qs[] = $this->q; $this->q = $q; // echo "Shift $q -- $semantic\n"; } public function top_n($n) { if (!$n) { return array(); } return array_slice($this->ss, 0 - $n); } public function pop_n($n) { if (!$n) { return array(); } $qq = array_splice($this->qs, 0 - $n); $this->q = $qq[0]; return array_splice($this->ss, 0 - $n); } public function occupied() { return !empty($this->ss); } public function index($n) { if ($n) { $this->q = $this->qs[count($this->qs) - $n]; } } public function text() { return $this->q . ' : ' . implode(' . ', array_reverse($this->qs)); } } class parse_engine { public $debug = false; public $parser; public $qi; public $rule; public $step; public $descr; /** * @var boolean */ public $accept; /** * @var parse_stack */ public $stack; public function __construct($parser) { $this->parser = $parser; $this->qi = $parser->qi; $this->rule = $parser->a; $this->step = $parser->i; $this->descr = $parser->d; $this->reset(); } public function reset() { $this->accept = false; $this->stack = new parse_stack($this->qi); $this->parser->errors = array(); } private function enter_error_tolerant_state() { while ($this->stack->occupied()) { if ($this->has_step_for('error')) { return true; } if ($this->debug) echo "Dropped an item from the stack, {" . implode(', ', $this->get_steps()) . "} left\n"; if ($this->debug) echo 'Currently in state ' . $this->state() . "\n"; $this->drop(); } return false; } private function drop() { $this->stack->pop_n(1); } /* * So that I don't get any brilliant misguided ideas: * * The "accept" step happens when we try to eat a start symbol. * That happens because the reductions up the stack at the end * finally (and symetrically) tell the parser to eat a symbol * representing what they've just shifted off the end of the stack * and reduced. However, that doesn't put the parser into any * special different state. Therefore, it's back at the start * state. * * That being said, the parser is ready to reduce an EOF to the * empty program, if given a grammar that allows them. * * So anyway, if you literally tell the parser to eat an EOF * symbol, then after it's done reducing and accepting the prior * program, it's going to think it has another symbol to deal with. * That is the EOF symbol, which means to reduce the empty program, * accept it, and then continue trying to eat the terminal EOF. * * This infinte loop quickly runs out of memory. * * That's why the real EOF algorithm doesn't try to pretend that * EOF is a terminal. Like the invented start symbol, it's special. * * Instead, we pretend to want to eat EOF, but never actually * try to get it into the parse stack. (It won't fit.) In short, * we look up what reduction is indicated at each step in the * process of rolling up the parse stack. * * The repetition is because one reduction is not guaranteed to * cascade into another and clean up the entire parse stack. * Rather, it will instead shift each partial production as it * is forced to completion by the EOF lookahead. */ public function eat_eof() { // We must reduce as if having read the EOF symbol do { // and we have to try at least once, because if nothing // has ever been shifted, then the stack will be empty // at the start. list($opcode, $operand) = $this->step_for('#'); switch ($opcode) { case 'r': $this->reduce($operand); break; case 'e': $this->premature_eof(); break; default: throw new parse_bug(); } } while ($this->stack->occupied()); // If the sentence is well-formed according to the grammar, then // this will eventually result in eating a start symbol, which // causes the "accept" instruction to fire. Otherwise, the // step('#') method will indicate an error in the syntax, which // here means a premature EOF. // // Incidentally, some tremendous amount of voodoo with the parse // stack might help find the beginning of some unfinished // production that the sentence was cut off during, but as a // general rule that would require deeper knowledge. if (!$this->accept) { throw new parse_bug(); } return $this->semantic; } private function premature_eof() { $seen = array(); $expect = $this->get_steps(); while ($this->enter_error_tolerant_state() || $this->has_step_for('error')) { if (isset($seen[$this->state()])) { // This means that it's pointless to try here. // We're guaranteed that the stack is occupied. $this->drop(); continue; } $seen[$this->state()] = true; $this->eat('error', 'Premature EOF'); if ($this->has_step_for('#')) { // Good. We can continue as normal. return; } else { // That attempt to resolve the error condition // did not work. There's no point trying to // figure out how much to slice off the stack. // The rest of the algorithm will make it happen. } } throw new parse_premature_eof($expect); } private function current_row() { return $this->step[$this->state()]; } private function step_for($type) { $row = $this->current_row(); if (isset($row[$type])) { return explode(' ', $row[$type]); } if (isset($row[''])) { return explode(' ', $row['']); } return array('e', $this->stack->q); } private function get_steps() { $out = array(); foreach($this->current_row() as $type => $row) { foreach($this->rule as $rule) { if ($rule['symbol'] == $type) { continue 2; } } list($opcode) = explode(' ', $row, 2); if ($opcode != 'e') { $out[] = $type === '' ? '$default' : $type; } } return $out; } private function has_step_for($type) { $row = $this->current_row(); return isset($row[$type]); } private function state() { return $this->stack->q; } function eat($type, $semantic) { // assert('$type == trim($type)'); if ($this->debug) echo "Trying to eat a ($type)\n"; list($opcode, $operand) = $this->step_for($type); switch ($opcode) { case 's': if ($this->debug) echo "shift $type to state $operand\n"; $this->stack->shift($operand, $semantic); // echo $this->stack->text()." shift $type
\n"; break; case 'r': if ($this->debug) echo "Reducing $type via rule $operand\n"; $this->reduce($operand); // Yes, this is tail-recursive. It's also the simplest way. return $this->eat($type, $semantic); case 'a': if ($this->stack->occupied()) { throw new parse_bug('Accept should happen with empty stack.'); } $this->accept = true; if ($this->debug) echo ("Accept\n\n"); $this->semantic = $semantic; break; case 'e': // This is thought to be the uncommon, exceptional path, so // it's OK that this algorithm will cause the stack to // flutter while the parse engine waits for an edible token. if ($this->debug) echo "($type) causes a problem.\n"; // get these before doing anything $expected = $this->get_steps(); $this->parser->errors[] = $this->descr($type, $semantic) . ' not expected, expected one of ' . implode(', ', $expected); if ($this->debug) echo "Possibilities before error fixing: {" . implode(', ', $expected) . "}\n"; if ($this->enter_error_tolerant_state() || $this->has_step_for('error')) { $this->eat('error', end($this->parser->errors)); if ($this->has_step_for($type)) { $this->eat($type, $semantic); } return false; } else { // If that didn't work, give up: throw new parse_error('Parse Error: ' . $this->descr($type, $semantic) . ' not expected, expected one of ' . implode(', ', $expected)); } break; default: throw new parse_bug("Bad parse table instruction " . htmlspecialchars($opcode)); } return true; } private function descr($type, $semantic) { if (isset($this->descr[$type])) { return $this->descr[$type]; } elseif ("$semantic" !== "") { return $type . ' (' . $semantic . ')'; } else { return $type; } } private function reduce($rule_id) { $rule = $this->rule[$rule_id]; $len = $rule['len']; $semantic = $this->perform_action($rule_id, $this->stack->top_n($len)); //echo $semantic.br(); if ($rule['replace']) { $this->stack->pop_n($len); } else { $this->stack->index($len); } $this->eat($rule['symbol'], $semantic); } private function perform_action($rule_id, $slice) { // we have this weird calling convention.... $result = null; $method = $this->parser->method[$rule_id]; //if ($this->debug) echo "rule $id: $method\n"; $this->parser->$method($slice, $result); return $result; } }