mirror of https://github.com/vitalif/lime
Cleaning up the code
parent
0e5a392f52
commit
175b67c614
|
@ -1,34 +1,41 @@
|
|||
<?php
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Let's face it: PHP is not up to lexical processing. GNU flex handles
|
||||
* it well, so I've created a little protocol for delegating the work.
|
||||
* Extend this class so that executable() gives a path to your lexical
|
||||
* analyser program.
|
||||
*/
|
||||
abstract class flex_scanner {
|
||||
/*
|
||||
Let's face it: PHP is not up to lexical processing. GNU flex handles
|
||||
it well, so I've created a little protocol for delegating the work.
|
||||
Extend this class so that executable() gives a path to your lexical
|
||||
analyser program.
|
||||
*/
|
||||
abstract function executable();
|
||||
function __construct($path) {
|
||||
if (!is_readable($path)) throw new Exception("$path is not readable.");
|
||||
|
||||
public function __construct($path) {
|
||||
if (!is_readable($path)) {
|
||||
throw new Exception("$path is not readable.");
|
||||
}
|
||||
|
||||
putenv("PHP_LIME_SCAN_STDIN=$path");
|
||||
|
||||
$scanner = $this->executable();
|
||||
$tokens = explode("\0", `$scanner < "\$PHP_LIME_SCAN_STDIN"`);
|
||||
|
||||
array_pop($tokens);
|
||||
$this->tokens = $tokens;
|
||||
$this->lineno = 1;
|
||||
}
|
||||
function next() {
|
||||
|
||||
public function next() {
|
||||
if (list($key, $token) = each($this->tokens)) {
|
||||
list($this->lineno, $type, $text) = explode("\1", $token);
|
||||
|
||||
return array($type, $text);
|
||||
}
|
||||
}
|
||||
function feed($parser) {
|
||||
|
||||
public function feed($parser) {
|
||||
while (list($type, $text) = $this->next()) {
|
||||
$parser->eat($type, $text);
|
||||
}
|
||||
|
||||
return $parser->eat_eof();
|
||||
}
|
||||
}
|
||||
|
|
121
lime.php
121
lime.php
|
@ -17,6 +17,7 @@
|
|||
*/
|
||||
|
||||
define('LIME_DIR', __DIR__);
|
||||
define('INDENT', ' ');
|
||||
|
||||
function emit($str) {
|
||||
fputs(STDERR, $str . PHP_EOL);
|
||||
|
@ -66,11 +67,11 @@ function lime_export($var) {
|
|||
$out[] = (!$i ? lime_export($k).' => ' : '') . lime_export($v);
|
||||
}
|
||||
|
||||
$result = 'array(' . PHP_EOL . preg_replace('~^~m', "\t", implode(',' . PHP_EOL, $out)) . PHP_EOL . ')';
|
||||
$result = 'array(' . PHP_EOL . preg_replace('~^~m', INDENT, implode(',' . PHP_EOL, $out)) . PHP_EOL . ')';
|
||||
} elseif (is_int($var) || is_float($var)) {
|
||||
$result = (string)$var;
|
||||
} elseif (is_string($var)) {
|
||||
$opt1 = "'" . str_replace(array('\\', "'"), array('\\\\', "\'"), $var) . "'";
|
||||
$opt1 = '\'' . str_replace(array('\\', '\''), array('\\\\', '\\\''), $var) . '\'';
|
||||
$opt2 = $opt1;
|
||||
|
||||
if (strpos($var, '$') === false) {
|
||||
|
@ -254,12 +255,16 @@ class RRC extends Exception {
|
|||
}
|
||||
|
||||
class state {
|
||||
public $id;
|
||||
public $key;
|
||||
public $close;
|
||||
public $action = array();
|
||||
|
||||
public function __construct($id, $key, $close) {
|
||||
$this->id = $id;
|
||||
$this->key = $key;
|
||||
$this->close = $close; // config key -> object
|
||||
ksort($this->close);
|
||||
$this->action = array();
|
||||
}
|
||||
|
||||
public function dump() {
|
||||
|
@ -1049,7 +1054,7 @@ class lime_language_php extends lime_language {
|
|||
$php = $this->to_php($a['code']);
|
||||
|
||||
$code .= 'function ' . $mn . '(' . LIME_CALL_PROTOCOL . ') {' . PHP_EOL .
|
||||
preg_replace('~^~m', "\t", $comment . $php) . PHP_EOL .
|
||||
rtrim(preg_replace('~^~m', INDENT, $comment . $php)) . PHP_EOL .
|
||||
'}' .
|
||||
PHP_EOL .
|
||||
PHP_EOL;
|
||||
|
@ -1063,7 +1068,7 @@ class lime_language_php extends lime_language {
|
|||
$code .= 'public $a = '.lime_export($rules, true) . ';' . PHP_EOL;
|
||||
|
||||
return 'class ' . $parser_class . ' extends lime_parser {' . PHP_EOL .
|
||||
preg_replace(array('~^~m', '~^\h+$~m'), array("\t", ''), $code) .
|
||||
preg_replace(array('~^~m', '~^\h+$~m'), array(INDENT, ''), $code) .
|
||||
'}' . PHP_EOL;
|
||||
}
|
||||
}
|
||||
|
@ -1153,12 +1158,15 @@ class lime_rewrite {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This keeps track of one position in an rhs.
|
||||
* We specialize to handle actions and glyphs.
|
||||
*
|
||||
* If there is a name for the slot, we store it here.
|
||||
* Later on, this structure will be consulted in the formation of
|
||||
* actual production rules.
|
||||
*/
|
||||
class lime_slot {
|
||||
// This keeps track of one position in an rhs.
|
||||
// We specialize to handle actions and glyphs.
|
||||
// If there is a name for the slot, we store it here.
|
||||
// Later on, this structure will be consulted in the formation of
|
||||
// actual production rules.
|
||||
public function __construct($data, $name) {
|
||||
$this->data = $data;
|
||||
$this->name = $name;
|
||||
|
@ -1175,34 +1183,32 @@ class lime_glyph extends lime_slot {
|
|||
}
|
||||
class lime_action extends lime_slot {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This function isn't too terribly interesting to the casual observer.
|
||||
* You're probably better off looking at parse_lime_grammar() instead.
|
||||
*
|
||||
* Ok, if you insist, I'll explain.
|
||||
*
|
||||
* The input to Lime is a CFG parser definition. That definition is
|
||||
* written in some language. (The Lime language, to be exact.)
|
||||
* Anyway, I have to parse the Lime language and compile it into a
|
||||
* very complex data structure from which a parser is eventually
|
||||
* built. What better way than to use Lime itself to parse its own
|
||||
* language? Well, it's almost that simple, but not quite.
|
||||
|
||||
* The Lime language is fairly potent, but a restricted subset of
|
||||
* its features was used to write a metagrammar. Then, I hand-translated
|
||||
* that metagrammar into another form which is easy to snarf up.
|
||||
* In the process of reading that simplified form, this function
|
||||
* builds the same sort of data structure that later gets turned into
|
||||
* a parser. The last step is to run the parser generation algorithm,
|
||||
* eval() the resulting PHP code, and voila! With no hard work, I can
|
||||
* suddenly read and comprehend the full range of the Lime language
|
||||
* without ever having written an algorithm to do so. It feels like magic.
|
||||
*/
|
||||
function lime_bootstrap() {
|
||||
|
||||
/*
|
||||
|
||||
This function isn't too terribly interesting to the casual observer.
|
||||
You're probably better off looking at parse_lime_grammar() instead.
|
||||
|
||||
Ok, if you insist, I'll explain.
|
||||
|
||||
The input to Lime is a CFG parser definition. That definition is
|
||||
written in some language. (The Lime language, to be exact.)
|
||||
Anyway, I have to parse the Lime language and compile it into a
|
||||
very complex data structure from which a parser is eventually
|
||||
built. What better way than to use Lime itself to parse its own
|
||||
language? Well, it's almost that simple, but not quite.
|
||||
|
||||
The Lime language is fairly potent, but a restricted subset of
|
||||
its features was used to write a metagrammar. Then, I hand-translated
|
||||
that metagrammar into another form which is easy to snarf up.
|
||||
In the process of reading that simplified form, this function
|
||||
builds the same sort of data structure that later gets turned into
|
||||
a parser. The last step is to run the parser generation algorithm,
|
||||
eval() the resulting PHP code, and voila! With no hard work, I can
|
||||
suddenly read and comprehend the full range of the Lime language
|
||||
without ever having written an algorithm to do so. It feels like magic.
|
||||
|
||||
*/
|
||||
|
||||
$bootstrap = LIME_DIR . '/lime.bootstrap';
|
||||
$lime = new lime();
|
||||
$lime->parser_class = 'lime_metaparser';
|
||||
|
@ -1245,31 +1251,29 @@ function lime_bootstrap() {
|
|||
eval($parser_code);
|
||||
}
|
||||
|
||||
/**
|
||||
* The voodoo is in the way I do lexical processing on grammar definition
|
||||
* files. They contain embedded bits of PHP, and it's important to keep
|
||||
* track of things like strings, comments, and matched braces. It seemed
|
||||
* like an ideal problem to solve with GNU flex, so I wrote a little
|
||||
* scanner in flex and C to dig out the tokens for me. Of course, I need
|
||||
* the tokens in PHP, so I designed a simple binary wrapper for them which
|
||||
* also contains line-number information, guaranteed to help out if you
|
||||
* write a grammar which surprises the parser in any manner.
|
||||
*/
|
||||
class voodoo_scanner extends flex_scanner {
|
||||
/*
|
||||
|
||||
The voodoo is in the way I do lexical processing on grammar definition
|
||||
files. They contain embedded bits of PHP, and it's important to keep
|
||||
track of things like strings, comments, and matched braces. It seemed
|
||||
like an ideal problem to solve with GNU flex, so I wrote a little
|
||||
scanner in flex and C to dig out the tokens for me. Of course, I need
|
||||
the tokens in PHP, so I designed a simple binary wrapper for them which
|
||||
also contains line-number information, guaranteed to help out if you
|
||||
write a grammar which surprises the parser in any manner.
|
||||
|
||||
*/
|
||||
function executable() { return LIME_DIR.'/lime_scan_tokens'; }
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a good function to read because it teaches you how to interface
|
||||
* with a Lime parser. I've tried to isolate out the bits that aren't
|
||||
* instructive in that regard.
|
||||
*/
|
||||
function parse_lime_grammar($path) {
|
||||
/*
|
||||
|
||||
This is a good function to read because it teaches you how to interface
|
||||
with a Lime parser. I've tried to isolate out the bits that aren't
|
||||
instructive in that regard.
|
||||
|
||||
*/
|
||||
if (!class_exists('lime_metaparser')) lime_bootstrap();
|
||||
if (!class_exists('lime_metaparser', false)) {
|
||||
lime_bootstrap();
|
||||
}
|
||||
|
||||
$parse_engine = new parse_engine(new lime_metaparser());
|
||||
$scanner = new voodoo_scanner($path);
|
||||
|
@ -1284,10 +1288,9 @@ function parse_lime_grammar($path) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
if ($_SERVER['argv']) {
|
||||
$code = '';
|
||||
array_shift($_SERVER['argv']); # Strip out the program name.
|
||||
array_shift($_SERVER['argv']); // Strip out the program name.
|
||||
foreach ($_SERVER['argv'] as $path) {
|
||||
$code .= parse_lime_grammar($path);
|
||||
}
|
||||
|
|
BIN
lime_scan_tokens
BIN
lime_scan_tokens
Binary file not shown.
|
@ -28,76 +28,76 @@ void php();
|
|||
%x dquote
|
||||
%x squote
|
||||
|
||||
CHAR \n|.
|
||||
CHAR \n|.
|
||||
|
||||
ALPHA [a-zA-Z]
|
||||
DIGIT [0-9]
|
||||
ALNUM {ALPHA}|{DIGIT}
|
||||
WORD {ALNUM}|_
|
||||
STOP "."
|
||||
ALPHA [a-zA-Z]
|
||||
DIGIT [0-9]
|
||||
ALNUM {ALPHA}|{DIGIT}
|
||||
WORD {ALNUM}|_
|
||||
STOP "."
|
||||
|
||||
SYM {ALPHA}{WORD}*'*
|
||||
LIT '.'
|
||||
SYM {ALPHA}{WORD}*'*
|
||||
LIT '.'
|
||||
|
||||
ESC "\"{CHAR}
|
||||
SCHAR [^\']|ESC
|
||||
DCHAR [^\"]|ESC
|
||||
COM "//"|"#"
|
||||
ESC "\"{CHAR}
|
||||
SCHAR [^\']|ESC
|
||||
DCHAR [^\"]|ESC
|
||||
COM "//"|"#"
|
||||
|
||||
CC [^*\n]
|
||||
CX "*"+{CC}+
|
||||
CT "*"+"/"
|
||||
BLOCKCMT "/*"({CC}|{CX})*{CT}
|
||||
CC [^*\n]
|
||||
CX "*"+{CC}+
|
||||
CT "*"+"/"
|
||||
BLOCKCMT "/*"({CC}|{CX})*{CT}
|
||||
|
||||
%x pragma
|
||||
|
||||
|
||||
%%
|
||||
|
||||
[[:space:]]+ {}
|
||||
#.* {}
|
||||
[[:space:]]+ {}
|
||||
#.* {}
|
||||
|
||||
{STOP} out("stop", ".");
|
||||
{SYM} tok("sym");
|
||||
{LIT} tok("lit");
|
||||
{BLOCKCMT} {}
|
||||
"/"{WORD}+ |
|
||||
"/"{WORD}+ |
|
||||
"/$" out("lambda", yytext+1);
|
||||
"%"{WORD}+ {
|
||||
out("pragma", yytext+1);
|
||||
yy_push_state(pragma);
|
||||
}
|
||||
|
||||
<*>"{" {
|
||||
<*>"{" {
|
||||
lit();
|
||||
yy_push_state(code);
|
||||
}
|
||||
|
||||
. lit();
|
||||
. lit();
|
||||
|
||||
|
||||
<pragma>{
|
||||
\n {
|
||||
out("stop", ".");
|
||||
yy_pop_state();
|
||||
}
|
||||
[[:space:]] {}
|
||||
{SYM} tok("sym");
|
||||
{LIT} tok("lit");
|
||||
. lit();
|
||||
\n {
|
||||
out("stop", ".");
|
||||
yy_pop_state();
|
||||
}
|
||||
[[:space:]] {}
|
||||
{SYM} tok("sym");
|
||||
{LIT} tok("lit");
|
||||
. lit();
|
||||
}
|
||||
|
||||
<code>{
|
||||
"}" {
|
||||
lit();
|
||||
yy_pop_state();
|
||||
}
|
||||
'{SCHAR}*' php();
|
||||
\"{DCHAR}*\" php();
|
||||
{COM}.* php();
|
||||
{BLOCKCMT} php();
|
||||
[^{}'"#/]+ php();
|
||||
. php();
|
||||
"}" {
|
||||
lit();
|
||||
yy_pop_state();
|
||||
}
|
||||
'{SCHAR}*' php();
|
||||
\"{DCHAR}*\" php();
|
||||
{COM}.* php();
|
||||
{BLOCKCMT} php();
|
||||
[^{}'"#/]+ php();
|
||||
. php();
|
||||
}
|
||||
|
||||
%%
|
||||
|
|
343
parse_engine.php
343
parse_engine.php
|
@ -1,5 +1,5 @@
|
|||
<?php
|
||||
/*
|
||||
/**
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
|
@ -15,148 +15,212 @@
|
|||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
|
||||
define('LIME_CALL_PROTOCOL', '$tokens, &$result');
|
||||
|
||||
abstract class lime_parser {
|
||||
}
|
||||
|
||||
class parse_error extends Exception {} # If this happens, the input doesn't match the grammar.
|
||||
class parse_bug extends Exception {} # If this happens, I made a mistake.
|
||||
/**
|
||||
* The input doesn't match the grammar
|
||||
*/
|
||||
class parse_error extends Exception {
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug, I made a mistake
|
||||
*/
|
||||
class parse_bug extends Exception {}
|
||||
|
||||
class parse_unexpected_token extends parse_error {
|
||||
function __construct($type, $state) {
|
||||
parent::__construct("Unexpected token of type ($type)");
|
||||
public function __construct($type, $state) {
|
||||
parent::__construct("Unexpected token of type ({$type})");
|
||||
|
||||
$this->type = $type;
|
||||
$this->state = $state;
|
||||
}
|
||||
}
|
||||
|
||||
class parse_premature_eof extends parse_error {
|
||||
function __construct() {
|
||||
parent::__construct("Premature EOF");
|
||||
public function __construct() {
|
||||
parent::__construct('Premature EOF');
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class parse_stack {
|
||||
function __construct($qi) {
|
||||
public $q;
|
||||
public $qs = array();
|
||||
/**
|
||||
* Stack of semantic actions
|
||||
*/
|
||||
public $ss = array();
|
||||
|
||||
public function __construct($qi) {
|
||||
$this->q = $qi;
|
||||
$this->qs = array();
|
||||
$this->ss = array();
|
||||
}
|
||||
function shift($q, $semantic) {
|
||||
|
||||
public function shift($q, $semantic) {
|
||||
$this->ss[] = $semantic;
|
||||
$this->qs[] = $this->q;
|
||||
|
||||
$this->q = $q;
|
||||
# echo "Shift $q -- $semantic<br/>\n";
|
||||
|
||||
// echo "Shift $q -- $semantic\n";
|
||||
}
|
||||
function top_n($n) {
|
||||
if (!$n) return array();
|
||||
return array_slice($this->ss, 0-$n);
|
||||
|
||||
public function top_n($n) {
|
||||
if (!$n) {
|
||||
return array();
|
||||
}
|
||||
|
||||
return array_slice($this->ss, 0 - $n);
|
||||
}
|
||||
function pop_n($n) {
|
||||
if (!$n) return array();
|
||||
$qq = array_splice($this->qs, 0-$n);
|
||||
|
||||
public function pop_n($n) {
|
||||
if (!$n) {
|
||||
return array();
|
||||
}
|
||||
|
||||
$qq = array_splice($this->qs, 0 - $n);
|
||||
$this->q = $qq[0];
|
||||
return array_splice($this->ss, 0-$n);
|
||||
|
||||
return array_splice($this->ss, 0 - $n);
|
||||
}
|
||||
function occupied() { return !empty($this->ss); }
|
||||
function index($n) {
|
||||
if ($n) $this->q = $this->qs[count($this->qs)-$n];
|
||||
|
||||
public function occupied() {
|
||||
return !empty($this->ss);
|
||||
}
|
||||
function text() {
|
||||
return $this->q." : ".implode(' . ', array_reverse($this->qs));
|
||||
|
||||
public function index($n) {
|
||||
if ($n) {
|
||||
$this->q = $this->qs[count($this->qs) - $n];
|
||||
}
|
||||
}
|
||||
|
||||
public function text() {
|
||||
return $this->q . ' : ' . implode(' . ', array_reverse($this->qs));
|
||||
}
|
||||
}
|
||||
|
||||
class parse_engine {
|
||||
function __construct($parser) {
|
||||
public $parser;
|
||||
public $qi;
|
||||
public $rule;
|
||||
public $step;
|
||||
/**
|
||||
* @var boolean
|
||||
*/
|
||||
public $accept;
|
||||
/**
|
||||
* @var parse_stack
|
||||
*/
|
||||
public $stack;
|
||||
|
||||
public function __construct($parser) {
|
||||
$this->parser = $parser;
|
||||
$this->qi = $parser->qi;
|
||||
$this->rule = $parser->a;
|
||||
$this->step = $parser->i;
|
||||
#$this->prepare_callables();
|
||||
|
||||
$this->reset();
|
||||
#$this->debug = false;
|
||||
}
|
||||
function reset() {
|
||||
|
||||
public function reset() {
|
||||
$this->accept = false;
|
||||
$this->stack = new parse_stack($this->qi);
|
||||
}
|
||||
|
||||
private function enter_error_tolerant_state() {
|
||||
while ($this->stack->occupied()) {
|
||||
if ($this->has_step_for('error')) return true;
|
||||
if ($this->has_step_for('error')) {
|
||||
return true;
|
||||
}
|
||||
|
||||
$this->drop();
|
||||
};
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
private function drop() { $this->stack->pop_n(1); }
|
||||
function eat_eof() {
|
||||
{/*
|
||||
|
||||
So that I don't get any brilliant misguided ideas:
|
||||
|
||||
The "accept" step happens when we try to eat a start symbol.
|
||||
That happens because the reductions up the stack at the end
|
||||
finally (and symetrically) tell the parser to eat a symbol
|
||||
representing what they've just shifted off the end of the stack
|
||||
and reduced. However, that doesn't put the parser into any
|
||||
special different state. Therefore, it's back at the start
|
||||
state.
|
||||
|
||||
That being said, the parser is ready to reduce an EOF to the
|
||||
empty program, if given a grammar that allows them.
|
||||
|
||||
So anyway, if you literally tell the parser to eat an EOF
|
||||
symbol, then after it's done reducing and accepting the prior
|
||||
program, it's going to think it has another symbol to deal with.
|
||||
That is the EOF symbol, which means to reduce the empty program,
|
||||
accept it, and then continue trying to eat the terminal EOF.
|
||||
|
||||
This infinte loop quickly runs out of memory.
|
||||
|
||||
That's why the real EOF algorithm doesn't try to pretend that
|
||||
EOF is a terminal. Like the invented start symbol, it's special.
|
||||
|
||||
Instead, we pretend to want to eat EOF, but never actually
|
||||
try to get it into the parse stack. (It won't fit.) In short,
|
||||
we look up what reduction is indicated at each step in the
|
||||
process of rolling up the parse stack.
|
||||
|
||||
The repetition is because one reduction is not guaranteed to
|
||||
cascade into another and clean up the entire parse stack.
|
||||
Rather, it will instead shift each partial production as it
|
||||
is forced to completion by the EOF lookahead.
|
||||
*/}
|
||||
|
||||
# We must reduce as if having read the EOF symbol
|
||||
|
||||
private function drop() {
|
||||
$this->stack->pop_n(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* So that I don't get any brilliant misguided ideas:
|
||||
*
|
||||
* The "accept" step happens when we try to eat a start symbol.
|
||||
* That happens because the reductions up the stack at the end
|
||||
* finally (and symetrically) tell the parser to eat a symbol
|
||||
* representing what they've just shifted off the end of the stack
|
||||
* and reduced. However, that doesn't put the parser into any
|
||||
* special different state. Therefore, it's back at the start
|
||||
* state.
|
||||
*
|
||||
* That being said, the parser is ready to reduce an EOF to the
|
||||
* empty program, if given a grammar that allows them.
|
||||
*
|
||||
* So anyway, if you literally tell the parser to eat an EOF
|
||||
* symbol, then after it's done reducing and accepting the prior
|
||||
* program, it's going to think it has another symbol to deal with.
|
||||
* That is the EOF symbol, which means to reduce the empty program,
|
||||
* accept it, and then continue trying to eat the terminal EOF.
|
||||
*
|
||||
* This infinte loop quickly runs out of memory.
|
||||
*
|
||||
* That's why the real EOF algorithm doesn't try to pretend that
|
||||
* EOF is a terminal. Like the invented start symbol, it's special.
|
||||
*
|
||||
* Instead, we pretend to want to eat EOF, but never actually
|
||||
* try to get it into the parse stack. (It won't fit.) In short,
|
||||
* we look up what reduction is indicated at each step in the
|
||||
* process of rolling up the parse stack.
|
||||
*
|
||||
* The repetition is because one reduction is not guaranteed to
|
||||
* cascade into another and clean up the entire parse stack.
|
||||
* Rather, it will instead shift each partial production as it
|
||||
* is forced to completion by the EOF lookahead.
|
||||
*/
|
||||
public function eat_eof() {
|
||||
// We must reduce as if having read the EOF symbol
|
||||
do {
|
||||
# and we have to try at least once, because if nothing
|
||||
# has ever been shifted, then the stack will be empty
|
||||
# at the start.
|
||||
// and we have to try at least once, because if nothing
|
||||
// has ever been shifted, then the stack will be empty
|
||||
// at the start.
|
||||
list($opcode, $operand) = $this->step_for('#');
|
||||
|
||||
switch ($opcode) {
|
||||
case 'r': $this->reduce($operand); break;
|
||||
case 'e': $this->premature_eof(); break;
|
||||
default: throw new parse_bug(); break;
|
||||
case 'r':
|
||||
$this->reduce($operand);
|
||||
break;
|
||||
case 'e':
|
||||
$this->premature_eof();
|
||||
break;
|
||||
default:
|
||||
throw new parse_bug();
|
||||
break;
|
||||
}
|
||||
} while ($this->stack->occupied());
|
||||
{/*
|
||||
If the sentence is well-formed according to the grammar, then
|
||||
this will eventually result in eating a start symbol, which
|
||||
causes the "accept" instruction to fire. Otherwise, the
|
||||
step('#') method will indicate an error in the syntax, which
|
||||
here means a premature EOF.
|
||||
|
||||
Incedentally, some tremendous amount of voodoo with the parse
|
||||
stack might help find the beginning of some unfinished
|
||||
production that the sentence was cut off during, but as a
|
||||
general rule that would require deeper knowledge.
|
||||
*/}
|
||||
if (!$this->accept) throw new parse_bug();
|
||||
|
||||
// If the sentence is well-formed according to the grammar, then
|
||||
// this will eventually result in eating a start symbol, which
|
||||
// causes the "accept" instruction to fire. Otherwise, the
|
||||
// step('#') method will indicate an error in the syntax, which
|
||||
// here means a premature EOF.
|
||||
//
|
||||
// Incidentally, some tremendous amount of voodoo with the parse
|
||||
// stack might help find the beginning of some unfinished
|
||||
// production that the sentence was cut off during, but as a
|
||||
// general rule that would require deeper knowledge.
|
||||
if (!$this->accept) {
|
||||
throw new parse_bug();
|
||||
}
|
||||
|
||||
return $this->semantic;
|
||||
}
|
||||
|
||||
private function premature_eof() {
|
||||
$seen = array();
|
||||
|
||||
while ($this->enter_error_tolerant_state()) {
|
||||
if (isset($seen[$this->state()])) {
|
||||
// This means that it's pointless to try here.
|
||||
|
@ -164,9 +228,11 @@ class parse_engine {
|
|||
$this->drop();
|
||||
continue;
|
||||
}
|
||||
|
||||
$seen[$this->state()] = true;
|
||||
|
||||
$this->eat('error', NULL);
|
||||
|
||||
$this->eat('error', null);
|
||||
|
||||
if ($this->has_step_for('#')) {
|
||||
// Good. We can continue as normal.
|
||||
return;
|
||||
|
@ -177,76 +243,101 @@ class parse_engine {
|
|||
// The rest of the algorithm will make it happen.
|
||||
}
|
||||
}
|
||||
|
||||
throw new parse_premature_eof();
|
||||
}
|
||||
private function current_row() { return $this->step[$this->state()]; }
|
||||
|
||||
private function current_row() {
|
||||
return $this->step[$this->state()];
|
||||
}
|
||||
|
||||
private function step_for($type) {
|
||||
$row = $this->current_row();
|
||||
if (!isset($row[$type])) return array('e', $this->stack->q);
|
||||
if (!isset($row[$type])) {
|
||||
return array('e', $this->stack->q);
|
||||
}
|
||||
|
||||
return explode(' ', $row[$type]);
|
||||
}
|
||||
|
||||
private function has_step_for($type) {
|
||||
$row = $this->current_row();
|
||||
return isset($row[$type]);
|
||||
}
|
||||
private function state() { return $this->stack->q; }
|
||||
|
||||
private function state() {
|
||||
return $this->stack->q;
|
||||
}
|
||||
|
||||
function eat($type, $semantic) {
|
||||
# assert('$type == trim($type)');
|
||||
# if ($this->debug) echo "Trying to eat a ($type)\n";
|
||||
// assert('$type == trim($type)');
|
||||
// if ($this->debug) echo "Trying to eat a ($type)\n";
|
||||
list($opcode, $operand) = $this->step_for($type);
|
||||
|
||||
switch ($opcode) {
|
||||
case 's':
|
||||
# if ($this->debug) echo "shift $type to state $operand\n";
|
||||
case 's':
|
||||
// if ($this->debug) echo "shift $type to state $operand\n";
|
||||
$this->stack->shift($operand, $semantic);
|
||||
# echo $this->stack->text()." shift $type<br/>\n";
|
||||
// echo $this->stack->text()." shift $type<br/>\n";
|
||||
break;
|
||||
|
||||
case 'r':
|
||||
case 'r':
|
||||
$this->reduce($operand);
|
||||
$this->eat($type, $semantic);
|
||||
# Yes, this is tail-recursive. It's also the simplest way.
|
||||
// Yes, this is tail-recursive. It's also the simplest way.
|
||||
break;
|
||||
|
||||
case 'a':
|
||||
if ($this->stack->occupied()) throw new parse_bug('Accept should happen with empty stack.');
|
||||
case 'a':
|
||||
if ($this->stack->occupied()) {
|
||||
throw new parse_bug('Accept should happen with empty stack.');
|
||||
}
|
||||
|
||||
$this->accept = true;
|
||||
#if ($this->debug) echo ("Accept\n\n");
|
||||
//if ($this->debug) echo ("Accept\n\n");
|
||||
$this->semantic = $semantic;
|
||||
break;
|
||||
|
||||
case 'e':
|
||||
# This is thought to be the uncommon, exceptional path, so
|
||||
# it's OK that this algorithm will cause the stack to
|
||||
# flutter while the parse engine waits for an edible token.
|
||||
# if ($this->debug) echo "($type) causes a problem.\n";
|
||||
case 'e':
|
||||
// This is thought to be the uncommon, exceptional path, so
|
||||
// it's OK that this algorithm will cause the stack to
|
||||
// flutter while the parse engine waits for an edible token.
|
||||
// if ($this->debug) echo "($type) causes a problem.\n";
|
||||
|
||||
if ($this->enter_error_tolerant_state()) {
|
||||
$this->eat('error', NULL);
|
||||
if ($this->has_step_for($type)) $this->eat($type, $semantic);
|
||||
$this->eat('error', null);
|
||||
if ($this->has_step_for($type)) {
|
||||
$this->eat($type, $semantic);
|
||||
}
|
||||
} else {
|
||||
# If that didn't work, give up:
|
||||
throw new parse_error("Parse Error: ($type)($semantic) not expected");
|
||||
// If that didn't work, give up:
|
||||
throw new parse_error("Parse Error: ({$type})({$semantic}) not expected");
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new parse_bug("Bad parse table instruction ".htmlspecialchars($opcode));
|
||||
default:
|
||||
throw new parse_bug("Bad parse table instruction " . htmlspecialchars($opcode));
|
||||
}
|
||||
}
|
||||
|
||||
private function reduce($rule_id) {
|
||||
$rule = $this->rule[$rule_id];
|
||||
$len = $rule['len'];
|
||||
$semantic = $this->perform_action($rule_id, $this->stack->top_n($len));
|
||||
#echo $semantic.br();
|
||||
if ($rule['replace']) $this->stack->pop_n($len);
|
||||
else $this->stack->index($len);
|
||||
|
||||
//echo $semantic.br();
|
||||
if ($rule['replace']) {
|
||||
$this->stack->pop_n($len);
|
||||
} else {
|
||||
$this->stack->index($len);
|
||||
}
|
||||
|
||||
$this->eat($rule['symbol'], $semantic);
|
||||
}
|
||||
|
||||
private function perform_action($rule_id, $slice) {
|
||||
# we have this weird calling convention....
|
||||
// we have this weird calling convention....
|
||||
$result = null;
|
||||
$method = $this->parser->method[$rule_id];
|
||||
#if ($this->debug) echo "rule $id: $method\n";
|
||||
|
||||
//if ($this->debug) echo "rule $id: $method\n";
|
||||
$this->parser->$method($slice, $result);
|
||||
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
|
57
set.so.php
57
set.so.php
|
@ -7,23 +7,52 @@ Purpose: We should really have a "set" data type. It's too useful.
|
|||
*/
|
||||
|
||||
class set {
|
||||
function __construct($list=array()) { $this->data = array_count_values($list); }
|
||||
function has($item) { return isset($this->data[$item]); }
|
||||
function add($item) { $this->data[$item] = true; }
|
||||
function del($item) { unset($this->data[$item]); return $item;}
|
||||
function all() { return array_keys($this->data); }
|
||||
function one() { return key($this->data); }
|
||||
function count() { return count($this->data); }
|
||||
function pop() { return $this->del($this->one()); }
|
||||
function union($that) {
|
||||
public function __construct(array $list = array()) {
|
||||
$this->data = array_count_values($list);
|
||||
}
|
||||
|
||||
public function has($item) {
|
||||
return isset($this->data[$item]);
|
||||
}
|
||||
|
||||
public function add($item) {
|
||||
$this->data[$item] = true;
|
||||
}
|
||||
|
||||
public function del($item) {
|
||||
unset($this->data[$item]);
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function all() {
|
||||
return array_keys($this->data);
|
||||
}
|
||||
|
||||
public function one() {
|
||||
return key($this->data);
|
||||
}
|
||||
|
||||
public function count() {
|
||||
return count($this->data);
|
||||
}
|
||||
|
||||
public function pop() {
|
||||
return $this->del($this->one());
|
||||
}
|
||||
|
||||
public function union($that) {
|
||||
$progress = false;
|
||||
foreach ($that->all() as $item) if (!$this->has($item)) {
|
||||
$this->add($item);
|
||||
$progress = true;
|
||||
foreach ($that->all() as $item) {
|
||||
if (!$this->has($item)) {
|
||||
$this->add($item);
|
||||
$progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
return $progress;
|
||||
}
|
||||
function text() {
|
||||
return ' { '.implode(' ', $this->all()).' } ';
|
||||
|
||||
public function text() {
|
||||
return ' { ' . implode(' ', $this->all()) . ' } ';
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue