mirror of https://github.com/vitalif/lime
Cleaning up the code
parent
0e5a392f52
commit
175b67c614
|
@ -1,34 +1,41 @@
|
||||||
<?php
|
<?php
|
||||||
|
/**
|
||||||
|
* Let's face it: PHP is not up to lexical processing. GNU flex handles
|
||||||
|
* it well, so I've created a little protocol for delegating the work.
|
||||||
|
* Extend this class so that executable() gives a path to your lexical
|
||||||
|
* analyser program.
|
||||||
|
*/
|
||||||
abstract class flex_scanner {
|
abstract class flex_scanner {
|
||||||
/*
|
|
||||||
Let's face it: PHP is not up to lexical processing. GNU flex handles
|
|
||||||
it well, so I've created a little protocol for delegating the work.
|
|
||||||
Extend this class so that executable() gives a path to your lexical
|
|
||||||
analyser program.
|
|
||||||
*/
|
|
||||||
abstract function executable();
|
abstract function executable();
|
||||||
function __construct($path) {
|
|
||||||
if (!is_readable($path)) throw new Exception("$path is not readable.");
|
public function __construct($path) {
|
||||||
|
if (!is_readable($path)) {
|
||||||
|
throw new Exception("$path is not readable.");
|
||||||
|
}
|
||||||
|
|
||||||
putenv("PHP_LIME_SCAN_STDIN=$path");
|
putenv("PHP_LIME_SCAN_STDIN=$path");
|
||||||
|
|
||||||
$scanner = $this->executable();
|
$scanner = $this->executable();
|
||||||
$tokens = explode("\0", `$scanner < "\$PHP_LIME_SCAN_STDIN"`);
|
$tokens = explode("\0", `$scanner < "\$PHP_LIME_SCAN_STDIN"`);
|
||||||
|
|
||||||
array_pop($tokens);
|
array_pop($tokens);
|
||||||
$this->tokens = $tokens;
|
$this->tokens = $tokens;
|
||||||
$this->lineno = 1;
|
$this->lineno = 1;
|
||||||
}
|
}
|
||||||
function next() {
|
|
||||||
|
public function next() {
|
||||||
if (list($key, $token) = each($this->tokens)) {
|
if (list($key, $token) = each($this->tokens)) {
|
||||||
list($this->lineno, $type, $text) = explode("\1", $token);
|
list($this->lineno, $type, $text) = explode("\1", $token);
|
||||||
|
|
||||||
return array($type, $text);
|
return array($type, $text);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
function feed($parser) {
|
|
||||||
|
public function feed($parser) {
|
||||||
while (list($type, $text) = $this->next()) {
|
while (list($type, $text) = $this->next()) {
|
||||||
$parser->eat($type, $text);
|
$parser->eat($type, $text);
|
||||||
}
|
}
|
||||||
|
|
||||||
return $parser->eat_eof();
|
return $parser->eat_eof();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
121
lime.php
121
lime.php
|
@ -17,6 +17,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
define('LIME_DIR', __DIR__);
|
define('LIME_DIR', __DIR__);
|
||||||
|
define('INDENT', ' ');
|
||||||
|
|
||||||
function emit($str) {
|
function emit($str) {
|
||||||
fputs(STDERR, $str . PHP_EOL);
|
fputs(STDERR, $str . PHP_EOL);
|
||||||
|
@ -66,11 +67,11 @@ function lime_export($var) {
|
||||||
$out[] = (!$i ? lime_export($k).' => ' : '') . lime_export($v);
|
$out[] = (!$i ? lime_export($k).' => ' : '') . lime_export($v);
|
||||||
}
|
}
|
||||||
|
|
||||||
$result = 'array(' . PHP_EOL . preg_replace('~^~m', "\t", implode(',' . PHP_EOL, $out)) . PHP_EOL . ')';
|
$result = 'array(' . PHP_EOL . preg_replace('~^~m', INDENT, implode(',' . PHP_EOL, $out)) . PHP_EOL . ')';
|
||||||
} elseif (is_int($var) || is_float($var)) {
|
} elseif (is_int($var) || is_float($var)) {
|
||||||
$result = (string)$var;
|
$result = (string)$var;
|
||||||
} elseif (is_string($var)) {
|
} elseif (is_string($var)) {
|
||||||
$opt1 = "'" . str_replace(array('\\', "'"), array('\\\\', "\'"), $var) . "'";
|
$opt1 = '\'' . str_replace(array('\\', '\''), array('\\\\', '\\\''), $var) . '\'';
|
||||||
$opt2 = $opt1;
|
$opt2 = $opt1;
|
||||||
|
|
||||||
if (strpos($var, '$') === false) {
|
if (strpos($var, '$') === false) {
|
||||||
|
@ -254,12 +255,16 @@ class RRC extends Exception {
|
||||||
}
|
}
|
||||||
|
|
||||||
class state {
|
class state {
|
||||||
|
public $id;
|
||||||
|
public $key;
|
||||||
|
public $close;
|
||||||
|
public $action = array();
|
||||||
|
|
||||||
public function __construct($id, $key, $close) {
|
public function __construct($id, $key, $close) {
|
||||||
$this->id = $id;
|
$this->id = $id;
|
||||||
$this->key = $key;
|
$this->key = $key;
|
||||||
$this->close = $close; // config key -> object
|
$this->close = $close; // config key -> object
|
||||||
ksort($this->close);
|
ksort($this->close);
|
||||||
$this->action = array();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function dump() {
|
public function dump() {
|
||||||
|
@ -1049,7 +1054,7 @@ class lime_language_php extends lime_language {
|
||||||
$php = $this->to_php($a['code']);
|
$php = $this->to_php($a['code']);
|
||||||
|
|
||||||
$code .= 'function ' . $mn . '(' . LIME_CALL_PROTOCOL . ') {' . PHP_EOL .
|
$code .= 'function ' . $mn . '(' . LIME_CALL_PROTOCOL . ') {' . PHP_EOL .
|
||||||
preg_replace('~^~m', "\t", $comment . $php) . PHP_EOL .
|
rtrim(preg_replace('~^~m', INDENT, $comment . $php)) . PHP_EOL .
|
||||||
'}' .
|
'}' .
|
||||||
PHP_EOL .
|
PHP_EOL .
|
||||||
PHP_EOL;
|
PHP_EOL;
|
||||||
|
@ -1063,7 +1068,7 @@ class lime_language_php extends lime_language {
|
||||||
$code .= 'public $a = '.lime_export($rules, true) . ';' . PHP_EOL;
|
$code .= 'public $a = '.lime_export($rules, true) . ';' . PHP_EOL;
|
||||||
|
|
||||||
return 'class ' . $parser_class . ' extends lime_parser {' . PHP_EOL .
|
return 'class ' . $parser_class . ' extends lime_parser {' . PHP_EOL .
|
||||||
preg_replace(array('~^~m', '~^\h+$~m'), array("\t", ''), $code) .
|
preg_replace(array('~^~m', '~^\h+$~m'), array(INDENT, ''), $code) .
|
||||||
'}' . PHP_EOL;
|
'}' . PHP_EOL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1153,12 +1158,15 @@ class lime_rewrite {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This keeps track of one position in an rhs.
|
||||||
|
* We specialize to handle actions and glyphs.
|
||||||
|
*
|
||||||
|
* If there is a name for the slot, we store it here.
|
||||||
|
* Later on, this structure will be consulted in the formation of
|
||||||
|
* actual production rules.
|
||||||
|
*/
|
||||||
class lime_slot {
|
class lime_slot {
|
||||||
// This keeps track of one position in an rhs.
|
|
||||||
// We specialize to handle actions and glyphs.
|
|
||||||
// If there is a name for the slot, we store it here.
|
|
||||||
// Later on, this structure will be consulted in the formation of
|
|
||||||
// actual production rules.
|
|
||||||
public function __construct($data, $name) {
|
public function __construct($data, $name) {
|
||||||
$this->data = $data;
|
$this->data = $data;
|
||||||
$this->name = $name;
|
$this->name = $name;
|
||||||
|
@ -1175,34 +1183,32 @@ class lime_glyph extends lime_slot {
|
||||||
}
|
}
|
||||||
class lime_action extends lime_slot {
|
class lime_action extends lime_slot {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function isn't too terribly interesting to the casual observer.
|
||||||
|
* You're probably better off looking at parse_lime_grammar() instead.
|
||||||
|
*
|
||||||
|
* Ok, if you insist, I'll explain.
|
||||||
|
*
|
||||||
|
* The input to Lime is a CFG parser definition. That definition is
|
||||||
|
* written in some language. (The Lime language, to be exact.)
|
||||||
|
* Anyway, I have to parse the Lime language and compile it into a
|
||||||
|
* very complex data structure from which a parser is eventually
|
||||||
|
* built. What better way than to use Lime itself to parse its own
|
||||||
|
* language? Well, it's almost that simple, but not quite.
|
||||||
|
|
||||||
|
* The Lime language is fairly potent, but a restricted subset of
|
||||||
|
* its features was used to write a metagrammar. Then, I hand-translated
|
||||||
|
* that metagrammar into another form which is easy to snarf up.
|
||||||
|
* In the process of reading that simplified form, this function
|
||||||
|
* builds the same sort of data structure that later gets turned into
|
||||||
|
* a parser. The last step is to run the parser generation algorithm,
|
||||||
|
* eval() the resulting PHP code, and voila! With no hard work, I can
|
||||||
|
* suddenly read and comprehend the full range of the Lime language
|
||||||
|
* without ever having written an algorithm to do so. It feels like magic.
|
||||||
|
*/
|
||||||
function lime_bootstrap() {
|
function lime_bootstrap() {
|
||||||
|
|
||||||
/*
|
|
||||||
|
|
||||||
This function isn't too terribly interesting to the casual observer.
|
|
||||||
You're probably better off looking at parse_lime_grammar() instead.
|
|
||||||
|
|
||||||
Ok, if you insist, I'll explain.
|
|
||||||
|
|
||||||
The input to Lime is a CFG parser definition. That definition is
|
|
||||||
written in some language. (The Lime language, to be exact.)
|
|
||||||
Anyway, I have to parse the Lime language and compile it into a
|
|
||||||
very complex data structure from which a parser is eventually
|
|
||||||
built. What better way than to use Lime itself to parse its own
|
|
||||||
language? Well, it's almost that simple, but not quite.
|
|
||||||
|
|
||||||
The Lime language is fairly potent, but a restricted subset of
|
|
||||||
its features was used to write a metagrammar. Then, I hand-translated
|
|
||||||
that metagrammar into another form which is easy to snarf up.
|
|
||||||
In the process of reading that simplified form, this function
|
|
||||||
builds the same sort of data structure that later gets turned into
|
|
||||||
a parser. The last step is to run the parser generation algorithm,
|
|
||||||
eval() the resulting PHP code, and voila! With no hard work, I can
|
|
||||||
suddenly read and comprehend the full range of the Lime language
|
|
||||||
without ever having written an algorithm to do so. It feels like magic.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
$bootstrap = LIME_DIR . '/lime.bootstrap';
|
$bootstrap = LIME_DIR . '/lime.bootstrap';
|
||||||
$lime = new lime();
|
$lime = new lime();
|
||||||
$lime->parser_class = 'lime_metaparser';
|
$lime->parser_class = 'lime_metaparser';
|
||||||
|
@ -1245,31 +1251,29 @@ function lime_bootstrap() {
|
||||||
eval($parser_code);
|
eval($parser_code);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The voodoo is in the way I do lexical processing on grammar definition
|
||||||
|
* files. They contain embedded bits of PHP, and it's important to keep
|
||||||
|
* track of things like strings, comments, and matched braces. It seemed
|
||||||
|
* like an ideal problem to solve with GNU flex, so I wrote a little
|
||||||
|
* scanner in flex and C to dig out the tokens for me. Of course, I need
|
||||||
|
* the tokens in PHP, so I designed a simple binary wrapper for them which
|
||||||
|
* also contains line-number information, guaranteed to help out if you
|
||||||
|
* write a grammar which surprises the parser in any manner.
|
||||||
|
*/
|
||||||
class voodoo_scanner extends flex_scanner {
|
class voodoo_scanner extends flex_scanner {
|
||||||
/*
|
|
||||||
|
|
||||||
The voodoo is in the way I do lexical processing on grammar definition
|
|
||||||
files. They contain embedded bits of PHP, and it's important to keep
|
|
||||||
track of things like strings, comments, and matched braces. It seemed
|
|
||||||
like an ideal problem to solve with GNU flex, so I wrote a little
|
|
||||||
scanner in flex and C to dig out the tokens for me. Of course, I need
|
|
||||||
the tokens in PHP, so I designed a simple binary wrapper for them which
|
|
||||||
also contains line-number information, guaranteed to help out if you
|
|
||||||
write a grammar which surprises the parser in any manner.
|
|
||||||
|
|
||||||
*/
|
|
||||||
function executable() { return LIME_DIR.'/lime_scan_tokens'; }
|
function executable() { return LIME_DIR.'/lime_scan_tokens'; }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a good function to read because it teaches you how to interface
|
||||||
|
* with a Lime parser. I've tried to isolate out the bits that aren't
|
||||||
|
* instructive in that regard.
|
||||||
|
*/
|
||||||
function parse_lime_grammar($path) {
|
function parse_lime_grammar($path) {
|
||||||
/*
|
if (!class_exists('lime_metaparser', false)) {
|
||||||
|
lime_bootstrap();
|
||||||
This is a good function to read because it teaches you how to interface
|
}
|
||||||
with a Lime parser. I've tried to isolate out the bits that aren't
|
|
||||||
instructive in that regard.
|
|
||||||
|
|
||||||
*/
|
|
||||||
if (!class_exists('lime_metaparser')) lime_bootstrap();
|
|
||||||
|
|
||||||
$parse_engine = new parse_engine(new lime_metaparser());
|
$parse_engine = new parse_engine(new lime_metaparser());
|
||||||
$scanner = new voodoo_scanner($path);
|
$scanner = new voodoo_scanner($path);
|
||||||
|
@ -1284,10 +1288,9 @@ function parse_lime_grammar($path) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if ($_SERVER['argv']) {
|
if ($_SERVER['argv']) {
|
||||||
$code = '';
|
$code = '';
|
||||||
array_shift($_SERVER['argv']); # Strip out the program name.
|
array_shift($_SERVER['argv']); // Strip out the program name.
|
||||||
foreach ($_SERVER['argv'] as $path) {
|
foreach ($_SERVER['argv'] as $path) {
|
||||||
$code .= parse_lime_grammar($path);
|
$code .= parse_lime_grammar($path);
|
||||||
}
|
}
|
||||||
|
|
BIN
lime_scan_tokens
BIN
lime_scan_tokens
Binary file not shown.
|
@ -28,76 +28,76 @@ void php();
|
||||||
%x dquote
|
%x dquote
|
||||||
%x squote
|
%x squote
|
||||||
|
|
||||||
CHAR \n|.
|
CHAR \n|.
|
||||||
|
|
||||||
ALPHA [a-zA-Z]
|
ALPHA [a-zA-Z]
|
||||||
DIGIT [0-9]
|
DIGIT [0-9]
|
||||||
ALNUM {ALPHA}|{DIGIT}
|
ALNUM {ALPHA}|{DIGIT}
|
||||||
WORD {ALNUM}|_
|
WORD {ALNUM}|_
|
||||||
STOP "."
|
STOP "."
|
||||||
|
|
||||||
SYM {ALPHA}{WORD}*'*
|
SYM {ALPHA}{WORD}*'*
|
||||||
LIT '.'
|
LIT '.'
|
||||||
|
|
||||||
ESC "\"{CHAR}
|
ESC "\"{CHAR}
|
||||||
SCHAR [^\']|ESC
|
SCHAR [^\']|ESC
|
||||||
DCHAR [^\"]|ESC
|
DCHAR [^\"]|ESC
|
||||||
COM "//"|"#"
|
COM "//"|"#"
|
||||||
|
|
||||||
CC [^*\n]
|
CC [^*\n]
|
||||||
CX "*"+{CC}+
|
CX "*"+{CC}+
|
||||||
CT "*"+"/"
|
CT "*"+"/"
|
||||||
BLOCKCMT "/*"({CC}|{CX})*{CT}
|
BLOCKCMT "/*"({CC}|{CX})*{CT}
|
||||||
|
|
||||||
%x pragma
|
%x pragma
|
||||||
|
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
||||||
[[:space:]]+ {}
|
[[:space:]]+ {}
|
||||||
#.* {}
|
#.* {}
|
||||||
|
|
||||||
{STOP} out("stop", ".");
|
{STOP} out("stop", ".");
|
||||||
{SYM} tok("sym");
|
{SYM} tok("sym");
|
||||||
{LIT} tok("lit");
|
{LIT} tok("lit");
|
||||||
{BLOCKCMT} {}
|
{BLOCKCMT} {}
|
||||||
"/"{WORD}+ |
|
"/"{WORD}+ |
|
||||||
"/$" out("lambda", yytext+1);
|
"/$" out("lambda", yytext+1);
|
||||||
"%"{WORD}+ {
|
"%"{WORD}+ {
|
||||||
out("pragma", yytext+1);
|
out("pragma", yytext+1);
|
||||||
yy_push_state(pragma);
|
yy_push_state(pragma);
|
||||||
}
|
}
|
||||||
|
|
||||||
<*>"{" {
|
<*>"{" {
|
||||||
lit();
|
lit();
|
||||||
yy_push_state(code);
|
yy_push_state(code);
|
||||||
}
|
}
|
||||||
|
|
||||||
. lit();
|
. lit();
|
||||||
|
|
||||||
|
|
||||||
<pragma>{
|
<pragma>{
|
||||||
\n {
|
\n {
|
||||||
out("stop", ".");
|
out("stop", ".");
|
||||||
yy_pop_state();
|
yy_pop_state();
|
||||||
}
|
}
|
||||||
[[:space:]] {}
|
[[:space:]] {}
|
||||||
{SYM} tok("sym");
|
{SYM} tok("sym");
|
||||||
{LIT} tok("lit");
|
{LIT} tok("lit");
|
||||||
. lit();
|
. lit();
|
||||||
}
|
}
|
||||||
|
|
||||||
<code>{
|
<code>{
|
||||||
"}" {
|
"}" {
|
||||||
lit();
|
lit();
|
||||||
yy_pop_state();
|
yy_pop_state();
|
||||||
}
|
}
|
||||||
'{SCHAR}*' php();
|
'{SCHAR}*' php();
|
||||||
\"{DCHAR}*\" php();
|
\"{DCHAR}*\" php();
|
||||||
{COM}.* php();
|
{COM}.* php();
|
||||||
{BLOCKCMT} php();
|
{BLOCKCMT} php();
|
||||||
[^{}'"#/]+ php();
|
[^{}'"#/]+ php();
|
||||||
. php();
|
. php();
|
||||||
}
|
}
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
343
parse_engine.php
343
parse_engine.php
|
@ -1,5 +1,5 @@
|
||||||
<?php
|
<?php
|
||||||
/*
|
/**
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
* the Free Software Foundation; either version 2 of the License, or
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
@ -15,148 +15,212 @@
|
||||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
define('LIME_CALL_PROTOCOL', '$tokens, &$result');
|
define('LIME_CALL_PROTOCOL', '$tokens, &$result');
|
||||||
|
|
||||||
abstract class lime_parser {
|
abstract class lime_parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
class parse_error extends Exception {} # If this happens, the input doesn't match the grammar.
|
/**
|
||||||
class parse_bug extends Exception {} # If this happens, I made a mistake.
|
* The input doesn't match the grammar
|
||||||
|
*/
|
||||||
|
class parse_error extends Exception {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bug, I made a mistake
|
||||||
|
*/
|
||||||
|
class parse_bug extends Exception {}
|
||||||
|
|
||||||
class parse_unexpected_token extends parse_error {
|
class parse_unexpected_token extends parse_error {
|
||||||
function __construct($type, $state) {
|
public function __construct($type, $state) {
|
||||||
parent::__construct("Unexpected token of type ($type)");
|
parent::__construct("Unexpected token of type ({$type})");
|
||||||
|
|
||||||
$this->type = $type;
|
$this->type = $type;
|
||||||
$this->state = $state;
|
$this->state = $state;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class parse_premature_eof extends parse_error {
|
class parse_premature_eof extends parse_error {
|
||||||
function __construct() {
|
public function __construct() {
|
||||||
parent::__construct("Premature EOF");
|
parent::__construct('Premature EOF');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class parse_stack {
|
class parse_stack {
|
||||||
function __construct($qi) {
|
public $q;
|
||||||
|
public $qs = array();
|
||||||
|
/**
|
||||||
|
* Stack of semantic actions
|
||||||
|
*/
|
||||||
|
public $ss = array();
|
||||||
|
|
||||||
|
public function __construct($qi) {
|
||||||
$this->q = $qi;
|
$this->q = $qi;
|
||||||
$this->qs = array();
|
|
||||||
$this->ss = array();
|
|
||||||
}
|
}
|
||||||
function shift($q, $semantic) {
|
|
||||||
|
public function shift($q, $semantic) {
|
||||||
$this->ss[] = $semantic;
|
$this->ss[] = $semantic;
|
||||||
$this->qs[] = $this->q;
|
$this->qs[] = $this->q;
|
||||||
|
|
||||||
$this->q = $q;
|
$this->q = $q;
|
||||||
# echo "Shift $q -- $semantic<br/>\n";
|
|
||||||
|
// echo "Shift $q -- $semantic\n";
|
||||||
}
|
}
|
||||||
function top_n($n) {
|
|
||||||
if (!$n) return array();
|
public function top_n($n) {
|
||||||
return array_slice($this->ss, 0-$n);
|
if (!$n) {
|
||||||
|
return array();
|
||||||
|
}
|
||||||
|
|
||||||
|
return array_slice($this->ss, 0 - $n);
|
||||||
}
|
}
|
||||||
function pop_n($n) {
|
|
||||||
if (!$n) return array();
|
public function pop_n($n) {
|
||||||
$qq = array_splice($this->qs, 0-$n);
|
if (!$n) {
|
||||||
|
return array();
|
||||||
|
}
|
||||||
|
|
||||||
|
$qq = array_splice($this->qs, 0 - $n);
|
||||||
$this->q = $qq[0];
|
$this->q = $qq[0];
|
||||||
return array_splice($this->ss, 0-$n);
|
|
||||||
|
return array_splice($this->ss, 0 - $n);
|
||||||
}
|
}
|
||||||
function occupied() { return !empty($this->ss); }
|
|
||||||
function index($n) {
|
public function occupied() {
|
||||||
if ($n) $this->q = $this->qs[count($this->qs)-$n];
|
return !empty($this->ss);
|
||||||
}
|
}
|
||||||
function text() {
|
|
||||||
return $this->q." : ".implode(' . ', array_reverse($this->qs));
|
public function index($n) {
|
||||||
|
if ($n) {
|
||||||
|
$this->q = $this->qs[count($this->qs) - $n];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public function text() {
|
||||||
|
return $this->q . ' : ' . implode(' . ', array_reverse($this->qs));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class parse_engine {
|
class parse_engine {
|
||||||
function __construct($parser) {
|
public $parser;
|
||||||
|
public $qi;
|
||||||
|
public $rule;
|
||||||
|
public $step;
|
||||||
|
/**
|
||||||
|
* @var boolean
|
||||||
|
*/
|
||||||
|
public $accept;
|
||||||
|
/**
|
||||||
|
* @var parse_stack
|
||||||
|
*/
|
||||||
|
public $stack;
|
||||||
|
|
||||||
|
public function __construct($parser) {
|
||||||
$this->parser = $parser;
|
$this->parser = $parser;
|
||||||
$this->qi = $parser->qi;
|
$this->qi = $parser->qi;
|
||||||
$this->rule = $parser->a;
|
$this->rule = $parser->a;
|
||||||
$this->step = $parser->i;
|
$this->step = $parser->i;
|
||||||
#$this->prepare_callables();
|
|
||||||
$this->reset();
|
$this->reset();
|
||||||
#$this->debug = false;
|
|
||||||
}
|
}
|
||||||
function reset() {
|
|
||||||
|
public function reset() {
|
||||||
$this->accept = false;
|
$this->accept = false;
|
||||||
$this->stack = new parse_stack($this->qi);
|
$this->stack = new parse_stack($this->qi);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function enter_error_tolerant_state() {
|
private function enter_error_tolerant_state() {
|
||||||
while ($this->stack->occupied()) {
|
while ($this->stack->occupied()) {
|
||||||
if ($this->has_step_for('error')) return true;
|
if ($this->has_step_for('error')) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
$this->drop();
|
$this->drop();
|
||||||
};
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
private function drop() { $this->stack->pop_n(1); }
|
|
||||||
function eat_eof() {
|
private function drop() {
|
||||||
{/*
|
$this->stack->pop_n(1);
|
||||||
|
}
|
||||||
So that I don't get any brilliant misguided ideas:
|
|
||||||
|
/*
|
||||||
The "accept" step happens when we try to eat a start symbol.
|
* So that I don't get any brilliant misguided ideas:
|
||||||
That happens because the reductions up the stack at the end
|
*
|
||||||
finally (and symetrically) tell the parser to eat a symbol
|
* The "accept" step happens when we try to eat a start symbol.
|
||||||
representing what they've just shifted off the end of the stack
|
* That happens because the reductions up the stack at the end
|
||||||
and reduced. However, that doesn't put the parser into any
|
* finally (and symetrically) tell the parser to eat a symbol
|
||||||
special different state. Therefore, it's back at the start
|
* representing what they've just shifted off the end of the stack
|
||||||
state.
|
* and reduced. However, that doesn't put the parser into any
|
||||||
|
* special different state. Therefore, it's back at the start
|
||||||
That being said, the parser is ready to reduce an EOF to the
|
* state.
|
||||||
empty program, if given a grammar that allows them.
|
*
|
||||||
|
* That being said, the parser is ready to reduce an EOF to the
|
||||||
So anyway, if you literally tell the parser to eat an EOF
|
* empty program, if given a grammar that allows them.
|
||||||
symbol, then after it's done reducing and accepting the prior
|
*
|
||||||
program, it's going to think it has another symbol to deal with.
|
* So anyway, if you literally tell the parser to eat an EOF
|
||||||
That is the EOF symbol, which means to reduce the empty program,
|
* symbol, then after it's done reducing and accepting the prior
|
||||||
accept it, and then continue trying to eat the terminal EOF.
|
* program, it's going to think it has another symbol to deal with.
|
||||||
|
* That is the EOF symbol, which means to reduce the empty program,
|
||||||
This infinte loop quickly runs out of memory.
|
* accept it, and then continue trying to eat the terminal EOF.
|
||||||
|
*
|
||||||
That's why the real EOF algorithm doesn't try to pretend that
|
* This infinte loop quickly runs out of memory.
|
||||||
EOF is a terminal. Like the invented start symbol, it's special.
|
*
|
||||||
|
* That's why the real EOF algorithm doesn't try to pretend that
|
||||||
Instead, we pretend to want to eat EOF, but never actually
|
* EOF is a terminal. Like the invented start symbol, it's special.
|
||||||
try to get it into the parse stack. (It won't fit.) In short,
|
*
|
||||||
we look up what reduction is indicated at each step in the
|
* Instead, we pretend to want to eat EOF, but never actually
|
||||||
process of rolling up the parse stack.
|
* try to get it into the parse stack. (It won't fit.) In short,
|
||||||
|
* we look up what reduction is indicated at each step in the
|
||||||
The repetition is because one reduction is not guaranteed to
|
* process of rolling up the parse stack.
|
||||||
cascade into another and clean up the entire parse stack.
|
*
|
||||||
Rather, it will instead shift each partial production as it
|
* The repetition is because one reduction is not guaranteed to
|
||||||
is forced to completion by the EOF lookahead.
|
* cascade into another and clean up the entire parse stack.
|
||||||
*/}
|
* Rather, it will instead shift each partial production as it
|
||||||
|
* is forced to completion by the EOF lookahead.
|
||||||
# We must reduce as if having read the EOF symbol
|
*/
|
||||||
|
public function eat_eof() {
|
||||||
|
// We must reduce as if having read the EOF symbol
|
||||||
do {
|
do {
|
||||||
# and we have to try at least once, because if nothing
|
// and we have to try at least once, because if nothing
|
||||||
# has ever been shifted, then the stack will be empty
|
// has ever been shifted, then the stack will be empty
|
||||||
# at the start.
|
// at the start.
|
||||||
list($opcode, $operand) = $this->step_for('#');
|
list($opcode, $operand) = $this->step_for('#');
|
||||||
|
|
||||||
switch ($opcode) {
|
switch ($opcode) {
|
||||||
case 'r': $this->reduce($operand); break;
|
case 'r':
|
||||||
case 'e': $this->premature_eof(); break;
|
$this->reduce($operand);
|
||||||
default: throw new parse_bug(); break;
|
break;
|
||||||
|
case 'e':
|
||||||
|
$this->premature_eof();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new parse_bug();
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
} while ($this->stack->occupied());
|
} while ($this->stack->occupied());
|
||||||
{/*
|
|
||||||
If the sentence is well-formed according to the grammar, then
|
// If the sentence is well-formed according to the grammar, then
|
||||||
this will eventually result in eating a start symbol, which
|
// this will eventually result in eating a start symbol, which
|
||||||
causes the "accept" instruction to fire. Otherwise, the
|
// causes the "accept" instruction to fire. Otherwise, the
|
||||||
step('#') method will indicate an error in the syntax, which
|
// step('#') method will indicate an error in the syntax, which
|
||||||
here means a premature EOF.
|
// here means a premature EOF.
|
||||||
|
//
|
||||||
Incedentally, some tremendous amount of voodoo with the parse
|
// Incidentally, some tremendous amount of voodoo with the parse
|
||||||
stack might help find the beginning of some unfinished
|
// stack might help find the beginning of some unfinished
|
||||||
production that the sentence was cut off during, but as a
|
// production that the sentence was cut off during, but as a
|
||||||
general rule that would require deeper knowledge.
|
// general rule that would require deeper knowledge.
|
||||||
*/}
|
if (!$this->accept) {
|
||||||
if (!$this->accept) throw new parse_bug();
|
throw new parse_bug();
|
||||||
|
}
|
||||||
|
|
||||||
return $this->semantic;
|
return $this->semantic;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function premature_eof() {
|
private function premature_eof() {
|
||||||
$seen = array();
|
$seen = array();
|
||||||
|
|
||||||
while ($this->enter_error_tolerant_state()) {
|
while ($this->enter_error_tolerant_state()) {
|
||||||
if (isset($seen[$this->state()])) {
|
if (isset($seen[$this->state()])) {
|
||||||
// This means that it's pointless to try here.
|
// This means that it's pointless to try here.
|
||||||
|
@ -164,9 +228,11 @@ class parse_engine {
|
||||||
$this->drop();
|
$this->drop();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
$seen[$this->state()] = true;
|
$seen[$this->state()] = true;
|
||||||
|
|
||||||
$this->eat('error', NULL);
|
$this->eat('error', null);
|
||||||
|
|
||||||
if ($this->has_step_for('#')) {
|
if ($this->has_step_for('#')) {
|
||||||
// Good. We can continue as normal.
|
// Good. We can continue as normal.
|
||||||
return;
|
return;
|
||||||
|
@ -177,76 +243,101 @@ class parse_engine {
|
||||||
// The rest of the algorithm will make it happen.
|
// The rest of the algorithm will make it happen.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new parse_premature_eof();
|
throw new parse_premature_eof();
|
||||||
}
|
}
|
||||||
private function current_row() { return $this->step[$this->state()]; }
|
|
||||||
|
private function current_row() {
|
||||||
|
return $this->step[$this->state()];
|
||||||
|
}
|
||||||
|
|
||||||
private function step_for($type) {
|
private function step_for($type) {
|
||||||
$row = $this->current_row();
|
$row = $this->current_row();
|
||||||
if (!isset($row[$type])) return array('e', $this->stack->q);
|
if (!isset($row[$type])) {
|
||||||
|
return array('e', $this->stack->q);
|
||||||
|
}
|
||||||
|
|
||||||
return explode(' ', $row[$type]);
|
return explode(' ', $row[$type]);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function has_step_for($type) {
|
private function has_step_for($type) {
|
||||||
$row = $this->current_row();
|
$row = $this->current_row();
|
||||||
return isset($row[$type]);
|
return isset($row[$type]);
|
||||||
}
|
}
|
||||||
private function state() { return $this->stack->q; }
|
|
||||||
|
private function state() {
|
||||||
|
return $this->stack->q;
|
||||||
|
}
|
||||||
|
|
||||||
function eat($type, $semantic) {
|
function eat($type, $semantic) {
|
||||||
# assert('$type == trim($type)');
|
// assert('$type == trim($type)');
|
||||||
# if ($this->debug) echo "Trying to eat a ($type)\n";
|
// if ($this->debug) echo "Trying to eat a ($type)\n";
|
||||||
list($opcode, $operand) = $this->step_for($type);
|
list($opcode, $operand) = $this->step_for($type);
|
||||||
|
|
||||||
switch ($opcode) {
|
switch ($opcode) {
|
||||||
case 's':
|
case 's':
|
||||||
# if ($this->debug) echo "shift $type to state $operand\n";
|
// if ($this->debug) echo "shift $type to state $operand\n";
|
||||||
$this->stack->shift($operand, $semantic);
|
$this->stack->shift($operand, $semantic);
|
||||||
# echo $this->stack->text()." shift $type<br/>\n";
|
// echo $this->stack->text()." shift $type<br/>\n";
|
||||||
break;
|
break;
|
||||||
|
case 'r':
|
||||||
case 'r':
|
|
||||||
$this->reduce($operand);
|
$this->reduce($operand);
|
||||||
$this->eat($type, $semantic);
|
$this->eat($type, $semantic);
|
||||||
# Yes, this is tail-recursive. It's also the simplest way.
|
// Yes, this is tail-recursive. It's also the simplest way.
|
||||||
break;
|
break;
|
||||||
|
case 'a':
|
||||||
case 'a':
|
if ($this->stack->occupied()) {
|
||||||
if ($this->stack->occupied()) throw new parse_bug('Accept should happen with empty stack.');
|
throw new parse_bug('Accept should happen with empty stack.');
|
||||||
|
}
|
||||||
|
|
||||||
$this->accept = true;
|
$this->accept = true;
|
||||||
#if ($this->debug) echo ("Accept\n\n");
|
//if ($this->debug) echo ("Accept\n\n");
|
||||||
$this->semantic = $semantic;
|
$this->semantic = $semantic;
|
||||||
break;
|
break;
|
||||||
|
case 'e':
|
||||||
case 'e':
|
// This is thought to be the uncommon, exceptional path, so
|
||||||
# This is thought to be the uncommon, exceptional path, so
|
// it's OK that this algorithm will cause the stack to
|
||||||
# it's OK that this algorithm will cause the stack to
|
// flutter while the parse engine waits for an edible token.
|
||||||
# flutter while the parse engine waits for an edible token.
|
// if ($this->debug) echo "($type) causes a problem.\n";
|
||||||
# if ($this->debug) echo "($type) causes a problem.\n";
|
|
||||||
if ($this->enter_error_tolerant_state()) {
|
if ($this->enter_error_tolerant_state()) {
|
||||||
$this->eat('error', NULL);
|
$this->eat('error', null);
|
||||||
if ($this->has_step_for($type)) $this->eat($type, $semantic);
|
if ($this->has_step_for($type)) {
|
||||||
|
$this->eat($type, $semantic);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
# If that didn't work, give up:
|
// If that didn't work, give up:
|
||||||
throw new parse_error("Parse Error: ($type)($semantic) not expected");
|
throw new parse_error("Parse Error: ({$type})({$semantic}) not expected");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
default:
|
throw new parse_bug("Bad parse table instruction " . htmlspecialchars($opcode));
|
||||||
throw new parse_bug("Bad parse table instruction ".htmlspecialchars($opcode));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private function reduce($rule_id) {
|
private function reduce($rule_id) {
|
||||||
$rule = $this->rule[$rule_id];
|
$rule = $this->rule[$rule_id];
|
||||||
$len = $rule['len'];
|
$len = $rule['len'];
|
||||||
$semantic = $this->perform_action($rule_id, $this->stack->top_n($len));
|
$semantic = $this->perform_action($rule_id, $this->stack->top_n($len));
|
||||||
#echo $semantic.br();
|
|
||||||
if ($rule['replace']) $this->stack->pop_n($len);
|
//echo $semantic.br();
|
||||||
else $this->stack->index($len);
|
if ($rule['replace']) {
|
||||||
|
$this->stack->pop_n($len);
|
||||||
|
} else {
|
||||||
|
$this->stack->index($len);
|
||||||
|
}
|
||||||
|
|
||||||
$this->eat($rule['symbol'], $semantic);
|
$this->eat($rule['symbol'], $semantic);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function perform_action($rule_id, $slice) {
|
private function perform_action($rule_id, $slice) {
|
||||||
# we have this weird calling convention....
|
// we have this weird calling convention....
|
||||||
$result = null;
|
$result = null;
|
||||||
$method = $this->parser->method[$rule_id];
|
$method = $this->parser->method[$rule_id];
|
||||||
#if ($this->debug) echo "rule $id: $method\n";
|
|
||||||
|
//if ($this->debug) echo "rule $id: $method\n";
|
||||||
$this->parser->$method($slice, $result);
|
$this->parser->$method($slice, $result);
|
||||||
|
|
||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
57
set.so.php
57
set.so.php
|
@ -7,23 +7,52 @@ Purpose: We should really have a "set" data type. It's too useful.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class set {
|
class set {
|
||||||
function __construct($list=array()) { $this->data = array_count_values($list); }
|
public function __construct(array $list = array()) {
|
||||||
function has($item) { return isset($this->data[$item]); }
|
$this->data = array_count_values($list);
|
||||||
function add($item) { $this->data[$item] = true; }
|
}
|
||||||
function del($item) { unset($this->data[$item]); return $item;}
|
|
||||||
function all() { return array_keys($this->data); }
|
public function has($item) {
|
||||||
function one() { return key($this->data); }
|
return isset($this->data[$item]);
|
||||||
function count() { return count($this->data); }
|
}
|
||||||
function pop() { return $this->del($this->one()); }
|
|
||||||
function union($that) {
|
public function add($item) {
|
||||||
|
$this->data[$item] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function del($item) {
|
||||||
|
unset($this->data[$item]);
|
||||||
|
return $item;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function all() {
|
||||||
|
return array_keys($this->data);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function one() {
|
||||||
|
return key($this->data);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function count() {
|
||||||
|
return count($this->data);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function pop() {
|
||||||
|
return $this->del($this->one());
|
||||||
|
}
|
||||||
|
|
||||||
|
public function union($that) {
|
||||||
$progress = false;
|
$progress = false;
|
||||||
foreach ($that->all() as $item) if (!$this->has($item)) {
|
foreach ($that->all() as $item) {
|
||||||
$this->add($item);
|
if (!$this->has($item)) {
|
||||||
$progress = true;
|
$this->add($item);
|
||||||
|
$progress = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return $progress;
|
return $progress;
|
||||||
}
|
}
|
||||||
function text() {
|
|
||||||
return ' { '.implode(' ', $this->all()).' } ';
|
public function text() {
|
||||||
|
return ' { ' . implode(' ', $this->all()) . ' } ';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue