#!/usr/bin/php -q $v) { $out[] = (!$i ? lime_export($k).' => ' : '') . lime_export($v); } $result = 'array(' . PHP_EOL . preg_replace('~^~m', INDENT, implode(',' . PHP_EOL, $out)) . PHP_EOL . ')'; } elseif (is_int($var) || is_float($var)) { $result = (string)$var; } elseif (is_string($var)) { $opt1 = '\'' . str_replace(array('\\', '\''), array('\\\\', '\\\''), $var) . '\''; $opt2 = $opt1; if (strpos($var, '$') === false) { $opt2 = '"' . str_replace(array('\\', '"'), array('\\\\', '\"'), $var) . '"'; } if (strlen($opt1) <= strlen($opt2)) { $result = $opt1; } else { $result = $opt2; } } elseif (is_bool($var)) { $result = $var ? 'true' : 'false'; } else { bug('Wrong type: ' . gettype($var)); } return $result; } function is_indexed(array $array) { $i = 0; foreach($array as $k => $v) { if ($k !== $i++) { return false; } } return true; } function unindent($text) { if (preg_match('{\A[\r\n]*([ \t]+)[^\r\n]*+(?:[\r\n]++(?>\1[^\r\n]*+(?:[\r\n]+|\z)|[\r\n]+)+)?\z}', rtrim($text), $match)) { $text = preg_replace('{^' . $match[1] . '}m', '', $text); } return $text; } class cf_action { protected $code; public function __construct($code) { $this->code = $code; } } /** * Base class for parse table instructions. The main idea is to make the * subclasses responsible for conflict resolution among themselves. It also * forms a sort of interface to the parse table. */ abstract class step { public $sym; public function __construct(sym $sym) { $this->sym = $sym; } public function glyph() { return $this->sym->name; } public function sane() { return true; } abstract public function instruction(); abstract public function decide($that); } class error extends step { public function sane() { return false; } public function instruction() { bug('This should not happen.'); } public function decide($that) { // An error shall remain one return array($this); } } class shift extends step { public $q; public $rule; public function __construct(sym $sym, $q, rule $rule) { parent::__construct($sym); $this->q = $q; $this->rule = $rule; } public function instruction() { return 's ' . $this->q; } public function decide($that) { // shift-shift conflicts are impossible. // shift-accept conflicts are a bug. // so we can infer: bug_unless($that instanceof reduce); // That being said, the resolution is a matter of precedence. $shift_prec = $this->rule->right_prec; $reduce_prec = $that->rule->left_prec; // If we don't have defined precedence levels for both options, // then we default to shifting: if (!($shift_prec and $reduce_prec)) { return array($this, 1); } // Otherwise, use the step with higher precedence. if ($shift_prec > $reduce_prec) { return array($this, 0); } if ($reduce_prec > $shift_prec) { return array($that, 0); } // The "nonassoc" works by giving equal precedence to both options, // which means to put an error instruction in the parse table. return array(new error($this->sym), 0); } } class reduce extends step { public function __construct($sym, rule $rule) { parent::__construct($sym); $this->rule = $rule; } public function instruction() { return 'r ' . $this->rule->id; } function decide($that) { // This means that the input grammar has a reduce-reduce conflict. // Such things are considered an error in the input. throw new RRC($this, $that); // BISON would go with the first encountered reduce thus: // return $this; } } class accept extends step { public function __construct(sym $sym) { parent::__construct($sym); } public function instruction() { return 'a ' . $this->sym->name; } public function decide($that) { return array($this, 0); } } class RRC extends Exception { public function __construct($a, $b) { parent::__construct('Reduce-Reduce Conflict'); $this->a = $a; $this->b = $b; } function make_noise() { emit(sprintf( 'Reduce-Reduce Conflict:' . PHP_EOL . '%s' . PHP_EOL . '%s' . PHP_EOL . 'Lookahead is (%s)', $this->a->rule->text(), $this->b->rule->text(), $this->a->glyph() )); } } class state { public $id; public $key; public $close; public $conflicts = 0; public $action = array(); public function __construct($id, $key, $close) { $this->id = $id; $this->key = $key; $this->close = $close; // config key -> object ksort($this->close); } public function dump() { echo ' * ' . $this->id . ' / ' . $this->key . PHP_EOL; foreach ($this->close as $config) { $config->dump(); } } public function add_shift(sym $sym, $state, $rule) { $this->add_instruction(new shift($sym, $state->id, $rule)); } public function add_reduce(sym $sym, $rule) { $this->add_instruction(new reduce($sym, $rule)); } public function add_accept(sym $sym) { $this->add_instruction(new accept($sym)); } public function add_instruction(step $step) { $this->action[] = $step; } function find_reductions($lime) { // rightmost configurations followset yields reduce. foreach($this->close as $c) { if ($c->rightmost) { foreach ($c->follow->all() as $glyph) { $this->add_reduce($lime->sym($glyph), $c->rule); } } } } function resolve_conflicts() { // For each possible lookahead, find one (and only one) step to take. $table = array(); foreach ($this->action as $step) { $glyph = $step->glyph(); if (isset($table[$glyph])) { // There's a conflict. The shifts all came first, which // simplifies the coding for the step->decide() methods. try { list($table[$glyph], $conflict) = $table[$glyph]->decide($step); $this->conflicts += $conflict; } catch (RRC $e) { emit('State ' . $this->id . ':'); $e->make_noise(); } } else { // This glyph is yet unprocessed, so the step at hand is // our best current guess at what the grammar indicates. $table[$glyph] = $step; } } // Now that we have the correct steps chosen, this routine is oddly // also responsible for turning that table into the form that will // eventually be passed to the parse engine. (So FIXME?) $out = array(); foreach ($table as $glyph => $step) { if ($step->sane()) { $out[$glyph] = $step->instruction(); } } return $out; } function segment_config() { // Filter $this->close into categories based on the symbol_after_the_dot. $f = array(); foreach ($this->close as $c) { if (($p = $c->symbol_after_the_dot)) { $f[$p->name][] = $c; } } return $f; } } class sym { public function __construct($name, $id) { $this->name = $name; $this->id = $id; $this->term = true; // Until proven otherwise. $this->rule = array(); $this->config = array(); $this->lambda = false; $this->first = new set(); $this->left_prec = $this->right_prec = 0; } public function summary() { $out = ''; foreach ($this->rule as $rule) { $out .= $rule->text() . PHP_EOL; } return $out; } } class rule { public function __construct($id, $sym, $rhs, $code, $look, $replace, $prec_sym) { bug_unless(is_int($look)); $this->id = $id; $this->sym = $sym; $this->rhs = $rhs; $this->code = $code; $this->look = $look; $this->replace = $replace; $this->prec_sym = $prec_sym; $this->left_prec = 0; $this->right_prec = 0; $this->first = array(); $this->epsilon = count($rhs); } public function lhs_glyph() { return $this->sym->name; } public function determine_precedence() { // We may eventually expand to allow explicit prec_symbol declarations. // Until then, we'll go with the rightmost terminal, which is what // BISON does. People probably expect that. The leftmost terminal // is a reasonable alternative behaviour, but I don't see the big // deal just now. $prec_sym = $this->prec_sym; if (!$prec_sym) { $prec_sym = $this->rightmost_terminal(); } if (!$prec_sym) { return; } $this->left_prec = $prec_sym->left_prec; $this->right_prec = $prec_sym->right_prec; } private function rightmost_terminal() { $symbol = null; $rhs = $this->rhs; while ($rhs) { $symbol = array_pop($rhs); if ($symbol->term) { break; } } return $symbol; } public function text() { $t = '(' . $this->id . ') ' . $this->lhs_glyph() . ' :='; foreach($this->rhs as $s) { $t .= ' ' . $s->name; } if (!$this->rhs) { $t .= ' ε'; } return $t; } public function table(lime_language $lang) { return array( 'symbol' => $this->lhs_glyph(), 'len' => $this->look, 'replace' => $this->replace, 'code' => $lang->fixup($this->code), 'text' => $this->text(), ); } public function lambda() { foreach ($this->rhs as $sym) { if (!$sym->lambda) { return false; } } return true; } public function find_first() { $dot = count($this->rhs); $last = $this->first[$dot] = new set(); while ($dot--) { $symbol_after_the_dot = $this->rhs[$dot]; $first = $symbol_after_the_dot->first->all(); bug_if(empty($first) and !$symbol_after_the_dot->lambda and $symbol_after_the_dot->name != 'error'); $set = new set($first); if ($symbol_after_the_dot->lambda) { $set->union($last); if ($this->epsilon == $dot + 1) { $this->epsilon = $dot; } } $last = $this->first[$dot] = $set; } } public function teach_symbol_of_first_set() { $go = false; foreach ($this->rhs as $sym) { if ($this->sym->first->union($sym->first)) { $go = true; } if (!$sym->lambda) { break; } } return $go; } public function lambda_from($dot) { return $this->epsilon <= $dot; } public function leftmost($follow) { return new config($this, 0, $follow); } public function dotted_text($dot) { $out = $this->lhs_glyph() . ' :='; $idx = -1; foreach($this->rhs as $idx => $s) { if ($idx == $dot) { $out .= ' .'; } $out .= ' ' . $s->name; } if (!$this->rhs) { $out .= ' ε'; } if ($dot > $idx) { $out .= ' .'; } return $out; } } class config { public function __construct($rule, $dot, $follow) { $this->rule = $rule; $this->dot = $dot; $this->key = $rule->id . '.' . $dot; $this->rightmost = count($rule->rhs) <= $dot; $this->symbol_after_the_dot = $this->rightmost ? null : $rule->rhs[$dot]; $this->_blink = array(); $this->follow = new set($follow); $this->_flink = array(); bug_unless($this->rightmost or count($rule)); } public function text() { return $this->rule->dotted_text($this->dot) . ' [ ' . implode(' ', $this->follow->all()) . ' ]'; } public function blink($config) { $this->_blink[] = $config; } public function next() { bug_if($this->rightmost); $c = new config($this->rule, $this->dot+1, array()); // Anything in the follow set for this config will also be in the next. // However, we link it backwards because we might wind up selecting a // pre-existing state, and the housekeeping is easier in the first half // of the program. We'll fix it before doing the propagation. $c->blink($this); return $c; } public function copy_links_from($that) { foreach($that->_blink as $c) { $this->blink($c); } } public function lambda() { return $this->rule->lambda_from($this->dot); } public function simple_follow() { return $this->rule->first[$this->dot + 1]->all(); } public function epsilon_follows() { return $this->rule->lambda_from($this->dot + 1); } public function fixlinks() { foreach ($this->_blink as $that) { $that->_flink[] = $this; } $this->blink = array(); } public function dump() { echo ' * '; echo $this->key . ' : '; echo $this->rule->dotted_text($this->dot); echo $this->follow->text(); foreach ($this->_flink as $c) { echo $c->key . ' / '; } echo PHP_EOL; } } class lime { public $parser_class = 'parser'; public $descr = array(); public function __construct() { $this->p_next = 1; $this->sym = array(); $this->rule = array(); $this->start_symbol_set = array(); $this->state = array(); $this->stop = $this->sym('#'); $this->expect_conflicts = null; $this->conflicts = 0; if ($err = $this->sym('error')) { $err->term = false; } $this->lang = new lime_language_php(); } function language() { return $this->lang; } function build_parser() { $this->add_start_rule(); foreach ($this->rule as $r) { $r->determine_precedence(); } $this->find_sym_lamdba(); $this->find_sym_first(); foreach ($this->rule as $rule) { $rule->find_first(); } $initial = $this->find_states(); $this->fixlinks(); // $this->dump_configurations(); $this->find_follow_sets(); foreach($this->state as $s) { $s->find_reductions($this); } $i = $this->resolve_conflicts(); $a = $this->rule_table(); $qi = $initial->id; $d = $this->descr; if ($this->expect_conflicts !== null && $this->expect_conflicts != $this->conflicts) { throw new Bug($this->expect_conflicts .' conflicts expected, got ' . $this->conflicts); } $this->compress($i); return $this->lang->ptab_to_class($this->parser_class, compact('a', 'qi', 'i', 'd')); } function compress(&$i) { foreach ($i as &$inst) { $act = reset($inst); if (substr($act, 0, 2) == 'r ') { foreach ($inst as $k => $a) { if ($a !== $act) { continue(2); } } $inst = array('' => $act); } } } function rule_table() { $s = array(); foreach ($this->rule as $i => $r) { $s[$i] = $r->table($this->lang); } return $s; } function add_rule($symbol, $rhs, $code) { $this->add_raw_rule($symbol, $rhs, $code, count($rhs), true); } function trump_up_bogus_lhs($real) { return "'{$real}'" . count($this->rule); } function add_raw_rule($lhs, $rhs, $code, $look, $replace, $prec_sym) { $sym = $this->sym($lhs); $sym->term = false; if (!$rhs) { $sym->lambda = true; } $rs = array(); foreach ($rhs as $str) { $rs[] = $this->sym($str); } $rid = count($this->rule); $r = new rule($rid, $sym, $rs, $code, $look, $replace, $prec_sym); $this->rule[$rid] = $r; $sym->rule[] = $r; } function sym($str, $description = null) { if (!isset($this->sym[$str])) { $this->sym[$str] = new sym($str, count($this->sym)); } if ($description) { $this->descr[$str] = $description; } return $this->sym[$str]; } function summary() { $out = ''; foreach ($this->sym as $sym) { if (!$sym->term) { $out .= $sym->summary(); } } return $out; } private function find_sym_lamdba() { do { $go = false; foreach ($this->sym as $sym) { if (!$sym->lambda) { foreach ($sym->rule as $rule) { if ($rule->lambda()) { $go = true; $sym->lambda = true; } } } } } while ($go); } private function teach_terminals_first_set() { foreach ($this->sym as $sym) { if ($sym->term) { $sym->first->add($sym->name); } } } private function find_sym_first() { $this->teach_terminals_first_set(); do { $go = false; foreach ($this->rule as $r) { if ($r->teach_symbol_of_first_set()) { $go = true; } } } while ($go); } function add_start_rule() { $rewrite = new lime_rewrite("'start'"); $rhs = new lime_rhs(); $rhs->add(new lime_glyph($this->deduce_start_symbol()->name, null)); $rewrite->add_rhs($rhs); $rewrite->update($this); } protected function deduce_start_symbol() { $candidate = current($this->start_symbol_set); // Did the person try to set a start symbol at all? if (!is_object($candidate)) { return $this->first_rule_lhs(); } // Do we actually have such a symbol on the left of a rule? if ($candidate->terminal) { return $this->first_rule_lhs(); } // Ok, it's a decent choice. We need to return the symbol entry. return $this->sym($candidate); } private function first_rule_lhs() { reset($this->rule); $r = current($this->rule); return $r->sym; } /** * Build an initial state. This is a recursive process which digs out * the LR(0) state graph. */ function find_states() { $start_glyph = "'start'"; $sym = $this->sym($start_glyph); $basis = array(); foreach($sym->rule as $rule) { $c = $rule->leftmost(array('#')); $basis[$c->key] = $c; } $initial = $this->get_state($basis); $initial->add_accept($sym); return $initial; } function get_state($basis) { $key = array_keys($basis); sort($key); $key = implode(' ', $key); if (isset($this->state[$key])) { // Copy all the links around... $state = $this->state[$key]; foreach($basis as $config) { $state->close[$config->key]->copy_links_from($config); } return $state; } else { $close = $this->state_closure($basis); $this->state[$key] = $state = new state(count($this->state), $key, $close); $this->build_shifts($state); return $state; } } private function state_closure($q) { // $q is a list of config. $close = array(); while ($config = array_pop($q)) { if (isset($close[$config->key])) { $close[$config->key]->copy_links_from($config); $close[$config->key]->follow->union($config->follow); continue; } $close[$config->key] = $config; $symbol_after_the_dot = $config->symbol_after_the_dot; if (!$symbol_after_the_dot) { continue; } if (!$symbol_after_the_dot->term) { foreach ($symbol_after_the_dot->rule as $r) { $station = $r->leftmost($config->simple_follow()); if ($config->epsilon_follows()) { $station->blink($config); } $q[] = $station; } } } return $close; } function build_shifts($state) { foreach ($state->segment_config() as $glyph => $segment) { $basis = array(); foreach ($segment as $preshift) { $postshift = $preshift->next(); $basis[$postshift->key] = $postshift; } $dest = $this->get_state($basis); $state->add_shift($this->sym($glyph), $dest, $segment[0]->rule); } } function fixlinks() { foreach ($this->state as $s) { foreach ($s->close as $c) { $c->fixlinks(); } } } function find_follow_sets() { $q = array(); foreach ($this->state as $s) { foreach ($s->close as $c) { $q[] = $c; } } while ($q) { $c = array_shift($q); foreach ($c->_flink as $d) { if ($d->follow->union($c->follow)) { $q[] = $d; } } } } private function set_assoc($ss, $l, $r) { $p = ($this->p_next++) * 2; foreach ($ss as $glyph) { $s = $this->sym($glyph); $s->left_prec = $p + $l; $s->right_prec = $p + $r; } } function left_assoc($ss) { $this->set_assoc($ss, 1, 0); } function right_assoc($ss) { $this->set_assoc($ss, 0, 1); } function non_assoc($ss) { $this->set_assoc($ss, 0, 0); } private function resolve_conflicts() { // For each state, try to find one and only one // thing to do for any given lookahead. $i = array(); foreach ($this->state as $s) { $i[$s->id] = $s->resolve_conflicts(); $this->conflicts += $s->conflicts; } return $i; } function dump_configurations() { foreach ($this->state as $q) { $q->dump(); } } function dump_first_sets() { foreach ($this->sym as $s) { echo ' * '; echo $s->name . ' : '; echo $s->first->text(); echo PHP_EOL; } } function add_rule_with_actions($lhs, $rhs) { // First, make sure this thing is well-formed. if(!is_object(end($rhs))) { $rhs[] = new cf_action(''); } // Now, split it into chunks based on the actions. $look = -1; $subrule = array(); $subsymbol = ''; while ($rhs) { $it = array_shift($rhs); ++$look; if (is_string($it)) { $subrule[] = $it; } else { $code = $it->code; // It's an action. // Is it the last one? if ($rhs) { // no. $subsymbol = $this->trump_up_bogus_lhs($lhs); $this->add_raw_rule($subsymbol, $subrule, $code, $look, false); $subrule = array($subsymbol); } else { // yes. $this->add_raw_rule($lhs, $subrule, $code, $look, true); } } } } function pragma($type, $args) { switch ($type) { case 'left': $this->left_assoc($args); break; case 'right': $this->right_assoc($args); break; case 'nonassoc': $this->non_assoc($args); break; case 'start': $this->start_symbol_set = $args; break; case 'class': $this->parser_class = $args[0]; break; case 'expect': if (!ctype_digit($args[0])) { emit(sprintf('Bad expect pragma: %s', $args[0])); exit(1); } $this->expect_conflicts = $args[0]; break; case 'token': $this->sym($args[0], @$args[1]); break; default: emit(sprintf('Bad Parser Pragma: (%s)', $type)); exit(1); } } } class lime_language { } class lime_language_php extends lime_language { protected function result_code($expr) { return '$result = ' . $expr . ';' . PHP_EOL; } public function default_result() { return $this->result_code('reset($tokens)'); } public function result_pos($pos) { return $this->result_code(lime_token_reference($pos)); } public function bind($name, $pos) { return '$' . $name . ' = &$tokens[' . $pos . '];' . PHP_EOL; } public function fixup($code) { return preg_replace_callback('~\$(\d+|\$)~', function ($foo) { if ($foo[1] === '$') { // always return '$result'; } return lime_token_reference($foo[1] - 1); }, $code); } function to_php($code) { return $code; } public function ptab_to_class($parser_class, $ptab) { $code = ''; $code .= 'public $qi = ' . lime_export($ptab['qi'], true) . ';' . PHP_EOL; $code .= 'public $i = ' . lime_export($ptab['i'], true) . ';' . PHP_EOL; $code .= 'public $d = ' . lime_export($ptab['d'], true) . ';' . PHP_EOL; $code .= 'public $errors = array();' . PHP_EOL; $rc = array(); $method = array(); $rules = array(); foreach($ptab['a'] as $k => $a) { $symbol = preg_replace('/[^\w]/', '', $a['symbol']); $rn = @++$rc[$symbol]; $mn = 'reduce_' . $k . '_' . $symbol . '_' . $rn; $method[$k] = $mn; $comment = '// ' . $a['text'] . PHP_EOL; $php = $this->to_php($a['code']); $code .= 'function ' . $mn . '(' . LIME_CALL_PROTOCOL . ') {' . PHP_EOL . rtrim(preg_replace('~^~m', INDENT, $comment . $php)) . PHP_EOL . '}' . PHP_EOL . PHP_EOL; unset($a['code']); unset($a['text']); $rules[$k] = $a; } $code .= 'public $method = ' . lime_export($method, true) . ';' . PHP_EOL; $code .= 'public $a = '.lime_export($rules, true) . ';' . PHP_EOL; return 'class ' . $parser_class . ' extends lime_parser {' . PHP_EOL . preg_replace(array('~^~m', '~^\h+$~m'), array(INDENT, ''), $code) . '}' . PHP_EOL; } } class lime_rhs { public $prec_glyph; function __construct() { // Construct and add glyphs and actions in whatever order. // Then, add this to a lime_rewrite. // // Don't call install_rule. // The rewrite will do that for you when you "update" with it. $this->rhs = array(); } public function add(lime_slot $slot) { $this->rhs[] = $slot; } public function set_prec_glyph($glyph) { $this->prec_glyph = $glyph; } function install_rule(lime $lime, $lhs) { // This is the part that has to break the rule into subrules if necessary. $rhs = $this->rhs; // First, make sure this thing is well-formed. if (!(end($rhs) instanceof lime_action)) { $rhs[] = new lime_action('', null); } $prec_sym = null; if ($this->prec_glyph) { $prec_sym = $lime->sym($this->prec_glyph); } // Now, split it into chunks based on the actions. $lang = $lime->language(); $result_code = $lang->default_result(); $look = -1; $subrule = array(); $subsymbol = ''; $preamble = ''; while ($rhs) { $it = array_shift($rhs); ++$look; if ($it instanceof lime_glyph) { $subrule[] = $it->data; } elseif ($it instanceof lime_action) { $code = unindent($it->data); // It's an action. // Is it the last one? if ($rhs) { // no. $subsymbol = $lime->trump_up_bogus_lhs($lhs); $action = $lang->default_result() . $preamble . $code; $lime->add_raw_rule($subsymbol, $subrule, $action, $look, false, $prec_sym); $subrule = array($subsymbol); } else { // yes. $action = $result_code . $preamble . $code; $lime->add_raw_rule($lhs, $subrule, $action, $look, true, $prec_sym); } } else { impossible(); } if ($it->name == '$') { $result_code = $lang->result_pos($look); } elseif ($it->name) { $preamble .= $lang->bind($it->name, $look); } } } } class lime_rewrite { function __construct($glyph) { // Construct one of these with the name of the lhs. // Add some rhs-es to it. // Finally, "update" the lime you're building. $this->glyph = $glyph; $this->rhs = array(); } function add_rhs(lime_rhs $rhs) { $this->rhs[] = $rhs; } function update(lime $lime) { foreach ($this->rhs as $rhs) { $rhs->install_rule($lime, $this->glyph); } } } /** * This keeps track of one position in an rhs. * We specialize to handle actions and glyphs. * * If there is a name for the slot, we store it here. * Later on, this structure will be consulted in the formation of * actual production rules. */ class lime_slot { public function __construct($data, $name) { $this->data = $data; $this->name = $name; } public function preamble($pos) { if (strlen($this->name) > 0) { return '$' . $this->name . ' = &$tokens[' . $pos . '];' . PHP_EOL; } } } class lime_glyph extends lime_slot { } class lime_action extends lime_slot { } /** * This function isn't too terribly interesting to the casual observer. * You're probably better off looking at parse_lime_grammar() instead. * * Ok, if you insist, I'll explain. * * The input to Lime is a CFG parser definition. That definition is * written in some language. (The Lime language, to be exact.) * Anyway, I have to parse the Lime language and compile it into a * very complex data structure from which a parser is eventually * built. What better way than to use Lime itself to parse its own * language? Well, it's almost that simple, but not quite. * The Lime language is fairly potent, but a restricted subset of * its features was used to write a metagrammar. Then, I hand-translated * that metagrammar into another form which is easy to snarf up. * In the process of reading that simplified form, this function * builds the same sort of data structure that later gets turned into * a parser. The last step is to run the parser generation algorithm, * eval() the resulting PHP code, and voila! With no hard work, I can * suddenly read and comprehend the full range of the Lime language * without ever having written an algorithm to do so. It feels like magic. */ function lime_bootstrap() { $bootstrap = LIME_DIR . '/lime.bootstrap'; $lime = new lime(); $lime->parser_class = 'lime_metaparser'; $rhs = array(); bug_unless(is_readable($bootstrap)); foreach(file($bootstrap) as $l) { $a = preg_split('~(?<=\s|^):(?=\s|$)~', $l, 2); if (count($a) == 2) { list($pattern, $code) = $a; $sl = new lime_rhs(); $pattern = trim($pattern); if (strlen($pattern) > 0) { foreach (explode(' ', $pattern) as $glyph) { $sl->add(new lime_glyph($glyph, null)); } } $sl->add(new lime_action($code, NULL)); $rhs[] = $sl; } else { if (preg_match('~^to (\w+)$~', $l, $r)) { $g = $r[1]; $rw = new lime_rewrite($g); foreach($rhs as $b) { $rw->add_rhs($b); } $rw->update($lime); $rhs = array(); } } } $parser_code = $lime->build_parser(); eval($parser_code); } /** * The voodoo is in the way I do lexical processing on grammar definition * files. They contain embedded bits of PHP, and it's important to keep * track of things like strings, comments, and matched braces. It seemed * like an ideal problem to solve with GNU flex, so I wrote a little * scanner in flex and C to dig out the tokens for me. Of course, I need * the tokens in PHP, so I designed a simple binary wrapper for them which * also contains line-number information, guaranteed to help out if you * write a grammar which surprises the parser in any manner. */ class voodoo_scanner extends flex_scanner { function executable() { return LIME_DIR.'/lime_scan_tokens'; } } /** * This is a good function to read because it teaches you how to interface * with a Lime parser. I've tried to isolate out the bits that aren't * instructive in that regard. */ function parse_lime_grammar($path) { if (!class_exists('lime_metaparser', false)) { lime_bootstrap(); } $parse_engine = new parse_engine(new lime_metaparser()); $scanner = new voodoo_scanner($path); try { // The result of parsing a Lime grammar is a Lime object. $lime = $scanner->feed($parse_engine); // Calling its build_parser() method gets the output PHP code. return $lime->build_parser(); } catch (parse_error $e) { die ($e->getMessage() . " in {$path} line {$scanner->lineno}." . PHP_EOL); } } if ($_SERVER['argv']) { $code = ''; if (!defined('INDENT')) { foreach($_SERVER['argv'] as $i => $opt) { if ($opt === '--indent') { $val = @$_SERVER['argv'][$i + 1]; if (!$val) { define('INDENT', ' '); } else { define('INDENT', $val == 1 ? "\t" : str_repeat(' ', $val)); } unset($_SERVER['argv'][$i], $_SERVER['argv'][$i + 1]); break; } } if (!defined('INDENT')) { define('INDENT', ' '); } } array_shift($_SERVER['argv']); // Strip out the program name. $timer = microtime(true); foreach ($_SERVER['argv'] as $path) { $code .= parse_lime_grammar($path); } echo <<