From 175b67c614bd4b744c62e8d95de3880a6d9cf033 Mon Sep 17 00:00:00 2001 From: Richard van Velzen Date: Wed, 28 Dec 2011 22:50:59 +0100 Subject: [PATCH] Cleaning up the code --- flex_token_stream.php | 33 ++-- lime.php | 121 +++++++-------- lime_scan_tokens | Bin 22050 -> 22209 bytes lime_scan_tokens.l | 78 +++++----- parse_engine.php | 343 ++++++++++++++++++++++++++---------------- set.so.php | 57 +++++-- 6 files changed, 381 insertions(+), 251 deletions(-) diff --git a/flex_token_stream.php b/flex_token_stream.php index c21e411..f65681d 100644 --- a/flex_token_stream.php +++ b/flex_token_stream.php @@ -1,34 +1,41 @@ executable(); $tokens = explode("\0", `$scanner < "\$PHP_LIME_SCAN_STDIN"`); + array_pop($tokens); $this->tokens = $tokens; $this->lineno = 1; } - function next() { + + public function next() { if (list($key, $token) = each($this->tokens)) { list($this->lineno, $type, $text) = explode("\1", $token); + return array($type, $text); } } - function feed($parser) { + + public function feed($parser) { while (list($type, $text) = $this->next()) { $parser->eat($type, $text); } + return $parser->eat_eof(); } } diff --git a/lime.php b/lime.php index b7d4e64..7f9eb6f 100755 --- a/lime.php +++ b/lime.php @@ -17,6 +17,7 @@ */ define('LIME_DIR', __DIR__); +define('INDENT', ' '); function emit($str) { fputs(STDERR, $str . PHP_EOL); @@ -66,11 +67,11 @@ function lime_export($var) { $out[] = (!$i ? lime_export($k).' => ' : '') . lime_export($v); } - $result = 'array(' . PHP_EOL . preg_replace('~^~m', "\t", implode(',' . PHP_EOL, $out)) . PHP_EOL . ')'; + $result = 'array(' . PHP_EOL . preg_replace('~^~m', INDENT, implode(',' . PHP_EOL, $out)) . PHP_EOL . ')'; } elseif (is_int($var) || is_float($var)) { $result = (string)$var; } elseif (is_string($var)) { - $opt1 = "'" . str_replace(array('\\', "'"), array('\\\\', "\'"), $var) . "'"; + $opt1 = '\'' . str_replace(array('\\', '\''), array('\\\\', '\\\''), $var) . '\''; $opt2 = $opt1; if (strpos($var, '$') === false) { @@ -254,12 +255,16 @@ class RRC extends Exception { } class state { + public $id; + public $key; + public $close; + public $action = array(); + public function __construct($id, $key, $close) { $this->id = $id; $this->key = $key; $this->close = $close; // config key -> object ksort($this->close); - $this->action = array(); } public function dump() { @@ -1049,7 +1054,7 @@ class lime_language_php extends lime_language { $php = $this->to_php($a['code']); $code .= 'function ' . $mn . '(' . LIME_CALL_PROTOCOL . ') {' . PHP_EOL . - preg_replace('~^~m', "\t", $comment . $php) . PHP_EOL . + rtrim(preg_replace('~^~m', INDENT, $comment . $php)) . PHP_EOL . '}' . PHP_EOL . PHP_EOL; @@ -1063,7 +1068,7 @@ class lime_language_php extends lime_language { $code .= 'public $a = '.lime_export($rules, true) . ';' . PHP_EOL; return 'class ' . $parser_class . ' extends lime_parser {' . PHP_EOL . - preg_replace(array('~^~m', '~^\h+$~m'), array("\t", ''), $code) . + preg_replace(array('~^~m', '~^\h+$~m'), array(INDENT, ''), $code) . '}' . PHP_EOL; } } @@ -1153,12 +1158,15 @@ class lime_rewrite { } } +/** + * This keeps track of one position in an rhs. + * We specialize to handle actions and glyphs. + * + * If there is a name for the slot, we store it here. + * Later on, this structure will be consulted in the formation of + * actual production rules. + */ class lime_slot { - // This keeps track of one position in an rhs. - // We specialize to handle actions and glyphs. - // If there is a name for the slot, we store it here. - // Later on, this structure will be consulted in the formation of - // actual production rules. public function __construct($data, $name) { $this->data = $data; $this->name = $name; @@ -1175,34 +1183,32 @@ class lime_glyph extends lime_slot { } class lime_action extends lime_slot { } + + +/** + * This function isn't too terribly interesting to the casual observer. + * You're probably better off looking at parse_lime_grammar() instead. + * + * Ok, if you insist, I'll explain. + * + * The input to Lime is a CFG parser definition. That definition is + * written in some language. (The Lime language, to be exact.) + * Anyway, I have to parse the Lime language and compile it into a + * very complex data structure from which a parser is eventually + * built. What better way than to use Lime itself to parse its own + * language? Well, it's almost that simple, but not quite. + + * The Lime language is fairly potent, but a restricted subset of + * its features was used to write a metagrammar. Then, I hand-translated + * that metagrammar into another form which is easy to snarf up. + * In the process of reading that simplified form, this function + * builds the same sort of data structure that later gets turned into + * a parser. The last step is to run the parser generation algorithm, + * eval() the resulting PHP code, and voila! With no hard work, I can + * suddenly read and comprehend the full range of the Lime language + * without ever having written an algorithm to do so. It feels like magic. + */ function lime_bootstrap() { - - /* - - This function isn't too terribly interesting to the casual observer. - You're probably better off looking at parse_lime_grammar() instead. - - Ok, if you insist, I'll explain. - - The input to Lime is a CFG parser definition. That definition is - written in some language. (The Lime language, to be exact.) - Anyway, I have to parse the Lime language and compile it into a - very complex data structure from which a parser is eventually - built. What better way than to use Lime itself to parse its own - language? Well, it's almost that simple, but not quite. - - The Lime language is fairly potent, but a restricted subset of - its features was used to write a metagrammar. Then, I hand-translated - that metagrammar into another form which is easy to snarf up. - In the process of reading that simplified form, this function - builds the same sort of data structure that later gets turned into - a parser. The last step is to run the parser generation algorithm, - eval() the resulting PHP code, and voila! With no hard work, I can - suddenly read and comprehend the full range of the Lime language - without ever having written an algorithm to do so. It feels like magic. - - */ - $bootstrap = LIME_DIR . '/lime.bootstrap'; $lime = new lime(); $lime->parser_class = 'lime_metaparser'; @@ -1245,31 +1251,29 @@ function lime_bootstrap() { eval($parser_code); } +/** + * The voodoo is in the way I do lexical processing on grammar definition + * files. They contain embedded bits of PHP, and it's important to keep + * track of things like strings, comments, and matched braces. It seemed + * like an ideal problem to solve with GNU flex, so I wrote a little + * scanner in flex and C to dig out the tokens for me. Of course, I need + * the tokens in PHP, so I designed a simple binary wrapper for them which + * also contains line-number information, guaranteed to help out if you + * write a grammar which surprises the parser in any manner. + */ class voodoo_scanner extends flex_scanner { - /* - - The voodoo is in the way I do lexical processing on grammar definition - files. They contain embedded bits of PHP, and it's important to keep - track of things like strings, comments, and matched braces. It seemed - like an ideal problem to solve with GNU flex, so I wrote a little - scanner in flex and C to dig out the tokens for me. Of course, I need - the tokens in PHP, so I designed a simple binary wrapper for them which - also contains line-number information, guaranteed to help out if you - write a grammar which surprises the parser in any manner. - - */ function executable() { return LIME_DIR.'/lime_scan_tokens'; } } +/** + * This is a good function to read because it teaches you how to interface + * with a Lime parser. I've tried to isolate out the bits that aren't + * instructive in that regard. + */ function parse_lime_grammar($path) { - /* - - This is a good function to read because it teaches you how to interface - with a Lime parser. I've tried to isolate out the bits that aren't - instructive in that regard. - - */ - if (!class_exists('lime_metaparser')) lime_bootstrap(); + if (!class_exists('lime_metaparser', false)) { + lime_bootstrap(); + } $parse_engine = new parse_engine(new lime_metaparser()); $scanner = new voodoo_scanner($path); @@ -1284,10 +1288,9 @@ function parse_lime_grammar($path) { } } - if ($_SERVER['argv']) { $code = ''; - array_shift($_SERVER['argv']); # Strip out the program name. + array_shift($_SERVER['argv']); // Strip out the program name. foreach ($_SERVER['argv'] as $path) { $code .= parse_lime_grammar($path); } diff --git a/lime_scan_tokens b/lime_scan_tokens index 37cade9cd586bd0a5f465be0eb6eb81fe5cbc8d9..70c7e0c9b2f03af77b11da064f371d9cd451d5c2 100755 GIT binary patch literal 22209 zcmeHPe|(h1nV(HIxI#z*f`(tZ)lh>%3dbHYBEfMN*SFVRWoAOcH1`-MNu*Q>C?9mfXZ8s+MqUNlv*1hj{=AGSlH!(*) zpS!=#_s#4x-+7*ydFGjC=AHL_C-42v@&y))Md%|-WDBCs2duW~pgYPGXS%S9OGUA` zQj8NrkVM|}9gsn=BLhPY(yR#C2rGeC?zGw%E(2yD56Lospm3{=Vbm-k7?@uaDE_Eh zkoFRI2=kGt9f9?dp9NA3eld6qr-2#BXB`Y)$a_&Y!+c-{&8v7jRP87g!g0)MW7rK@ z2F<@6e2DiubotA{H|y5}Ne1ReG2#ZlZ{rRA>M4F-U3hCrL!e}u;z4!EzjbLPVyYc1 zGarF@M$9#V9*w|b(u95E zXT2XAfAqFDTQ^m@SzX%kJrf#<_%AYWM+Pp*z(+Ii!x{MT42&*H9;|Oy5(z=fcId;7 z$RrIw$^TIXKAeG9X5e)hcz*`omVx~lcu58h1Mldx+SoTE#266;-C)9Ffe$~S`T62j z;A#_JxLliR19h&3kUJQ1xdhyzitaE|xE3u#YPGM<6>jiU3(rm5_qM*WeC?8bz(Aw!uSTPcT>)aDnd*`2uyK%I|Rpkw)o2I3#N7!=Wls zn=FB>8lT@&7Z5e~1brcos6qMFkPG_iLN!vOO&(-%xu~Y0bFJG~Cu)#4fZRckN8DPz zXzo1Mw36w`7{(DjnP;{^8P{owllrxQ$3W-Q4m!%`8&1!E#YiiVI7|Enm`5+h_>iH> z_Bn8N+BoM5)i*gD0Kk`U{F0(-8z}}$E-5;%fD{8}6e$K=A?YPTj3>1TVJE#*h$2$d zQ%pKch{>e6LX?t1!|9}XLd+t?pq)dCMmR`EV2&Y$<1Qq{gtsEnV~eu^>vpz%ZLtus z$R~+JVoz%*Cr;OnUx3c>XU7W>X}7Iwqo5Fp86TmwiHV=!SK^6motM$B3&@?A$X0sE z>*|!4t@YBBU9A$c)!stlV-mCVUOVwYiD?CIG4XzhX$@~F@eYY;6)$FE(N!-ot>bkN zZ;+T)@-8Hn#meR0QjAS+YqQmM`zr5S2d%ca z3+46gPdh53Z=;*6J8s^MVnq0s=DncL+F|{j-Q)3l;^VAHw`KOZhAW#|P=|=LS`JYp zF=`LlQhfHgVEj}L3$R4mt?TY=YYKf)G(C4M}xs1d{KLU8-Ke8;HG3XGU63>Q{rn8#H7c{f5j`>Aa;SeHfEs8ILt4Firufy|W zGme6p-;#3}WbcsZA7T@zQGUyi{Fc_3byzXSCwC z+fLSwBgpczbCHkB?;DZd;(V1E|4wG?w(OUAf%1RWNuH$>3Bvw;tL>55=fWoj=o`hihUoV%b^tt#q zn7W^-&mpzz0mL#D{2~g8PsE(uPnjRtZweQ%qREl7fuQVa$tI{2f*?h45!& zT*KIp7#qXb_ZiD)teLSK#vW$uW6q>ajJ=0gY`AoOT0DAE+96tAlnYhm(21@rveK>J z|5GB-brRxoOh(%0th>u~XIu1rl|i~eByNd1Q{9t3R5*%aj>#BEO9o;T96Eyf2jGn6 z_=*Z-By^*4749D(#tILVYkwRU+L^S>kYL9$m3FSpT$_m|uu{*8nZ;;fHa~#uyXml3;!@jnH_F=IRgGUgiibiGxXV*r-J8XHaHMowCi_HD>T9n*h+ zI-bF0eV|Om==STSS+$2Lhxy179(tU;*tHJ>={OuzbnQZ{zi88%M2EATR_)HuZ@Fae zkjN=Zq{9+%wp(W3*}ko3pjs!RW5U<<=XB(-1uRRoC0bF?bpmzwmyA{9M$f4^PCECQ zu0G_Fa_H|-{WEmu$l(!no->*dbDoPitg48p^Sp$kkaxX||z^0SMs zC{U>tRyhGH_vD2y?RwX*;gRiUME>7oVUpd6m|ckbF6nT}NPi>59EG?l?|vHfcQ`w* zljJOZMfb!3ge$DkbJ@zMPboPWB$3oYXhqdIkS*F7!%Re%`Axiyrh zbDYJcy?blCJ}H(gyNSYb#zGrn!=+hg$YsQDvn=n$E{!d+Mt>1~&GH_4Ef<#>xp1KW zM{|qf2cejn1?isXv(d?Nol@ugJ$11E049_p5?(B)J!G7NYxm_H&eL*!l=CNy_-8mo z_avlpl=UwJ(*jK4fptJJ!*7qXn_Re~Fm(E?ONE~IJ5LSNKvtWax;|6nkXo9Y zjz*scDIHw6PZ?W82hp-JM>EFBGz-!KjpWaG!}Hxj`a0h z^pE*1C(;*mt$M|V<(`S1TG7c_XZET5UF+Z}(Q~>9t5&Eh%Cl53=Cq+0p;w$Y0v|H1 zIj=GJTy{PZb=o4^ZRPi&rEOZxHmIq~&@0b2lb$y@ZQ*A>Lx7sC z;#_yvoo)KcG(I+g7ht{I%O220BCT1o&*eYRt39@y|vHQxgDTUKouDl=iQb+4DPv}=3R*(($EIw~IfLn6WDQX!X1 zLvRVa9G3$Gd2U?G*x8t5+1hmjOjE;;B*AXaJ9Gu=eYuW`<;k@iuz>xDXe z;PC@4cWCTEi@})6-^~YVTJ(-|71cdzXue?x-KqUMaWsciv|_-OHZME8ilpblb=elp zies8KmbRQjFa=11X2{Al{%0=b9X7SvpbFAG2gohd&AA%*LiVR&s$%$lDXNZ-$FnCm z0=ikqWsU8hFd!UwKozUR@~IDNCj8Lzk;Z? zE5>KM;9agf&{BiHS1M#fE29NS>TncO>5I}RG)iW$8?Eec6d|XGY|j;;D=}BF_8-wkb=)h#HhR>sGxB%y1}t8O0(B|>*MkCGca5woZLN&d;muqgajGND z5HSYOFK{=e?hR>zR7V#Y+UM^+s11@)eE#kSG-puPWA`nCp)%_4RNg05OzDa%MN(I! z7H4MSGq2Fdc;RPgt>iIb`Z}Dws!w}QT#y598z#_0oI{L;$w4}ugY-LmjN|NW8np-Q zQN4!AQ9V6F0`(u(4Igv%9(ok{J94NYMY|rRJky#g60#w)Pv$?k3yB@hK1#$!lZoNV zRcLSjI^3eYg*PN zcWg;t^wsx&5Iy~ePTmg7xl&GDLr`9%y$BD4_gm@}S@j;oAkSHg57_ZCE)aV&4*H#u{+y@}$YC$lTjuP7GA?8VFi z`|izR@luOk))K-4ZL=kR!dk9HEz%Cg6rWrO6valj!)KaL<7f8CZEhKfw2nE%1PSQR zSa6&HhNF)(tn?VV<`C1m$4C}uMB^FRH#AG<;d<>nwP;2oW22d?nUzLQoWQNr%dyc< z14Q1jL|-}4eU;_cFaIezf|CeEV0=p}ffQ*k(kq5SSpdv0r6kzX+Q^D>kD;Pjs7R?L zTW37JPL-st+Ehd}AC((2M&A`aGa2PHZ=l2y8=eFv*Do7>hY8GDgf@!!bi~os^0}?d zC|7bS#;1uY#z%@O#v3OYL$l;V-Xz#(=A^P7FefKLN6t#5bGqrs;S7TaVtSV7$l)ZQ z(vBu$$C9z9lQ9}t$+5Flj5bj*s-$AnhcW5>Qe~_Jx}C*w+)X{}+!lgV1?`0Ma}sij`8z|a)i{B|OdO5!sIc{K*7 zER7?DfAN+UeycbBjo+r1SnO%$T!RrYwH0&zwI$c82b>&;&iB+~V3gkElw+|Rq8Koq z8|52Eyuy1ZSvKD9&>c*wTm@mRWRl1E@fbXs{-&9+(d_@@(~%xqlFP@`89w^Sn=tGS zqjZ#jydwTPS+#tRRf#u8aUT+Qwq0L>Ijd)U%wJU6^eo@C#s3Nt_e+LH)P zn);!7j}@C=G%Gr}2s+ykole~o2mYC&cn&%H)9~y1-l}_e+WWE(+G692+EPjC9>m(r zeHxLE5~0hUWvpg@q}!V4jAbJ!+!v8A#rE$vzYo2C3fgf$+7U^;`()cP56rXjeQ{>` z;CPG}hFpBlJPcjI^m*czwJ1DNeITUa73-MbZhKpSM<{?gbEX4&!$R)^Og}mCi zLO|VBMusr{pJbDOq zz&oroTW0F@IsX4+AGku&7xXw+bDB!ejB}h06e!0q@G(MME$$XR;T4<242WzMFJT@k zG=yIg^TZOQ@EC(I7QC0zP9atUPm&S=#O}sUKd(f$0(lVI2#T#hh}B5^lBmM$_}3yL z){1MvTZ0l7A#JH}iZXE>QXUfD6yFxti^s(i;z{uj;s@e~qD4F-ekx9iQ{n|#I)BPx zpCve#8fQ7?;Bw} zjuTVGUx_b^S+Lg(F9G zK+AB>2c3^|A?QM!i$NFTybbg=oWnpb=G1*T5al}Je?GG@Iup)IVe(48R^i75Aqq)$Oyh|_YybSFtg@-MGUtDH7MBf<6BV{Y!K`Qt5eehFpbnfIzveN#4C%{o z-VS;@PL5n5N8<{NGLB*F4wfUi5@W7XtO8zzb2aE{oNGYW;9Lv37AJ2R?!d_%o*bzh zpB$+d)3=lv{ICqpPUBK`AoEJ8udYReuVT!g-! zx@vo%#=bFJgWaR{T3-XVxvG4!MfNGCredqAmJDq11Z(_(dxWo!sXnyVY!>c|ML{^Q z$CVXSH`cjpeN}dBiwy)DDc{)W!ai14oo8#vrK&IHbuF!!`f!7H%0_IqwI}Nvth_4h zlnr@Or41tMscQ%awW!+AE2|$=Uv_$`^ugPvWE(@C29&$eU9AnLTAs$$bsDzuY^`_K zVaxF*yWEdFr78dq;b!fac-hf&@-SDL3==N0L{gX)*R5upg#pI2d(Z!e$abB_kn&L^eE_I&@-Sdpgo}C zJ>vt4`ycv zvxl~oohs`nL!r-?4WoQL!b87KB(62fuQTLxa#G@uD@Hho5Pdz7Xah}_zevev{c}pZ z2<$Tm_6z72v+Tl%^?~fHCn!lRs6!vZ5y-jOF0FI9p~F&3s!*_LgCfYi20#1*;z{|e z51vPP(oXZiKL-E&nwh^OlAX1^P1eaWypS0O-#uxT$@-}*gM8ZjC*b?i_#Ub(bvOe4 z5cvFkY5aSW`A>si4gOcs__axX9Q+@Fe<+Rr-6Y?Ju`>%}sWFWoPS#%x{@1~OF^&K8 zWd4QVe;H%1C5`{XB;O1ES@1_dkE9K=hg&Eh+YkQp7^^R*<$qD;wM+}b(<<6b@*)cw8!{d6$ZPX(9*z6Y5FX=Th!wxbmM ze*u52nZHV!N!~M6fd4%BoYRu+M3imxgLe)*J|9W)7O1@B?*sqjApE1?A032$2K;Xh z!tVip#~}P%%wKnd&*vIR{T8M4n+*QyLHOn1&mV;E2Y>1y_3s0J+#vanf^P$Vms!88 z`=o-j-x=_!|C4F_Cz5^F1AZ^~e8!S&-!jRT{gaD1=}qv{`!>a&4E}Eh$zKkBC;0Md zpZ2e%sr-KM*}sd;^=BbDR49`Mj6I(*Ex5RUzV5qV0Kwv!2{N^yit*yrFo@Ui}OYyp$(ip7nV?J1fSu#euvS zLh@kR9Hf;X4QBoP_}l`YTi|mGd~SiyE%5)^0(c3UpaC>xoJ(F_FLM#r@3GprzVhS$ z%Kq!qMBpg?#~hbsu|9(TQw2Q_%=O{#5cm#_`Se>sEZ;;Q-jMPd{0IW?-T0mg|2HMt zcUx_Y9|q<6!;Zjp4&T$^ZFnGrbm$bIF1S?;%m+sFFFyD`^E%U$XiE#iDTH4k{1)Lu zgdzC<|H~1+h%ggj0m3ST8iXwf46%3 z!GL{w$;^_eQ)Y){;*FCO#_|IX#zO9mprN3mULBKT z%P~MXj+gZS94GpqERVYo=_JkRfNT7c6lv0^e~{j^pbv z+d&78AZWRE$hAX`5cY^Rc8OezOX6EacR?U`+DHSDa@6D32wFdWSD<%*@>u@sCOOX6 zeUMv&G<{HpA&#KSE5|mSa)XM5}MzD69qu$Cbi)By#*HoG&rQiNdrT>ceUSatZ zLD0VyRx>vJSYclMmA@*iRtfY=g}MJotrryLDgke%;Xeu&N=$!Jm~Zz}M>b{#bH7>f zV3{rExTKT?Z>l+WX#Fg>q7(yb{VZ6Wa5sT?P|0WE?dENOyHOv@wcw5R9iV%G*^|Ux z&T)=6S&~G)@Ov^^H+2I-vM?Q{lWU50@erH(+nIOnh#tJ!-;N0dvEp&d=Sq+;*vPDe!R0q-UGE`jB32mTH6^Vuu$KH%!Vwc7Zc zpZEaq^9QUpKF25iHn8Ur{QHf8{~ovv`NtZ#1$f(ct+rhTJ_W4(pL_aV1=j0f3s&hD zGWZ_=bN)wm*#8P~DWaPSojPg0Nvb;Mp7Nf%U=qpUS{(z`B15&_8^S$W7K1V}JY# zk>8>`zVl#tZv)?Es{dW!FJSx}Hu#?a>%)%7FpP^_Q+o^5B~pq|pJLz&)W?JoS%qQ- z@O3762eA1YRm*^Pe#>fm5%Sch3RoX3Z$}3HTi_32zs2BD{$b!g^xszud<2-E*l`>9 zIB>P8{XYlR2h0CA;HBTjJtN{Q?_JIKQu>?44D(`Y&u8&zJ^G{Gq1VUiE zdj;ltmu4J+`En-k4paVxz-Iqn3CwQ;F)zzo3%ncGFTM*Ub^+U9FTPVF4gl*z+RKFP zU>q=E@{a&tZt9PxfE!`2x!|$9A7#jQ00&I|@e1%76aEb_?>CSg`G<)&fjx3!%VfY@dMM*utT9ZWv!g9JEdewUHEC6VuyTK##dZbt#o);h$ zpKc=whW#EF_jJ^{S(+!{XR6=b5HiZcCJa`OkFfPmuc{A7jiq{q0qXG}1Cr*XYN+MS zNCX^U|B0+4;7+wsSIk{7DK+W?^(nv|H1$Ex7GEITkV*{&8(m8)%Uz+MyH4tuvC)P# z$vr;&I32zi*Vq{5E*`a)hfJE@Ff}3iHC4FeDIaWrM@i@o2Hmhc_B*-euU%TUWYN5I zj_N|!t>w$+mX*7fEm*L^xze?=Y;L*JB{!iLuuDGRf!9HopvNf7 z<(oNcW{GTnl`G^8*KID@NKa!5zpKZGI8-_q!UisWInUwH-^45BxA)L;a|%D2mxiTf z%qcF9d@j`3h%fCajWRz3n3A4uC{pd&7~YipWM7J33ch@H#RjQQ|G@-l+m|4L&r;8ZU@)D&ksG4m8)=_gRGnsM#ZN3|RMHS|d9fqP&r!&x z;^TdTCcjdGNvrX!5meqTs5muq}a(VWCRe^9jA!^w02H3LMEl*g(MK$`ikntGS_9y%_}oW^kBL8am7@+*+=8;_iFYy6xy zlp5+YlWF-DZ*eX6Z1Oc=|KdDMc@5|n{cTFKoH9AOklm}kZJEwy4=dMHy-hheE5Me@ KZQ%@oM)BW@%!42R literal 22050 zcmeHveRx#Wx$m0E4vvsyMhG=v(6Jpf*boy9ln7`b;X6X)ODxo3l1!M?B$Fn4khDAs zodh#((ncTX;Z|x*!G3b@@pw5j$8sc^n1F{bcTW^-N5(C>+F(I`{WmYwu)d zine|3^W1+9duQd{zjwXs>s{}=)|&mX9$Zki(BW_h>u`z`LF9Qza}|Pao2Oz5g;!iD za>TXb8Zibe#uaXZ41yO47*fI05K<6U1Fzn$xfmt@GcXQp89}$vk(kSFFYzf z^9~_0k7zE2QY7j{V7ZKUg5)4R2XPEXff*RjG8jUT520*^QeXxvuKZ2Cs&pE7KSLaX ztSjWL_`47f@jp4N{8u90UcVtoGB7=|5jWS?RNP!!HNCc`KC&gRDV%qUibK_9{G#P+ zaHZP8JlzOPn~6Z(G5*?TPn-@tJpN$MA9pNuHoqNydd_CXG7z0{Yny^orI4h;ir@EKP2I@B>aOU%z9gg7Z)=v z0LuCVk}&(!Iv5|a04VVOf7cXbc8wYr~a7rV=$x0bOqfXsiv^Lpa#jSReKyKA_ix z>qTX4Fwh7dVybO9BW*aMi|U4mUMcE^^P^yO4KfOg>iZjO^q{ClX{Z${xhV)yzn?WX zn^702sTb8q6Gp1WU{EY7TQa}Ie@ou2d42$4H@w2FN z{1<3YwAZ!1hk`;RW?V#T6H`CIe~G8Eb<$G?&LDMSDq9(1+`vhR+1e0QJJ2mLTOGZYd!7y4Wj+jvf|I{0NP6FZiu{K|Ge}lk!J)ijDh`{+#>Jzc^lUAK81Q_esrF)T2*v zA88TA`!rYdV%YZmN%3gEA^#y zZUn2p)aN>iJ_Xg-)|lptzxKyO0=nd|6EDYgbSSN}7qZc~qqTbj+o5atVZVg zd!Z`yUnesd?C)TkLRN-@|Fdf~EBOHo-PC7vD7%vyjcxE{#&lm6%B(V`bc55rDlvN^ z^}Kn4fk*?9(cbJHGhKU=t3C7Y*(0ac}T>;eTFFV$Q&i2NB6Wi+Z#cF*y@m(m} z$cgpC|Kn{?rOKzEJ9cY-Ne+uZmdfp5_eH1mjQ$d@EV;zTk_)<9FX&mZLSKA6TLg31 z2in^0{$aQKPhR3f+YD+4jDcYN}eqcZBkbVPe&W`^l5~@T~s6&nqqF9Lb>^+?vQrNsw>L{JMM#kB( z6e!@B{*<&1L)#wTZRK0WeC0^5#sk&fG@1;0zytkfLjPGXpHRF3#oERVeyU&N#3>@vAdzhscgI(a zDwCZe{XY6O1}rvc@VLUV|2?Ra5BlIJ)WBZ3fh0tv+A@vre;`ex zUtbp;bjH@D#Tv9&gR8%o3pt_OC$`ShUz{bekV4ZDVSSzhnzlT^;os`Z#iRgeDSMg74%{ZHLpG}LAJwvDD4YvX&#KnMXyBO z?<>u6h(0Y#pe@qRC&FByqbyXDCL8VbU{>qt^85e=)u5oRvBv@|~-pTMis%e@cV3hYhP8?KL9f0lJpyO;*o z$;D@XDj^$+8KUKR{M3?WN^qQjiY53Bft=3(UcoOg+YsY!IR-|2h)t&*O<_;E4^2D( zqE@&Yd^v3sj2Q>Ph`sLqOg+Hyr4|`+*d>~fe; zS|=+-TT-?N8{G9Cl6{5wfct^+gb!K3tC$qyn^A>%;uSc$3G@W=SnHrdw`=`@N zzMM*RC>x2!9ms!{`%o$kqP=Xlz%-j5gVx(p42Q8L7qQu`33tmRXIwuXW&c&Q_yNl zY>Z9pA0aj%5E(Clo-pjkmr?v(fMi8JO3{BsM6|c8pj-Z^GPTo|VmCI^BnNclYNYQ% z)r^oQ=8^_m>zfmsPF)$SWp;mYE_-62kBy*Ye=5gL^5DL$g($4E3Jm!}%<}5io5JiDdEf>pt`JMl)lmGQOh;Sh7+UKK55CVjV7Zm6N{?>I zb-G(VKxwhJKRA_Ap4NKG{avm{3XWk(nLD^tjtgtk+O_em-49g97N$kJov;8e70mgx z_z!3yFcz8%HT^3n)i>?+*xQc&4^G9T5g21+t`E-S;xYDXE*_$?K^Lvy!&j0P)ZUxCDO(z_ZN*yc>5P$fsLeY653 z;BcM>g}RJ|nj_A9Z_u71joRnzW+5qa>0gH1SfZtr z`R25hW+bA0j@eL{yX6NE9_z61NZn22-@a}03U(o*EF@{GNw>Z)}NtVLQi4#%6I zK*j&eHTOxFb}S|SAzC|-CYP72JL_luG-`dv-7<~sz&!>W1;1NF~%s?q8~YwJ@;ujLUQ^_bMV7zjImgYNiWx{qaVS_USJLze6t+?q=w)4 zeyC541L(>e_n`>`k7Dr_JNnW^s24gn7HLqooL>KJ2oZ}kWLmDZoiCMamR8n912Fz8k%4jcL_b%4O|)3K_)YjiteHuq$i+bes2y+(!;x`Nb!3 zH{@*O4NjF#+yGL;BW@GP5whjnMP>PepwS+OQvARpsEBEsnzBu~{xe*#O~*i=Rxs8O z?aT4=hqf^=1nJm&#ubBqFjv)FkHl-x#O*!froy9!vN5XXxBrUw7URa-P+OFj)B2xF zI9n6?w1NxDKC#G|70dOZYhAc3WEUrP{cnoG5sE&IydLe%mSbwtdi-3Ek?phHk~8lQ zcJetE97Uh-D_-Gu@Sv7BX{6A1&PU}H?ys9#06EMRjd(iwU*I8wH(c$l3)Y1jo4vs;4FO2jZ=zhIo}3Q(netW5^?|yYN-LdgLa@Fm z(irrsR;gzqXv*iO@mJ#Ug>EH+_~+zrsz@qLX8h-6O?9FU)d4+FJ6cJ2iW6+C2Uk8I znLa&OUqw}zy;4`x#K%1cieZ zrR=!sFglIe{OseM-I}ZGQ59aqbnD4#KN9%ddKxvjkLIB58qJUkW> zc%~rI{=#78b18~PNrC~7k|gExVvG0JP|u%9ms~a;QOP#_HD&z2$nu#j>%eyflIwCA z|M%1hOyLs8B>0vCrxSD>e19DLbFBCR&a3f;#ML;*;)#70{(ImlY51RkYs%A4>9r?MnWI&p}K623G_@)MriHZ#WFGId};#_W?WhkS}JXeTvan~?fDOQSA zcuQc-FvpTxYm+DMZumeMT0;4<;WTT+I;3X258-Zc4`RssD$shIL>q9fM-WoZ%KuVy zxB;lYvMp;IRci{xOTOkfc&R`+hD#q;i7$$;iyFMau}RE;$QE$|dX#r;fli1L=BkiBE`1^zOGr6k56waqE!7 z67ZIb1!A7K37kj8_r&+bm&N1aN8-og=i+~fr^O-hjCfW&FZ#rbGI#!?o&EVU1+7nk zt)=5X?I#7TyGGnD!(#0C91zxiH zlj9C>i*4LuAlkp(2UZ|{1yTaLaqg{%K~W|xv&p)PDc;TN653Cw?X!HaA(`j0@P;)xVOQp=A(bvkGCV;9pKJE zZLTxVndrBIOK5N*WM|m;i;$ipE6;1q^G3wqh;tg~G@Li%EJMgQ&s%|R#aU>cMZiTk z=Yr0~IS+Im&Qj1)oQpvh<6H{56z4L~WjM!y{w3#UcCL(Ye17z(Sjt{XKACTqaa@x! zyBrLTPg`G0Bx-Se_}>$WFl;2zmf?G3a5?4WP$C8$e$JZ2=tu-2?gw=mF5Q|4byhL9;<$ zCLeT&{CFb4Ni!QX6Eqhz2XrfFG3Z{q0QraV8Go7=Mncm7>CSvu>5Gsf();-W^6_p zI4D-@TgcxCUWBs6B>hR3;SY=#S2 z^^}fd-DW{28zA#Fn~b;C6j?NGC9iriv<2gqsZ<`w(O%n-=1!);;X~L4UN3mxA)_QC ze_=Fr;SOzatMj20=Vs`szpqEu`=^l0h3#%sa^5o5+qqCyoVv&XpLQRDObcbqdgoV| zby_mcxm?vrmU|Jh#~~}<-Gm+%M^npL=nl>KEwfmR#LyXhpcFoGAM=Eq^KKLgZL;oj z5Vr?$kDGCequSyfDN7!5I==xS)VyRI#wF+*avu2AHzQKQ>t}mtd%Rt`4(YBp)3G1C>qqoSe+?lXW-Jftn7&(;#Crq0jY&NE_J$;08vK{# z+4J*)w;DYD&g8)|3c)K+;&E)?7-E;(0N&!H{5W=$+T`eeE#MV{cbWxIAunfWEN3tH zE{s2LTX~T86nI(^uNORF<1x?E;C+HTmoO#l3M$vgKMQ^#$27(FuFKHm7{Hfr=OpoE z|ASu&{%}6U=vs8p9HgIvxGji_aiNdSyM*s=O!;og*o;j0&nWMoH;(0RMf&(}af6d} zk?H5Br!IQ|BM|%xti7N=#jUx(a6GKUKDI1JyOv&hY&ngzuOThESsv6K$IlnR1G#)$ z*1%;AT-Ly44P4g1Wer@`z-0|w*1%;AT-Ly44P4g1Wexm)(m(W8w4s zovoUS&$)NvebxW^G!Z_4Z_IfwWJ46+or1mw%=;~4kcjv0_#5APYRFq6=kW#*fB)}6 z;Jpt%XUFHM;(Uwd;`Pg*d>ZLR;InYv#=v_`;a%XPPGQ#NENBHVF8}I--^^QyeuK8W zg>V63%nr@<1%w+AW+LE*pKC2bHNs|u9SGk;_%XsEgg%7dAiRZe0l~_X^;=X@a+^12 zO+}<$kK9~VQy*vydkgbs<`qmYib&?I(-amW4{ssqfeKK)QPGffC10n(+e-CeJ(xFd z{*vi>V3Ub9)kpFwA~pETzNSj#5rqOxA(4kSW%yQ;q@#$YYw z)I~$BF7j&X@itVRj+bggUNr!s!c}<9N8|-V{_4g+UCB?|jU9eua6!sU;Yam0t(r)Cw3IMbX>tGy@FCp;#6w1l{ z4-nzSwRKqcm&kLC9`IR491w)$E59kj{q`8oFNxCsnPdAPLXo;Bsz$9&U~ z`(z9c3^utJY;yb%I{P!^j@#t;!Ltu?R#}J|zJ3YU>%g-P$}q$cto+LG25MQcvJpP3N;TaKMUI_$0K63bBr2y6jmj3d{J1Hz;Q%jr5%nJ3M+%+xS+7AfHlvh z$Pr8Vxr&!!(w`M(E7UxyFlS-qp9*t?f__1&iD^#?^X-|DBLyRaz27wPUV<&AUsB3}Hyt>3SoL#YM#%xT>gT}fgm3E%jcYzWh;L>@UFkL>T{L&F0j4)$ACAQ`e*rn z2W%Z|PcOhply?-A@vngLReGyFZvyih6${f3587}lD#%yYt@tUx-1vZw;Xr?Xi5R;d zd>`qErvq=?k8cc2%uj&4Phu?u%J?~;-2A|bF98;iFE{0v5&vXFekJe|ZJO&guv!02 z#J2j}56pM-t@;_jeAC^+-vj0sPar6NlGulfGsutQ2lIOx*b`HGIT0TL?zN>q1$^9w ze+OI+c|NzM{2zdi+Tu01^jXC7IW^-a19#iv^MLtLgH^w!z-J82^&M0G9^gH;_yFVa z?OGM$slWSyE&JpX{|{W8vB|#$d~&DeYBA%_0IaG#hn-&N~;=6@Bi<&R!GndUng z^hdUzdj{pHArDBsVR(BIE(cx*dB%HS&vyg!%PK29*GJZPG#=woMN;|)fQvgu>ho|? z{Nuplw*z|+{|z(#F<|TP;^ODP-M0S!r7fQI=?5-Hc}#Pa zGfQxtdDbTXI@0ZB&A%x|#Cd1U_xnm-hD{=*kGp{zV2_qQ*C)wuPQq=#i7l!s?B7%yt_aln{E$cw--pj}P;Y!$#7|Ee8iSi_!jUF^mV-^hz^&w$DEZ(~XENK@$2@>S462qKsGtceLnRk1Fxg}kbok*3{WqgRIf*ikMRmTKVL3NsN! z<;@Gw#z<|@&riSV0+bJiYheOXHqh7@X!hfyB;hZ;XZgH4mz0c-QN8A0RJLONyfXia zg$q|LSnXduZ+_VVzx-?rnK!9Q=wYj1b9cDPrifqOF@c$&GL1nqFMrL`JG~Pu@Cj zM7q!{wklW=*)+Vp-AH^s;^nO$R$pz8hX#-OnhX-y)sb7}A%>sF!6KQN+Bk1?R&1e} zoytsZt2bIk?x5G)+yqfGUvn5xE=hf}Bb8xTn*iFgP}}&~Iex!QrOnOMv6T&OtHB2W zSsoN-@#QCiu!K>&6{;>WYsF0olL~1H`$PD!ua@q~reXs@rBvI-hRw|l^o0?-hHI)# z3rA;`w7;M|lAkA{o>f5(b(SJ*HYbf<7HLm~g5k5H%2j5cv{mBhG^z>8aMg~8_IT{2 z=nvgbD}_!+Mfh=3&$MB=J)(U-#}PqpkO=b$O1G}A9Z`(hk#R&YU-*}9_OA?Xs%gU4 zge4e3n}SWln>yO%l;+Ws?B!9LJ&s7RShZi|2#=Od8&Jlpu4NYH5TXnlJr!;i{}+ya BVh;cS diff --git a/lime_scan_tokens.l b/lime_scan_tokens.l index 3884279..d8d9a9d 100644 --- a/lime_scan_tokens.l +++ b/lime_scan_tokens.l @@ -28,76 +28,76 @@ void php(); %x dquote %x squote -CHAR \n|. +CHAR \n|. -ALPHA [a-zA-Z] -DIGIT [0-9] -ALNUM {ALPHA}|{DIGIT} -WORD {ALNUM}|_ -STOP "." +ALPHA [a-zA-Z] +DIGIT [0-9] +ALNUM {ALPHA}|{DIGIT} +WORD {ALNUM}|_ +STOP "." -SYM {ALPHA}{WORD}*'* -LIT '.' +SYM {ALPHA}{WORD}*'* +LIT '.' -ESC "\"{CHAR} -SCHAR [^\']|ESC -DCHAR [^\"]|ESC -COM "//"|"#" +ESC "\"{CHAR} +SCHAR [^\']|ESC +DCHAR [^\"]|ESC +COM "//"|"#" -CC [^*\n] -CX "*"+{CC}+ -CT "*"+"/" -BLOCKCMT "/*"({CC}|{CX})*{CT} +CC [^*\n] +CX "*"+{CC}+ +CT "*"+"/" +BLOCKCMT "/*"({CC}|{CX})*{CT} %x pragma %% -[[:space:]]+ {} -#.* {} +[[:space:]]+ {} +#.* {} {STOP} out("stop", "."); {SYM} tok("sym"); {LIT} tok("lit"); {BLOCKCMT} {} -"/"{WORD}+ | +"/"{WORD}+ | "/$" out("lambda", yytext+1); "%"{WORD}+ { out("pragma", yytext+1); yy_push_state(pragma); } -<*>"{" { +<*>"{" { lit(); yy_push_state(code); } -. lit(); +. lit(); { -\n { - out("stop", "."); - yy_pop_state(); -} -[[:space:]] {} -{SYM} tok("sym"); -{LIT} tok("lit"); -. lit(); + \n { + out("stop", "."); + yy_pop_state(); + } + [[:space:]] {} + {SYM} tok("sym"); + {LIT} tok("lit"); + . lit(); } { -"}" { - lit(); - yy_pop_state(); -} -'{SCHAR}*' php(); -\"{DCHAR}*\" php(); -{COM}.* php(); -{BLOCKCMT} php(); -[^{}'"#/]+ php(); -. php(); + "}" { + lit(); + yy_pop_state(); + } + '{SCHAR}*' php(); + \"{DCHAR}*\" php(); + {COM}.* php(); + {BLOCKCMT} php(); + [^{}'"#/]+ php(); + . php(); } %% diff --git a/parse_engine.php b/parse_engine.php index fd54cc4..587c56e 100644 --- a/parse_engine.php +++ b/parse_engine.php @@ -1,5 +1,5 @@ type = $type; $this->state = $state; } } + class parse_premature_eof extends parse_error { - function __construct() { - parent::__construct("Premature EOF"); + public function __construct() { + parent::__construct('Premature EOF'); } } - class parse_stack { - function __construct($qi) { + public $q; + public $qs = array(); + /** + * Stack of semantic actions + */ + public $ss = array(); + + public function __construct($qi) { $this->q = $qi; - $this->qs = array(); - $this->ss = array(); } - function shift($q, $semantic) { + + public function shift($q, $semantic) { $this->ss[] = $semantic; $this->qs[] = $this->q; + $this->q = $q; - # echo "Shift $q -- $semantic
\n"; + + // echo "Shift $q -- $semantic\n"; } - function top_n($n) { - if (!$n) return array(); - return array_slice($this->ss, 0-$n); + + public function top_n($n) { + if (!$n) { + return array(); + } + + return array_slice($this->ss, 0 - $n); } - function pop_n($n) { - if (!$n) return array(); - $qq = array_splice($this->qs, 0-$n); + + public function pop_n($n) { + if (!$n) { + return array(); + } + + $qq = array_splice($this->qs, 0 - $n); $this->q = $qq[0]; - return array_splice($this->ss, 0-$n); + + return array_splice($this->ss, 0 - $n); } - function occupied() { return !empty($this->ss); } - function index($n) { - if ($n) $this->q = $this->qs[count($this->qs)-$n]; + + public function occupied() { + return !empty($this->ss); } - function text() { - return $this->q." : ".implode(' . ', array_reverse($this->qs)); + + public function index($n) { + if ($n) { + $this->q = $this->qs[count($this->qs) - $n]; + } + } + + public function text() { + return $this->q . ' : ' . implode(' . ', array_reverse($this->qs)); } } + class parse_engine { - function __construct($parser) { + public $parser; + public $qi; + public $rule; + public $step; + /** + * @var boolean + */ + public $accept; + /** + * @var parse_stack + */ + public $stack; + + public function __construct($parser) { $this->parser = $parser; $this->qi = $parser->qi; $this->rule = $parser->a; $this->step = $parser->i; - #$this->prepare_callables(); + $this->reset(); - #$this->debug = false; } - function reset() { + + public function reset() { $this->accept = false; $this->stack = new parse_stack($this->qi); } + private function enter_error_tolerant_state() { while ($this->stack->occupied()) { - if ($this->has_step_for('error')) return true; + if ($this->has_step_for('error')) { + return true; + } + $this->drop(); - }; + } + return false; } - private function drop() { $this->stack->pop_n(1); } - function eat_eof() { - {/* - - So that I don't get any brilliant misguided ideas: - - The "accept" step happens when we try to eat a start symbol. - That happens because the reductions up the stack at the end - finally (and symetrically) tell the parser to eat a symbol - representing what they've just shifted off the end of the stack - and reduced. However, that doesn't put the parser into any - special different state. Therefore, it's back at the start - state. - - That being said, the parser is ready to reduce an EOF to the - empty program, if given a grammar that allows them. - - So anyway, if you literally tell the parser to eat an EOF - symbol, then after it's done reducing and accepting the prior - program, it's going to think it has another symbol to deal with. - That is the EOF symbol, which means to reduce the empty program, - accept it, and then continue trying to eat the terminal EOF. - - This infinte loop quickly runs out of memory. - - That's why the real EOF algorithm doesn't try to pretend that - EOF is a terminal. Like the invented start symbol, it's special. - - Instead, we pretend to want to eat EOF, but never actually - try to get it into the parse stack. (It won't fit.) In short, - we look up what reduction is indicated at each step in the - process of rolling up the parse stack. - - The repetition is because one reduction is not guaranteed to - cascade into another and clean up the entire parse stack. - Rather, it will instead shift each partial production as it - is forced to completion by the EOF lookahead. - */} - - # We must reduce as if having read the EOF symbol + + private function drop() { + $this->stack->pop_n(1); + } + + /* + * So that I don't get any brilliant misguided ideas: + * + * The "accept" step happens when we try to eat a start symbol. + * That happens because the reductions up the stack at the end + * finally (and symetrically) tell the parser to eat a symbol + * representing what they've just shifted off the end of the stack + * and reduced. However, that doesn't put the parser into any + * special different state. Therefore, it's back at the start + * state. + * + * That being said, the parser is ready to reduce an EOF to the + * empty program, if given a grammar that allows them. + * + * So anyway, if you literally tell the parser to eat an EOF + * symbol, then after it's done reducing and accepting the prior + * program, it's going to think it has another symbol to deal with. + * That is the EOF symbol, which means to reduce the empty program, + * accept it, and then continue trying to eat the terminal EOF. + * + * This infinte loop quickly runs out of memory. + * + * That's why the real EOF algorithm doesn't try to pretend that + * EOF is a terminal. Like the invented start symbol, it's special. + * + * Instead, we pretend to want to eat EOF, but never actually + * try to get it into the parse stack. (It won't fit.) In short, + * we look up what reduction is indicated at each step in the + * process of rolling up the parse stack. + * + * The repetition is because one reduction is not guaranteed to + * cascade into another and clean up the entire parse stack. + * Rather, it will instead shift each partial production as it + * is forced to completion by the EOF lookahead. + */ + public function eat_eof() { + // We must reduce as if having read the EOF symbol do { - # and we have to try at least once, because if nothing - # has ever been shifted, then the stack will be empty - # at the start. + // and we have to try at least once, because if nothing + // has ever been shifted, then the stack will be empty + // at the start. list($opcode, $operand) = $this->step_for('#'); + switch ($opcode) { - case 'r': $this->reduce($operand); break; - case 'e': $this->premature_eof(); break; - default: throw new parse_bug(); break; + case 'r': + $this->reduce($operand); + break; + case 'e': + $this->premature_eof(); + break; + default: + throw new parse_bug(); + break; } } while ($this->stack->occupied()); - {/* - If the sentence is well-formed according to the grammar, then - this will eventually result in eating a start symbol, which - causes the "accept" instruction to fire. Otherwise, the - step('#') method will indicate an error in the syntax, which - here means a premature EOF. - - Incedentally, some tremendous amount of voodoo with the parse - stack might help find the beginning of some unfinished - production that the sentence was cut off during, but as a - general rule that would require deeper knowledge. - */} - if (!$this->accept) throw new parse_bug(); + + // If the sentence is well-formed according to the grammar, then + // this will eventually result in eating a start symbol, which + // causes the "accept" instruction to fire. Otherwise, the + // step('#') method will indicate an error in the syntax, which + // here means a premature EOF. + // + // Incidentally, some tremendous amount of voodoo with the parse + // stack might help find the beginning of some unfinished + // production that the sentence was cut off during, but as a + // general rule that would require deeper knowledge. + if (!$this->accept) { + throw new parse_bug(); + } + return $this->semantic; } + private function premature_eof() { $seen = array(); + while ($this->enter_error_tolerant_state()) { if (isset($seen[$this->state()])) { // This means that it's pointless to try here. @@ -164,9 +228,11 @@ class parse_engine { $this->drop(); continue; } + $seen[$this->state()] = true; - - $this->eat('error', NULL); + + $this->eat('error', null); + if ($this->has_step_for('#')) { // Good. We can continue as normal. return; @@ -177,76 +243,101 @@ class parse_engine { // The rest of the algorithm will make it happen. } } + throw new parse_premature_eof(); } - private function current_row() { return $this->step[$this->state()]; } + + private function current_row() { + return $this->step[$this->state()]; + } + private function step_for($type) { $row = $this->current_row(); - if (!isset($row[$type])) return array('e', $this->stack->q); + if (!isset($row[$type])) { + return array('e', $this->stack->q); + } + return explode(' ', $row[$type]); } + private function has_step_for($type) { $row = $this->current_row(); return isset($row[$type]); } - private function state() { return $this->stack->q; } + + private function state() { + return $this->stack->q; + } + function eat($type, $semantic) { - # assert('$type == trim($type)'); - # if ($this->debug) echo "Trying to eat a ($type)\n"; + // assert('$type == trim($type)'); + // if ($this->debug) echo "Trying to eat a ($type)\n"; list($opcode, $operand) = $this->step_for($type); + switch ($opcode) { - case 's': - # if ($this->debug) echo "shift $type to state $operand\n"; + case 's': + // if ($this->debug) echo "shift $type to state $operand\n"; $this->stack->shift($operand, $semantic); - # echo $this->stack->text()." shift $type
\n"; + // echo $this->stack->text()." shift $type
\n"; break; - - case 'r': + case 'r': $this->reduce($operand); $this->eat($type, $semantic); - # Yes, this is tail-recursive. It's also the simplest way. + // Yes, this is tail-recursive. It's also the simplest way. break; - - case 'a': - if ($this->stack->occupied()) throw new parse_bug('Accept should happen with empty stack.'); + case 'a': + if ($this->stack->occupied()) { + throw new parse_bug('Accept should happen with empty stack.'); + } + $this->accept = true; - #if ($this->debug) echo ("Accept\n\n"); + //if ($this->debug) echo ("Accept\n\n"); $this->semantic = $semantic; break; - - case 'e': - # This is thought to be the uncommon, exceptional path, so - # it's OK that this algorithm will cause the stack to - # flutter while the parse engine waits for an edible token. - # if ($this->debug) echo "($type) causes a problem.\n"; + case 'e': + // This is thought to be the uncommon, exceptional path, so + // it's OK that this algorithm will cause the stack to + // flutter while the parse engine waits for an edible token. + // if ($this->debug) echo "($type) causes a problem.\n"; + if ($this->enter_error_tolerant_state()) { - $this->eat('error', NULL); - if ($this->has_step_for($type)) $this->eat($type, $semantic); + $this->eat('error', null); + if ($this->has_step_for($type)) { + $this->eat($type, $semantic); + } } else { - # If that didn't work, give up: - throw new parse_error("Parse Error: ($type)($semantic) not expected"); + // If that didn't work, give up: + throw new parse_error("Parse Error: ({$type})({$semantic}) not expected"); } break; - - default: - throw new parse_bug("Bad parse table instruction ".htmlspecialchars($opcode)); + default: + throw new parse_bug("Bad parse table instruction " . htmlspecialchars($opcode)); } } + private function reduce($rule_id) { $rule = $this->rule[$rule_id]; $len = $rule['len']; $semantic = $this->perform_action($rule_id, $this->stack->top_n($len)); - #echo $semantic.br(); - if ($rule['replace']) $this->stack->pop_n($len); - else $this->stack->index($len); + + //echo $semantic.br(); + if ($rule['replace']) { + $this->stack->pop_n($len); + } else { + $this->stack->index($len); + } + $this->eat($rule['symbol'], $semantic); } + private function perform_action($rule_id, $slice) { - # we have this weird calling convention.... + // we have this weird calling convention.... $result = null; $method = $this->parser->method[$rule_id]; - #if ($this->debug) echo "rule $id: $method\n"; + + //if ($this->debug) echo "rule $id: $method\n"; $this->parser->$method($slice, $result); + return $result; } } diff --git a/set.so.php b/set.so.php index ef87c6c..26ab138 100644 --- a/set.so.php +++ b/set.so.php @@ -7,23 +7,52 @@ Purpose: We should really have a "set" data type. It's too useful. */ class set { - function __construct($list=array()) { $this->data = array_count_values($list); } - function has($item) { return isset($this->data[$item]); } - function add($item) { $this->data[$item] = true; } - function del($item) { unset($this->data[$item]); return $item;} - function all() { return array_keys($this->data); } - function one() { return key($this->data); } - function count() { return count($this->data); } - function pop() { return $this->del($this->one()); } - function union($that) { + public function __construct(array $list = array()) { + $this->data = array_count_values($list); + } + + public function has($item) { + return isset($this->data[$item]); + } + + public function add($item) { + $this->data[$item] = true; + } + + public function del($item) { + unset($this->data[$item]); + return $item; + } + + public function all() { + return array_keys($this->data); + } + + public function one() { + return key($this->data); + } + + public function count() { + return count($this->data); + } + + public function pop() { + return $this->del($this->one()); + } + + public function union($that) { $progress = false; - foreach ($that->all() as $item) if (!$this->has($item)) { - $this->add($item); - $progress = true; + foreach ($that->all() as $item) { + if (!$this->has($item)) { + $this->add($item); + $progress = true; + } } + return $progress; } - function text() { - return ' { '.implode(' ', $this->all()).' } '; + + public function text() { + return ' { ' . implode(' ', $this->all()) . ' } '; } }