2 // @codingStandardsIgnoreFile
3 // @ignore comment_docblock_file:file
4 // @ignore style_curly_braces:file
5 // @ignore style_string_spacing:file
6 // @ignore style_else_spacing:file
7 // @ignore comment_comment_docblock_missing:file
8 // @ignore comment_comment_eg:file
9 // @ignore production_code:file
10 // @ignore druplart_unary:file
11 // @ignore style_uppercase_constants:file
12 // @ignore comment_comment_space:file
13 // @ignore druplart_conditional_assignment:file
14 // @ignore style_paren_spacing:file
15 // @ignore style_no_tabs:file
19 * JSMinPlus version 1.4
21 * Minifies a javascript file using a javascript parser
23 * This implements a PHP port of Brendan Eich's Narcissus open source javascript engine (in javascript)
24 * References: http://en.wikipedia.org/wiki/Narcissus_(JavaScript_engine)
25 * Narcissus sourcecode: http://mxr.mozilla.org/mozilla/source/js/narcissus/
26 * JSMinPlus weblog: http://crisp.tweakblogs.net/blog/cat/716
28 * Tino Zijdel <crisp@tweakers.net>
30 * Usage: $minified = JSMinPlus::minify($script [, $filename])
32 * Versionlog (see also changelog.txt):
33 * 23-07-2011 - remove dynamic creation of OP_* and KEYWORD_* defines and declare them on top
34 * reduce memory footprint by minifying by block-scope
35 * some small byte-saving and performance improvements
36 * 12-05-2009 - fixed hook:colon precedence, fixed empty body in loop and if-constructs
37 * 18-04-2009 - fixed crashbug in PHP 5.2.9 and several other bugfixes
38 * 12-04-2009 - some small bugfixes and performance improvements
39 * 09-04-2009 - initial open sourced version 1.0
41 * Latest version of this script: http://files.tweakers.net/jsminplus/jsminplus.zip
46 /* ***** BEGIN LICENSE BLOCK *****
47 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
49 * The contents of this file are subject to the Mozilla Public License Version
50 * 1.1 (the "License"); you may not use this file except in compliance with
51 * the License. You may obtain a copy of the License at
52 * http://www.mozilla.org/MPL/
54 * Software distributed under the License is distributed on an "AS IS" basis,
55 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
56 * for the specific language governing rights and limitations under the
59 * The Original Code is the Narcissus JavaScript engine.
61 * The Initial Developer of the Original Code is
62 * Brendan Eich <brendan@mozilla.org>.
63 * Portions created by the Initial Developer are Copyright (C) 2004
64 * the Initial Developer. All Rights Reserved.
66 * Contributor(s): Tino Zijdel <crisp@tweakers.net>
67 * PHP port, modifications and minifier routine are (C) 2009-2011
69 * Alternatively, the contents of this file may be used under the terms of
70 * either the GNU General Public License Version 2 or later (the "GPL"), or
71 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
72 * in which case the provisions of the GPL or the LGPL are applicable instead
73 * of those above. If you wish to allow use of your version of this file only
74 * under the terms of either the GPL or the LGPL, and not to allow others to
75 * use your version of this file under the terms of the MPL, indicate your
76 * decision by deleting the provisions above and replace them with the notice
77 * and other provisions required by the GPL or the LGPL. If you do not delete
78 * the provisions above, a recipient may use your version of this file under
79 * the terms of any one of the MPL, the GPL or the LGPL.
81 * ***** END LICENSE BLOCK ***** */
83 define('TOKEN_END', 1);
84 define('TOKEN_NUMBER', 2);
85 define('TOKEN_IDENTIFIER', 3);
86 define('TOKEN_STRING', 4);
87 define('TOKEN_REGEXP', 5);
88 define('TOKEN_NEWLINE', 6);
89 define('TOKEN_CONDCOMMENT_START', 7);
90 define('TOKEN_CONDCOMMENT_END', 8);
92 define('JS_SCRIPT', 100);
93 define('JS_BLOCK', 101);
94 define('JS_LABEL', 102);
95 define('JS_FOR_IN', 103);
96 define('JS_CALL', 104);
97 define('JS_NEW_WITH_ARGS', 105);
98 define('JS_INDEX', 106);
99 define('JS_ARRAY_INIT', 107);
100 define('JS_OBJECT_INIT', 108);
101 define('JS_PROPERTY_INIT', 109);
102 define('JS_GETTER', 110);
103 define('JS_SETTER', 111);
104 define('JS_GROUP', 112);
105 define('JS_LIST', 113);
107 define('JS_MINIFIED', 999);
109 define('DECLARED_FORM', 0);
110 define('EXPRESSED_FORM', 1);
111 define('STATEMENT_FORM', 2);
114 define('OP_SEMICOLON', ';');
115 define('OP_COMMA', ',');
116 define('OP_HOOK', '?');
117 define('OP_COLON', ':');
118 define('OP_OR', '||');
119 define('OP_AND', '&&');
120 define('OP_BITWISE_OR', '|');
121 define('OP_BITWISE_XOR', '^');
122 define('OP_BITWISE_AND', '&');
123 define('OP_STRICT_EQ', '===');
124 define('OP_EQ', '==');
125 define('OP_ASSIGN', '=');
126 define('OP_STRICT_NE', '!==');
127 define('OP_NE', '!=');
128 define('OP_LSH', '<<');
129 define('OP_LE', '<=');
130 define('OP_LT', '<');
131 define('OP_URSH', '>>>');
132 define('OP_RSH', '>>');
133 define('OP_GE', '>=');
134 define('OP_GT', '>');
135 define('OP_INCREMENT', '++');
136 define('OP_DECREMENT', '--');
137 define('OP_PLUS', '+');
138 define('OP_MINUS', '-');
139 define('OP_MUL', '*');
140 define('OP_DIV', '/');
141 define('OP_MOD', '%');
142 define('OP_NOT', '!');
143 define('OP_BITWISE_NOT', '~');
144 define('OP_DOT', '.');
145 define('OP_LEFT_BRACKET', '[');
146 define('OP_RIGHT_BRACKET', ']');
147 define('OP_LEFT_CURLY', '{');
148 define('OP_RIGHT_CURLY', '}');
149 define('OP_LEFT_PAREN', '(');
150 define('OP_RIGHT_PAREN', ')');
151 define('OP_CONDCOMMENT_END', '@*/');
153 define('OP_UNARY_PLUS', 'U+');
154 define('OP_UNARY_MINUS', 'U-');
157 define('KEYWORD_BREAK', 'break');
158 define('KEYWORD_CASE', 'case');
159 define('KEYWORD_CATCH', 'catch');
160 define('KEYWORD_CONST', 'const');
161 define('KEYWORD_CONTINUE', 'continue');
162 define('KEYWORD_DEBUGGER', 'debugger');
163 define('KEYWORD_DEFAULT', 'default');
164 define('KEYWORD_DELETE', 'delete');
165 define('KEYWORD_DO', 'do');
166 define('KEYWORD_ELSE', 'else');
167 define('KEYWORD_ENUM', 'enum');
168 define('KEYWORD_FALSE', 'false');
169 define('KEYWORD_FINALLY', 'finally');
170 define('KEYWORD_FOR', 'for');
171 define('KEYWORD_FUNCTION', 'function');
172 define('KEYWORD_IF', 'if');
173 define('KEYWORD_IN', 'in');
174 define('KEYWORD_INSTANCEOF', 'instanceof');
175 define('KEYWORD_NEW', 'new');
176 define('KEYWORD_NULL', 'null');
177 define('KEYWORD_RETURN', 'return');
178 define('KEYWORD_SWITCH', 'switch');
179 define('KEYWORD_THIS', 'this');
180 define('KEYWORD_THROW', 'throw');
181 define('KEYWORD_TRUE', 'true');
182 define('KEYWORD_TRY', 'try');
183 define('KEYWORD_TYPEOF', 'typeof');
184 define('KEYWORD_VAR', 'var');
185 define('KEYWORD_VOID', 'void');
186 define('KEYWORD_WHILE', 'while');
187 define('KEYWORD_WITH', 'with');
192 private $reserved = array(
218 // Words reserved for future use
250 // These are not reserved, but should be taken into account
251 // in isValidIdentifier (See jslint source code)
262 private function __construct() {
263 $this->parser = new JSParser($this);
266 public static function minify($js, $filename = '') {
269 // this is a singleton
271 $instance = new JSMinPlus();
274 return $instance->min($js, $filename);
277 private function min($js, $filename) {
279 $n = $this->parser->parse($js, $filename, 1);
280 return $this->parseTree($n);
282 catch (Exception $e) {
283 echo $e->getMessage() . "\n";
289 public function parseTree($n, $noBlockGrouping = false) {
298 // we do nothing yet with funDecls or varDecls
299 $noBlockGrouping = true;
303 $childs = $n->treeNodes;
305 for ($c = 0, $i = 0, $j = count($childs); $i < $j; $i++) {
306 $type = $childs[$i]->type;
307 $t = $this->parseTree($childs[$i]);
312 if ($type == KEYWORD_FUNCTION && $childs[$i]->functionForm == DECLARED_FORM) {
313 // put declared functions on a new line
316 elseif ($type == KEYWORD_VAR && $type == $lastType) {
317 // multiple var-statements can go into one
318 $t = ',' . substr($t, 4);
333 if ($c > 1 && !$noBlockGrouping) {
338 case KEYWORD_FUNCTION:
339 $s .= 'function' . ($n->name ? ' ' . $n->name : '') . '(';
340 $params = $n->params;
341 for ($i = 0, $j = count($params); $i < $j; $i++) {
342 $s .= ($i ? ',' : '') . $params[$i];
344 $s .= '){' . $this->parseTree($n->body, true) . '}';
348 $s = 'if(' . $this->parseTree($n->condition) . ')';
349 $thenPart = $this->parseTree($n->thenPart);
350 $elsePart = $n->elsePart ? $this->parseTree($n->elsePart) : null;
352 // empty if-statement
353 if ($thenPart == '') {
358 // be careful and always make a block out of the thenPart; could be more optimized but is a lot of trouble
359 if ($thenPart != ';' && $thenPart[0] != '{') {
360 $thenPart = '{' . $thenPart . '}';
363 $s .= $thenPart . 'else';
365 // we could check for more, but that hardly ever applies so go for performance
366 if ($elsePart[0] != '{') {
378 $s = 'switch(' . $this->parseTree($n->discriminant) . '){';
380 for ($i = 0, $j = count($cases); $i < $j; $i++) {
382 if ($case->type == KEYWORD_CASE) {
383 $s .= 'case' . ($case->caseLabel->type != TOKEN_STRING ? ' ' : '') . $this->parseTree($case->caseLabel) . ':';
389 $statement = $this->parseTree($case->statements, true);
392 // no terminator for last statement
402 $s = 'for(' . ($n->setup ? $this->parseTree($n->setup) : '')
403 . ';' . ($n->condition ? $this->parseTree($n->condition) : '')
404 . ';' . ($n->update ? $this->parseTree($n->update) : '') . ')';
406 $body = $this->parseTree($n->body);
415 $s = 'while(' . $this->parseTree($n->condition) . ')';
417 $body = $this->parseTree($n->body);
426 $s = 'for(' . ($n->varDecl ? $this->parseTree($n->varDecl) : $this->parseTree($n->iterator)) . ' in ' . $this->parseTree($n->object) . ')';
428 $body = $this->parseTree($n->body);
437 $s = 'do{' . $this->parseTree($n->body, true) . '}while(' . $this->parseTree($n->condition) . ')';
441 case KEYWORD_CONTINUE:
442 $s = $n->value . ($n->label ? ' ' . $n->label : '');
446 $s = 'try{' . $this->parseTree($n->tryBlock, true) . '}';
447 $catchClauses = $n->catchClauses;
448 for ($i = 0, $j = count($catchClauses); $i < $j; $i++) {
449 $t = $catchClauses[$i];
450 $s .= 'catch(' . $t->varName . ($t->guard ? ' if ' . $this->parseTree($t->guard) : '') . '){' . $this->parseTree($t->block, true) . '}';
452 if ($n->finallyBlock) {
453 $s .= 'finally{' . $this->parseTree($n->finallyBlock, true) . '}';
461 $t = $this->parseTree($n->value);
463 if ($this->isWordChar($t[0]) || $t[0] == '\\') {
473 $s = 'with(' . $this->parseTree($n->object) . ')' . $this->parseTree($n->body);
478 $s = $n->value . ' ';
479 $childs = $n->treeNodes;
480 for ($i = 0, $j = count($childs); $i < $j; $i++) {
482 $s .= ($i ? ',' : '') . $t->name;
483 $u = $t->initializer;
485 $s .= '=' . $this->parseTree($u);
491 case KEYWORD_INSTANCEOF:
492 $left = $this->parseTree($n->treeNodes[0]);
493 $right = $this->parseTree($n->treeNodes[1]);
497 if ($this->isWordChar(substr($left, -1))) {
503 if ($this->isWordChar($right[0]) || $right[0] == '\\') {
512 $right = $this->parseTree($n->treeNodes[0]);
516 if ($this->isWordChar($right[0]) || $right[0] == '\\') {
524 $s = 'void(' . $this->parseTree($n->treeNodes[0]) . ')';
527 case KEYWORD_DEBUGGER:
528 throw new Exception('NOT IMPLEMENTED: DEBUGGER');
531 case TOKEN_CONDCOMMENT_START:
532 case TOKEN_CONDCOMMENT_END:
533 $s = $n->value . ($n->type == TOKEN_CONDCOMMENT_START ? ' ' : '');
534 $childs = $n->treeNodes;
535 for ($i = 0, $j = count($childs); $i < $j; $i++) {
536 $s .= $this->parseTree($childs[$i]);
541 if ($expression = $n->expression) {
542 $s = $this->parseTree($expression);
547 $s = $n->label . ':' . $this->parseTree($n->statement);
551 $childs = $n->treeNodes;
552 for ($i = 0, $j = count($childs); $i < $j; $i++) {
553 $s .= ($i ? ',' : '') . $this->parseTree($childs[$i]);
558 $s = $this->parseTree($n->treeNodes[0]) . $n->value . $this->parseTree($n->treeNodes[1]);
562 $s = $this->parseTree($n->treeNodes[0]) . '?' . $this->parseTree($n->treeNodes[1]) . ':' . $this->parseTree($n->treeNodes[2]);
584 $s = $this->parseTree($n->treeNodes[0]) . $n->type . $this->parseTree($n->treeNodes[1]);
589 $left = $this->parseTree($n->treeNodes[0]);
590 $right = $this->parseTree($n->treeNodes[1]);
592 switch ($n->treeNodes[1]->type) {
599 $s = $left . $n->type . ' ' . $right;
603 //combine concatenated strings with same quote style
604 if ($n->type == OP_PLUS && substr($left, -1) == $right[0]) {
605 $s = substr($left, 0, -1) . substr($right, 1);
611 $s = $left . $n->type . $right;
619 $s = $n->value . $this->parseTree($n->treeNodes[0]);
625 $s = $this->parseTree($n->treeNodes[0]) . $n->value;
628 $s = $n->value . $this->parseTree($n->treeNodes[0]);
633 $s = $this->parseTree($n->treeNodes[0]) . '.' . $this->parseTree($n->treeNodes[1]);
637 $s = $this->parseTree($n->treeNodes[0]);
638 // See if we can replace named index with a dot saving 3 bytes
639 if ( $n->treeNodes[0]->type == TOKEN_IDENTIFIER &&
640 $n->treeNodes[1]->type == TOKEN_STRING &&
641 $this->isValidIdentifier(substr($n->treeNodes[1]->value, 1, -1))
643 $s .= '.' . substr($n->treeNodes[1]->value, 1, -1);
646 $s .= '[' . $this->parseTree($n->treeNodes[1]) . ']';
651 $childs = $n->treeNodes;
652 for ($i = 0, $j = count($childs); $i < $j; $i++) {
653 $s .= ($i ? ',' : '') . $this->parseTree($childs[$i]);
658 $s = $this->parseTree($n->treeNodes[0]) . '(' . $this->parseTree($n->treeNodes[1]) . ')';
662 case JS_NEW_WITH_ARGS:
663 $s = 'new ' . $this->parseTree($n->treeNodes[0]) . '(' . ($n->type == JS_NEW_WITH_ARGS ? $this->parseTree($n->treeNodes[1]) : '') . ')';
668 $childs = $n->treeNodes;
669 for ($i = 0, $j = count($childs); $i < $j; $i++) {
670 $s .= ($i ? ',' : '') . $this->parseTree($childs[$i]);
677 $childs = $n->treeNodes;
678 for ($i = 0, $j = count($childs); $i < $j; $i++) {
683 if ($t->type == JS_PROPERTY_INIT) {
684 // Ditch the quotes when the index is a valid identifier
685 if ( $t->treeNodes[0]->type == TOKEN_STRING &&
686 $this->isValidIdentifier(substr($t->treeNodes[0]->value, 1, -1))
688 $s .= substr($t->treeNodes[0]->value, 1, -1);
691 $s .= $t->treeNodes[0]->value;
694 $s .= ':' . $this->parseTree($t->treeNodes[1]);
697 $s .= $t->type == JS_GETTER ? 'get' : 'set';
698 $s .= ' ' . $t->name . '(';
699 $params = $t->params;
700 for ($i = 0, $j = count($params); $i < $j; $i++) {
701 $s .= ($i ? ',' : '') . $params[$i];
703 $s .= '){' . $this->parseTree($t->body, true) . '}';
711 if (preg_match('/^([1-9]+)(0{3,})$/', $s, $m)) {
712 $s = $m[1] . 'e' . strlen($m[2]);
720 case TOKEN_IDENTIFIER:
728 $n->treeNodes[0]->type,
743 $s = $this->parseTree($n->treeNodes[0]);
746 $s = '(' . $this->parseTree($n->treeNodes[0]) . ')';
751 throw new Exception('UNKNOWN TOKEN TYPE: ' . $n->type);
757 private function isValidIdentifier($string) {
758 return preg_match('/^[a-zA-Z_][a-zA-Z0-9_]*$/', $string) && !in_array($string, $this->reserved);
761 private function isWordChar($char) {
762 return $char == '_' || $char == '$' || ctype_alnum($char);
770 private $opPrecedence = array(
776 // The above all have to have the same precedence, see bug 330975
811 JS_NEW_WITH_ARGS => 0,
819 private $opArity = array(
857 JS_NEW_WITH_ARGS => 2,
863 TOKEN_CONDCOMMENT_START => 1,
864 TOKEN_CONDCOMMENT_END => 1,
867 public function __construct($minifier = null) {
868 $this->minifier = $minifier;
869 $this->t = new JSTokenizer();
872 public function parse($s, $f, $l) {
873 // initialize tokenizer
874 $this->t->init($s, $f, $l);
876 $x = new JSCompilerContext(false);
877 $n = $this->Script($x);
878 if (!$this->t->isDone()) {
879 throw $this->t->newSyntaxError('Syntax error');
885 private function Script($x) {
886 $n = $this->Statements($x);
887 $n->type = JS_SCRIPT;
888 $n->funDecls = $x->funDecls;
889 $n->varDecls = $x->varDecls;
892 if ($this->minifier) {
893 $n->value = $this->minifier->parseTree($n);
895 // clear tree from node to save memory
896 $n->treeNodes = null;
900 $n->type = JS_MINIFIED;
906 private function Statements($x) {
907 $n = new JSNode($this->t, JS_BLOCK);
908 array_push($x->stmtStack, $n);
910 while (!$this->t->isDone() && $this->t->peek() != OP_RIGHT_CURLY) {
911 $n->addNode($this->Statement($x));
914 array_pop($x->stmtStack);
919 private function Block($x) {
920 $this->t->mustMatch(OP_LEFT_CURLY);
921 $n = $this->Statements($x);
922 $this->t->mustMatch(OP_RIGHT_CURLY);
927 private function Statement($x) {
928 $tt = $this->t->get();
931 // Cases for statements ending in a right curly return early, avoiding the
932 // common semicolon insertion magic after this switch.
934 case KEYWORD_FUNCTION:
935 return $this->FunctionDefinition(
938 count($x->stmtStack) > 1 ? STATEMENT_FORM : DECLARED_FORM
943 $n = $this->Statements($x);
944 $this->t->mustMatch(OP_RIGHT_CURLY);
948 $n = new JSNode($this->t);
949 $n->condition = $this->ParenExpression($x);
950 array_push($x->stmtStack, $n);
951 $n->thenPart = $this->Statement($x);
952 $n->elsePart = $this->t->match(KEYWORD_ELSE) ? $this->Statement($x) : null;
953 array_pop($x->stmtStack);
957 $n = new JSNode($this->t);
958 $this->t->mustMatch(OP_LEFT_PAREN);
959 $n->discriminant = $this->Expression($x);
960 $this->t->mustMatch(OP_RIGHT_PAREN);
962 $n->defaultIndex = -1;
964 array_push($x->stmtStack, $n);
966 $this->t->mustMatch(OP_LEFT_CURLY);
968 while (($tt = $this->t->get()) != OP_RIGHT_CURLY) {
970 case KEYWORD_DEFAULT:
971 if ($n->defaultIndex >= 0) {
972 throw $this->t->newSyntaxError('More than one switch default');
976 $n2 = new JSNode($this->t);
977 if ($tt == KEYWORD_DEFAULT) {
978 $n->defaultIndex = count($n->cases);
981 $n2->caseLabel = $this->Expression($x, OP_COLON);
985 throw $this->t->newSyntaxError('Invalid switch case');
988 $this->t->mustMatch(OP_COLON);
989 $n2->statements = new JSNode($this->t, JS_BLOCK);
990 while (($tt = $this->t->peek()) != KEYWORD_CASE && $tt != KEYWORD_DEFAULT && $tt != OP_RIGHT_CURLY) {
991 $n2->statements->addNode($this->Statement($x));
994 array_push($n->cases, $n2);
997 array_pop($x->stmtStack);
1001 $n = new JSNode($this->t);
1003 $this->t->mustMatch(OP_LEFT_PAREN);
1005 if (($tt = $this->t->peek()) != OP_SEMICOLON) {
1006 $x->inForLoopInit = true;
1007 if ($tt == KEYWORD_VAR || $tt == KEYWORD_CONST) {
1009 $n2 = $this->Variables($x);
1012 $n2 = $this->Expression($x);
1014 $x->inForLoopInit = false;
1017 if ($n2 && $this->t->match(KEYWORD_IN)) {
1018 $n->type = JS_FOR_IN;
1019 if ($n2->type == KEYWORD_VAR) {
1020 if (count($n2->treeNodes) != 1) {
1021 throw $this->t->SyntaxError(
1022 'Invalid for..in left-hand side',
1028 // NB: n2[0].type == IDENTIFIER and n2[0].value == n2[0].name.
1029 $n->iterator = $n2->treeNodes[0];
1037 $n->object = $this->Expression($x);
1040 $n->setup = $n2 ? $n2 : null;
1041 $this->t->mustMatch(OP_SEMICOLON);
1042 $n->condition = $this->t->peek() == OP_SEMICOLON ? null : $this->Expression($x);
1043 $this->t->mustMatch(OP_SEMICOLON);
1044 $n->update = $this->t->peek() == OP_RIGHT_PAREN ? null : $this->Expression($x);
1047 $this->t->mustMatch(OP_RIGHT_PAREN);
1048 $n->body = $this->nest($x, $n);
1052 $n = new JSNode($this->t);
1054 $n->condition = $this->ParenExpression($x);
1055 $n->body = $this->nest($x, $n);
1059 $n = new JSNode($this->t);
1061 $n->body = $this->nest($x, $n, KEYWORD_WHILE);
1062 $n->condition = $this->ParenExpression($x);
1063 if (!$x->ecmaStrictMode) {
1064 // <script language="JavaScript"> (without version hints) may need
1065 // automatic semicolon insertion without a newline after do-while.
1066 // See http://bugzilla.mozilla.org/show_bug.cgi?id=238945.
1067 $this->t->match(OP_SEMICOLON);
1073 case KEYWORD_CONTINUE:
1074 $n = new JSNode($this->t);
1076 if ($this->t->peekOnSameLine() == TOKEN_IDENTIFIER) {
1078 $n->label = $this->t->currentToken()->value;
1081 $ss = $x->stmtStack;
1087 throw $this->t->newSyntaxError('Label not found');
1089 } while ($ss[$i]->label != $label);
1094 throw $this->t->newSyntaxError('Invalid ' . $tt);
1096 } while (!$ss[$i]->isLoop && ($tt != KEYWORD_BREAK || $ss[$i]->type != KEYWORD_SWITCH));
1099 $n->target = $ss[$i];
1103 $n = new JSNode($this->t);
1104 $n->tryBlock = $this->Block($x);
1105 $n->catchClauses = array();
1107 while ($this->t->match(KEYWORD_CATCH)) {
1108 $n2 = new JSNode($this->t);
1109 $this->t->mustMatch(OP_LEFT_PAREN);
1110 $n2->varName = $this->t->mustMatch(TOKEN_IDENTIFIER)->value;
1112 if ($this->t->match(KEYWORD_IF)) {
1113 if ($x->ecmaStrictMode) {
1114 throw $this->t->newSyntaxError('Illegal catch guard');
1117 if (count($n->catchClauses) && !end($n->catchClauses)->guard) {
1118 throw $this->t->newSyntaxError('Guarded catch after unguarded');
1121 $n2->guard = $this->Expression($x);
1127 $this->t->mustMatch(OP_RIGHT_PAREN);
1128 $n2->block = $this->Block($x);
1129 array_push($n->catchClauses, $n2);
1132 if ($this->t->match(KEYWORD_FINALLY)) {
1133 $n->finallyBlock = $this->Block($x);
1136 if (!count($n->catchClauses) && !$n->finallyBlock) {
1137 throw $this->t->newSyntaxError('Invalid try statement');
1142 case KEYWORD_FINALLY:
1143 throw $this->t->newSyntaxError($tt . ' without preceding try');
1146 $n = new JSNode($this->t);
1147 $n->value = $this->Expression($x);
1150 case KEYWORD_RETURN:
1151 if (!$x->inFunction) {
1152 throw $this->t->newSyntaxError('Invalid return');
1155 $n = new JSNode($this->t);
1156 $tt = $this->t->peekOnSameLine();
1157 if ($tt != TOKEN_END && $tt != TOKEN_NEWLINE && $tt != OP_SEMICOLON && $tt != OP_RIGHT_CURLY) {
1158 $n->value = $this->Expression($x);
1166 $n = new JSNode($this->t);
1167 $n->object = $this->ParenExpression($x);
1168 $n->body = $this->nest($x, $n);
1173 $n = $this->Variables($x);
1176 case TOKEN_CONDCOMMENT_START:
1177 case TOKEN_CONDCOMMENT_END:
1178 $n = new JSNode($this->t);
1181 case KEYWORD_DEBUGGER:
1182 $n = new JSNode($this->t);
1187 $n = new JSNode($this->t, OP_SEMICOLON);
1188 $n->expression = null;
1192 if ($tt == TOKEN_IDENTIFIER) {
1193 $this->t->scanOperand = false;
1194 $tt = $this->t->peek();
1195 $this->t->scanOperand = true;
1196 if ($tt == OP_COLON) {
1197 $label = $this->t->currentToken()->value;
1198 $ss = $x->stmtStack;
1199 for ($i = count($ss) - 1; $i >= 0; --$i) {
1200 if ($ss[$i]->label == $label) {
1201 throw $this->t->newSyntaxError('Duplicate label');
1206 $n = new JSNode($this->t, JS_LABEL);
1208 $n->statement = $this->nest($x, $n);
1214 $n = new JSNode($this->t, OP_SEMICOLON);
1216 $n->expression = $this->Expression($x);
1217 $n->end = $n->expression->end;
1221 if ($this->t->lineno == $this->t->currentToken()->lineno) {
1222 $tt = $this->t->peekOnSameLine();
1223 if ($tt != TOKEN_END && $tt != TOKEN_NEWLINE && $tt != OP_SEMICOLON && $tt != OP_RIGHT_CURLY) {
1224 throw $this->t->newSyntaxError('Missing ; before statement');
1228 $this->t->match(OP_SEMICOLON);
1233 private function FunctionDefinition($x, $requireName, $functionForm) {
1234 $f = new JSNode($this->t);
1236 if ($f->type != KEYWORD_FUNCTION) {
1237 $f->type = ($f->value == 'get') ? JS_GETTER : JS_SETTER;
1240 if ($this->t->match(TOKEN_IDENTIFIER)) {
1241 $f->name = $this->t->currentToken()->value;
1243 elseif ($requireName) {
1244 throw $this->t->newSyntaxError('Missing function identifier');
1247 $this->t->mustMatch(OP_LEFT_PAREN);
1248 $f->params = array();
1250 while (($tt = $this->t->get()) != OP_RIGHT_PAREN) {
1251 if ($tt != TOKEN_IDENTIFIER) {
1252 throw $this->t->newSyntaxError('Missing formal parameter');
1255 array_push($f->params, $this->t->currentToken()->value);
1257 if ($this->t->peek() != OP_RIGHT_PAREN) {
1258 $this->t->mustMatch(OP_COMMA);
1262 $this->t->mustMatch(OP_LEFT_CURLY);
1264 $x2 = new JSCompilerContext(true);
1265 $f->body = $this->Script($x2);
1267 $this->t->mustMatch(OP_RIGHT_CURLY);
1268 $f->end = $this->t->currentToken()->end;
1270 $f->functionForm = $functionForm;
1271 if ($functionForm == DECLARED_FORM) {
1272 array_push($x->funDecls, $f);
1278 private function Variables($x) {
1279 $n = new JSNode($this->t);
1282 $this->t->mustMatch(TOKEN_IDENTIFIER);
1284 $n2 = new JSNode($this->t);
1285 $n2->name = $n2->value;
1287 if ($this->t->match(OP_ASSIGN)) {
1288 if ($this->t->currentToken()->assignOp) {
1289 throw $this->t->newSyntaxError('Invalid variable initialization');
1292 $n2->initializer = $this->Expression($x, OP_COMMA);
1295 $n2->readOnly = $n->type == KEYWORD_CONST;
1298 array_push($x->varDecls, $n2);
1299 } while ($this->t->match(OP_COMMA));
1304 private function Expression($x, $stop = false) {
1305 $operators = array();
1306 $operands = array();
1309 $bl = $x->bracketLevel;
1310 $cl = $x->curlyLevel;
1311 $pl = $x->parenLevel;
1312 $hl = $x->hookLevel;
1314 while (($tt = $this->t->get()) != TOKEN_END) {
1316 $x->bracketLevel == $bl &&
1317 $x->curlyLevel == $cl &&
1318 $x->parenLevel == $pl &&
1319 $x->hookLevel == $hl
1321 // Stop only if tt matches the optional stop parameter, and that
1322 // token is not quoted by some kind of bracket.
1328 // NB: cannot be empty, Statement handled that.
1332 if ($this->t->scanOperand) {
1336 while ( !empty($operators) &&
1337 $this->opPrecedence[end($operators)->type] > $this->opPrecedence[$tt]
1339 $this->reduce($operators, $operands);
1342 array_push($operators, new JSNode($this->t));
1345 $this->t->scanOperand = true;
1346 $n = $this->Expression($x);
1348 if (!$this->t->match(OP_COLON)) {
1353 array_push($operands, $n);
1357 if ($x->hookLevel) {
1361 throw $this->t->newSyntaxError('Invalid label');
1365 if ($this->t->scanOperand) {
1369 // Use >, not >=, for right-associative ASSIGN
1370 while ( !empty($operators) &&
1371 $this->opPrecedence[end($operators)->type] > $this->opPrecedence[$tt]
1373 $this->reduce($operators, $operands);
1376 array_push($operators, new JSNode($this->t));
1377 end($operands)->assignOp = $this->t->currentToken()->assignOp;
1378 $this->t->scanOperand = true;
1382 // An in operator should not be parsed if we're parsing the head of
1383 // a for (...) loop, unless it is in the then part of a conditional
1384 // expression, or parenthesized somehow.
1385 if ($x->inForLoopInit && !$x->hookLevel &&
1386 !$x->bracketLevel && !$x->curlyLevel &&
1393 // A comma operator should not be parsed if we're parsing the then part
1394 // of a conditional expression unless it's parenthesized somehow.
1395 if ($tt == OP_COMMA && $x->hookLevel &&
1396 !$x->bracketLevel && !$x->curlyLevel &&
1401 // Treat comma as left-associative so reduce can fold left-heavy
1402 // COMMA trees into a single array.
1407 case OP_BITWISE_XOR:
1408 case OP_BITWISE_AND:
1417 case KEYWORD_INSTANCEOF:
1427 if ($this->t->scanOperand) {
1431 while ( !empty($operators) &&
1432 $this->opPrecedence[end($operators)->type] >= $this->opPrecedence[$tt]
1434 $this->reduce($operators, $operands);
1437 if ($tt == OP_DOT) {
1438 $this->t->mustMatch(TOKEN_IDENTIFIER);
1439 array_push($operands, new JSNode($this->t, OP_DOT, array_pop($operands), new JSNode($this->t)));
1442 array_push($operators, new JSNode($this->t));
1443 $this->t->scanOperand = true;
1447 case KEYWORD_DELETE:
1449 case KEYWORD_TYPEOF:
1451 case OP_BITWISE_NOT:
1453 case OP_UNARY_MINUS:
1455 if (!$this->t->scanOperand) {
1459 array_push($operators, new JSNode($this->t));
1464 if ($this->t->scanOperand) {
1465 array_push($operators, new JSNode($this->t)); // prefix increment or decrement
1468 // Don't cross a line boundary for postfix {in,de}crement.
1469 $t = $this->t->tokens[($this->t->tokenIndex + $this->t->lookahead - 1) & 3];
1470 if ($t && $t->lineno != $this->t->lineno) {
1474 if (!empty($operators)) {
1475 // Use >, not >=, so postfix has higher precedence than prefix.
1476 while ($this->opPrecedence[end($operators)->type] > $this->opPrecedence[$tt]) {
1477 $this->reduce($operators, $operands);
1481 $n = new JSNode($this->t, $tt, array_pop($operands));
1483 array_push($operands, $n);
1487 case KEYWORD_FUNCTION:
1488 if (!$this->t->scanOperand) {
1492 array_push($operands, $this->FunctionDefinition($x, false, EXPRESSED_FORM));
1493 $this->t->scanOperand = false;
1500 case TOKEN_IDENTIFIER:
1504 if (!$this->t->scanOperand) {
1508 array_push($operands, new JSNode($this->t));
1509 $this->t->scanOperand = false;
1512 case TOKEN_CONDCOMMENT_START:
1513 case TOKEN_CONDCOMMENT_END:
1514 if ($this->t->scanOperand) {
1515 array_push($operators, new JSNode($this->t));
1518 array_push($operands, new JSNode($this->t));
1522 case OP_LEFT_BRACKET:
1523 if ($this->t->scanOperand) {
1524 // Array initialiser. Parse using recursive descent, as the
1525 // sub-grammar here is not an operator grammar.
1526 $n = new JSNode($this->t, JS_ARRAY_INIT);
1527 while (($tt = $this->t->peek()) != OP_RIGHT_BRACKET) {
1528 if ($tt == OP_COMMA) {
1534 $n->addNode($this->Expression($x, OP_COMMA));
1535 if (!$this->t->match(OP_COMMA)) {
1540 $this->t->mustMatch(OP_RIGHT_BRACKET);
1541 array_push($operands, $n);
1542 $this->t->scanOperand = false;
1545 // Property indexing operator.
1546 array_push($operators, new JSNode($this->t, JS_INDEX));
1547 $this->t->scanOperand = true;
1552 case OP_RIGHT_BRACKET:
1553 if ($this->t->scanOperand || $x->bracketLevel == $bl) {
1557 while ($this->reduce($operators, $operands)->type != JS_INDEX) {
1565 if (!$this->t->scanOperand) {
1569 // Object initialiser. As for array initialisers (see above),
1570 // parse using recursive descent.
1572 $n = new JSNode($this->t, JS_OBJECT_INIT);
1573 while (!$this->t->match(OP_RIGHT_CURLY)) {
1575 $tt = $this->t->get();
1576 $tv = $this->t->currentToken()->value;
1577 if (($tv == 'get' || $tv == 'set') && $this->t->peek() == TOKEN_IDENTIFIER) {
1578 if ($x->ecmaStrictMode) {
1579 throw $this->t->newSyntaxError('Illegal property accessor');
1582 $n->addNode($this->FunctionDefinition($x, true, EXPRESSED_FORM));
1586 case TOKEN_IDENTIFIER:
1589 $id = new JSNode($this->t);
1592 case OP_RIGHT_CURLY:
1593 if ($x->ecmaStrictMode) {
1594 throw $this->t->newSyntaxError('Illegal trailing ,');
1599 throw $this->t->newSyntaxError('Invalid property name');
1602 $this->t->mustMatch(OP_COLON);
1603 $n->addNode(new JSNode($this->t, JS_PROPERTY_INIT, $id, $this->Expression($x, OP_COMMA)));
1605 } while ($this->t->match(OP_COMMA));
1607 $this->t->mustMatch(OP_RIGHT_CURLY);
1611 array_push($operands, $n);
1612 $this->t->scanOperand = false;
1616 case OP_RIGHT_CURLY:
1617 if (!$this->t->scanOperand && $x->curlyLevel != $cl) {
1618 throw new Exception('PANIC: right curly botch');
1623 if ($this->t->scanOperand) {
1624 array_push($operators, new JSNode($this->t, JS_GROUP));
1627 while ( !empty($operators) &&
1628 $this->opPrecedence[end($operators)->type] > $this->opPrecedence[KEYWORD_NEW]
1630 $this->reduce($operators, $operands);
1633 // Handle () now, to regularize the n-ary case for n > 0.
1634 // We must set scanOperand in case there are arguments and
1635 // the first one is a regexp or unary+/-.
1636 $n = end($operators);
1637 $this->t->scanOperand = true;
1638 if ($this->t->match(OP_RIGHT_PAREN)) {
1639 if ($n && $n->type == KEYWORD_NEW) {
1640 array_pop($operators);
1641 $n->addNode(array_pop($operands));
1644 $n = new JSNode($this->t, JS_CALL, array_pop($operands), new JSNode($this->t, JS_LIST));
1647 array_push($operands, $n);
1648 $this->t->scanOperand = false;
1652 if ($n && $n->type == KEYWORD_NEW) {
1653 $n->type = JS_NEW_WITH_ARGS;
1656 array_push($operators, new JSNode($this->t, JS_CALL));
1663 case OP_RIGHT_PAREN:
1664 if ($this->t->scanOperand || $x->parenLevel == $pl) {
1668 while (($tt = $this->reduce($operators, $operands)->type) != JS_GROUP &&
1669 $tt != JS_CALL && $tt != JS_NEW_WITH_ARGS
1674 if ($tt != JS_GROUP) {
1675 $n = end($operands);
1676 if ($n->treeNodes[1]->type != OP_COMMA) {
1677 $n->treeNodes[1] = new JSNode($this->t, JS_LIST, $n->treeNodes[1]);
1680 $n->treeNodes[1]->type = JS_LIST;
1687 // Automatic semicolon insertion means we may scan across a newline
1688 // and into the beginning of another statement. If so, break out of
1689 // the while loop and let the t.scanOperand logic handle errors.
1695 if ($x->hookLevel != $hl) {
1696 throw $this->t->newSyntaxError('Missing : in conditional expression');
1699 if ($x->parenLevel != $pl) {
1700 throw $this->t->newSyntaxError('Missing ) in parenthetical');
1703 if ($x->bracketLevel != $bl) {
1704 throw $this->t->newSyntaxError('Missing ] in index expression');
1707 if ($this->t->scanOperand) {
1708 throw $this->t->newSyntaxError('Missing operand');
1711 // Resume default mode, scanning for operands, not operators.
1712 $this->t->scanOperand = true;
1715 while (count($operators)) {
1716 $this->reduce($operators, $operands);
1719 return array_pop($operands);
1722 private function ParenExpression($x) {
1723 $this->t->mustMatch(OP_LEFT_PAREN);
1724 $n = $this->Expression($x);
1725 $this->t->mustMatch(OP_RIGHT_PAREN);
1730 // Statement stack and nested statement handler.
1731 private function nest($x, $node, $end = false) {
1732 array_push($x->stmtStack, $node);
1733 $n = $this->statement($x);
1734 array_pop($x->stmtStack);
1737 $this->t->mustMatch($end);
1743 private function reduce(&$operators, &$operands) {
1744 $n = array_pop($operators);
1746 $arity = $this->opArity[$op];
1747 $c = count($operands);
1749 // Flatten left-associative trees
1751 $left = $operands[$c - 2];
1752 if ($left->type == $op) {
1753 $right = array_pop($operands);
1754 $left->addNode($right);
1761 // Always use push to add operands to n, to update start and end
1762 $a = array_splice($operands, $c - $arity);
1763 for ($i = 0; $i < $arity; $i++) {
1764 $n->addNode($a[$i]);
1767 // Include closing bracket or postfix operator in [start,end]
1768 $te = $this->t->currentToken()->end;
1769 if ($n->end < $te) {
1773 array_push($operands, $n);
1779 class JSCompilerContext {
1780 public $inFunction = false;
1781 public $inForLoopInit = false;
1782 public $ecmaStrictMode = false;
1783 public $bracketLevel = 0;
1784 public $curlyLevel = 0;
1785 public $parenLevel = 0;
1786 public $hookLevel = 0;
1788 public $stmtStack = array();
1789 public $funDecls = array();
1790 public $varDecls = array();
1792 public function __construct($inFunction) {
1793 $this->inFunction = $inFunction;
1804 public $treeNodes = array();
1805 public $funDecls = array();
1806 public $varDecls = array();
1808 public function __construct($t, $type = 0) {
1809 if ($token = $t->currentToken()) {
1810 $this->type = $type ? $type : $token->type;
1811 $this->value = $token->value;
1812 $this->lineno = $token->lineno;
1813 $this->start = $token->start;
1814 $this->end = $token->end;
1817 $this->type = $type;
1818 $this->lineno = $t->lineno;
1821 if (($numargs = func_num_args()) > 2) {
1822 $args = func_get_args();
1823 for ($i = 2; $i < $numargs; $i++) {
1824 $this->addNode($args[$i]);
1829 // we don't want to bloat our object with all kind of specific properties, so we use overloading
1830 public function __set($name, $value) {
1831 $this->$name = $value;
1834 public function __get($name) {
1835 if (isset($this->$name)) {
1836 return $this->$name;
1842 public function addNode($node) {
1843 if ($node !== null) {
1844 if ($node->start < $this->start) {
1845 $this->start = $node->start;
1847 if ($this->end < $node->end) {
1848 $this->end = $node->end;
1852 $this->treeNodes[] = $node;
1857 private $cursor = 0;
1860 public $tokens = array();
1861 public $tokenIndex = 0;
1862 public $lookahead = 0;
1863 public $scanNewlines = false;
1864 public $scanOperand = true;
1869 private $keywords = array(
1903 private $opTypeNames = array(
1944 private $assignOps = array('|', '^', '&', '<<', '>>', '>>>', '+', '-', '*', '/', '%');
1947 public function __construct() {
1948 $this->opRegExp = '#^(' . implode('|', array_map('preg_quote', $this->opTypeNames)) . ')#';
1951 public function init($source, $filename = '', $lineno = 1) {
1952 $this->source = $source;
1953 $this->filename = $filename ? $filename : '[inline]';
1954 $this->lineno = $lineno;
1957 $this->tokens = array();
1958 $this->tokenIndex = 0;
1959 $this->lookahead = 0;
1960 $this->scanNewlines = false;
1961 $this->scanOperand = true;
1964 public function getInput($chunksize) {
1966 return substr($this->source, $this->cursor, $chunksize);
1969 return substr($this->source, $this->cursor);
1972 public function isDone() {
1973 return $this->peek() == TOKEN_END;
1976 public function match($tt) {
1977 return $this->get() == $tt || $this->unget();
1980 public function mustMatch($tt) {
1981 if (!$this->match($tt)) {
1982 throw $this->newSyntaxError('Unexpected token; token ' . $tt . ' expected');
1985 return $this->currentToken();
1988 public function peek() {
1989 if ($this->lookahead) {
1990 $next = $this->tokens[($this->tokenIndex + $this->lookahead) & 3];
1991 if ($this->scanNewlines && $next->lineno != $this->lineno) {
1992 $tt = TOKEN_NEWLINE;
2006 public function peekOnSameLine() {
2007 $this->scanNewlines = true;
2008 $tt = $this->peek();
2009 $this->scanNewlines = false;
2014 public function currentToken() {
2015 if (!empty($this->tokens)) {
2016 return $this->tokens[$this->tokenIndex];
2020 public function get($chunksize = 1000) {
2021 while ($this->lookahead) {
2023 $this->tokenIndex = ($this->tokenIndex + 1) & 3;
2024 $token = $this->tokens[$this->tokenIndex];
2025 if ($token->type != TOKEN_NEWLINE || $this->scanNewlines) {
2026 return $token->type;
2030 $conditional_comment = false;
2032 // strip whitespace and comments
2034 $input = $this->getInput($chunksize);
2036 // whitespace handling; gobble up \r as well (effectively we don't have support for MAC newlines!)
2037 $re = $this->scanNewlines ? '/^[ \r\t]+/' : '/^\s+/';
2038 if (preg_match($re, $input, $match)) {
2039 $spaces = $match[0];
2040 $spacelen = strlen($spaces);
2041 $this->cursor += $spacelen;
2042 if (!$this->scanNewlines) {
2043 $this->lineno += substr_count($spaces, "\n");
2046 if ($spacelen == $chunksize) {
2047 continue; // complete chunk contained whitespace
2050 $input = $this->getInput($chunksize);
2051 if ($input == '' || $input[0] != '/') {
2057 if (!preg_match('/^\/(?:\*(@(?:cc_on|if|elif|else|end))?.*?\*\/|\/[^\n]*)/s', $input, $match)) {
2062 // retry with a full chunk fetch; this also prevents breakage of long regular expressions (which will never match a comment)
2067 // check if this is a conditional (JScript) comment
2068 if (!empty($match[1])) {
2069 $match[0] = '/*' . $match[1];
2070 $conditional_comment = true;
2074 $this->cursor += strlen($match[0]);
2075 $this->lineno += substr_count($match[0], "\n");
2083 elseif ($conditional_comment) {
2084 $tt = TOKEN_CONDCOMMENT_START;
2087 switch ($input[0]) {
2090 if (($input[1] == 'x' || $input[1] == 'X') && preg_match('/^0x[0-9a-f]+/i', $input, $match)) {
2105 // should always match
2106 preg_match('/^\d+(?:\.\d*)?(?:[eE][-+]?\d+)?/', $input, $match);
2111 if (preg_match('/^\'(?:[^\\\\\'\r\n]++|\\\\(?:.|\r?\n))*\'/', $input, $match)) {
2116 return $this->get(null); // retry with a full chunk fetch
2119 throw $this->newSyntaxError('Unterminated string literal');
2124 if (preg_match('/^"(?:[^\\\\"\r\n]++|\\\\(?:.|\r?\n))*"/', $input, $match)) {
2129 return $this->get(null); // retry with a full chunk fetch
2132 throw $this->newSyntaxError('Unterminated string literal');
2137 if ($this->scanOperand && preg_match('/^\/((?:\\\\.|\[(?:\\\\.|[^\]])*\]|[^\/])+)\/([gimy]*)/', $input, $match)) {
2154 // should always match
2155 preg_match($this->opRegExp, $input, $match);
2157 if (in_array($op, $this->assignOps) && $input[strlen($op)] == '=') {
2163 if ($this->scanOperand) {
2164 if ($op == OP_PLUS) {
2165 $tt = OP_UNARY_PLUS;
2167 elseif ($op == OP_MINUS) {
2168 $tt = OP_UNARY_MINUS;
2176 if (preg_match('/^\.\d+(?:[eE][-+]?\d+)?/', $input, $match)) {
2193 // these are all single
2194 $match = array($input[0]);
2199 // check end of conditional comment
2200 if (substr($input, 0, 3) == '@*/') {
2201 $match = array('@*/');
2202 $tt = TOKEN_CONDCOMMENT_END;
2205 throw $this->newSyntaxError('Illegal token');
2210 if ($this->scanNewlines) {
2211 $match = array("\n");
2212 $tt = TOKEN_NEWLINE;
2215 throw $this->newSyntaxError('Illegal token');
2220 // Fast path for identifiers: word chars followed by whitespace or various other tokens.
2221 // Note we don't need to exclude digits in the first char, as they've already been found
2223 if (!preg_match('/^[$\w]+(?=[\s\/\|\^\&<>\+\-\*%=!.;,\?:~\[\]\{\}\(\)@])/', $input, $match)) {
2224 // Character classes per ECMA-262 edition 5.1 section 7.6
2225 // Per spec, must accept Unicode 3.0, *may* accept later versions.
2226 // We'll take whatever PCRE understands, which should be more recent.
2227 $identifierStartChars = "\\p{L}\\p{Nl}" . # UnicodeLetter
2230 $identifierPartChars = $identifierStartChars .
2231 "\\p{Mn}\\p{Mc}" . # UnicodeCombiningMark
2232 "\\p{Nd}" . # UnicodeDigit
2233 "\\p{Pc}"; # UnicodeConnectorPunctuation
2234 $unicodeEscape = "\\\\u[0-9A-F-a-f]{4}";
2235 $identifierRegex = "/^" .
2236 "(?:[$identifierStartChars]|$unicodeEscape)" .
2237 "(?:[$identifierPartChars]|$unicodeEscape)*" .
2239 if (preg_match($identifierRegex, $input, $match)) {
2240 if (strpos($match[0], '\\') !== false) {
2241 // Per ECMA-262 edition 5.1, section 7.6 escape sequences should behave as if they were
2242 // the original chars, but only within the boundaries of the identifier.
2243 $decoded = preg_replace_callback('/\\\\u([0-9A-Fa-f]{4})/',
2244 array(__CLASS__, 'unicodeEscapeCallback'),
2247 // Since our original regex didn't de-escape the originals, we need to check for validity again.
2248 // No need to worry about token boundaries, as anything outside the identifier is illegal!
2249 if (!preg_match("/^[$identifierStartChars][$identifierPartChars]*$/u", $decoded)) {
2250 throw $this->newSyntaxError('Illegal token');
2253 // Per spec it _ought_ to work to use these escapes for keywords words as well...
2254 // but IE rejects them as invalid, while Firefox and Chrome treat them as identifiers
2255 // that don't match the keyword.
2256 if (in_array($decoded, $this->keywords)) {
2257 throw $this->newSyntaxError('Illegal token');
2260 // TODO: save the decoded form for output?
2264 throw $this->newSyntaxError('Illegal token');
2267 $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;
2271 $this->tokenIndex = ($this->tokenIndex + 1) & 3;
2273 if (!isset($this->tokens[$this->tokenIndex])) {
2274 $this->tokens[$this->tokenIndex] = new JSToken();
2277 $token = $this->tokens[$this->tokenIndex];
2280 if ($tt == OP_ASSIGN) {
2281 $token->assignOp = $op;
2284 $token->start = $this->cursor;
2286 $token->value = $match[0];
2287 $this->cursor += strlen($match[0]);
2289 $token->end = $this->cursor;
2290 $token->lineno = $this->lineno;
2295 public function unget() {
2296 if (++$this->lookahead == 4) {
2297 throw $this->newSyntaxError('PANIC: too much lookahead!');
2300 $this->tokenIndex = ($this->tokenIndex - 1) & 3;
2303 public function newSyntaxError($m) {
2304 return new Exception('Parse error: ' . $m . ' in file \'' . $this->filename . '\' on line ' . $this->lineno);
2307 public static function unicodeEscapeCallback($m) {
2308 return html_entity_decode('&#x' . $m[1]. ';', ENT_QUOTES, 'UTF-8');