2 * Author: Alex Kocharin <alex@kocharin.ru>
3 * GIT: https://github.com/rlidwka/jju
4 * License: WTFPL, grab your copy here: http://www.wtfpl.net/txt/copying/
7 // RTFM: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
9 var Uni = require('./unicode')
11 function isHexDigit(x) {
12 return (x >= '0' && x <= '9')
13 || (x >= 'A' && x <= 'F')
14 || (x >= 'a' && x <= 'f')
17 function isOctDigit(x) {
18 return x >= '0' && x <= '7'
21 function isDecDigit(x) {
22 return x >= '0' && x <= '9'
38 function formatError(input, msg, position, lineno, column, json5) {
39 var result = msg + ' at ' + (lineno + 1) + ':' + (column + 1)
40 , tmppos = position - column - 1
44 var isLineTerminator = json5 ? Uni.isLineTerminator : Uni.isLineTerminatorJSON
46 // output no more than 70 characters before the wrong ones
47 if (tmppos < position - 70) {
48 tmppos = position - 70
52 var chr = input[++tmppos]
54 if (isLineTerminator(chr) || tmppos === input.length) {
55 if (position >= tmppos) {
56 // ending line error, so show it after the last char
63 if (position === tmppos) {
65 } else if (position > tmppos) {
66 underline += input[tmppos] === '\t' ? '\t' : ' '
69 // output no more than 78 characters on the string
70 if (srcline.length > 78) break
73 return result + '\n' + srcline + '\n' + underline
76 function parse(input, options) {
77 // parse as a standard JSON mode
78 var json5 = !(options.mode === 'json' || options.legacy)
79 var isLineTerminator = json5 ? Uni.isLineTerminator : Uni.isLineTerminatorJSON
80 var isWhiteSpace = json5 ? Uni.isWhiteSpace : Uni.isWhiteSpaceJSON
82 var length = input.length
88 var tokenStart = function() {}
89 var tokenEnd = function(v) {return v}
93 type: 'whitespace'|'comment'|'key'|'literal'|'separator'|'newline',
94 value: 'number'|'string'|'whatever',
98 if (options._tokenize) {
101 tokenStart = function() {
102 if (start !== null) throw Error('internal error, token overlap')
106 tokenEnd = function(v, type) {
107 if (start != position) {
109 raw: input.substr(start, position-start),
111 stack: stack.slice(0),
113 if (v !== undefined) hash.value = v
114 options._tokenize.call(null, hash)
123 var column = position - linestart
126 if (position < length) {
129 .stringify(input[position])
130 .replace(/^"|"$/g, '')
131 .replace(/'/g, "\\'")
132 .replace(/\\"/g, '"')
135 if (!msg) msg = 'Unexpected token ' + token
137 if (!msg) msg = 'Unexpected end of input'
141 var error = SyntaxError(formatError(input, msg, position, lineno, column, json5))
142 error.row = lineno + 1
143 error.column = column + 1
147 function newline(chr) {
148 // account for <cr><lf>
149 if (chr === '\r' && input[position] === '\n') position++
154 function parseGeneric() {
157 while (position < length) {
159 var chr = input[position++]
161 if (chr === '"' || (chr === '\'' && json5)) {
162 return tokenEnd(parseString(chr), 'literal')
164 } else if (chr === '{') {
165 tokenEnd(undefined, 'separator')
168 } else if (chr === '[') {
169 tokenEnd(undefined, 'separator')
172 } else if (chr === '-'
175 // + number Infinity NaN
176 || (json5 && (chr === '+' || chr === 'I' || chr === 'N'))
178 return tokenEnd(parseNumber(), 'literal')
180 } else if (chr === 'n') {
182 return tokenEnd(null, 'literal')
184 } else if (chr === 't') {
186 return tokenEnd(true, 'literal')
188 } else if (chr === 'f') {
189 parseKeyword('false')
190 return tokenEnd(false, 'literal')
194 return tokenEnd(undefined)
199 function parseKey() {
202 while (position < length) {
204 var chr = input[position++]
206 if (chr === '"' || (chr === '\'' && json5)) {
207 return tokenEnd(parseString(chr), 'key')
209 } else if (chr === '{') {
210 tokenEnd(undefined, 'separator')
213 } else if (chr === '[') {
214 tokenEnd(undefined, 'separator')
217 } else if (chr === '.'
220 return tokenEnd(parseNumber(true), 'key')
223 && Uni.isIdentifierStart(chr) || (chr === '\\' && input[position] === 'u')) {
224 // unicode char or a unicode sequence
225 var rollback = position - 1
226 var result = parseIdentifier()
228 if (result === undefined) {
230 return tokenEnd(undefined)
232 return tokenEnd(result, 'key')
237 return tokenEnd(undefined)
242 function skipWhiteSpace() {
244 while (position < length) {
245 var chr = input[position++]
247 if (isLineTerminator(chr)) {
249 tokenEnd(undefined, 'whitespace')
253 tokenEnd(undefined, 'newline')
256 } else if (isWhiteSpace(chr)) {
259 } else if (chr === '/'
261 && (input[position] === '/' || input[position] === '*')
264 tokenEnd(undefined, 'whitespace')
267 skipComment(input[position++] === '*')
268 tokenEnd(undefined, 'comment')
276 return tokenEnd(undefined, 'whitespace')
279 function skipComment(multi) {
280 while (position < length) {
281 var chr = input[position++]
283 if (isLineTerminator(chr)) {
284 // LineTerminator is an end of singleline comment
286 // let parent function deal with newline
293 } else if (chr === '*' && multi) {
294 // end of multiline comment
295 if (input[position] === '/') {
306 fail('Unclosed multiline comment')
310 function parseKeyword(keyword) {
311 // keyword[0] is not checked because it should've checked earlier
313 var len = keyword.length
314 for (var i=1; i<len; i++) {
315 if (position >= length || keyword[i] != input[position]) {
323 function parseObject() {
324 var result = options.null_prototype ? Object.create(null) : {}
326 , is_non_empty = false
328 while (position < length) {
330 var item1 = parseKey()
333 var chr = input[position++]
334 tokenEnd(undefined, 'separator')
336 if (chr === '}' && item1 === undefined) {
337 if (!json5 && is_non_empty) {
339 fail('Trailing comma in object')
343 } else if (chr === ':' && item1 !== undefined) {
346 var item2 = parseGeneric()
349 if (item2 === undefined) fail('No value found for key ' + item1)
350 if (typeof(item1) !== 'string') {
351 if (!json5 || typeof(item1) !== 'number') {
352 fail('Wrong key type: ' + item1)
356 if ((item1 in empty_object || empty_object[item1] != null) && options.reserved_keys !== 'replace') {
357 if (options.reserved_keys === 'throw') {
358 fail('Reserved key: ' + item1)
360 // silently ignore it
363 if (typeof(options.reviver) === 'function') {
364 item2 = options.reviver.call(null, item1, item2)
367 if (item2 !== undefined) {
369 Object.defineProperty(result, item1, {
381 var chr = input[position++]
382 tokenEnd(undefined, 'separator')
387 } else if (chr === '}') {
403 function parseArray() {
406 while (position < length) {
408 stack.push(result.length)
409 var item = parseGeneric()
413 var chr = input[position++]
414 tokenEnd(undefined, 'separator')
416 if (item !== undefined) {
417 if (typeof(options.reviver) === 'function') {
418 item = options.reviver.call(null, String(result.length), item)
420 if (item === undefined) {
422 item = true // hack for check below, not included into result
429 if (item === undefined) {
430 fail('Elisions are not supported')
433 } else if (chr === ']') {
434 if (!json5 && item === undefined && result.length) {
436 fail('Trailing comma in array')
447 function parseNumber() {
448 // rewind because we don't know first char
452 , chr = input[position++]
455 var to_num = function(is_octal) {
456 var str = input.substr(start, position - start)
459 var result = parseInt(str.replace(/^0o?/, ''), 8)
461 var result = Number(str)
464 if (Number.isNaN(result)) {
466 fail('Bad numeric literal - "' + input.substr(start, position - start + 1) + '"')
467 } else if (!json5 && !str.match(/^-?(0|[1-9][0-9]*)(\.[0-9]+)?(e[+-]?[0-9]+)?$/i)) {
468 // additional restrictions imposed by json
470 fail('Non-json numeric literal - "' + input.substr(start, position - start + 1) + '"')
476 // ex: -5982475.249875e+29384
478 if (chr === '-' || (chr === '+' && json5)) chr = input[position++]
480 if (chr === 'N' && json5) {
485 if (chr === 'I' && json5) {
486 parseKeyword('Infinity')
488 // returning +inf or -inf
492 if (chr >= '1' && chr <= '9') {
493 // ex: -5982475.249875e+29384
494 // ^^^ skipping these
495 while (position < length && isDecDigit(input[position])) position++
496 chr = input[position++]
499 // special case for leading zero: 0.123456
501 chr = input[position++]
503 // new syntax, "0o777" old syntax, "0777"
504 var is_octal = chr === 'o' || chr === 'O' || isOctDigit(chr)
505 var is_hex = chr === 'x' || chr === 'X'
507 if (json5 && (is_octal || is_hex)) {
508 while (position < length
509 && (is_hex ? isHexDigit : isOctDigit)( input[position] )
513 if (input[start] === '-') {
516 } else if (input[start] === '+') {
520 return sign * to_num(is_octal)
525 // ex: -5982475.249875e+29384
526 // ^^^ skipping these
527 while (position < length && isDecDigit(input[position])) position++
528 chr = input[position++]
531 if (chr === 'e' || chr === 'E') {
532 chr = input[position++]
533 if (chr === '-' || chr === '+') position++
534 // ex: -5982475.249875e+29384
535 // ^^^ skipping these
536 while (position < length && isDecDigit(input[position])) position++
537 chr = input[position++]
540 // we have char in the buffer, so count for it
545 function parseIdentifier() {
546 // rewind because we don't know first char
551 while (position < length) {
552 var chr = input[position++]
555 && input[position] === 'u'
556 && isHexDigit(input[position+1])
557 && isHexDigit(input[position+2])
558 && isHexDigit(input[position+3])
559 && isHexDigit(input[position+4])
561 // UnicodeEscapeSequence
562 chr = String.fromCharCode(parseInt(input.substr(position+1, 4), 16))
567 // identifier started
568 if (Uni.isIdentifierPart(chr)) {
576 if (Uni.isIdentifierStart(chr)) {
587 function parseString(endChar) {
588 // 7.8.4 of ES262 spec
591 while (position < length) {
592 var chr = input[position++]
594 if (chr === endChar) {
597 } else if (chr === '\\') {
598 if (position >= length) fail()
599 chr = input[position++]
601 if (unescapeMap[chr] && (json5 || (chr != 'v' && chr != "'"))) {
602 result += unescapeMap[chr]
604 } else if (json5 && isLineTerminator(chr)) {
608 } else if (chr === 'u' || (chr === 'x' && json5)) {
609 // unicode/character escape sequence
610 var off = chr === 'u' ? 4 : 2
612 // validation for \uXXXX
613 for (var i=0; i<off; i++) {
614 if (position >= length) fail()
615 if (!isHexDigit(input[position])) fail('Bad escape sequence')
619 result += String.fromCharCode(parseInt(input.substr(position-off, off), 16))
620 } else if (json5 && isOctDigit(chr)) {
621 if (chr < '4' && isOctDigit(input[position]) && isOctDigit(input[position+1])) {
624 } else if (isOctDigit(input[position])) {
630 position += digits - 1
631 result += String.fromCharCode(parseInt(input.substr(position-digits, digits), 8))
632 /*if (!isOctDigit(input[position])) {
633 // \0 is allowed still
636 fail('Octal literals are not supported')
648 } else if (isLineTerminator(chr)) {
652 if (!json5 && chr.charCodeAt(0) < 32) {
654 fail('Unexpected control character')
657 // SourceCharacter but not one of " or \ or LineTerminator
666 var return_value = parseGeneric()
667 if (return_value !== undefined || position < length) {
670 if (position >= length) {
671 if (typeof(options.reviver) === 'function') {
672 return_value = options.reviver.call(null, '', return_value)
681 fail('No data, only a whitespace')
683 fail('No data, empty input')
689 * parse(text, options)
691 * parse(text, reviver)
698 module.exports.parse = function parseJSON(input, options) {
699 // support legacy functions
700 if (typeof(options) === 'function') {
706 if (input === undefined) {
707 // parse(stringify(x)) should be equal x
708 // with JSON functions it is not 'cause of undefined
709 // so we're fixing it
714 if (typeof(input) !== 'string') input = String(input)
715 if (options == null) options = {}
716 if (options.reserved_keys == null) options.reserved_keys = 'ignore'
718 if (options.reserved_keys === 'throw' || options.reserved_keys === 'ignore') {
719 if (options.null_prototype == null) {
720 options.null_prototype = true
725 return parse(input, options)
727 // jju is a recursive parser, so JSON.parse("{{{{{{{") could blow up the stack
729 // this catch is used to skip all those internal calls
730 if (err instanceof SyntaxError && err.row != null && err.column != null) {
732 err = SyntaxError(old_err.message)
733 err.column = old_err.column
734 err.row = old_err.row
740 module.exports.tokenize = function tokenizeJSON(input, options) {
741 if (options == null) options = {}
743 options._tokenize = function(smth) {
744 if (options._addstack) smth.stack.unshift.apply(smth.stack, options._addstack)
749 tokens.data = module.exports.parse(input, options)