'use strict'; var CssSyntaxError = require('./error'); var constants = require('./const'); var TYPE = constants.TYPE; var NAME = constants.NAME; var SYMBOL_TYPE = constants.SYMBOL_TYPE; var utils = require('./utils'); var firstCharOffset = utils.firstCharOffset; var cmpStr = utils.cmpStr; var isNumber = utils.isNumber; var findWhiteSpaceStart = utils.findWhiteSpaceStart; var findWhiteSpaceEnd = utils.findWhiteSpaceEnd; var findCommentEnd = utils.findCommentEnd; var findStringEnd = utils.findStringEnd; var findNumberEnd = utils.findNumberEnd; var findIdentifierEnd = utils.findIdentifierEnd; var findUrlRawEnd = utils.findUrlRawEnd; var NULL = 0; var WHITESPACE = TYPE.WhiteSpace; var IDENTIFIER = TYPE.Identifier; var NUMBER = TYPE.Number; var STRING = TYPE.String; var COMMENT = TYPE.Comment; var PUNCTUATOR = TYPE.Punctuator; var CDO = TYPE.CDO; var CDC = TYPE.CDC; var ATRULE = TYPE.Atrule; var FUNCTION = TYPE.Function; var URL = TYPE.Url; var RAW = TYPE.Raw; var N = 10; var F = 12; var R = 13; var STAR = TYPE.Asterisk; var SLASH = TYPE.Solidus; var FULLSTOP = TYPE.FullStop; var PLUSSIGN = TYPE.PlusSign; var HYPHENMINUS = TYPE.HyphenMinus; var GREATERTHANSIGN = TYPE.GreaterThanSign; var LESSTHANSIGN = TYPE.LessThanSign; var EXCLAMATIONMARK = TYPE.ExclamationMark; var COMMERCIALAT = TYPE.CommercialAt; var QUOTATIONMARK = TYPE.QuotationMark; var APOSTROPHE = TYPE.Apostrophe; var LEFTPARENTHESIS = TYPE.LeftParenthesis; var RIGHTPARENTHESIS = TYPE.RightParenthesis; var LEFTCURLYBRACKET = TYPE.LeftCurlyBracket; var RIGHTCURLYBRACKET = TYPE.RightCurlyBracket; var LEFTSQUAREBRACKET = TYPE.LeftSquareBracket; var RIGHTSQUAREBRACKET = TYPE.RightSquareBracket; var MIN_BUFFER_SIZE = 16 * 1024; var OFFSET_MASK = 0x00FFFFFF; var TYPE_SHIFT = 24; var SafeUint32Array = typeof Uint32Array !== 'undefined' ? Uint32Array : Array; // fallback on Array when TypedArray is not supported function computeLinesAndColumns(tokenizer, source) { var sourceLength = source.length; var start = firstCharOffset(source); var lines = tokenizer.lines; var line = tokenizer.startLine; var columns = tokenizer.columns; var column = tokenizer.startColumn; if (lines === null || lines.length < sourceLength + 1) { lines = new SafeUint32Array(Math.max(sourceLength + 1024, MIN_BUFFER_SIZE)); columns = new SafeUint32Array(lines.length); } for (var i = start; i < sourceLength; i++) { var code = source.charCodeAt(i); lines[i] = line; columns[i] = column++; if (code === N || code === R || code === F) { if (code === R && i + 1 < sourceLength && source.charCodeAt(i + 1) === N) { i++; lines[i] = line; columns[i] = column; } line++; column = 1; } } lines[i] = line; columns[i] = column; tokenizer.linesAnsColumnsComputed = true; tokenizer.lines = lines; tokenizer.columns = columns; } function tokenLayout(tokenizer, source, startPos) { var sourceLength = source.length; var offsetAndType = tokenizer.offsetAndType; var balance = tokenizer.balance; var tokenCount = 0; var prevType = 0; var offset = startPos; var anchor = 0; var balanceCloseCode = 0; var balanceStart = 0; var balancePrev = 0; if (offsetAndType === null || offsetAndType.length < sourceLength + 1) { offsetAndType = new SafeUint32Array(sourceLength + 1024); balance = new SafeUint32Array(sourceLength + 1024); } while (offset < sourceLength) { var code = source.charCodeAt(offset); var type = code < 0x80 ? SYMBOL_TYPE[code] : IDENTIFIER; balance[tokenCount] = sourceLength; switch (type) { case WHITESPACE: offset = findWhiteSpaceEnd(source, offset + 1); break; case PUNCTUATOR: switch (code) { case balanceCloseCode: balancePrev = balanceStart & OFFSET_MASK; balanceStart = balance[balancePrev]; balanceCloseCode = balanceStart >> TYPE_SHIFT; balance[tokenCount] = balancePrev; balance[balancePrev++] = tokenCount; for (; balancePrev < tokenCount; balancePrev++) { if (balance[balancePrev] === sourceLength) { balance[balancePrev] = tokenCount; } } break; case LEFTSQUAREBRACKET: balance[tokenCount] = balanceStart; balanceCloseCode = RIGHTSQUAREBRACKET; balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount; break; case LEFTCURLYBRACKET: balance[tokenCount] = balanceStart; balanceCloseCode = RIGHTCURLYBRACKET; balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount; break; case LEFTPARENTHESIS: balance[tokenCount] = balanceStart; balanceCloseCode = RIGHTPARENTHESIS; balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount; break; } // /* if (code === STAR && prevType === SLASH) { type = COMMENT; offset = findCommentEnd(source, offset + 1); tokenCount--; // rewrite prev token break; } // edge case for -.123 and +.123 if (code === FULLSTOP && (prevType === PLUSSIGN || prevType === HYPHENMINUS)) { if (offset + 1 < sourceLength && isNumber(source.charCodeAt(offset + 1))) { type = NUMBER; offset = findNumberEnd(source, offset + 2, false); tokenCount--; // rewrite prev token break; } } // if (code === HYPHENMINUS && prevType === HYPHENMINUS) { if (offset + 1 < sourceLength && source.charCodeAt(offset + 1) === GREATERTHANSIGN) { type = CDC; offset = offset + 2; tokenCount--; // rewrite prev token break; } } // ident( if (code === LEFTPARENTHESIS && prevType === IDENTIFIER) { offset = offset + 1; tokenCount--; // rewrite prev token balance[tokenCount] = balance[tokenCount + 1]; balanceStart--; // 4 char length identifier and equal to `url(` (case insensitive) if (offset - anchor === 4 && cmpStr(source, anchor, offset, 'url(')) { // special case for url() because it can contain any symbols sequence with few exceptions anchor = findWhiteSpaceEnd(source, offset); code = source.charCodeAt(anchor); if (code !== LEFTPARENTHESIS && code !== RIGHTPARENTHESIS && code !== QUOTATIONMARK && code !== APOSTROPHE) { // url( offsetAndType[tokenCount++] = (URL << TYPE_SHIFT) | offset; balance[tokenCount] = sourceLength; // ws* if (anchor !== offset) { offsetAndType[tokenCount++] = (WHITESPACE << TYPE_SHIFT) | anchor; balance[tokenCount] = sourceLength; } // raw type = RAW; offset = findUrlRawEnd(source, anchor); } else { type = URL; } } else { type = FUNCTION; } break; } type = code; offset = offset + 1; break; case NUMBER: offset = findNumberEnd(source, offset + 1, prevType !== FULLSTOP); // merge number with a preceding dot, dash or plus if (prevType === FULLSTOP || prevType === HYPHENMINUS || prevType === PLUSSIGN) { tokenCount--; // rewrite prev token } break; case STRING: offset = findStringEnd(source, offset + 1, code); break; default: anchor = offset; offset = findIdentifierEnd(source, offset); // merge identifier with a preceding dash if (prevType === HYPHENMINUS) { // rewrite prev token tokenCount--; // restore prev prev token type // for case @-prefix-ident prevType = tokenCount === 0 ? 0 : offsetAndType[tokenCount - 1] >> TYPE_SHIFT; } if (prevType === COMMERCIALAT) { // rewrite prev token and change type to tokenCount--; type = ATRULE; } } offsetAndType[tokenCount++] = (type << TYPE_SHIFT) | offset; prevType = type; } // finalize arrays offsetAndType[tokenCount] = offset; balance[tokenCount] = sourceLength; balance[sourceLength] = sourceLength; // prevents false positive balance match with any token while (balanceStart !== 0) { balancePrev = balanceStart & OFFSET_MASK; balanceStart = balance[balancePrev]; balance[balancePrev] = sourceLength; } tokenizer.offsetAndType = offsetAndType; tokenizer.tokenCount = tokenCount; tokenizer.balance = balance; } // // tokenizer // var Tokenizer = function(source, startOffset, startLine, startColumn) { this.offsetAndType = null; this.balance = null; this.lines = null; this.columns = null; this.setSource(source, startOffset, startLine, startColumn); }; Tokenizer.prototype = { setSource: function(source, startOffset, startLine, startColumn) { var safeSource = String(source || ''); var start = firstCharOffset(safeSource); this.source = safeSource; this.firstCharOffset = start; this.startOffset = typeof startOffset === 'undefined' ? 0 : startOffset; this.startLine = typeof startLine === 'undefined' ? 1 : startLine; this.startColumn = typeof startColumn === 'undefined' ? 1 : startColumn; this.linesAnsColumnsComputed = false; this.eof = false; this.currentToken = -1; this.tokenType = 0; this.tokenStart = start; this.tokenEnd = start; tokenLayout(this, safeSource, start); this.next(); }, lookupType: function(offset) { offset += this.currentToken; if (offset < this.tokenCount) { return this.offsetAndType[offset] >> TYPE_SHIFT; } return NULL; }, lookupNonWSType: function(offset) { offset += this.currentToken; for (var type; offset < this.tokenCount; offset++) { type = this.offsetAndType[offset] >> TYPE_SHIFT; if (type !== WHITESPACE) { return type; } } return NULL; }, lookupValue: function(offset, referenceStr) { offset += this.currentToken; if (offset < this.tokenCount) { return cmpStr( this.source, this.offsetAndType[offset - 1] & OFFSET_MASK, this.offsetAndType[offset] & OFFSET_MASK, referenceStr ); } return false; }, getTokenStart: function(tokenNum) { if (tokenNum === this.currentToken) { return this.tokenStart; } if (tokenNum > 0) { return tokenNum < this.tokenCount ? this.offsetAndType[tokenNum - 1] & OFFSET_MASK : this.offsetAndType[this.tokenCount] & OFFSET_MASK; } return this.firstCharOffset; }, getOffsetExcludeWS: function() { if (this.currentToken > 0) { if ((this.offsetAndType[this.currentToken - 1] >> TYPE_SHIFT) === WHITESPACE) { return this.currentToken > 1 ? this.offsetAndType[this.currentToken - 2] & OFFSET_MASK : this.firstCharOffset; } } return this.tokenStart; }, getRawLength: function(startToken, endTokenType1, endTokenType2, includeTokenType2) { var cursor = startToken; var balanceEnd; loop: for (; cursor < this.tokenCount; cursor++) { balanceEnd = this.balance[cursor]; // belance end points to offset before start if (balanceEnd < startToken) { break loop; } // check token is stop type switch (this.offsetAndType[cursor] >> TYPE_SHIFT) { case endTokenType1: break loop; case endTokenType2: if (includeTokenType2) { cursor++; } break loop; default: // fast forward to the end of balanced block if (this.balance[balanceEnd] === cursor) { cursor = balanceEnd; } } } return cursor - this.currentToken; }, isBalanceEdge: function(pos) { var balanceStart = this.balance[this.currentToken]; return balanceStart < pos; }, getTokenValue: function() { return this.source.substring(this.tokenStart, this.tokenEnd); }, substrToCursor: function(start) { return this.source.substring(start, this.tokenStart); }, skipWS: function() { for (var i = this.currentToken, skipTokenCount = 0; i < this.tokenCount; i++, skipTokenCount++) { if ((this.offsetAndType[i] >> TYPE_SHIFT) !== WHITESPACE) { break; } } if (skipTokenCount > 0) { this.skip(skipTokenCount); } }, skipSC: function() { while (this.tokenType === WHITESPACE || this.tokenType === COMMENT) { this.next(); } }, skip: function(tokenCount) { var next = this.currentToken + tokenCount; if (next < this.tokenCount) { this.currentToken = next; this.tokenStart = this.offsetAndType[next - 1] & OFFSET_MASK; next = this.offsetAndType[next]; this.tokenType = next >> TYPE_SHIFT; this.tokenEnd = next & OFFSET_MASK; } else { this.currentToken = this.tokenCount; this.next(); } }, next: function() { var next = this.currentToken + 1; if (next < this.tokenCount) { this.currentToken = next; this.tokenStart = this.tokenEnd; next = this.offsetAndType[next]; this.tokenType = next >> TYPE_SHIFT; this.tokenEnd = next & OFFSET_MASK; } else { this.currentToken = this.tokenCount; this.eof = true; this.tokenType = NULL; this.tokenStart = this.tokenEnd = this.source.length; } }, eat: function(tokenType) { if (this.tokenType !== tokenType) { var offset = this.tokenStart; var message = NAME[tokenType] + ' is expected'; // tweak message and offset if (tokenType === IDENTIFIER) { // when identifier is expected but there is a function or url if (this.tokenType === FUNCTION || this.tokenType === URL) { offset = this.tokenEnd - 1; message += ' but function found'; } } else { // when test type is part of another token show error for current position + 1 // e.g. eat(HYPHENMINUS) will fail on "-foo", but pointing on "-" is odd if (this.source.charCodeAt(this.tokenStart) === tokenType) { offset = offset + 1; } } this.error(message, offset); } this.next(); }, eatNonWS: function(tokenType) { this.skipWS(); this.eat(tokenType); }, consume: function(tokenType) { var value = this.getTokenValue(); this.eat(tokenType); return value; }, consumeFunctionName: function() { var name = this.source.substring(this.tokenStart, this.tokenEnd - 1); this.eat(FUNCTION); return name; }, consumeNonWS: function(tokenType) { this.skipWS(); return this.consume(tokenType); }, expectIdentifier: function(name) { if (this.tokenType !== IDENTIFIER || cmpStr(this.source, this.tokenStart, this.tokenEnd, name) === false) { this.error('Identifier `' + name + '` is expected'); } this.next(); }, getLocation: function(offset, filename) { if (!this.linesAnsColumnsComputed) { computeLinesAndColumns(this, this.source); } return { source: filename, offset: this.startOffset + offset, line: this.lines[offset], column: this.columns[offset] }; }, getLocationRange: function(start, end, filename) { if (!this.linesAnsColumnsComputed) { computeLinesAndColumns(this, this.source); } return { source: filename, start: { offset: this.startOffset + start, line: this.lines[start], column: this.columns[start] }, end: { offset: this.startOffset + end, line: this.lines[end], column: this.columns[end] } }; }, error: function(message, offset) { var location = typeof offset !== 'undefined' && offset < this.source.length ? this.getLocation(offset) : this.eof ? this.getLocation(findWhiteSpaceStart(this.source, this.source.length - 1)) : this.getLocation(this.tokenStart); throw new CssSyntaxError( message || 'Unexpected input', this.source, location.offset, location.line, location.column ); }, dump: function() { var offset = 0; return Array.prototype.slice.call(this.offsetAndType, 0, this.tokenCount).map(function(item, idx) { var start = offset; var end = item & OFFSET_MASK; offset = end; return { idx: idx, type: NAME[item >> TYPE_SHIFT], chunk: this.source.substring(start, end), balance: this.balance[idx] }; }, this); } }; // extend with error class Tokenizer.CssSyntaxError = CssSyntaxError; // extend tokenizer with constants Object.keys(constants).forEach(function(key) { Tokenizer[key] = constants[key]; }); // extend tokenizer with static methods from utils Object.keys(utils).forEach(function(key) { Tokenizer[key] = utils[key]; }); // warm up tokenizer to elimitate code branches that never execute // fix soft deoptimizations (insufficient type feedback) new Tokenizer('\n\r\r\n\f//""\'\'/*\r\n\f*/1a;.\\31\t\+2{url(a);func();+1.2e3 -.4e-5 .6e+7}').getLocation(); module.exports = Tokenizer;