755 lines
23 KiB
JavaScript
755 lines
23 KiB
JavaScript
/**
|
|
* Copyright 2018 Shape Security, Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License")
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
/* eslint-disable no-use-before-define */
|
|
|
|
const matchPropertyValue = require('unicode-match-property-value-ecmascript');
|
|
|
|
const matchPropertyValueMappings = require('unicode-match-property-value-ecmascript/data/mappings');
|
|
|
|
const matchProperty = require('unicode-match-property-ecmascript');
|
|
|
|
const propertyAliases = require('unicode-property-aliases-ecmascript');
|
|
|
|
const { idContinueBool, idContinueLargeRegex, idStartBool, idStartLargeRegex } = require('./unicode');
|
|
|
|
const catchIsFalse = predicate => {
|
|
try {
|
|
return !!predicate();
|
|
} catch (e) {
|
|
return false;
|
|
}
|
|
};
|
|
|
|
const syntaxCharacters = '^$\\.*+?()[]{}|'.split('');
|
|
const extendedSyntaxCharacters = '^$\\.*+?()[|'.split('');
|
|
|
|
const controlEscapeCharacters = 'fnrtv'.split('');
|
|
const controlEscapeCharacterValues = { 'f': '\f'.charCodeAt(0), 'n': '\n'.charCodeAt(0), 'r': '\r'.charCodeAt(0), 't': '\t'.charCodeAt(0), 'v': '\v'.charCodeAt(0) };
|
|
|
|
const controlCharacters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'.split('');
|
|
const hexDigits = '0123456789abcdefABCDEF'.split('');
|
|
const decimalDigits = '0123456789'.split('');
|
|
const octalDigits = '01234567'.split('');
|
|
|
|
const INVALID_NAMED_BACKREFERENCE_SENTINEL = {};
|
|
|
|
function isIdentifierStart(ch) {
|
|
return ch < 128 ? idStartBool[ch] : idStartLargeRegex.test(String.fromCodePoint(ch));
|
|
}
|
|
|
|
function isIdentifierPart(ch) {
|
|
return ch < 128 ? idContinueBool[ch] : idContinueLargeRegex.test(String.fromCodePoint(ch));
|
|
}
|
|
|
|
class PatternAcceptorState {
|
|
constructor(pattern, unicode) {
|
|
this.pattern = pattern;
|
|
this.unicode = unicode;
|
|
this.index = 0;
|
|
this.largestBackreference = 0;
|
|
this.backreferenceNames = [];
|
|
this.groupingNames = [];
|
|
this.capturingGroups = 0;
|
|
}
|
|
|
|
empty() {
|
|
return this.index >= this.pattern.length;
|
|
}
|
|
|
|
backreference(ref) {
|
|
if (ref > this.largestBackreference) {
|
|
this.largestBackreference = ref;
|
|
}
|
|
}
|
|
|
|
nextCodePoint() {
|
|
if (this.empty()) {
|
|
return null;
|
|
}
|
|
if (this.unicode) {
|
|
return String.fromCodePoint(this.pattern.codePointAt(this.index));
|
|
}
|
|
return this.pattern.charAt(this.index);
|
|
}
|
|
|
|
skipCodePoint() {
|
|
this.index += this.nextCodePoint().length;
|
|
}
|
|
|
|
eat(str) {
|
|
if (this.index + str.length > this.pattern.length || this.pattern.slice(this.index, this.index + str.length) !== str) {
|
|
return false;
|
|
}
|
|
this.index += str.length;
|
|
return true;
|
|
}
|
|
|
|
eatIdentifierCodePoint() {
|
|
let characterValue;
|
|
let originalIndex = this.index;
|
|
let character;
|
|
if (this.match('\\u')) {
|
|
this.skipCodePoint();
|
|
characterValue = acceptUnicodeEscape(this);
|
|
if (!characterValue.matched) {
|
|
this.index = originalIndex;
|
|
return null;
|
|
}
|
|
characterValue = characterValue.value;
|
|
character = String.fromCodePoint(characterValue);
|
|
} else {
|
|
character = this.nextCodePoint();
|
|
if (character == null) {
|
|
this.index = originalIndex;
|
|
return null;
|
|
}
|
|
this.index += character.length;
|
|
characterValue = character.codePointAt(0);
|
|
}
|
|
return { character, characterValue };
|
|
}
|
|
|
|
eatIdentifierStart() {
|
|
let originalIndex = this.index;
|
|
let codePoint = this.eatIdentifierCodePoint();
|
|
if (codePoint === null) {
|
|
this.index = originalIndex;
|
|
return null;
|
|
}
|
|
if (codePoint.character === '_' || codePoint.character === '$' || isIdentifierStart(codePoint.characterValue)) {
|
|
return codePoint.character;
|
|
}
|
|
this.index = originalIndex;
|
|
return null;
|
|
}
|
|
|
|
eatIdentifierPart() {
|
|
let originalIndex = this.index;
|
|
let codePoint = this.eatIdentifierCodePoint();
|
|
if (codePoint === null) {
|
|
this.index = originalIndex;
|
|
return null;
|
|
}
|
|
// ZWNJ / ZWJ
|
|
if (codePoint.character === '\u200C' || codePoint.character === '\u200D' || codePoint.character === '$' || isIdentifierPart(codePoint.characterValue)) {
|
|
return codePoint.character;
|
|
}
|
|
this.index = originalIndex;
|
|
return null;
|
|
}
|
|
|
|
eatAny(...strs) {
|
|
for (let str of strs) {
|
|
if (this.eat(str)) {
|
|
return str;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
match(str) {
|
|
return this.index + str.length <= this.pattern.length && this.pattern.slice(this.index, this.index + str.length) === str;
|
|
}
|
|
|
|
matchAny(...strs) {
|
|
for (let str of strs) {
|
|
if (this.match(str)) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
eatNaturalNumber() {
|
|
let characters = [];
|
|
let eatNumber = () => {
|
|
for (let str of decimalDigits) {
|
|
if (this.eat(str)) {
|
|
characters.push(str);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
};
|
|
while (eatNumber());
|
|
return characters.length === 0 ? null : characters.join('');
|
|
}
|
|
}
|
|
|
|
// acceptRegex
|
|
module.exports = (pattern, { unicode = false } = {}) => {
|
|
let state = new PatternAcceptorState(pattern, unicode);
|
|
let accepted = acceptDisjunction(state);
|
|
if (accepted.matched) {
|
|
if (state.unicode) {
|
|
if (state.largestBackreference > state.capturingGroups) {
|
|
return false;
|
|
}
|
|
}
|
|
if (state.groupingNames.length > 0 || state.unicode) {
|
|
for (let backreferenceName of state.backreferenceNames) {
|
|
if (state.groupingNames.indexOf(backreferenceName) === -1) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return accepted.matched;
|
|
};
|
|
|
|
const backtrackOnFailure = func => state => {
|
|
let savedIndex = state.index;
|
|
let oldBackreference = state.largestBackreference;
|
|
let oldCapturingGroups = state.capturingGroups;
|
|
let val = func(state);
|
|
if (!val.matched) {
|
|
state.index = savedIndex;
|
|
state.largestBackreference = oldBackreference;
|
|
state.capturingGroups = oldCapturingGroups;
|
|
}
|
|
return val;
|
|
};
|
|
|
|
const acceptUnicodeEscape = backtrackOnFailure(state => {
|
|
if (!state.eat('u')) {
|
|
return { matched: false };
|
|
}
|
|
if (state.unicode && state.eat('{')) {
|
|
let digits = [];
|
|
while (!state.eat('}')) {
|
|
let digit = state.eatAny(...hexDigits);
|
|
if (digit === null) {
|
|
return { matched: false };
|
|
}
|
|
digits.push(digit);
|
|
}
|
|
let value = parseInt(digits.join(''), 16);
|
|
return value > 0x10FFFF ? { matched: false } : { matched: true, value };
|
|
}
|
|
let digits = [0, 0, 0, 0].map(() => state.eatAny(...hexDigits));
|
|
if (digits.some(digit => digit === null)) {
|
|
return { matched: false };
|
|
}
|
|
let value = parseInt(digits.join(''), 16);
|
|
if (state.unicode && value >= 0xD800 && value <= 0xDBFF) {
|
|
let surrogatePairValue = backtrackOnFailure(subState => {
|
|
if (!subState.eat('\\u')) {
|
|
return { matched: false };
|
|
}
|
|
let digits2 = [0, 0, 0, 0].map(() => subState.eatAny(...hexDigits));
|
|
if (digits2.some(digit => digit === null)) {
|
|
return { matched: false };
|
|
}
|
|
let value2 = parseInt(digits2.join(''), 16);
|
|
if (value2 < 0xDC00 || value2 >= 0xE000) {
|
|
return { matched: false };
|
|
}
|
|
return { matched: true, value: 0x10000 + ((value & 0x03FF) << 10) + (value2 & 0x03FF) };
|
|
})(state);
|
|
if (surrogatePairValue.matched) {
|
|
return surrogatePairValue;
|
|
}
|
|
}
|
|
return { matched: true, value };
|
|
});
|
|
|
|
const acceptDisjunction = (state, terminator) => {
|
|
do {
|
|
if (terminator !== void 0 && state.eat(terminator)) {
|
|
return { matched: true };
|
|
} else if (state.match('|')) {
|
|
continue;
|
|
}
|
|
if (!acceptAlternative(state, terminator).matched) {
|
|
return { matched: false };
|
|
}
|
|
} while (state.eat('|'));
|
|
return { matched: terminator === void 0 || !!state.eat(terminator) };
|
|
};
|
|
|
|
const acceptAlternative = (state, terminator) => {
|
|
while (!state.match('|') && !state.empty() && (terminator === void 0 || !state.match(terminator))) {
|
|
if (!acceptTerm(state).matched) {
|
|
return { matched: false };
|
|
}
|
|
}
|
|
return { matched: true };
|
|
};
|
|
|
|
const anyOf = (...acceptors) => state => {
|
|
for (let predicate of acceptors) {
|
|
let value = predicate(state);
|
|
if (value.matched) {
|
|
return value;
|
|
}
|
|
}
|
|
return { matched: false };
|
|
};
|
|
|
|
const acceptTerm = state => {
|
|
// non-quantified references are rolled into quantified accepts to improve performance significantly.
|
|
if (state.unicode) {
|
|
return anyOf(acceptAssertion, acceptQuantified(acceptAtom))(state);
|
|
}
|
|
return anyOf(acceptQuantified(acceptQuantifiableAssertion),
|
|
acceptAssertion,
|
|
acceptQuantified(acceptAtom))(state);
|
|
};
|
|
|
|
const acceptLabeledGroup = predicate => backtrackOnFailure(state => {
|
|
if (!state.eat('(')) {
|
|
return { matched: false };
|
|
}
|
|
if (predicate(state)) {
|
|
return acceptDisjunction(state, ')');
|
|
}
|
|
return { matched: false };
|
|
});
|
|
|
|
const acceptQuantifiableAssertion = acceptLabeledGroup(state => !!state.eatAny('?=', '?!'));
|
|
|
|
const acceptAssertion = state => {
|
|
if (state.eatAny('^', '$', '\\b', '\\B')) {
|
|
return { matched: true };
|
|
}
|
|
return acceptLabeledGroup(subState => subState.unicode ? !!subState.eatAny('?=', '?!', '?<=', '?<!') : !!subState.eatAny('?<=', '?<!'))(state);
|
|
};
|
|
|
|
const acceptDecimal = state => {
|
|
return { matched: state.eatNaturalNumber() !== null };
|
|
};
|
|
|
|
const acceptQuantified = acceptor => backtrackOnFailure(state => {
|
|
if (!acceptor(state).matched) {
|
|
return { matched: false };
|
|
}
|
|
if (state.match('{')) {
|
|
let value = backtrackOnFailure(subState => {
|
|
subState.eat('{');
|
|
let num1 = subState.eatNaturalNumber();
|
|
if (num1 === null) {
|
|
return { matched: false };
|
|
}
|
|
if (subState.eat(',') && subState.matchAny(...decimalDigits)) {
|
|
let num2 = subState.eatNaturalNumber();
|
|
if (num2 === null || parseInt(num1) > parseInt(num2)) {
|
|
return { matched: false };
|
|
}
|
|
}
|
|
if (!subState.eat('}')) {
|
|
return { matched: false };
|
|
}
|
|
subState.eat('?');
|
|
return { matched: true };
|
|
})(state);
|
|
if (!value.matched) {
|
|
return { matched: !state.unicode };
|
|
}
|
|
return value;
|
|
} else if (state.eatAny('*', '+', '?')) {
|
|
state.eat('?');
|
|
}
|
|
return { matched: true };
|
|
});
|
|
|
|
const acceptCharacterExcept = characters => state => {
|
|
let nextCodePoint = state.nextCodePoint();
|
|
if (nextCodePoint === null || characters.indexOf(nextCodePoint) !== -1) {
|
|
return { matched: false };
|
|
}
|
|
state.skipCodePoint();
|
|
return { matched: true };
|
|
};
|
|
|
|
const acceptPatternCharacter = acceptCharacterExcept(syntaxCharacters);
|
|
|
|
const acceptExtendedPatternCharacter = acceptCharacterExcept(extendedSyntaxCharacters);
|
|
|
|
const acceptInvalidBracedQuantifier = state => {
|
|
return backtrackOnFailure(subState => {
|
|
return { matched: !!(subState.eat('{') && acceptDecimal(subState).matched && (!subState.eat(',') || subState.match('}') || acceptDecimal(subState).matched) && subState.eat('}')) };
|
|
})(state);
|
|
};
|
|
|
|
const acceptAtom = state => {
|
|
if (state.unicode) {
|
|
return anyOf(acceptPatternCharacter,
|
|
subState => ({ matched: !!subState.eat('.') }),
|
|
backtrackOnFailure(subState => subState.eat('\\') ? acceptAtomEscape(subState) : { matched: false }),
|
|
acceptCharacterClass,
|
|
acceptLabeledGroup(subState => subState.eat('?:')),
|
|
acceptGrouping)(state);
|
|
}
|
|
let matched = anyOf(
|
|
subState => ({ matched: !!subState.eat('.') }),
|
|
backtrackOnFailure(subState => subState.eat('\\') ? acceptAtomEscape(subState) : { matched: false }),
|
|
backtrackOnFailure(subState => ({ matched: subState.eat('\\') && subState.match('c') })),
|
|
acceptCharacterClass,
|
|
acceptLabeledGroup(subState => subState.eat('?:')),
|
|
acceptGrouping)(state);
|
|
if (!matched.matched && acceptInvalidBracedQuantifier(state).matched) {
|
|
return { matched: false };
|
|
}
|
|
return matched.matched ? matched : acceptExtendedPatternCharacter(state);
|
|
|
|
};
|
|
|
|
const acceptGrouping = backtrackOnFailure(state => {
|
|
if (!state.eat('(')) {
|
|
return { matched: false };
|
|
}
|
|
let groupName = backtrackOnFailure(subState => {
|
|
if (!state.eat('?')) {
|
|
return { matched: false };
|
|
}
|
|
return acceptGroupName(subState);
|
|
})(state);
|
|
if (!acceptDisjunction(state, ')').matched) {
|
|
return { matched: false };
|
|
}
|
|
if (groupName.matched) {
|
|
if (state.groupingNames.indexOf(groupName.data) !== -1) {
|
|
return { matched: false };
|
|
}
|
|
state.groupingNames.push(groupName.data);
|
|
}
|
|
state.capturingGroups++;
|
|
return { matched: true };
|
|
});
|
|
|
|
const acceptDecimalEscape = backtrackOnFailure(state => {
|
|
let firstDecimal = state.eatAny(...decimalDigits);
|
|
if (firstDecimal === null) {
|
|
return { matched: false };
|
|
}
|
|
if (firstDecimal === '0') {
|
|
return { matched: true };
|
|
}
|
|
// we also accept octal escapes here, but it is impossible to tell if it is a octal escape until all parsing is complete.
|
|
// octal escapes are handled in acceptCharacterEscape for classes
|
|
state.backreference(parseInt(firstDecimal + (state.eatNaturalNumber() || '')));
|
|
return { matched: true };
|
|
});
|
|
|
|
const acceptCharacterClassEscape = state => {
|
|
if (state.eatAny('d', 'D', 's', 'S', 'w', 'W')) {
|
|
return { matched: true };
|
|
}
|
|
if (state.unicode) {
|
|
return backtrackOnFailure(subState => {
|
|
if (!subState.eat('p{') && !subState.eat('P{')) {
|
|
return { matched: false };
|
|
}
|
|
if (!acceptUnicodePropertyValueExpression(subState).matched) {
|
|
return { matched: false };
|
|
}
|
|
return { matched: !!subState.eat('}') };
|
|
})(state);
|
|
}
|
|
return { matched: false };
|
|
};
|
|
|
|
const acceptUnicodePropertyName = state => {
|
|
let characters = [];
|
|
let character;
|
|
while (character = state.eatAny(...controlCharacters, '_')) { // eslint-disable-line no-cond-assign
|
|
characters.push(character);
|
|
}
|
|
return { matched: characters.length > 0, data: characters.join('') };
|
|
};
|
|
|
|
const acceptUnicodePropertyValue = state => {
|
|
let characters = [];
|
|
let character;
|
|
while (character = state.eatAny(...controlCharacters, ...decimalDigits, '_')) { // eslint-disable-line no-cond-assign
|
|
characters.push(character);
|
|
}
|
|
return { matched: characters.length > 0, data: characters.join('') };
|
|
};
|
|
|
|
// excluding nonbinary properties from mathias' list
|
|
// https://www.ecma-international.org/ecma-262/9.0/index.html#table-nonbinary-unicode-properties
|
|
const illegalLoneUnicodePropertyNames = [
|
|
'General_Category',
|
|
'Script',
|
|
'Script_Extensions',
|
|
'scx',
|
|
'sc',
|
|
'gc',
|
|
];
|
|
|
|
const generalCategoryValues = matchPropertyValueMappings.get('General_Category');
|
|
|
|
const acceptLoneUnicodePropertyNameOrValue = state => {
|
|
let loneValue = acceptUnicodePropertyValue(state);
|
|
if (!loneValue.matched || illegalLoneUnicodePropertyNames.includes(loneValue.data)) {
|
|
return { matched: false };
|
|
}
|
|
|
|
return { matched: catchIsFalse(() => matchProperty(loneValue.data)) || generalCategoryValues.get(loneValue.data) != null };
|
|
};
|
|
|
|
const acceptUnicodePropertyValueExpression = state =>
|
|
anyOf(backtrackOnFailure(subState => {
|
|
let name = acceptUnicodePropertyName(subState);
|
|
if (!name.matched || !subState.eat('=')) {
|
|
return { matched: false };
|
|
}
|
|
let value = acceptUnicodePropertyValue(subState);
|
|
if (!value.matched) {
|
|
return { matched: false };
|
|
}
|
|
return { matched: catchIsFalse(() => matchPropertyValue(propertyAliases.get(name.data) || name.data, value.data)) };
|
|
}),
|
|
backtrackOnFailure(acceptLoneUnicodePropertyNameOrValue))(state);
|
|
|
|
const acceptCharacterEscape = anyOf(
|
|
state => {
|
|
let eaten = state.eatAny(...controlEscapeCharacters);
|
|
if (eaten === null) {
|
|
return { matched: false };
|
|
}
|
|
return { matched: true, value: controlEscapeCharacterValues[eaten] };
|
|
},
|
|
backtrackOnFailure(state => {
|
|
if (!state.eat('c')) {
|
|
return { matched: false };
|
|
}
|
|
let character = state.eatAny(...controlCharacters);
|
|
if (character === null) {
|
|
return { matched: false };
|
|
}
|
|
return { matched: true, value: character.charCodeAt(0) % 32 };
|
|
}),
|
|
backtrackOnFailure(state => {
|
|
if (!state.eat('0') || state.eatAny(...decimalDigits)) {
|
|
return { matched: false };
|
|
}
|
|
return { matched: true, value: 0 };
|
|
}),
|
|
backtrackOnFailure(state => {
|
|
if (!state.eat('x')) {
|
|
return { matched: false };
|
|
}
|
|
let digits = [0, 0].map(() => state.eatAny(...hexDigits));
|
|
if (digits.some(value => value === null)) {
|
|
return { matched: false };
|
|
}
|
|
return { matched: true, value: parseInt(digits.join(''), 16) };
|
|
}),
|
|
acceptUnicodeEscape,
|
|
backtrackOnFailure(state => {
|
|
if (state.unicode) {
|
|
return { matched: false };
|
|
}
|
|
let octal1 = state.eatAny(...octalDigits);
|
|
if (octal1 === null) {
|
|
return { matched: false };
|
|
}
|
|
let octal1Value = parseInt(octal1, 8);
|
|
if (octalDigits.indexOf(state.nextCodePoint()) === -1) {
|
|
return { matched: true, value: octal1Value };
|
|
}
|
|
let octal2 = state.eatAny(...octalDigits);
|
|
let octal2Value = parseInt(octal2, 8);
|
|
if (octal1Value < 4) {
|
|
if (octalDigits.indexOf(state.nextCodePoint()) === -1) {
|
|
return { matched: true, value: octal1Value << 3 | octal2Value };
|
|
}
|
|
let octal3 = state.eatAny(...octalDigits);
|
|
let octal3Value = parseInt(octal3, 8);
|
|
return { matched: true, value: octal1Value << 6 | octal2Value << 3 | octal3Value };
|
|
}
|
|
return { matched: true, value: octal1Value << 3 | octal2Value };
|
|
}),
|
|
backtrackOnFailure(state => {
|
|
if (!state.unicode) {
|
|
return { matched: false };
|
|
}
|
|
let value = state.eatAny(...syntaxCharacters);
|
|
if (value === null) {
|
|
return { matched: false };
|
|
}
|
|
return { matched: true, value: value.charCodeAt(0) };
|
|
}),
|
|
state => {
|
|
if (!state.unicode || !state.eat('/')) {
|
|
return { matched: false };
|
|
}
|
|
return { matched: true, value: '/'.charCodeAt(0) };
|
|
},
|
|
backtrackOnFailure(state => {
|
|
if (state.unicode) {
|
|
return { matched: false };
|
|
}
|
|
let next = state.nextCodePoint();
|
|
if (next !== null && next !== 'c' && next !== 'k') {
|
|
state.skipCodePoint();
|
|
return { matched: true, value: next.codePointAt(0) };
|
|
}
|
|
return { matched: false };
|
|
})
|
|
);
|
|
|
|
const acceptGroupNameBackreference = backtrackOnFailure(state => {
|
|
if (!state.eat('k')) {
|
|
return { matched: false };
|
|
}
|
|
let name = acceptGroupName(state);
|
|
if (!name.matched) {
|
|
state.backreferenceNames.push(INVALID_NAMED_BACKREFERENCE_SENTINEL);
|
|
return { matched: true };
|
|
}
|
|
state.backreferenceNames.push(name.data);
|
|
return { matched: true };
|
|
});
|
|
|
|
const acceptGroupName = backtrackOnFailure(state => {
|
|
if (!state.eat('<')) {
|
|
return { matched: false };
|
|
}
|
|
let characters = [];
|
|
let start = state.eatIdentifierStart();
|
|
if (!start) {
|
|
return { matched: false };
|
|
}
|
|
characters.push(start);
|
|
let part;
|
|
while (part = state.eatIdentifierPart()) { // eslint-disable-line no-cond-assign
|
|
characters.push(part);
|
|
}
|
|
if (!state.eat('>')) {
|
|
return { matched: false };
|
|
}
|
|
return { matched: characters.length > 0, data: characters.join('') };
|
|
});
|
|
|
|
const acceptAtomEscape = anyOf(
|
|
acceptDecimalEscape,
|
|
acceptCharacterClassEscape,
|
|
acceptCharacterEscape,
|
|
acceptGroupNameBackreference
|
|
);
|
|
|
|
const acceptCharacterClass = backtrackOnFailure(state => {
|
|
if (!state.eat('[')) {
|
|
return { matched: false };
|
|
}
|
|
state.eat('^');
|
|
|
|
const acceptClassEscape = anyOf(
|
|
subState => {
|
|
return { matched: !!subState.eat('b'), value: 0x0008 };
|
|
},
|
|
subState => {
|
|
return { matched: subState.unicode && !!subState.eat('-'), value: '-'.charCodeAt(0) };
|
|
},
|
|
backtrackOnFailure(subState => {
|
|
if (subState.unicode || !subState.eat('c')) {
|
|
return { matched: false };
|
|
}
|
|
let character = subState.eatAny(...decimalDigits, '_');
|
|
if (character === null) {
|
|
return { matched: false };
|
|
}
|
|
return { matched: true, value: character.charCodeAt(0) % 32 };
|
|
}),
|
|
acceptCharacterClassEscape,
|
|
acceptCharacterEscape,
|
|
// We special-case `\k` because `acceptCharacterEscape` rejects `\k` unconditionally,
|
|
// deferring `\k` to acceptGroupNameBackreference, which is not called here.
|
|
// See also https://github.com/tc39/ecma262/issues/2037. This code takes the route of
|
|
// making it unconditionally legal, rather than legal only in the absence of a group name.
|
|
subState => {
|
|
return { matched: !subState.unicode && !!subState.eat('k'), value: 107 };
|
|
}
|
|
);
|
|
|
|
const acceptClassAtomNoDash = localState => {
|
|
let nextCodePoint = localState.nextCodePoint();
|
|
if (nextCodePoint === ']' || nextCodePoint === '-' || nextCodePoint === null) {
|
|
return { matched: false };
|
|
}
|
|
if (nextCodePoint !== '\\') {
|
|
localState.skipCodePoint();
|
|
return { matched: true, value: nextCodePoint.codePointAt(0) };
|
|
}
|
|
localState.eat('\\');
|
|
let classEscape = acceptClassEscape(localState);
|
|
if (!classEscape.matched && localState.nextCodePoint() === 'c' && !localState.unicode) {
|
|
return { matched: true, value: '\\'.charCodeAt(0) };
|
|
}
|
|
return classEscape;
|
|
};
|
|
|
|
const acceptClassAtom = localState => {
|
|
if (localState.eat('-')) {
|
|
return { matched: true, value: '-'.charCodeAt(0) };
|
|
}
|
|
return acceptClassAtomNoDash(localState);
|
|
};
|
|
|
|
const finishClassRange = (localState, atom) => {
|
|
const isUnvaluedPassedAtom = subAtom => {
|
|
return subAtom.value === void 0 && subAtom.matched;
|
|
};
|
|
if (localState.eat('-')) {
|
|
if (localState.match(']')) {
|
|
return { matched: true };
|
|
}
|
|
let otherAtom = acceptClassAtom(localState);
|
|
if (!otherAtom.matched) {
|
|
return { matched: false };
|
|
}
|
|
if (localState.unicode && (isUnvaluedPassedAtom(atom) || isUnvaluedPassedAtom(otherAtom))) {
|
|
return { matched: false };
|
|
} else if (!(!localState.unicode && (isUnvaluedPassedAtom(atom) || isUnvaluedPassedAtom(otherAtom))) && atom.value > otherAtom.value) {
|
|
return { matched: false };
|
|
} else if (localState.match(']')) {
|
|
return { matched: true };
|
|
}
|
|
return acceptNonEmptyClassRanges(localState);
|
|
|
|
}
|
|
if (localState.match(']')) {
|
|
return { matched: true };
|
|
}
|
|
return acceptNonEmptyClassRangesNoDash(localState);
|
|
};
|
|
|
|
const acceptNonEmptyClassRanges = localState => {
|
|
let atom = acceptClassAtom(localState);
|
|
return atom.matched ? finishClassRange(localState, atom) : { matched: false };
|
|
};
|
|
|
|
const acceptNonEmptyClassRangesNoDash = localState => {
|
|
let atom = acceptClassAtomNoDash(localState);
|
|
return atom.matched ? finishClassRange(localState, atom) : { matched: false };
|
|
};
|
|
|
|
if (state.eat(']')) {
|
|
return { matched: true };
|
|
}
|
|
|
|
let value = acceptNonEmptyClassRanges(state);
|
|
if (value.matched) {
|
|
state.eat(']'); // cannot fail, as above will not return matched if it is not seen in advance
|
|
}
|
|
|
|
return value;
|
|
});
|