Add capacitorjs runtime
This commit is contained in:
parent
d0ece489ee
commit
f90c0e6c40
8362 changed files with 1502407 additions and 1 deletions
1147
node_modules/chevrotain/src/scan/lexer.ts
generated
vendored
Normal file
1147
node_modules/chevrotain/src/scan/lexer.ts
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
21
node_modules/chevrotain/src/scan/lexer_errors_public.ts
generated
vendored
Normal file
21
node_modules/chevrotain/src/scan/lexer_errors_public.ts
generated
vendored
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
import { ILexerErrorMessageProvider, IToken } from "../../api"
|
||||
|
||||
export const defaultLexerErrorProvider: ILexerErrorMessageProvider = {
|
||||
buildUnableToPopLexerModeMessage(token: IToken): string {
|
||||
return `Unable to pop Lexer Mode after encountering Token ->${token.image}<- The Mode Stack is empty`
|
||||
},
|
||||
|
||||
buildUnexpectedCharactersMessage(
|
||||
fullText: string,
|
||||
startOffset: number,
|
||||
length: number,
|
||||
line?: number,
|
||||
column?: number
|
||||
): string {
|
||||
return (
|
||||
`unexpected character: ->${fullText.charAt(
|
||||
startOffset
|
||||
)}<- at offset: ${startOffset},` + ` skipped ${length} characters.`
|
||||
)
|
||||
}
|
||||
}
|
||||
922
node_modules/chevrotain/src/scan/lexer_public.ts
generated
vendored
Normal file
922
node_modules/chevrotain/src/scan/lexer_public.ts
generated
vendored
Normal file
|
|
@ -0,0 +1,922 @@
|
|||
import {
|
||||
analyzeTokenTypes,
|
||||
charCodeToOptimizedIndex,
|
||||
cloneEmptyGroups,
|
||||
DEFAULT_MODE,
|
||||
LineTerminatorOptimizedTester,
|
||||
performRuntimeChecks,
|
||||
performWarningRuntimeChecks,
|
||||
SUPPORT_STICKY,
|
||||
validatePatterns
|
||||
} from "./lexer"
|
||||
import {
|
||||
cloneArr,
|
||||
cloneObj,
|
||||
forEach,
|
||||
IDENTITY,
|
||||
isArray,
|
||||
isEmpty,
|
||||
isUndefined,
|
||||
keys,
|
||||
last,
|
||||
map,
|
||||
merge,
|
||||
NOOP,
|
||||
PRINT_WARNING,
|
||||
reduce,
|
||||
reject,
|
||||
timer,
|
||||
toFastProperties
|
||||
} from "../utils/utils"
|
||||
import { augmentTokenTypes } from "./tokens"
|
||||
import {
|
||||
CustomPatternMatcherFunc,
|
||||
ILexerConfig,
|
||||
ILexerDefinitionError,
|
||||
ILexingError,
|
||||
IMultiModeLexerDefinition,
|
||||
IToken,
|
||||
TokenType
|
||||
} from "../../api"
|
||||
import { defaultLexerErrorProvider } from "../scan/lexer_errors_public"
|
||||
import { clearRegExpParserCache } from "./reg_exp_parser"
|
||||
|
||||
export interface ILexingResult {
|
||||
tokens: IToken[]
|
||||
groups: { [groupName: string]: IToken[] }
|
||||
errors: ILexingError[]
|
||||
}
|
||||
|
||||
export enum LexerDefinitionErrorType {
|
||||
MISSING_PATTERN,
|
||||
INVALID_PATTERN,
|
||||
EOI_ANCHOR_FOUND,
|
||||
UNSUPPORTED_FLAGS_FOUND,
|
||||
DUPLICATE_PATTERNS_FOUND,
|
||||
INVALID_GROUP_TYPE_FOUND,
|
||||
PUSH_MODE_DOES_NOT_EXIST,
|
||||
MULTI_MODE_LEXER_WITHOUT_DEFAULT_MODE,
|
||||
MULTI_MODE_LEXER_WITHOUT_MODES_PROPERTY,
|
||||
MULTI_MODE_LEXER_DEFAULT_MODE_VALUE_DOES_NOT_EXIST,
|
||||
LEXER_DEFINITION_CANNOT_CONTAIN_UNDEFINED,
|
||||
SOI_ANCHOR_FOUND,
|
||||
EMPTY_MATCH_PATTERN,
|
||||
NO_LINE_BREAKS_FLAGS,
|
||||
UNREACHABLE_PATTERN,
|
||||
IDENTIFY_TERMINATOR,
|
||||
CUSTOM_LINE_BREAK
|
||||
}
|
||||
|
||||
export interface IRegExpExec {
|
||||
exec: CustomPatternMatcherFunc
|
||||
}
|
||||
|
||||
const DEFAULT_LEXER_CONFIG: ILexerConfig = {
|
||||
deferDefinitionErrorsHandling: false,
|
||||
positionTracking: "full",
|
||||
lineTerminatorsPattern: /\n|\r\n?/g,
|
||||
lineTerminatorCharacters: ["\n", "\r"],
|
||||
ensureOptimizations: false,
|
||||
safeMode: false,
|
||||
errorMessageProvider: defaultLexerErrorProvider,
|
||||
traceInitPerf: false,
|
||||
skipValidations: false
|
||||
}
|
||||
|
||||
Object.freeze(DEFAULT_LEXER_CONFIG)
|
||||
|
||||
export class Lexer {
|
||||
public static SKIPPED =
|
||||
"This marks a skipped Token pattern, this means each token identified by it will" +
|
||||
"be consumed and then thrown into oblivion, this can be used to for example to completely ignore whitespace."
|
||||
|
||||
public static NA = /NOT_APPLICABLE/
|
||||
public lexerDefinitionErrors: ILexerDefinitionError[] = []
|
||||
public lexerDefinitionWarning: ILexerDefinitionError[] = []
|
||||
|
||||
protected patternIdxToConfig: any = {}
|
||||
protected charCodeToPatternIdxToConfig: any = {}
|
||||
|
||||
protected modes: string[] = []
|
||||
protected defaultMode: string
|
||||
protected emptyGroups: { [groupName: string]: IToken } = {}
|
||||
|
||||
private config: ILexerConfig = undefined
|
||||
private trackStartLines: boolean = true
|
||||
private trackEndLines: boolean = true
|
||||
private hasCustom: boolean = false
|
||||
private canModeBeOptimized: any = {}
|
||||
|
||||
private traceInitPerf: boolean | number
|
||||
private traceInitMaxIdent: number
|
||||
private traceInitIndent: number
|
||||
|
||||
constructor(
|
||||
protected lexerDefinition: TokenType[] | IMultiModeLexerDefinition,
|
||||
config: ILexerConfig = DEFAULT_LEXER_CONFIG
|
||||
) {
|
||||
if (typeof config === "boolean") {
|
||||
throw Error(
|
||||
"The second argument to the Lexer constructor is now an ILexerConfig Object.\n" +
|
||||
"a boolean 2nd argument is no longer supported"
|
||||
)
|
||||
}
|
||||
|
||||
// todo: defaults func?
|
||||
this.config = merge(DEFAULT_LEXER_CONFIG, config)
|
||||
|
||||
const traceInitVal = this.config.traceInitPerf
|
||||
if (traceInitVal === true) {
|
||||
this.traceInitMaxIdent = Infinity
|
||||
this.traceInitPerf = true
|
||||
} else if (typeof traceInitVal === "number") {
|
||||
this.traceInitMaxIdent = traceInitVal
|
||||
this.traceInitPerf = true
|
||||
}
|
||||
this.traceInitIndent = -1
|
||||
|
||||
this.TRACE_INIT("Lexer Constructor", () => {
|
||||
let actualDefinition: IMultiModeLexerDefinition
|
||||
let hasOnlySingleMode = true
|
||||
this.TRACE_INIT("Lexer Config handling", () => {
|
||||
if (
|
||||
this.config.lineTerminatorsPattern ===
|
||||
DEFAULT_LEXER_CONFIG.lineTerminatorsPattern
|
||||
) {
|
||||
// optimized built-in implementation for the defaults definition of lineTerminators
|
||||
this.config.lineTerminatorsPattern = LineTerminatorOptimizedTester
|
||||
} else {
|
||||
if (
|
||||
this.config.lineTerminatorCharacters ===
|
||||
DEFAULT_LEXER_CONFIG.lineTerminatorCharacters
|
||||
) {
|
||||
throw Error(
|
||||
"Error: Missing <lineTerminatorCharacters> property on the Lexer config.\n" +
|
||||
"\tFor details See: https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#MISSING_LINE_TERM_CHARS"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
if (config.safeMode && config.ensureOptimizations) {
|
||||
throw Error(
|
||||
'"safeMode" and "ensureOptimizations" flags are mutually exclusive.'
|
||||
)
|
||||
}
|
||||
|
||||
this.trackStartLines = /full|onlyStart/i.test(
|
||||
this.config.positionTracking
|
||||
)
|
||||
this.trackEndLines = /full/i.test(this.config.positionTracking)
|
||||
|
||||
// Convert SingleModeLexerDefinition into a IMultiModeLexerDefinition.
|
||||
if (isArray(lexerDefinition)) {
|
||||
actualDefinition = <any>{ modes: {} }
|
||||
actualDefinition.modes[DEFAULT_MODE] = cloneArr(
|
||||
<TokenType[]>lexerDefinition
|
||||
)
|
||||
actualDefinition[DEFAULT_MODE] = DEFAULT_MODE
|
||||
} else {
|
||||
// no conversion needed, input should already be a IMultiModeLexerDefinition
|
||||
hasOnlySingleMode = false
|
||||
actualDefinition = cloneObj(
|
||||
<IMultiModeLexerDefinition>lexerDefinition
|
||||
)
|
||||
}
|
||||
})
|
||||
|
||||
if (this.config.skipValidations === false) {
|
||||
this.TRACE_INIT("performRuntimeChecks", () => {
|
||||
this.lexerDefinitionErrors = this.lexerDefinitionErrors.concat(
|
||||
performRuntimeChecks(
|
||||
actualDefinition,
|
||||
this.trackStartLines,
|
||||
this.config.lineTerminatorCharacters
|
||||
)
|
||||
)
|
||||
})
|
||||
|
||||
this.TRACE_INIT("performWarningRuntimeChecks", () => {
|
||||
this.lexerDefinitionWarning = this.lexerDefinitionWarning.concat(
|
||||
performWarningRuntimeChecks(
|
||||
actualDefinition,
|
||||
this.trackStartLines,
|
||||
this.config.lineTerminatorCharacters
|
||||
)
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// for extra robustness to avoid throwing an none informative error message
|
||||
actualDefinition.modes = actualDefinition.modes
|
||||
? actualDefinition.modes
|
||||
: {}
|
||||
|
||||
// an error of undefined TokenTypes will be detected in "performRuntimeChecks" above.
|
||||
// this transformation is to increase robustness in the case of partially invalid lexer definition.
|
||||
forEach(actualDefinition.modes, (currModeValue, currModeName) => {
|
||||
actualDefinition.modes[currModeName] = reject<TokenType>(
|
||||
currModeValue,
|
||||
(currTokType) => isUndefined(currTokType)
|
||||
)
|
||||
})
|
||||
|
||||
let allModeNames = keys(actualDefinition.modes)
|
||||
|
||||
forEach(
|
||||
actualDefinition.modes,
|
||||
(currModDef: TokenType[], currModName) => {
|
||||
this.TRACE_INIT(`Mode: <${currModName}> processing`, () => {
|
||||
this.modes.push(currModName)
|
||||
|
||||
if (this.config.skipValidations === false) {
|
||||
this.TRACE_INIT(`validatePatterns`, () => {
|
||||
this.lexerDefinitionErrors = this.lexerDefinitionErrors.concat(
|
||||
validatePatterns(<TokenType[]>currModDef, allModeNames)
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// If definition errors were encountered, the analysis phase may fail unexpectedly/
|
||||
// Considering a lexer with definition errors may never be used, there is no point
|
||||
// to performing the analysis anyhow...
|
||||
if (isEmpty(this.lexerDefinitionErrors)) {
|
||||
augmentTokenTypes(currModDef)
|
||||
|
||||
let currAnalyzeResult
|
||||
this.TRACE_INIT(`analyzeTokenTypes`, () => {
|
||||
currAnalyzeResult = analyzeTokenTypes(currModDef, {
|
||||
lineTerminatorCharacters: this.config
|
||||
.lineTerminatorCharacters,
|
||||
positionTracking: config.positionTracking,
|
||||
ensureOptimizations: config.ensureOptimizations,
|
||||
safeMode: config.safeMode,
|
||||
tracer: this.TRACE_INIT.bind(this)
|
||||
})
|
||||
})
|
||||
|
||||
this.patternIdxToConfig[currModName] =
|
||||
currAnalyzeResult.patternIdxToConfig
|
||||
|
||||
this.charCodeToPatternIdxToConfig[currModName] =
|
||||
currAnalyzeResult.charCodeToPatternIdxToConfig
|
||||
|
||||
this.emptyGroups = merge(
|
||||
this.emptyGroups,
|
||||
currAnalyzeResult.emptyGroups
|
||||
)
|
||||
|
||||
this.hasCustom = currAnalyzeResult.hasCustom || this.hasCustom
|
||||
|
||||
this.canModeBeOptimized[currModName] =
|
||||
currAnalyzeResult.canBeOptimized
|
||||
}
|
||||
})
|
||||
}
|
||||
)
|
||||
|
||||
this.defaultMode = actualDefinition.defaultMode
|
||||
|
||||
if (
|
||||
!isEmpty(this.lexerDefinitionErrors) &&
|
||||
!this.config.deferDefinitionErrorsHandling
|
||||
) {
|
||||
let allErrMessages = map(this.lexerDefinitionErrors, (error) => {
|
||||
return error.message
|
||||
})
|
||||
let allErrMessagesString = allErrMessages.join(
|
||||
"-----------------------\n"
|
||||
)
|
||||
throw new Error(
|
||||
"Errors detected in definition of Lexer:\n" + allErrMessagesString
|
||||
)
|
||||
}
|
||||
|
||||
// Only print warning if there are no errors, This will avoid pl
|
||||
forEach(this.lexerDefinitionWarning, (warningDescriptor) => {
|
||||
PRINT_WARNING(warningDescriptor.message)
|
||||
})
|
||||
|
||||
this.TRACE_INIT("Choosing sub-methods implementations", () => {
|
||||
// Choose the relevant internal implementations for this specific parser.
|
||||
// These implementations should be in-lined by the JavaScript engine
|
||||
// to provide optimal performance in each scenario.
|
||||
if (SUPPORT_STICKY) {
|
||||
this.chopInput = <any>IDENTITY
|
||||
this.match = this.matchWithTest
|
||||
} else {
|
||||
this.updateLastIndex = NOOP
|
||||
this.match = this.matchWithExec
|
||||
}
|
||||
|
||||
if (hasOnlySingleMode) {
|
||||
this.handleModes = NOOP
|
||||
}
|
||||
|
||||
if (this.trackStartLines === false) {
|
||||
this.computeNewColumn = IDENTITY
|
||||
}
|
||||
|
||||
if (this.trackEndLines === false) {
|
||||
this.updateTokenEndLineColumnLocation = NOOP
|
||||
}
|
||||
|
||||
if (/full/i.test(this.config.positionTracking)) {
|
||||
this.createTokenInstance = this.createFullToken
|
||||
} else if (/onlyStart/i.test(this.config.positionTracking)) {
|
||||
this.createTokenInstance = this.createStartOnlyToken
|
||||
} else if (/onlyOffset/i.test(this.config.positionTracking)) {
|
||||
this.createTokenInstance = this.createOffsetOnlyToken
|
||||
} else {
|
||||
throw Error(
|
||||
`Invalid <positionTracking> config option: "${this.config.positionTracking}"`
|
||||
)
|
||||
}
|
||||
|
||||
if (this.hasCustom) {
|
||||
this.addToken = this.addTokenUsingPush
|
||||
this.handlePayload = this.handlePayloadWithCustom
|
||||
} else {
|
||||
this.addToken = this.addTokenUsingMemberAccess
|
||||
this.handlePayload = this.handlePayloadNoCustom
|
||||
}
|
||||
})
|
||||
|
||||
this.TRACE_INIT("Failed Optimization Warnings", () => {
|
||||
const unOptimizedModes = reduce(
|
||||
this.canModeBeOptimized,
|
||||
(cannotBeOptimized, canBeOptimized, modeName) => {
|
||||
if (canBeOptimized === false) {
|
||||
cannotBeOptimized.push(modeName)
|
||||
}
|
||||
return cannotBeOptimized
|
||||
},
|
||||
[]
|
||||
)
|
||||
|
||||
if (config.ensureOptimizations && !isEmpty(unOptimizedModes)) {
|
||||
throw Error(
|
||||
`Lexer Modes: < ${unOptimizedModes.join(
|
||||
", "
|
||||
)} > cannot be optimized.\n` +
|
||||
'\t Disable the "ensureOptimizations" lexer config flag to silently ignore this and run the lexer in an un-optimized mode.\n' +
|
||||
"\t Or inspect the console log for details on how to resolve these issues."
|
||||
)
|
||||
}
|
||||
})
|
||||
|
||||
this.TRACE_INIT("clearRegExpParserCache", () => {
|
||||
clearRegExpParserCache()
|
||||
})
|
||||
|
||||
this.TRACE_INIT("toFastProperties", () => {
|
||||
toFastProperties(this)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
public tokenize(
|
||||
text: string,
|
||||
initialMode: string = this.defaultMode
|
||||
): ILexingResult {
|
||||
if (!isEmpty(this.lexerDefinitionErrors)) {
|
||||
let allErrMessages = map(this.lexerDefinitionErrors, (error) => {
|
||||
return error.message
|
||||
})
|
||||
let allErrMessagesString = allErrMessages.join(
|
||||
"-----------------------\n"
|
||||
)
|
||||
throw new Error(
|
||||
"Unable to Tokenize because Errors detected in definition of Lexer:\n" +
|
||||
allErrMessagesString
|
||||
)
|
||||
}
|
||||
|
||||
let lexResult = this.tokenizeInternal(text, initialMode)
|
||||
|
||||
return lexResult
|
||||
}
|
||||
|
||||
// There is quite a bit of duplication between this and "tokenizeInternalLazy"
|
||||
// This is intentional due to performance considerations.
|
||||
private tokenizeInternal(text: string, initialMode: string): ILexingResult {
|
||||
let i,
|
||||
j,
|
||||
matchAltImage,
|
||||
longerAltIdx,
|
||||
matchedImage,
|
||||
payload,
|
||||
altPayload,
|
||||
imageLength,
|
||||
group,
|
||||
tokType,
|
||||
newToken,
|
||||
errLength,
|
||||
droppedChar,
|
||||
msg,
|
||||
match
|
||||
let orgText = text
|
||||
let orgLength = orgText.length
|
||||
let offset = 0
|
||||
let matchedTokensIndex = 0
|
||||
// initializing the tokensArray to the "guessed" size.
|
||||
// guessing too little will still reduce the number of array re-sizes on pushes.
|
||||
// guessing too large (Tested by guessing x4 too large) may cost a bit more of memory
|
||||
// but would still have a faster runtime by avoiding (All but one) array resizing.
|
||||
let guessedNumberOfTokens = this.hasCustom
|
||||
? 0 // will break custom token pattern APIs the matchedTokens array will contain undefined elements.
|
||||
: Math.floor(text.length / 10)
|
||||
let matchedTokens = new Array(guessedNumberOfTokens)
|
||||
let errors: ILexingError[] = []
|
||||
let line = this.trackStartLines ? 1 : undefined
|
||||
let column = this.trackStartLines ? 1 : undefined
|
||||
let groups: any = cloneEmptyGroups(this.emptyGroups)
|
||||
let trackLines = this.trackStartLines
|
||||
const lineTerminatorPattern = this.config.lineTerminatorsPattern
|
||||
|
||||
let currModePatternsLength = 0
|
||||
let patternIdxToConfig = []
|
||||
let currCharCodeToPatternIdxToConfig = []
|
||||
|
||||
let modeStack = []
|
||||
|
||||
const emptyArray = []
|
||||
Object.freeze(emptyArray)
|
||||
let getPossiblePatterns = undefined
|
||||
|
||||
function getPossiblePatternsSlow() {
|
||||
return patternIdxToConfig
|
||||
}
|
||||
|
||||
function getPossiblePatternsOptimized(charCode) {
|
||||
const optimizedCharIdx = charCodeToOptimizedIndex(charCode)
|
||||
const possiblePatterns =
|
||||
currCharCodeToPatternIdxToConfig[optimizedCharIdx]
|
||||
if (possiblePatterns === undefined) {
|
||||
return emptyArray
|
||||
} else {
|
||||
return possiblePatterns
|
||||
}
|
||||
}
|
||||
|
||||
let pop_mode = (popToken) => {
|
||||
// TODO: perhaps avoid this error in the edge case there is no more input?
|
||||
if (
|
||||
modeStack.length === 1 &&
|
||||
// if we have both a POP_MODE and a PUSH_MODE this is in-fact a "transition"
|
||||
// So no error should occur.
|
||||
popToken.tokenType.PUSH_MODE === undefined
|
||||
) {
|
||||
// if we try to pop the last mode there lexer will no longer have ANY mode.
|
||||
// thus the pop is ignored, an error will be created and the lexer will continue parsing in the previous mode.
|
||||
let msg = this.config.errorMessageProvider.buildUnableToPopLexerModeMessage(
|
||||
popToken
|
||||
)
|
||||
|
||||
errors.push({
|
||||
offset: popToken.startOffset,
|
||||
line:
|
||||
popToken.startLine !== undefined ? popToken.startLine : undefined,
|
||||
column:
|
||||
popToken.startColumn !== undefined
|
||||
? popToken.startColumn
|
||||
: undefined,
|
||||
length: popToken.image.length,
|
||||
message: msg
|
||||
})
|
||||
} else {
|
||||
modeStack.pop()
|
||||
let newMode = last(modeStack)
|
||||
patternIdxToConfig = this.patternIdxToConfig[newMode]
|
||||
currCharCodeToPatternIdxToConfig = this.charCodeToPatternIdxToConfig[
|
||||
newMode
|
||||
]
|
||||
currModePatternsLength = patternIdxToConfig.length
|
||||
const modeCanBeOptimized =
|
||||
this.canModeBeOptimized[newMode] && this.config.safeMode === false
|
||||
|
||||
if (currCharCodeToPatternIdxToConfig && modeCanBeOptimized) {
|
||||
getPossiblePatterns = getPossiblePatternsOptimized
|
||||
} else {
|
||||
getPossiblePatterns = getPossiblePatternsSlow
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function push_mode(newMode) {
|
||||
modeStack.push(newMode)
|
||||
currCharCodeToPatternIdxToConfig = this.charCodeToPatternIdxToConfig[
|
||||
newMode
|
||||
]
|
||||
|
||||
patternIdxToConfig = this.patternIdxToConfig[newMode]
|
||||
currModePatternsLength = patternIdxToConfig.length
|
||||
|
||||
currModePatternsLength = patternIdxToConfig.length
|
||||
const modeCanBeOptimized =
|
||||
this.canModeBeOptimized[newMode] && this.config.safeMode === false
|
||||
|
||||
if (currCharCodeToPatternIdxToConfig && modeCanBeOptimized) {
|
||||
getPossiblePatterns = getPossiblePatternsOptimized
|
||||
} else {
|
||||
getPossiblePatterns = getPossiblePatternsSlow
|
||||
}
|
||||
}
|
||||
|
||||
// this pattern seems to avoid a V8 de-optimization, although that de-optimization does not
|
||||
// seem to matter performance wise.
|
||||
push_mode.call(this, initialMode)
|
||||
|
||||
let currConfig
|
||||
|
||||
while (offset < orgLength) {
|
||||
matchedImage = null
|
||||
|
||||
let nextCharCode = orgText.charCodeAt(offset)
|
||||
const chosenPatternIdxToConfig = getPossiblePatterns(nextCharCode)
|
||||
let chosenPatternsLength = chosenPatternIdxToConfig.length
|
||||
|
||||
for (i = 0; i < chosenPatternsLength; i++) {
|
||||
currConfig = chosenPatternIdxToConfig[i]
|
||||
let currPattern = currConfig.pattern
|
||||
payload = null
|
||||
|
||||
// manually in-lined because > 600 chars won't be in-lined in V8
|
||||
let singleCharCode = currConfig.short
|
||||
if (singleCharCode !== false) {
|
||||
if (nextCharCode === singleCharCode) {
|
||||
// single character string
|
||||
matchedImage = currPattern
|
||||
}
|
||||
} else if (currConfig.isCustom === true) {
|
||||
match = currPattern.exec(orgText, offset, matchedTokens, groups)
|
||||
if (match !== null) {
|
||||
matchedImage = match[0]
|
||||
if (match.payload !== undefined) {
|
||||
payload = match.payload
|
||||
}
|
||||
} else {
|
||||
matchedImage = null
|
||||
}
|
||||
} else {
|
||||
this.updateLastIndex(currPattern, offset)
|
||||
matchedImage = this.match(currPattern, text, offset)
|
||||
}
|
||||
|
||||
if (matchedImage !== null) {
|
||||
// even though this pattern matched we must try a another longer alternative.
|
||||
// this can be used to prioritize keywords over identifiers
|
||||
longerAltIdx = currConfig.longerAlt
|
||||
if (longerAltIdx !== undefined) {
|
||||
// TODO: micro optimize, avoid extra prop access
|
||||
// by saving/linking longerAlt on the original config?
|
||||
let longerAltConfig = patternIdxToConfig[longerAltIdx]
|
||||
let longerAltPattern = longerAltConfig.pattern
|
||||
altPayload = null
|
||||
|
||||
// single Char can never be a longer alt so no need to test it.
|
||||
// manually in-lined because > 600 chars won't be in-lined in V8
|
||||
if (longerAltConfig.isCustom === true) {
|
||||
match = longerAltPattern.exec(
|
||||
orgText,
|
||||
offset,
|
||||
matchedTokens,
|
||||
groups
|
||||
)
|
||||
if (match !== null) {
|
||||
matchAltImage = match[0]
|
||||
if (match.payload !== undefined) {
|
||||
altPayload = match.payload
|
||||
}
|
||||
} else {
|
||||
matchAltImage = null
|
||||
}
|
||||
} else {
|
||||
this.updateLastIndex(longerAltPattern, offset)
|
||||
matchAltImage = this.match(longerAltPattern, text, offset)
|
||||
}
|
||||
|
||||
if (matchAltImage && matchAltImage.length > matchedImage.length) {
|
||||
matchedImage = matchAltImage
|
||||
payload = altPayload
|
||||
currConfig = longerAltConfig
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// successful match
|
||||
if (matchedImage !== null) {
|
||||
imageLength = matchedImage.length
|
||||
group = currConfig.group
|
||||
if (group !== undefined) {
|
||||
tokType = currConfig.tokenTypeIdx
|
||||
// TODO: "offset + imageLength" and the new column may be computed twice in case of "full" location information inside
|
||||
// createFullToken method
|
||||
newToken = this.createTokenInstance(
|
||||
matchedImage,
|
||||
offset,
|
||||
tokType,
|
||||
currConfig.tokenType,
|
||||
line,
|
||||
column,
|
||||
imageLength
|
||||
)
|
||||
|
||||
this.handlePayload(newToken, payload)
|
||||
|
||||
// TODO: optimize NOOP in case there are no special groups?
|
||||
if (group === false) {
|
||||
matchedTokensIndex = this.addToken(
|
||||
matchedTokens,
|
||||
matchedTokensIndex,
|
||||
newToken
|
||||
)
|
||||
} else {
|
||||
groups[group].push(newToken)
|
||||
}
|
||||
}
|
||||
text = this.chopInput(text, imageLength)
|
||||
offset = offset + imageLength
|
||||
|
||||
// TODO: with newlines the column may be assigned twice
|
||||
column = this.computeNewColumn(column, imageLength)
|
||||
|
||||
if (trackLines === true && currConfig.canLineTerminator === true) {
|
||||
let numOfLTsInMatch = 0
|
||||
let foundTerminator
|
||||
let lastLTEndOffset
|
||||
lineTerminatorPattern.lastIndex = 0
|
||||
do {
|
||||
foundTerminator = lineTerminatorPattern.test(matchedImage)
|
||||
if (foundTerminator === true) {
|
||||
lastLTEndOffset = lineTerminatorPattern.lastIndex - 1
|
||||
numOfLTsInMatch++
|
||||
}
|
||||
} while (foundTerminator === true)
|
||||
|
||||
if (numOfLTsInMatch !== 0) {
|
||||
line = line + numOfLTsInMatch
|
||||
column = imageLength - lastLTEndOffset
|
||||
this.updateTokenEndLineColumnLocation(
|
||||
newToken,
|
||||
group,
|
||||
lastLTEndOffset,
|
||||
numOfLTsInMatch,
|
||||
line,
|
||||
column,
|
||||
imageLength
|
||||
)
|
||||
}
|
||||
}
|
||||
// will be NOOP if no modes present
|
||||
this.handleModes(currConfig, pop_mode, push_mode, newToken)
|
||||
} else {
|
||||
// error recovery, drop characters until we identify a valid token's start point
|
||||
let errorStartOffset = offset
|
||||
let errorLine = line
|
||||
let errorColumn = column
|
||||
let foundResyncPoint = false
|
||||
while (!foundResyncPoint && offset < orgLength) {
|
||||
// drop chars until we succeed in matching something
|
||||
droppedChar = orgText.charCodeAt(offset)
|
||||
// Identity Func (when sticky flag is enabled)
|
||||
text = this.chopInput(text, 1)
|
||||
offset++
|
||||
for (j = 0; j < currModePatternsLength; j++) {
|
||||
let currConfig = patternIdxToConfig[j]
|
||||
let currPattern = currConfig.pattern
|
||||
|
||||
// manually in-lined because > 600 chars won't be in-lined in V8
|
||||
let singleCharCode = currConfig.short
|
||||
if (singleCharCode !== false) {
|
||||
if (orgText.charCodeAt(offset) === singleCharCode) {
|
||||
// single character string
|
||||
foundResyncPoint = true
|
||||
}
|
||||
} else if (currConfig.isCustom === true) {
|
||||
foundResyncPoint =
|
||||
currPattern.exec(orgText, offset, matchedTokens, groups) !==
|
||||
null
|
||||
} else {
|
||||
this.updateLastIndex(currPattern, offset)
|
||||
foundResyncPoint = currPattern.exec(text) !== null
|
||||
}
|
||||
|
||||
if (foundResyncPoint === true) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errLength = offset - errorStartOffset
|
||||
// at this point we either re-synced or reached the end of the input text
|
||||
msg = this.config.errorMessageProvider.buildUnexpectedCharactersMessage(
|
||||
orgText,
|
||||
errorStartOffset,
|
||||
errLength,
|
||||
errorLine,
|
||||
errorColumn
|
||||
)
|
||||
errors.push({
|
||||
offset: errorStartOffset,
|
||||
line: errorLine,
|
||||
column: errorColumn,
|
||||
length: errLength,
|
||||
message: msg
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// if we do have custom patterns which push directly into the
|
||||
// TODO: custom tokens should not push directly??
|
||||
if (!this.hasCustom) {
|
||||
// if we guessed a too large size for the tokens array this will shrink it to the right size.
|
||||
matchedTokens.length = matchedTokensIndex
|
||||
}
|
||||
|
||||
return {
|
||||
tokens: matchedTokens,
|
||||
groups: groups,
|
||||
errors: errors
|
||||
}
|
||||
}
|
||||
|
||||
private handleModes(config, pop_mode, push_mode, newToken) {
|
||||
if (config.pop === true) {
|
||||
// need to save the PUSH_MODE property as if the mode is popped
|
||||
// patternIdxToPopMode is updated to reflect the new mode after popping the stack
|
||||
let pushMode = config.push
|
||||
pop_mode(newToken)
|
||||
if (pushMode !== undefined) {
|
||||
push_mode.call(this, pushMode)
|
||||
}
|
||||
} else if (config.push !== undefined) {
|
||||
push_mode.call(this, config.push)
|
||||
}
|
||||
}
|
||||
|
||||
private chopInput(text, length): string {
|
||||
return text.substring(length)
|
||||
}
|
||||
|
||||
private updateLastIndex(regExp, newLastIndex): void {
|
||||
regExp.lastIndex = newLastIndex
|
||||
}
|
||||
|
||||
// TODO: decrease this under 600 characters? inspect stripping comments option in TSC compiler
|
||||
private updateTokenEndLineColumnLocation(
|
||||
newToken,
|
||||
group,
|
||||
lastLTIdx,
|
||||
numOfLTsInMatch,
|
||||
line,
|
||||
column,
|
||||
imageLength
|
||||
): void {
|
||||
let lastCharIsLT, fixForEndingInLT
|
||||
if (group !== undefined) {
|
||||
// a none skipped multi line Token, need to update endLine/endColumn
|
||||
lastCharIsLT = lastLTIdx === imageLength - 1
|
||||
fixForEndingInLT = lastCharIsLT ? -1 : 0
|
||||
if (!(numOfLTsInMatch === 1 && lastCharIsLT === true)) {
|
||||
// if a token ends in a LT that last LT only affects the line numbering of following Tokens
|
||||
newToken.endLine = line + fixForEndingInLT
|
||||
// the last LT in a token does not affect the endColumn either as the [columnStart ... columnEnd)
|
||||
// inclusive to exclusive range.
|
||||
newToken.endColumn = column - 1 + -fixForEndingInLT
|
||||
}
|
||||
// else single LT in the last character of a token, no need to modify the endLine/EndColumn
|
||||
}
|
||||
}
|
||||
|
||||
private computeNewColumn(oldColumn, imageLength) {
|
||||
return oldColumn + imageLength
|
||||
}
|
||||
|
||||
// Place holder, will be replaced by the correct variant according to the locationTracking option at runtime.
|
||||
/* istanbul ignore next - place holder */
|
||||
private createTokenInstance(...args: any[]): IToken {
|
||||
return null
|
||||
}
|
||||
|
||||
private createOffsetOnlyToken(image, startOffset, tokenTypeIdx, tokenType) {
|
||||
return {
|
||||
image,
|
||||
startOffset,
|
||||
tokenTypeIdx,
|
||||
tokenType
|
||||
}
|
||||
}
|
||||
|
||||
private createStartOnlyToken(
|
||||
image,
|
||||
startOffset,
|
||||
tokenTypeIdx,
|
||||
tokenType,
|
||||
startLine,
|
||||
startColumn
|
||||
) {
|
||||
return {
|
||||
image,
|
||||
startOffset,
|
||||
startLine,
|
||||
startColumn,
|
||||
tokenTypeIdx,
|
||||
tokenType
|
||||
}
|
||||
}
|
||||
|
||||
private createFullToken(
|
||||
image,
|
||||
startOffset,
|
||||
tokenTypeIdx,
|
||||
tokenType,
|
||||
startLine,
|
||||
startColumn,
|
||||
imageLength
|
||||
) {
|
||||
return {
|
||||
image,
|
||||
startOffset,
|
||||
endOffset: startOffset + imageLength - 1,
|
||||
startLine,
|
||||
endLine: startLine,
|
||||
startColumn,
|
||||
endColumn: startColumn + imageLength - 1,
|
||||
tokenTypeIdx,
|
||||
tokenType
|
||||
}
|
||||
}
|
||||
|
||||
// Place holder, will be replaced by the correct variant according to the locationTracking option at runtime.
|
||||
/* istanbul ignore next - place holder */
|
||||
private addToken(tokenVector, index, tokenToAdd): number {
|
||||
return 666
|
||||
}
|
||||
|
||||
private addTokenUsingPush(tokenVector, index, tokenToAdd): number {
|
||||
tokenVector.push(tokenToAdd)
|
||||
return index
|
||||
}
|
||||
|
||||
private addTokenUsingMemberAccess(tokenVector, index, tokenToAdd): number {
|
||||
tokenVector[index] = tokenToAdd
|
||||
index++
|
||||
return index
|
||||
}
|
||||
|
||||
// Place holder, will be replaced by the correct variant according to the hasCustom flag option at runtime.
|
||||
/* istanbul ignore next - place holder */
|
||||
private handlePayload(token: IToken, payload: any): void {}
|
||||
|
||||
private handlePayloadNoCustom(token: IToken, payload: any): void {}
|
||||
|
||||
private handlePayloadWithCustom(token: IToken, payload: any): void {
|
||||
if (payload !== null) {
|
||||
token.payload = payload
|
||||
}
|
||||
}
|
||||
|
||||
/* istanbul ignore next - place holder to be replaced with chosen alternative at runtime */
|
||||
private match(pattern: RegExp, text: string, offset?: number): string {
|
||||
return null
|
||||
}
|
||||
|
||||
private matchWithTest(pattern: RegExp, text: string, offset: number): string {
|
||||
let found = pattern.test(text)
|
||||
if (found === true) {
|
||||
return text.substring(offset, pattern.lastIndex)
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
private matchWithExec(pattern, text): string {
|
||||
let regExpArray = pattern.exec(text)
|
||||
return regExpArray !== null ? regExpArray[0] : regExpArray
|
||||
}
|
||||
|
||||
// Duplicated from the parser's perf trace trait to allow future extraction
|
||||
// of the lexer to a separate package.
|
||||
TRACE_INIT<T>(phaseDesc: string, phaseImpl: () => T): T {
|
||||
// No need to optimize this using NOOP pattern because
|
||||
// It is not called in a hot spot...
|
||||
if (this.traceInitPerf === true) {
|
||||
this.traceInitIndent++
|
||||
const indent = new Array(this.traceInitIndent + 1).join("\t")
|
||||
if (this.traceInitIndent < this.traceInitMaxIdent) {
|
||||
console.log(`${indent}--> <${phaseDesc}>`)
|
||||
}
|
||||
const { time, value } = timer(phaseImpl)
|
||||
/* istanbul ignore next - Difficult to reproduce specific performance behavior (>10ms) in tests */
|
||||
const traceMethod = time > 10 ? console.warn : console.log
|
||||
if (this.traceInitIndent < this.traceInitMaxIdent) {
|
||||
traceMethod(`${indent}<-- <${phaseDesc}> time: ${time}ms`)
|
||||
}
|
||||
this.traceInitIndent--
|
||||
return value
|
||||
} else {
|
||||
return phaseImpl()
|
||||
}
|
||||
}
|
||||
}
|
||||
307
node_modules/chevrotain/src/scan/reg_exp.ts
generated
vendored
Normal file
307
node_modules/chevrotain/src/scan/reg_exp.ts
generated
vendored
Normal file
|
|
@ -0,0 +1,307 @@
|
|||
import { VERSION, BaseRegExpVisitor } from "regexp-to-ast"
|
||||
import {
|
||||
flatten,
|
||||
map,
|
||||
forEach,
|
||||
contains,
|
||||
PRINT_ERROR,
|
||||
PRINT_WARNING,
|
||||
find,
|
||||
isArray,
|
||||
every,
|
||||
values
|
||||
} from "../utils/utils"
|
||||
import { getRegExpAst } from "./reg_exp_parser"
|
||||
import { charCodeToOptimizedIndex, minOptimizationVal } from "./lexer"
|
||||
|
||||
const complementErrorMessage =
|
||||
"Complement Sets are not supported for first char optimization"
|
||||
export const failedOptimizationPrefixMsg =
|
||||
'Unable to use "first char" lexer optimizations:\n'
|
||||
|
||||
export function getOptimizedStartCodesIndices(
|
||||
regExp: RegExp,
|
||||
ensureOptimizations = false
|
||||
): number[] {
|
||||
try {
|
||||
const ast = getRegExpAst(regExp)
|
||||
const firstChars = firstCharOptimizedIndices(
|
||||
ast.value,
|
||||
{},
|
||||
ast.flags.ignoreCase
|
||||
)
|
||||
return firstChars
|
||||
} catch (e) {
|
||||
/* istanbul ignore next */
|
||||
// Testing this relies on the regexp-to-ast library having a bug... */
|
||||
// TODO: only the else branch needs to be ignored, try to fix with newer prettier / tsc
|
||||
if (e.message === complementErrorMessage) {
|
||||
if (ensureOptimizations) {
|
||||
PRINT_WARNING(
|
||||
`${failedOptimizationPrefixMsg}` +
|
||||
`\tUnable to optimize: < ${regExp.toString()} >\n` +
|
||||
"\tComplement Sets cannot be automatically optimized.\n" +
|
||||
"\tThis will disable the lexer's first char optimizations.\n" +
|
||||
"\tSee: https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#COMPLEMENT for details."
|
||||
)
|
||||
}
|
||||
} else {
|
||||
let msgSuffix = ""
|
||||
if (ensureOptimizations) {
|
||||
msgSuffix =
|
||||
"\n\tThis will disable the lexer's first char optimizations.\n" +
|
||||
"\tSee: https://sap.github.io/chevrotain/docs/guide/resolving_lexer_errors.html#REGEXP_PARSING for details."
|
||||
}
|
||||
PRINT_ERROR(
|
||||
`${failedOptimizationPrefixMsg}\n` +
|
||||
`\tFailed parsing: < ${regExp.toString()} >\n` +
|
||||
`\tUsing the regexp-to-ast library version: ${VERSION}\n` +
|
||||
"\tPlease open an issue at: https://github.com/bd82/regexp-to-ast/issues" +
|
||||
msgSuffix
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return []
|
||||
}
|
||||
|
||||
export function firstCharOptimizedIndices(ast, result, ignoreCase): number[] {
|
||||
switch (ast.type) {
|
||||
case "Disjunction":
|
||||
for (let i = 0; i < ast.value.length; i++) {
|
||||
firstCharOptimizedIndices(ast.value[i], result, ignoreCase)
|
||||
}
|
||||
break
|
||||
case "Alternative":
|
||||
const terms = ast.value
|
||||
for (let i = 0; i < terms.length; i++) {
|
||||
const term = terms[i]
|
||||
|
||||
// skip terms that cannot effect the first char results
|
||||
switch (term.type) {
|
||||
case "EndAnchor":
|
||||
// A group back reference cannot affect potential starting char.
|
||||
// because if a back reference is the first production than automatically
|
||||
// the group being referenced has had to come BEFORE so its codes have already been added
|
||||
case "GroupBackReference":
|
||||
// assertions do not affect potential starting codes
|
||||
case "Lookahead":
|
||||
case "NegativeLookahead":
|
||||
case "StartAnchor":
|
||||
case "WordBoundary":
|
||||
case "NonWordBoundary":
|
||||
continue
|
||||
}
|
||||
|
||||
const atom = term
|
||||
switch (atom.type) {
|
||||
case "Character":
|
||||
addOptimizedIdxToResult(atom.value, result, ignoreCase)
|
||||
break
|
||||
case "Set":
|
||||
if (atom.complement === true) {
|
||||
throw Error(complementErrorMessage)
|
||||
}
|
||||
forEach(atom.value, (code) => {
|
||||
if (typeof code === "number") {
|
||||
addOptimizedIdxToResult(code, result, ignoreCase)
|
||||
} else {
|
||||
// range
|
||||
const range = code
|
||||
// cannot optimize when ignoreCase is
|
||||
if (ignoreCase === true) {
|
||||
for (
|
||||
let rangeCode = range.from;
|
||||
rangeCode <= range.to;
|
||||
rangeCode++
|
||||
) {
|
||||
addOptimizedIdxToResult(rangeCode, result, ignoreCase)
|
||||
}
|
||||
}
|
||||
// Optimization (2 orders of magnitude less work for very large ranges)
|
||||
else {
|
||||
// handle unoptimized values
|
||||
for (
|
||||
let rangeCode = range.from;
|
||||
rangeCode <= range.to && rangeCode < minOptimizationVal;
|
||||
rangeCode++
|
||||
) {
|
||||
addOptimizedIdxToResult(rangeCode, result, ignoreCase)
|
||||
}
|
||||
|
||||
// Less common charCode where we optimize for faster init time, by using larger "buckets"
|
||||
if (range.to >= minOptimizationVal) {
|
||||
const minUnOptVal =
|
||||
range.from >= minOptimizationVal
|
||||
? range.from
|
||||
: minOptimizationVal
|
||||
const maxUnOptVal = range.to
|
||||
const minOptIdx = charCodeToOptimizedIndex(minUnOptVal)
|
||||
const maxOptIdx = charCodeToOptimizedIndex(maxUnOptVal)
|
||||
|
||||
for (
|
||||
let currOptIdx = minOptIdx;
|
||||
currOptIdx <= maxOptIdx;
|
||||
currOptIdx++
|
||||
) {
|
||||
result[currOptIdx] = currOptIdx
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
break
|
||||
case "Group":
|
||||
firstCharOptimizedIndices(atom.value, result, ignoreCase)
|
||||
break
|
||||
/* istanbul ignore next */
|
||||
default:
|
||||
throw Error("Non Exhaustive Match")
|
||||
}
|
||||
|
||||
// reached a mandatory production, no more **start** codes can be found on this alternative
|
||||
const isOptionalQuantifier =
|
||||
atom.quantifier !== undefined && atom.quantifier.atLeast === 0
|
||||
if (
|
||||
// A group may be optional due to empty contents /(?:)/
|
||||
// or if everything inside it is optional /((a)?)/
|
||||
(atom.type === "Group" && isWholeOptional(atom) === false) ||
|
||||
// If this term is not a group it may only be optional if it has an optional quantifier
|
||||
(atom.type !== "Group" && isOptionalQuantifier === false)
|
||||
) {
|
||||
break
|
||||
}
|
||||
}
|
||||
break
|
||||
/* istanbul ignore next */
|
||||
default:
|
||||
throw Error("non exhaustive match!")
|
||||
}
|
||||
|
||||
// console.log(Object.keys(result).length)
|
||||
return values(result)
|
||||
}
|
||||
|
||||
function addOptimizedIdxToResult(
|
||||
code: number,
|
||||
result: number[],
|
||||
ignoreCase: boolean
|
||||
) {
|
||||
const optimizedCharIdx = charCodeToOptimizedIndex(code)
|
||||
result[optimizedCharIdx] = optimizedCharIdx
|
||||
|
||||
if (ignoreCase === true) {
|
||||
handleIgnoreCase(code, result)
|
||||
}
|
||||
}
|
||||
|
||||
function handleIgnoreCase(code: number, result: number[]) {
|
||||
const char = String.fromCharCode(code)
|
||||
const upperChar = char.toUpperCase()
|
||||
/* istanbul ignore else */
|
||||
if (upperChar !== char) {
|
||||
const optimizedCharIdx = charCodeToOptimizedIndex(upperChar.charCodeAt(0))
|
||||
result[optimizedCharIdx] = optimizedCharIdx
|
||||
} else {
|
||||
const lowerChar = char.toLowerCase()
|
||||
if (lowerChar !== char) {
|
||||
const optimizedCharIdx = charCodeToOptimizedIndex(lowerChar.charCodeAt(0))
|
||||
result[optimizedCharIdx] = optimizedCharIdx
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function findCode(setNode, targetCharCodes) {
|
||||
return find(setNode.value, (codeOrRange) => {
|
||||
if (typeof codeOrRange === "number") {
|
||||
return contains(targetCharCodes, codeOrRange)
|
||||
} else {
|
||||
// range
|
||||
const range = <any>codeOrRange
|
||||
return (
|
||||
find(
|
||||
targetCharCodes,
|
||||
(targetCode) => range.from <= targetCode && targetCode <= range.to
|
||||
) !== undefined
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
function isWholeOptional(ast) {
|
||||
if (ast.quantifier && ast.quantifier.atLeast === 0) {
|
||||
return true
|
||||
}
|
||||
|
||||
if (!ast.value) {
|
||||
return false
|
||||
}
|
||||
|
||||
return isArray(ast.value)
|
||||
? every(ast.value, isWholeOptional)
|
||||
: isWholeOptional(ast.value)
|
||||
}
|
||||
|
||||
class CharCodeFinder extends BaseRegExpVisitor {
|
||||
found: boolean = false
|
||||
|
||||
constructor(private targetCharCodes: number[]) {
|
||||
super()
|
||||
}
|
||||
|
||||
visitChildren(node) {
|
||||
// No need to keep looking...
|
||||
if (this.found === true) {
|
||||
return
|
||||
}
|
||||
|
||||
// switch lookaheads as they do not actually consume any characters thus
|
||||
// finding a charCode at lookahead context does not mean that regexp can actually contain it in a match.
|
||||
switch (node.type) {
|
||||
case "Lookahead":
|
||||
this.visitLookahead(node)
|
||||
return
|
||||
case "NegativeLookahead":
|
||||
this.visitNegativeLookahead(node)
|
||||
return
|
||||
}
|
||||
|
||||
super.visitChildren(node)
|
||||
}
|
||||
|
||||
visitCharacter(node) {
|
||||
if (contains(this.targetCharCodes, node.value)) {
|
||||
this.found = true
|
||||
}
|
||||
}
|
||||
|
||||
visitSet(node) {
|
||||
if (node.complement) {
|
||||
if (findCode(node, this.targetCharCodes) === undefined) {
|
||||
this.found = true
|
||||
}
|
||||
} else {
|
||||
if (findCode(node, this.targetCharCodes) !== undefined) {
|
||||
this.found = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function canMatchCharCode(
|
||||
charCodes: number[],
|
||||
pattern: RegExp | string
|
||||
) {
|
||||
if (pattern instanceof RegExp) {
|
||||
const ast = getRegExpAst(pattern)
|
||||
const charCodeFinder = new CharCodeFinder(charCodes)
|
||||
charCodeFinder.visit(ast)
|
||||
return charCodeFinder.found
|
||||
} else {
|
||||
return (
|
||||
find(<any>pattern, (char) => {
|
||||
return contains(charCodes, (<string>char).charCodeAt(0))
|
||||
}) !== undefined
|
||||
)
|
||||
}
|
||||
}
|
||||
19
node_modules/chevrotain/src/scan/reg_exp_parser.ts
generated
vendored
Normal file
19
node_modules/chevrotain/src/scan/reg_exp_parser.ts
generated
vendored
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
import { RegExpParser, RegExpPattern } from "regexp-to-ast"
|
||||
|
||||
let regExpAstCache = {}
|
||||
const regExpParser = new RegExpParser()
|
||||
|
||||
export function getRegExpAst(regExp: RegExp): RegExpPattern {
|
||||
const regExpStr = regExp.toString()
|
||||
if (regExpAstCache.hasOwnProperty(regExpStr)) {
|
||||
return regExpAstCache[regExpStr]
|
||||
} else {
|
||||
const regExpAst = regExpParser.pattern(regExpStr)
|
||||
regExpAstCache[regExpStr] = regExpAst
|
||||
return regExpAst
|
||||
}
|
||||
}
|
||||
|
||||
export function clearRegExpParserCache() {
|
||||
regExpAstCache = {}
|
||||
}
|
||||
159
node_modules/chevrotain/src/scan/tokens.ts
generated
vendored
Normal file
159
node_modules/chevrotain/src/scan/tokens.ts
generated
vendored
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
import {
|
||||
cloneArr,
|
||||
compact,
|
||||
contains,
|
||||
difference,
|
||||
flatten,
|
||||
forEach,
|
||||
has,
|
||||
isArray,
|
||||
isEmpty,
|
||||
map
|
||||
} from "../utils/utils"
|
||||
import { TokenType } from "../../api"
|
||||
|
||||
export function tokenStructuredMatcher(tokInstance, tokConstructor) {
|
||||
const instanceType = tokInstance.tokenTypeIdx
|
||||
if (instanceType === tokConstructor.tokenTypeIdx) {
|
||||
return true
|
||||
} else {
|
||||
return (
|
||||
tokConstructor.isParent === true &&
|
||||
tokConstructor.categoryMatchesMap[instanceType] === true
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Optimized tokenMatcher in case our grammar does not use token categories
|
||||
// Being so tiny it is much more likely to be in-lined and this avoid the function call overhead
|
||||
export function tokenStructuredMatcherNoCategories(token, tokType) {
|
||||
return token.tokenTypeIdx === tokType.tokenTypeIdx
|
||||
}
|
||||
|
||||
export let tokenShortNameIdx = 1
|
||||
export const tokenIdxToClass = {}
|
||||
|
||||
export function augmentTokenTypes(tokenTypes: TokenType[]): void {
|
||||
// collect the parent Token Types as well.
|
||||
let tokenTypesAndParents = expandCategories(tokenTypes)
|
||||
|
||||
// add required tokenType and categoryMatches properties
|
||||
assignTokenDefaultProps(tokenTypesAndParents)
|
||||
|
||||
// fill up the categoryMatches
|
||||
assignCategoriesMapProp(tokenTypesAndParents)
|
||||
assignCategoriesTokensProp(tokenTypesAndParents)
|
||||
|
||||
forEach(tokenTypesAndParents, (tokType) => {
|
||||
tokType.isParent = tokType.categoryMatches.length > 0
|
||||
})
|
||||
}
|
||||
|
||||
export function expandCategories(tokenTypes: TokenType[]): TokenType[] {
|
||||
let result = cloneArr(tokenTypes)
|
||||
|
||||
let categories = tokenTypes
|
||||
let searching = true
|
||||
while (searching) {
|
||||
categories = compact(
|
||||
flatten(map(categories, (currTokType) => currTokType.CATEGORIES))
|
||||
)
|
||||
|
||||
let newCategories = difference(categories, result)
|
||||
|
||||
result = result.concat(newCategories)
|
||||
|
||||
if (isEmpty(newCategories)) {
|
||||
searching = false
|
||||
} else {
|
||||
categories = newCategories
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
export function assignTokenDefaultProps(tokenTypes: TokenType[]): void {
|
||||
forEach(tokenTypes, (currTokType) => {
|
||||
if (!hasShortKeyProperty(currTokType)) {
|
||||
tokenIdxToClass[tokenShortNameIdx] = currTokType
|
||||
;(<any>currTokType).tokenTypeIdx = tokenShortNameIdx++
|
||||
}
|
||||
|
||||
// CATEGORIES? : TokenType | TokenType[]
|
||||
if (
|
||||
hasCategoriesProperty(currTokType) &&
|
||||
!isArray(currTokType.CATEGORIES)
|
||||
// &&
|
||||
// !isUndefined(currTokType.CATEGORIES.PATTERN)
|
||||
) {
|
||||
currTokType.CATEGORIES = [currTokType.CATEGORIES]
|
||||
}
|
||||
|
||||
if (!hasCategoriesProperty(currTokType)) {
|
||||
currTokType.CATEGORIES = []
|
||||
}
|
||||
|
||||
if (!hasExtendingTokensTypesProperty(currTokType)) {
|
||||
currTokType.categoryMatches = []
|
||||
}
|
||||
|
||||
if (!hasExtendingTokensTypesMapProperty(currTokType)) {
|
||||
currTokType.categoryMatchesMap = {}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
export function assignCategoriesTokensProp(tokenTypes: TokenType[]): void {
|
||||
forEach(tokenTypes, (currTokType) => {
|
||||
// avoid duplications
|
||||
currTokType.categoryMatches = []
|
||||
forEach(currTokType.categoryMatchesMap, (val, key) => {
|
||||
currTokType.categoryMatches.push(tokenIdxToClass[key].tokenTypeIdx)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
export function assignCategoriesMapProp(tokenTypes: TokenType[]): void {
|
||||
forEach(tokenTypes, (currTokType) => {
|
||||
singleAssignCategoriesToksMap([], currTokType)
|
||||
})
|
||||
}
|
||||
|
||||
export function singleAssignCategoriesToksMap(
|
||||
path: TokenType[],
|
||||
nextNode: TokenType
|
||||
): void {
|
||||
forEach(path, (pathNode) => {
|
||||
nextNode.categoryMatchesMap[pathNode.tokenTypeIdx] = true
|
||||
})
|
||||
|
||||
forEach(nextNode.CATEGORIES, (nextCategory) => {
|
||||
const newPath = path.concat(nextNode)
|
||||
// avoids infinite loops due to cyclic categories.
|
||||
if (!contains(newPath, nextCategory)) {
|
||||
singleAssignCategoriesToksMap(newPath, nextCategory)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
export function hasShortKeyProperty(tokType: TokenType): boolean {
|
||||
return has(tokType, "tokenTypeIdx")
|
||||
}
|
||||
|
||||
export function hasCategoriesProperty(tokType: TokenType): boolean {
|
||||
return has(tokType, "CATEGORIES")
|
||||
}
|
||||
|
||||
export function hasExtendingTokensTypesProperty(tokType: TokenType): boolean {
|
||||
return has(tokType, "categoryMatches")
|
||||
}
|
||||
|
||||
export function hasExtendingTokensTypesMapProperty(
|
||||
tokType: TokenType
|
||||
): boolean {
|
||||
return has(tokType, "categoryMatchesMap")
|
||||
}
|
||||
|
||||
export function isTokenType(tokType: TokenType): boolean {
|
||||
return has(tokType, "tokenTypeIdx")
|
||||
}
|
||||
1
node_modules/chevrotain/src/scan/tokens_constants.ts
generated
vendored
Normal file
1
node_modules/chevrotain/src/scan/tokens_constants.ts
generated
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
export const EOF_TOKEN_TYPE = 1
|
||||
119
node_modules/chevrotain/src/scan/tokens_public.ts
generated
vendored
Normal file
119
node_modules/chevrotain/src/scan/tokens_public.ts
generated
vendored
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
import { has, isString, isUndefined } from "../utils/utils"
|
||||
import { Lexer } from "./lexer_public"
|
||||
import { augmentTokenTypes, tokenStructuredMatcher } from "./tokens"
|
||||
import { IToken, ITokenConfig, TokenType } from "../../api"
|
||||
|
||||
export function tokenLabel(tokType: TokenType): string {
|
||||
if (hasTokenLabel(tokType)) {
|
||||
return tokType.LABEL
|
||||
} else {
|
||||
return tokType.name
|
||||
}
|
||||
}
|
||||
|
||||
export function tokenName(tokType: TokenType): string {
|
||||
return tokType.name
|
||||
}
|
||||
|
||||
export function hasTokenLabel(obj: TokenType): boolean {
|
||||
return isString((<any>obj).LABEL) && (<any>obj).LABEL !== ""
|
||||
}
|
||||
|
||||
const PARENT = "parent"
|
||||
const CATEGORIES = "categories"
|
||||
const LABEL = "label"
|
||||
const GROUP = "group"
|
||||
const PUSH_MODE = "push_mode"
|
||||
const POP_MODE = "pop_mode"
|
||||
const LONGER_ALT = "longer_alt"
|
||||
const LINE_BREAKS = "line_breaks"
|
||||
const START_CHARS_HINT = "start_chars_hint"
|
||||
|
||||
export function createToken(config: ITokenConfig): TokenType {
|
||||
return createTokenInternal(config)
|
||||
}
|
||||
|
||||
function createTokenInternal(config: ITokenConfig): TokenType {
|
||||
let pattern = config.pattern
|
||||
|
||||
let tokenType: TokenType = <any>{}
|
||||
tokenType.name = config.name
|
||||
|
||||
if (!isUndefined(pattern)) {
|
||||
tokenType.PATTERN = pattern
|
||||
}
|
||||
|
||||
if (has(config, PARENT)) {
|
||||
throw (
|
||||
"The parent property is no longer supported.\n" +
|
||||
"See: https://github.com/SAP/chevrotain/issues/564#issuecomment-349062346 for details."
|
||||
)
|
||||
}
|
||||
|
||||
if (has(config, CATEGORIES)) {
|
||||
// casting to ANY as this will be fixed inside `augmentTokenTypes``
|
||||
tokenType.CATEGORIES = <any>config[CATEGORIES]
|
||||
}
|
||||
|
||||
augmentTokenTypes([tokenType])
|
||||
|
||||
if (has(config, LABEL)) {
|
||||
tokenType.LABEL = config[LABEL]
|
||||
}
|
||||
|
||||
if (has(config, GROUP)) {
|
||||
tokenType.GROUP = config[GROUP]
|
||||
}
|
||||
|
||||
if (has(config, POP_MODE)) {
|
||||
tokenType.POP_MODE = config[POP_MODE]
|
||||
}
|
||||
|
||||
if (has(config, PUSH_MODE)) {
|
||||
tokenType.PUSH_MODE = config[PUSH_MODE]
|
||||
}
|
||||
|
||||
if (has(config, LONGER_ALT)) {
|
||||
tokenType.LONGER_ALT = config[LONGER_ALT]
|
||||
}
|
||||
|
||||
if (has(config, LINE_BREAKS)) {
|
||||
tokenType.LINE_BREAKS = config[LINE_BREAKS]
|
||||
}
|
||||
|
||||
if (has(config, START_CHARS_HINT)) {
|
||||
tokenType.START_CHARS_HINT = config[START_CHARS_HINT]
|
||||
}
|
||||
|
||||
return tokenType
|
||||
}
|
||||
|
||||
export const EOF = createToken({ name: "EOF", pattern: Lexer.NA })
|
||||
augmentTokenTypes([EOF])
|
||||
|
||||
export function createTokenInstance(
|
||||
tokType: TokenType,
|
||||
image: string,
|
||||
startOffset: number,
|
||||
endOffset: number,
|
||||
startLine: number,
|
||||
endLine: number,
|
||||
startColumn: number,
|
||||
endColumn: number
|
||||
): IToken {
|
||||
return {
|
||||
image,
|
||||
startOffset,
|
||||
endOffset,
|
||||
startLine,
|
||||
endLine,
|
||||
startColumn,
|
||||
endColumn,
|
||||
tokenTypeIdx: (<any>tokType).tokenTypeIdx,
|
||||
tokenType: tokType
|
||||
}
|
||||
}
|
||||
|
||||
export function tokenMatcher(token: IToken, tokType: TokenType): boolean {
|
||||
return tokenStructuredMatcher(token, tokType)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue