Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| /** | |
| * Represents tokens that our language understands in parsing. | |
| */ | |
| export const TOKEN_TYPES = Object.freeze({ | |
| Text: "Text", // The text between Jinja statements or expressions | |
| NumericLiteral: "NumericLiteral", // e.g., 123 | |
| BooleanLiteral: "BooleanLiteral", // true or false | |
| StringLiteral: "StringLiteral", // 'string' | |
| Identifier: "Identifier", // Variables, functions, etc. | |
| Equals: "Equals", // = | |
| OpenParen: "OpenParen", // ( | |
| CloseParen: "CloseParen", // ) | |
| OpenStatement: "OpenStatement", // {% | |
| CloseStatement: "CloseStatement", // %} | |
| OpenExpression: "OpenExpression", // {{ | |
| CloseExpression: "CloseExpression", // }} | |
| OpenSquareBracket: "OpenSquareBracket", // [ | |
| CloseSquareBracket: "CloseSquareBracket", // ] | |
| OpenCurlyBracket: "OpenCurlyBracket", // { | |
| CloseCurlyBracket: "CloseCurlyBracket", // } | |
| Comma: "Comma", // , | |
| Dot: "Dot", // . | |
| Colon: "Colon", // : | |
| Pipe: "Pipe", // | | |
| CallOperator: "CallOperator", // () | |
| AdditiveBinaryOperator: "AdditiveBinaryOperator", // + - | |
| MultiplicativeBinaryOperator: "MultiplicativeBinaryOperator", // * / % | |
| ComparisonBinaryOperator: "ComparisonBinaryOperator", // < > <= >= == != | |
| UnaryOperator: "UnaryOperator", // ! - + | |
| // Keywords | |
| Set: "Set", | |
| If: "If", | |
| For: "For", | |
| In: "In", | |
| Is: "Is", | |
| NotIn: "NotIn", | |
| Else: "Else", | |
| EndIf: "EndIf", | |
| ElseIf: "ElseIf", | |
| EndFor: "EndFor", | |
| And: "And", | |
| Or: "Or", | |
| Not: "UnaryOperator", | |
| }); | |
| export type TokenType = keyof typeof TOKEN_TYPES; | |
| /** | |
| * Constant lookup for keywords and known identifiers + symbols. | |
| */ | |
| const KEYWORDS = Object.freeze({ | |
| set: TOKEN_TYPES.Set, | |
| for: TOKEN_TYPES.For, | |
| in: TOKEN_TYPES.In, | |
| is: TOKEN_TYPES.Is, | |
| if: TOKEN_TYPES.If, | |
| else: TOKEN_TYPES.Else, | |
| endif: TOKEN_TYPES.EndIf, | |
| elif: TOKEN_TYPES.ElseIf, | |
| endfor: TOKEN_TYPES.EndFor, | |
| and: TOKEN_TYPES.And, | |
| or: TOKEN_TYPES.Or, | |
| not: TOKEN_TYPES.Not, | |
| "not in": TOKEN_TYPES.NotIn, | |
| // Literals | |
| true: TOKEN_TYPES.BooleanLiteral, | |
| false: TOKEN_TYPES.BooleanLiteral, | |
| }); | |
| /** | |
| * Represents a single token in the template. | |
| */ | |
| export class Token { | |
| /** | |
| * Constructs a new Token. | |
| * @param {string} value The raw value as seen inside the source code. | |
| * @param {TokenType} type The type of token. | |
| */ | |
| constructor( | |
| public value: string, | |
| public type: TokenType | |
| ) {} | |
| } | |
| function isWord(char: string): boolean { | |
| return /\w/.test(char); | |
| } | |
| function isInteger(char: string): boolean { | |
| return /[0-9]/.test(char); | |
| } | |
| /** | |
| * A data structure which contains a list of rules to test | |
| */ | |
| const ORDERED_MAPPING_TABLE: [string, TokenType][] = [ | |
| // Control sequences | |
| ["{%", TOKEN_TYPES.OpenStatement], | |
| ["%}", TOKEN_TYPES.CloseStatement], | |
| ["{{", TOKEN_TYPES.OpenExpression], | |
| ["}}", TOKEN_TYPES.CloseExpression], | |
| // Single character tokens | |
| ["(", TOKEN_TYPES.OpenParen], | |
| [")", TOKEN_TYPES.CloseParen], | |
| ["{", TOKEN_TYPES.OpenCurlyBracket], | |
| ["}", TOKEN_TYPES.CloseCurlyBracket], | |
| ["[", TOKEN_TYPES.OpenSquareBracket], | |
| ["]", TOKEN_TYPES.CloseSquareBracket], | |
| [",", TOKEN_TYPES.Comma], | |
| [".", TOKEN_TYPES.Dot], | |
| [":", TOKEN_TYPES.Colon], | |
| ["|", TOKEN_TYPES.Pipe], | |
| // Comparison operators | |
| ["<=", TOKEN_TYPES.ComparisonBinaryOperator], | |
| [">=", TOKEN_TYPES.ComparisonBinaryOperator], | |
| ["==", TOKEN_TYPES.ComparisonBinaryOperator], | |
| ["!=", TOKEN_TYPES.ComparisonBinaryOperator], | |
| ["<", TOKEN_TYPES.ComparisonBinaryOperator], | |
| [">", TOKEN_TYPES.ComparisonBinaryOperator], | |
| // Arithmetic operators | |
| ["+", TOKEN_TYPES.AdditiveBinaryOperator], | |
| ["-", TOKEN_TYPES.AdditiveBinaryOperator], | |
| ["*", TOKEN_TYPES.MultiplicativeBinaryOperator], | |
| ["/", TOKEN_TYPES.MultiplicativeBinaryOperator], | |
| ["%", TOKEN_TYPES.MultiplicativeBinaryOperator], | |
| // Assignment operator | |
| ["=", TOKEN_TYPES.Equals], | |
| ]; | |
| const ESCAPE_CHARACTERS = new Map([ | |
| ["n", "\n"], // New line | |
| ["t", "\t"], // Horizontal tab | |
| ["r", "\r"], // Carriage return | |
| ["b", "\b"], // Backspace | |
| ["f", "\f"], // Form feed | |
| ["v", "\v"], // Vertical tab | |
| ["'", "'"], // Single quote | |
| ['"', '"'], // Double quote | |
| ["\\", "\\"], // Backslash | |
| ]); | |
| export interface PreprocessOptions { | |
| trim_blocks?: boolean; | |
| lstrip_blocks?: boolean; | |
| } | |
| function preprocess(template: string, options: PreprocessOptions = {}): string { | |
| // According to https://jinja.palletsprojects.com/en/3.0.x/templates/#whitespace-control | |
| // In the default configuration: | |
| // - a single trailing newline is stripped if present | |
| // - other whitespace (spaces, tabs, newlines etc.) is returned unchanged | |
| if (template.endsWith("\n")) { | |
| template = template.slice(0, -1); | |
| } | |
| // Replace all comments with a placeholder | |
| // This ensures that comments don't interfere with the following options | |
| template = template.replace(/{#.*?#}/gs, "{##}"); | |
| if (options.lstrip_blocks) { | |
| // The lstrip_blocks option can also be set to strip tabs and spaces from the | |
| // beginning of a line to the start of a block. (Nothing will be stripped if | |
| // there are other characters before the start of the block.) | |
| template = template.replace(/^[ \t]*({[#%])/gm, "$1"); | |
| } | |
| if (options.trim_blocks) { | |
| // If an application configures Jinja to trim_blocks, the first newline after | |
| // a template tag is removed automatically (like in PHP). | |
| template = template.replace(/([#%]})\n/g, "$1"); | |
| } | |
| return template | |
| .replace(/{##}/g, "") // Remove comments | |
| .replace(/-%}\s*/g, "%}") | |
| .replace(/\s*{%-/g, "{%") | |
| .replace(/-}}\s*/g, "}}") | |
| .replace(/\s*{{-/g, "{{"); | |
| } | |
| /** | |
| * Generate a list of tokens from a source string. | |
| */ | |
| export function tokenize(source: string, options: PreprocessOptions = {}): Token[] { | |
| const tokens: Token[] = []; | |
| const src: string = preprocess(source, options); | |
| let cursorPosition = 0; | |
| const consumeWhile = (predicate: (char: string) => boolean): string => { | |
| let str = ""; | |
| while (predicate(src[cursorPosition])) { | |
| // Check for escaped characters | |
| if (src[cursorPosition] === "\\") { | |
| // Consume the backslash | |
| ++cursorPosition; | |
| // Check for end of input | |
| if (cursorPosition >= src.length) throw new SyntaxError("Unexpected end of input"); | |
| // Add the escaped character | |
| const escaped = src[cursorPosition++]; | |
| const unescaped = ESCAPE_CHARACTERS.get(escaped); | |
| if (unescaped === undefined) { | |
| throw new SyntaxError(`Unexpected escaped character: ${escaped}`); | |
| } | |
| str += unescaped; | |
| continue; | |
| } | |
| str += src[cursorPosition++]; | |
| if (cursorPosition >= src.length) throw new SyntaxError("Unexpected end of input"); | |
| } | |
| return str; | |
| }; | |
| // Build each token until end of input | |
| main: while (cursorPosition < src.length) { | |
| // First, consume all text that is outside of a Jinja statement or expression | |
| const lastTokenType = tokens.at(-1)?.type; | |
| if ( | |
| lastTokenType === undefined || | |
| lastTokenType === TOKEN_TYPES.CloseStatement || | |
| lastTokenType === TOKEN_TYPES.CloseExpression | |
| ) { | |
| let text = ""; | |
| while ( | |
| cursorPosition < src.length && | |
| // Keep going until we hit the next Jinja statement or expression | |
| !(src[cursorPosition] === "{" && (src[cursorPosition + 1] === "%" || src[cursorPosition + 1] === "{")) | |
| ) { | |
| // Consume text | |
| text += src[cursorPosition++]; | |
| } | |
| // There is some text to add | |
| if (text.length > 0) { | |
| tokens.push(new Token(text, TOKEN_TYPES.Text)); | |
| continue; | |
| } | |
| } | |
| // Consume (and ignore) all whitespace inside Jinja statements or expressions | |
| consumeWhile((char) => /\s/.test(char)); | |
| // Handle multi-character tokens | |
| const char = src[cursorPosition]; | |
| // Check for unary operators | |
| if (char === "-" || char === "+") { | |
| const lastTokenType = tokens.at(-1)?.type; | |
| if (lastTokenType === TOKEN_TYPES.Text || lastTokenType === undefined) { | |
| throw new SyntaxError(`Unexpected character: ${char}`); | |
| } | |
| switch (lastTokenType) { | |
| case TOKEN_TYPES.Identifier: | |
| case TOKEN_TYPES.NumericLiteral: | |
| case TOKEN_TYPES.BooleanLiteral: | |
| case TOKEN_TYPES.StringLiteral: | |
| case TOKEN_TYPES.CloseParen: | |
| case TOKEN_TYPES.CloseSquareBracket: | |
| // Part of a binary operator | |
| // a - 1, 1 - 1, true - 1, "apple" - 1, (1) - 1, a[1] - 1 | |
| // Continue parsing normally | |
| break; | |
| default: { | |
| // Is part of a unary operator | |
| // (-1), [-1], (1 + -1), not -1, -apple | |
| ++cursorPosition; // consume the unary operator | |
| // Check for numbers following the unary operator | |
| const num = consumeWhile(isInteger); | |
| tokens.push( | |
| new Token(`${char}${num}`, num.length > 0 ? TOKEN_TYPES.NumericLiteral : TOKEN_TYPES.UnaryOperator) | |
| ); | |
| continue; | |
| } | |
| } | |
| } | |
| // Try to match one of the tokens in the mapping table | |
| for (const [char, token] of ORDERED_MAPPING_TABLE) { | |
| const slice = src.slice(cursorPosition, cursorPosition + char.length); | |
| if (slice === char) { | |
| tokens.push(new Token(char, token)); | |
| cursorPosition += char.length; | |
| continue main; | |
| } | |
| } | |
| if (char === "'" || char === '"') { | |
| ++cursorPosition; // Skip the opening quote | |
| const str = consumeWhile((c) => c !== char); | |
| tokens.push(new Token(str, TOKEN_TYPES.StringLiteral)); | |
| ++cursorPosition; // Skip the closing quote | |
| continue; | |
| } | |
| if (isInteger(char)) { | |
| const num = consumeWhile(isInteger); | |
| tokens.push(new Token(num, TOKEN_TYPES.NumericLiteral)); | |
| continue; | |
| } | |
| if (isWord(char)) { | |
| const word = consumeWhile(isWord); | |
| // Check for special/reserved keywords | |
| // NOTE: We use Object.hasOwn() to avoid matching `.toString()` and other Object methods | |
| const type = Object.hasOwn(KEYWORDS, word) ? KEYWORDS[word as keyof typeof KEYWORDS] : TOKEN_TYPES.Identifier; | |
| // Special case of not in: | |
| // If the previous token was a "not", and this token is "in" | |
| // then we want to combine them into a single token | |
| if (type === TOKEN_TYPES.In && tokens.at(-1)?.type === TOKEN_TYPES.Not) { | |
| tokens.pop(); | |
| tokens.push(new Token("not in", TOKEN_TYPES.NotIn)); | |
| } else { | |
| tokens.push(new Token(word, type)); | |
| } | |
| continue; | |
| } | |
| throw new SyntaxError(`Unexpected character: ${char}`); | |
| } | |
| return tokens; | |
| } | |