Files
ai-agent-deep-dive/extracted-source/node_modules/@alcalzone/ansi-tokenize/build/tokenize.js

141 lines
5.1 KiB
JavaScript

import isFullwidthCodePoint from "is-fullwidth-code-point";
import { CSI, ESCAPES, getEndCode, linkStartCodePrefix, linkStartCodePrefixCharCodes, OSC, } from "./ansiCodes.js";
// HOT PATH: Use only basic string/char code operations for maximum performance
function parseLinkCode(string, offset) {
string = string.slice(offset);
for (let index = 1; index < linkStartCodePrefixCharCodes.length; index++) {
if (string.charCodeAt(index) !== linkStartCodePrefixCharCodes[index]) {
return undefined;
}
}
// This is a link code (with or without the URL part). Find the end of it.
const endIndex = string.indexOf("\x07", linkStartCodePrefix.length);
if (endIndex === -1)
return undefined;
return string.slice(0, endIndex + 1);
}
const CC_0 = "0".charCodeAt(0);
const CC_9 = "9".charCodeAt(0);
const CC_SEMI = ";".charCodeAt(0);
const CC_M = "m".charCodeAt(0);
/**
* Scans through the given string and finds the index of the last character of an SGR sequence
* like `\x1B[38;2;123;123;123m`. This assumes that the string has been checked to start with `\x1B[`.
* Returns -1 if no valid SGR sequence is found.
*/
function findSGRSequenceEndIndex(str) {
for (let index = 2; index < str.length; index++) {
const charCode = str.charCodeAt(index);
// m marks the end of the SGR sequence
if (charCode === CC_M)
return index;
// Digits and semicolons are valid
if (charCode === CC_SEMI)
continue;
if (charCode >= CC_0 && charCode <= CC_9)
continue;
// Everything else is invalid
break;
}
return -1;
}
// HOT PATH: Use only basic string/char code operations for maximum performance
function parseSGRSequence(string, offset) {
string = string.slice(offset);
const endIndex = findSGRSequenceEndIndex(string);
if (endIndex === -1)
return;
return string.slice(0, endIndex + 1);
}
/**
* Splits compound SGR sequences like `\x1B[1;3;31m` into individual components
*/
function splitCompoundSGRSequences(code) {
if (!code.includes(";")) {
// Not a compound code
return [code];
}
const codeParts = code
// Strip off the escape sequences \x1B[ and m
.slice(2, -1)
.split(";");
const ret = [];
for (let i = 0; i < codeParts.length; i++) {
const rawCode = codeParts[i];
// Keep 8-bit and 24-bit color codes (containing multiple ";") together
if (rawCode === "38" || rawCode === "48") {
if (i + 2 < codeParts.length && codeParts[i + 1] === "5") {
// 8-bit color, followed by another number
ret.push(codeParts.slice(i, i + 3).join(";"));
i += 2;
continue;
}
else if (i + 4 < codeParts.length && codeParts[i + 1] === "2") {
// 24-bit color, followed by three numbers
ret.push(codeParts.slice(i, i + 5).join(";"));
i += 4;
continue;
}
}
// Not a (valid) 8/24-bit color code, push as is
ret.push(rawCode);
}
return ret.map((part) => `\x1b[${part}m`);
}
export function tokenize(str, endChar = Number.POSITIVE_INFINITY) {
const ret = [];
let index = 0;
let visible = 0;
while (index < str.length) {
const codePoint = str.codePointAt(index);
if (ESCAPES.has(codePoint)) {
let code;
// Peek the next code point to determine the type of ANSI sequence
const nextCodePoint = str.codePointAt(index + 1);
if (nextCodePoint === OSC) {
// ] = operating system commands, like links
code = parseLinkCode(str, index);
if (code) {
ret.push({
type: "ansi",
code: code,
endCode: getEndCode(code),
});
}
}
else if (nextCodePoint === CSI) {
// [ = control sequence introducer, like SGR sequences [...m
code = parseSGRSequence(str, index);
if (code) {
// Split compound codes into individual tokens
const codes = splitCompoundSGRSequences(code);
for (const individualCode of codes) {
ret.push({
type: "ansi",
code: individualCode,
endCode: getEndCode(individualCode),
});
}
}
}
if (code) {
index += code.length;
continue;
}
}
const fullWidth = isFullwidthCodePoint(codePoint);
const character = String.fromCodePoint(codePoint);
ret.push({
type: "char",
value: character,
fullWidth,
});
index += character.length;
visible += fullWidth ? 2 : character.length;
if (visible >= endChar) {
break;
}
}
return ret;
}
//# sourceMappingURL=tokenize.js.map