Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import { omit } from "../src/utils/omit"; | |
| import { isBackend, isFrontend } from "../../shared"; | |
| import { HF_HUB_URL } from "../src/lib/getDefaultTask"; | |
| const TAPES_FILE = "./tapes.json"; | |
| const BASE64_PREFIX = "data:application/octet-stream;base64,"; | |
| enum MODE { | |
| RECORD = "record", | |
| PLAYBACK = "playback", | |
| CACHE = "cache", | |
| DISABLED = "disabled", | |
| } | |
| let VCR_MODE: MODE; | |
| /** | |
| * Allows to record tapes with a token to avoid rate limit. | |
| * | |
| * If VCR_MODE is not set and a token is present then disable it. | |
| */ | |
| const env = import.meta.env; | |
| if (env.VCR_MODE) { | |
| if ((env.VCR_MODE === MODE.RECORD || env.VCR_MODE === MODE.CACHE) && isFrontend) { | |
| throw new Error("VCR_MODE=record is not supported in the browser"); | |
| } | |
| VCR_MODE = env.VCR_MODE as MODE; | |
| } else { | |
| VCR_MODE = env.HF_TOKEN ? MODE.DISABLED : MODE.PLAYBACK; | |
| } | |
| const originalFetch = globalThis.fetch; | |
| globalThis.fetch = (...args) => vcr(originalFetch, args[0], args[1]); | |
| /** | |
| * Represents a recorded HTTP request | |
| */ | |
| interface Tape { | |
| url: string; | |
| init?: RequestInit; | |
| response: { | |
| /** | |
| * Base64 string of the response body | |
| */ | |
| body: string; | |
| status: number; | |
| statusText: string; | |
| headers?: HeadersInit; | |
| }; | |
| } | |
| async function tapeToResponse(tape: Tape) { | |
| return new Response( | |
| tape.response.body?.startsWith(BASE64_PREFIX) ? (await originalFetch(tape.response.body)).body : tape.response.body, | |
| { | |
| status: tape.response.status, | |
| statusText: tape.response.statusText, | |
| headers: tape.response.headers, | |
| } | |
| ); | |
| } | |
| /** | |
| * Headers are volontarily skipped for now. They are not useful to distinguish requests | |
| * but bring more complexity because some of them are not deterministics like "date" | |
| * and it's complex to handle all the formats they can be given in. | |
| */ | |
| async function hashRequest(url: string, init: RequestInit): Promise<string> { | |
| const hashObject = { | |
| url, | |
| method: init.method, | |
| body: init.body, | |
| }; | |
| const inputBuffer = new TextEncoder().encode(JSON.stringify(hashObject)); | |
| let hashed: ArrayBuffer; | |
| if (isBackend) { | |
| const crypto = await import("node:crypto"); | |
| hashed = await crypto.subtle.digest("SHA-256", inputBuffer); | |
| } else { | |
| hashed = await crypto.subtle.digest("SHA-256", inputBuffer); | |
| } | |
| return Array.from(new Uint8Array(hashed)) | |
| .map((b) => b.toString(16).padStart(2, "0")) | |
| .join(""); | |
| } | |
| /** | |
| * This function behavior change according to the value of the VCR_MODE environment variable: | |
| * - record: requests will be made to the external API and responses will be saved in files | |
| * - playback: answers will be read from the filesystem, if they don't have been recorded before then an error will be thrown | |
| * - cache: same as playback but if the response is not found in the filesystem then it will be recorded | |
| */ | |
| async function vcr( | |
| originalFetch: typeof global.fetch, | |
| input: RequestInfo | URL, | |
| init: RequestInit = {} | |
| ): Promise<Response> { | |
| let url: string; | |
| if (typeof input === "string") { | |
| url = input; | |
| } else if (input instanceof URL) { | |
| url = input.href; | |
| } else { | |
| url = input.url; | |
| } | |
| const hash = await hashRequest(url, init); | |
| const { default: tapes } = await import(TAPES_FILE); | |
| if (VCR_MODE === MODE.PLAYBACK && !url.startsWith(HF_HUB_URL)) { | |
| if (!tapes[hash]) { | |
| throw new Error(`Tape not found: ${hash} (${url})`); | |
| } | |
| const response = tapeToResponse(tapes[hash]); | |
| return response; | |
| } | |
| if (VCR_MODE === "cache" && tapes[hash]) { | |
| const response = tapeToResponse(tapes[hash]); | |
| return response; | |
| } | |
| const response = await originalFetch(input, init); | |
| if (url.startsWith(HF_HUB_URL)) { | |
| return response; | |
| } | |
| if (VCR_MODE === MODE.RECORD || VCR_MODE === MODE.CACHE) { | |
| const isText = | |
| response.headers.get("Content-Type")?.includes("json") || response.headers.get("Content-Type")?.includes("text"); | |
| const isJson = response.headers.get("Content-Type")?.includes("json"); | |
| const arrayBuffer = await response.arrayBuffer(); | |
| let body = ""; | |
| if (isText || isJson) { | |
| body = new TextDecoder().decode(arrayBuffer); | |
| if (isJson) { | |
| // check for base64 strings and truncate them | |
| body = JSON.stringify( | |
| JSON.parse(body, (key: unknown, value: unknown): unknown => { | |
| if ( | |
| typeof value === "string" && | |
| value.length > 1_000 && | |
| // base64 heuristic | |
| value.length % 4 === 0 && | |
| value.match(/^[a-zA-Z0-9+/]+={0,2}$/) | |
| ) { | |
| return value.slice(0, 1_000); | |
| } else { | |
| return value; | |
| } | |
| }) | |
| ); | |
| } | |
| } else { | |
| // // Alternative to also save binary data: | |
| // arrayBuffer.byteLength > 30_000 | |
| // ? "" | |
| // : isText | |
| // ? new TextDecoder().decode(arrayBuffer) | |
| // : BASE64_PREFIX + base64FromBytes(new Uint8Array(arrayBuffer)), | |
| body = ""; | |
| } | |
| const tape: Tape = { | |
| url, | |
| init: { | |
| headers: init.headers && omit(init.headers as Record<string, string>, "Authorization"), | |
| method: init.method, | |
| body: typeof init.body === "string" && init.body.length < 1_000 ? init.body : undefined, | |
| }, | |
| response: { | |
| body, | |
| status: response.status, | |
| statusText: response.statusText, | |
| headers: Object.fromEntries( | |
| // Remove varying headers as much as possible | |
| [...response.headers.entries()].filter( | |
| ([key]) => key !== "date" && key !== "content-length" && !key.startsWith("x-") && key !== "via" | |
| ) | |
| ), | |
| }, | |
| }; | |
| tapes[hash] = tape; | |
| const { writeFileSync } = await import("node:fs"); | |
| writeFileSync(`./test/${TAPES_FILE}`, JSON.stringify(tapes, null, 2)); | |
| // Return a new response with an unconsummed body | |
| return tapeToResponse(tape); | |
| } | |
| return response; | |
| } | |