diff --git a/src/pyodide/BUILD.bazel b/src/pyodide/BUILD.bazel index 7f1d43216ec..097cdf7bf26 100644 --- a/src/pyodide/BUILD.bazel +++ b/src/pyodide/BUILD.bazel @@ -82,7 +82,7 @@ copy_file( # TODO: all of these should be fixed by linking our own Pyodide or by upstreaming. PRELUDE = """ -import { newWasmModule, monotonicDateNow, wasmInstantiate } from "pyodide-internal:builtin_wrappers"; +import { newWasmModule, monotonicDateNow, wasmInstantiate, getRandomValues } from "pyodide-internal:builtin_wrappers"; // Pyodide uses `new URL(some_url, location)` to resolve the path in `loadPackage`. Setting // `location = undefined` makes this throw an error if some_url is not an absolute url. Which is what @@ -124,11 +124,7 @@ REPLACEMENTS = [ ], [ "crypto.getRandomValues(", - "Module.getRandomValues(Module, ", - ], - [ - "(shouldRunNow)", - "(Module.noInitialRun)", + "getRandomValues(Module, ", ], ] diff --git a/src/pyodide/internal/builtin_wrappers.ts b/src/pyodide/internal/builtin_wrappers.ts index 0a8ff07204f..9f45ff5f0d8 100644 --- a/src/pyodide/internal/builtin_wrappers.ts +++ b/src/pyodide/internal/builtin_wrappers.ts @@ -1,4 +1,5 @@ import { default as UnsafeEval } from 'internal:unsafe-eval'; +export { getRandomValues } from 'pyodide-internal:topLevelEntropy/lib'; let lastTime: number; let lastDelta = 0; diff --git a/src/pyodide/internal/emscriptenSetup.ts b/src/pyodide/internal/emscriptenSetup.ts deleted file mode 100644 index e9557838831..00000000000 --- a/src/pyodide/internal/emscriptenSetup.ts +++ /dev/null @@ -1,203 +0,0 @@ -// import { enterJaegerSpan } from "pyodide-internal:jaeger"; -import { reportError } from 'pyodide-internal:util'; -/** - * This file is a simplified version of the Pyodide loader: - * https://github.com/pyodide/pyodide/blob/main/src/js/pyodide.ts - * - * In particular, it drops the package lock, which disables - * `pyodide.loadPackage`. In trade we add memory snapshots here. - */ - -/** - * _createPyodideModule and pyodideWasmModule together are produced by the - * Emscripten linker - */ -import { _createPyodideModule } from 'pyodide-internal:generated/pyodide.asm'; -import pyodideWasmModule from 'pyodide-internal:generated/pyodide.asm.wasm'; - -/** - * The Python and Pyodide stdlib zipped together. The zip format is convenient - * because Python has a "ziploader" that allows one to import directly from a - * zip file. - * - * The ziploader solves bootstrapping problems around unpacking: Python comes - * with a bunch of C libs to unpack various archive formats, but they need stuff - * in this zip file to initialize their runtime state. - */ -import stdlib from 'pyodide-internal:generated/python_stdlib.zip'; - -/** - * A hook that the Emscripten runtime calls to perform the WebAssembly - * instantiation action. Once instantiated, this callback function should call - * ``successCallback()`` with the generated WebAssembly Instance object. - * - * @param wasmImports a JS object which contains all the function imports that - * need to be passed to the WebAssembly Module when instantiating - * @param successCallback A callback to indicate that instantiation was - * successful, - * @returns The return value of this function should contain the ``exports`` object of - * the instantiated WebAssembly Module, or an empty dictionary object ``{}`` if - * the instantiation is performed asynchronously, or ``false`` if instantiation - * synchronously failed. There is no way to indicate asynchronous failure. - */ -function instantiateWasm( - wasmImports: WebAssembly.Imports, - successCallback: (inst: WebAssembly.Instance, mod: WebAssembly.Module) => void -): WebAssembly.Exports { - (async function () { - // Instantiate pyodideWasmModule with wasmImports - const instance = await WebAssembly.instantiate( - pyodideWasmModule, - wasmImports - ); - successCallback(instance, pyodideWasmModule); - })(); - - return {}; -} - -/** - * This is passed as a preRun hook in EmscriptenSettings, run just before - * main(). It ensures that the file system includes the stuff that main() needs, - * most importantly the Python standard library. - * - * Put the Python + Pyodide standard libraries into a zip file in the - * appropriate location /lib/python311.zip . Python will import stuff directly - * from this zip file using ZipImporter. - * - * ZipImporter is quite useful here -- the Python runtime knows how to unpack a - * bunch of different archive formats but it is not possible to use these until - * the runtime state is initialized. So ZipImporter breaks this bootstrapping - * knot for us. - * - * We also make an empty home directory and an empty global site-packages - * directory `/lib/pythonv.vv/site-packages`. - * - * This is a simplified version of the `prepareFileSystem` function here: - * https://github.com/pyodide/pyodide/blob/main/src/js/module.ts - */ -function prepareFileSystem(Module: Module): void { - try { - const pymajor = Module._py_version_major(); - const pyminor = Module._py_version_minor(); - Module.FS.mkdirTree(`/lib/python${pymajor}.${pyminor}/site-packages`); - Module.FS.writeFile( - `/lib/python${pymajor}${pyminor}.zip`, - new Uint8Array(stdlib), - { canOwn: true } - ); - Module.FS.mkdirTree(Module.API.config.env.HOME); - } catch (e) { - reportError(e); - } -} - -/** - * A preRun hook. Make sure environment variables are visible at runtime. - */ -function setEnv(Module: Module): void { - Object.assign(Module.ENV, Module.API.config.env); -} - -/** - * The Emscripten settings object - * - * This isn't public API of Pyodide so it's a bit fiddly. - */ -function getEmscriptenSettings( - lockfile?: PackageLock, - indexURL?: string -): EmscriptenSettings { - const config = { - // jsglobals is used for the js module. - jsglobals: globalThis, - // environment variables go here - env: { - HOME: '/session', - // We don't have access to entropy at startup so we cannot support hash - // randomization. Setting `PYTHONHASHSEED` disables it. See further - // discussion in topLevelEntropy/entropy_patches.py - PYTHONHASHSEED: '111', - }, - // This is the index that we use as the base URL to fetch the wheels. - indexURL, - }; - // loadPackage initializes its state using lockFilePromise. - const lockFilePromise = lockfile ? Promise.resolve(lockfile) : undefined; - const API = { config, lockFilePromise }; - let resolveReadyPromise: (mod: Module) => void; - const readyPromise: Promise = new Promise( - (res) => (resolveReadyPromise = res) - ); - function waitForDynlibs(Module: Module): void { - Module.addRunDependency('dynlibs'); - resolveReadyPromise(Module); - } - - // Emscripten settings to control runtime instantiation. - return { - // preRun hook to set up the file system before running main - // The preRun hook gets run independently of noInitialRun, which is - // important because the file system lives outside of linear memory. - preRun: [prepareFileSystem, setEnv, waitForDynlibs], - instantiateWasm, - reportUndefinedSymbolsNoOp() {}, - API, // Pyodide requires we pass this in. - readyPromise, - }; -} - -/** - * Simple wrapper around _createPyodideModule that applies some monkey patches - * to force the environment to be detected the way we want. - * - * In the long run we should fix this in `pyodide.asm.js` instead. - * - * Returns the instantiated emscriptenModule object. - */ -async function instantiateEmscriptenModule( - emscriptenSettings: EmscriptenSettings -): Promise { - try { - // Force Emscripten to feature detect the way we want - // They used to have an `environment` setting that did this but it has been - // removed =( - // If/when we link our own Pyodide we can remove this. - // @ts-ignore - globalThis.window = {}; // makes ENVIRONMENT_IS_WEB = true - // @ts-ignore - globalThis.importScripts = 1; // makes ENVIRONMENT_IS_WORKER = false - const p = _createPyodideModule(emscriptenSettings); - // @ts-ignore - delete globalThis.window; - // @ts-ignore - delete globalThis.importScripts; - const emscriptenModule = await p; - return emscriptenModule; - } catch (e) { - console.warn('Error in instantiateEmscriptenModule'); - reportError(e); - } -} - -let emscriptenPromise: Promise; -export async function setupEmscriptenModule( - lockfile?: PackageLock, - indexURL?: string -) { - console.log('setupEmscripten Module'); - if (emscriptenPromise) { - return await emscriptenPromise; - } - const emscriptenSettings = getEmscriptenSettings(lockfile, indexURL); - // enterJaegerSpan("instantiate_emscripten", () => - instantiateEmscriptenModule(emscriptenSettings); - // ); - emscriptenPromise = emscriptenSettings.readyPromise; - console.log('Set emscriptenPromise'); - const res = await emscriptenPromise; - console.log('awaited EmscriptenPromise'); - return res; -} - -export function f() {} diff --git a/src/pyodide/internal/python.ts b/src/pyodide/internal/python.ts index f1595abd30a..a46f04c7062 100644 --- a/src/pyodide/internal/python.ts +++ b/src/pyodide/internal/python.ts @@ -1,17 +1,19 @@ Error.stackTraceLimit = Infinity; import { enterJaegerSpan } from 'pyodide-internal:jaeger'; import { + TRANSITIVE_REQUIREMENTS, SITE_PACKAGES, adjustSysPath, mountSitePackages, mountWorkerFiles, } from 'pyodide-internal:setupPackages'; +import { reportError } from 'pyodide-internal:util'; import { SHOULD_RESTORE_SNAPSHOT, finishSnapshotSetup, + getSnapshotSettings, maybeSetupSnapshotUpload, restoreSnapshot, - preloadDynamicLibs, } from 'pyodide-internal:snapshot'; import { entropyMountFiles, @@ -202,10 +204,6 @@ async function prepareWasmLinearMemory(Module: Module): Promise { // Note: if we are restoring from a snapshot, runtime is not initialized yet. mountSitePackages(Module, SITE_PACKAGES.rootInfo); entropyMountFiles(Module); - Module.noInitialRun = !SHOULD_RESTORE_SNAPSHOT; - preloadDynamicLibs(Module); - Module.getRandomValues = getRandomValues; - Module.removeRunDependency('dynlibs'); if (SHOULD_RESTORE_SNAPSHOT) { restoreSnapshot(Module); } @@ -224,8 +222,10 @@ export async function loadPyodide( lockfile: PackageLock, indexURL: string ): Promise { - const Module = await setupEmscriptenModule(lockfile, indexURL); - // Finish setting up Pyodide's ffi so we can use the nice Python interface + const emscriptenSettings = getEmscriptenSettings(lockfile, indexURL); + const Module = await enterJaegerSpan('instantiate_emscripten', () => + instantiateEmscriptenModule(emscriptenSettings) + ); await enterJaegerSpan('prepare_wasm_linear_memory', () => prepareWasmLinearMemory(Module) ); diff --git a/src/pyodide/internal/snapshot.ts b/src/pyodide/internal/snapshot.ts index 91c81fd5e19..b52739b5972 100644 --- a/src/pyodide/internal/snapshot.ts +++ b/src/pyodide/internal/snapshot.ts @@ -161,6 +161,17 @@ export function preloadDynamicLibs(Module: Module): void { } } +export function getSnapshotSettings() { + return { + preRun: [preloadDynamicLibs], + // if SNAPSHOT_SIZE is defined, start with the linear memory big enough to + // fit the snapshot. If it's not defined, this falls back to the default. + INITIAL_MEMORY: SNAPSHOT_SIZE, + // skip running main() if we have a snapshot + noInitialRun: SHOULD_RESTORE_SNAPSHOT, + }; +} + type DylinkInfo = { [name: string]: { handles: string[] }; } & { @@ -405,17 +416,12 @@ function decodeSnapshot(): void { } export function restoreSnapshot(Module: Module): void { - if (!READ_MEMORY || !SNAPSHOT_SIZE) { + if (!READ_MEMORY) { throw Error('READ_MEMORY not defined when restoring snapshot'); } - Module.growMemory(SNAPSHOT_SIZE); READ_MEMORY(Module); } -export function hasSnapshot() { - return !!READ_MEMORY; -} - let TEST_SNAPSHOT: Uint8Array | undefined = undefined; (function () { try { diff --git a/src/pyodide/types/Module.d.ts b/src/pyodide/types/Module.d.ts index 11ef8e57358..6257162d26f 100644 --- a/src/pyodide/types/Module.d.ts +++ b/src/pyodide/types/Module.d.ts @@ -39,16 +39,4 @@ interface Module { opt: object, path: string ) => WebAssembly.Exports; - growMemory(newSize: number): void; - resolveGlobalSymbol(symName: string): { sym?: (...args: number[]) => number }; - getExecutableName(): string; - HEAPU32: Uint32Array; - stringToUTF8OnStack(str: string): number; - stackAlloc(size: number): number; - exitJS(ret: number, implicit: boolean): never; - handleException(e: any): number; - addRunDependency(x: string): void; - removeRunDependency(x: string): void; - noInitialRun: boolean; - getRandomValues(Module: Module, x: Uint8Array): Uint8Array | undefined; } diff --git a/src/pyodide/types/emscripten.d.ts b/src/pyodide/types/emscripten.d.ts index 74870a58dc4..46d465901d5 100644 --- a/src/pyodide/types/emscripten.d.ts +++ b/src/pyodide/types/emscripten.d.ts @@ -8,8 +8,8 @@ interface EmscriptenSettings { ) => void ) => WebAssembly.Exports; reportUndefinedSymbolsNoOp: () => void; + noInitialRun: boolean; API: { config: API['config']; }; - readyPromise: Promise; } diff --git a/src/workerd/jsg/setup.h b/src/workerd/jsg/setup.h index aaad19b897e..12fda5a5c94 100644 --- a/src/workerd/jsg/setup.h +++ b/src/workerd/jsg/setup.h @@ -17,6 +17,7 @@ #include #include + namespace workerd::jsg { // Construct a default V8 platform, with the given background thread pool size.