diff --git a/src/pyodide/internal/emscriptenSetup.ts b/src/pyodide/internal/emscriptenSetup.ts new file mode 100644 index 00000000000..6044f582791 --- /dev/null +++ b/src/pyodide/internal/emscriptenSetup.ts @@ -0,0 +1,211 @@ +/** + * This file is intended to be executed in the Python pool (once it exists). As such, it cannot + * import anything that transitively uses C++ extension modules. It has to work in a vanilla v8 + * isolate. Also, we will have to bundle this file and all of its transitive imports into a single + * js file. + */ + +import { reportError } from 'pyodide-internal:util'; + +/** + * _createPyodideModule and pyodideWasmModule together are produced by the + * Emscripten linker + */ +import { _createPyodideModule } from 'pyodide-internal:generated/pyodide.asm'; + +/** + * A preRun hook. Make sure environment variables are visible at runtime. + */ +function setEnv(Module: Module): void { + Object.assign(Module.ENV, Module.API.config.env); +} + +function getWaitForDynlibs(resolveReadyPromise: PreRunHook): PreRunHook { + return function waitForDynlibs(Module: Module): void { + // Block the instantiation of the runtime until we can preload the dynamic libraries. The + // promise returned by _createPyodideModule won't resolve until we call + // `removeRunDependency('dynlibs')` so we use `emscriptenSettings.readyPromise` to continue + // execution when we've gotten to this point. + Module.addRunDependency('dynlibs'); + resolveReadyPromise(Module); + }; +} + +/** + * This is passed as a preRun hook in EmscriptenSettings, run just before + * main(). It ensures that the file system includes the stuff that main() needs, + * most importantly the Python standard library. + * + * Put the Python + Pyodide standard libraries into a zip file in the + * appropriate location /lib/python311.zip . Python will import stuff directly + * from this zip file using ZipImporter. + * + * ZipImporter is quite useful here -- the Python runtime knows how to unpack a + * bunch of different archive formats but it is not possible to use these until + * the runtime state is initialized. So ZipImporter breaks this bootstrapping + * knot for us. + * + * We also make an empty home directory and an empty global site-packages + * directory `/lib/pythonv.vv/site-packages`. + * + * This is a simplified version of the `prepareFileSystem` function here: + * https://github.com/pyodide/pyodide/blob/main/src/js/module.ts + */ +function getPrepareFileSystem(pythonStdlib: Uint8Array): PreRunHook { + return function prepareFileSystem(Module: Module): void { + try { + const pymajor = Module._py_version_major(); + const pyminor = Module._py_version_minor(); + Module.FS.mkdirTree(`/lib/python${pymajor}.${pyminor}/site-packages`); + Module.FS.writeFile( + `/lib/python${pymajor}${pyminor}.zip`, + new Uint8Array(pythonStdlib), + { canOwn: true } + ); + Module.FS.mkdirTree(Module.API.config.env.HOME); + } catch (e) { + reportError(e); + } + }; +} + +/** + * A hook that the Emscripten runtime calls to perform the WebAssembly + * instantiation action. Once instantiated, this callback function should call + * ``successCallback()`` with the generated WebAssembly Instance object. + * + * @param wasmImports a JS object which contains all the function imports that + * need to be passed to the WebAssembly Module when instantiating + * @param successCallback A callback to indicate that instantiation was + * successful, + * @returns The return value of this function should contain the ``exports`` object of + * the instantiated WebAssembly Module, or an empty dictionary object ``{}`` if + * the instantiation is performed asynchronously, or ``false`` if instantiation + * synchronously failed. There is no way to indicate asynchronous failure. + */ +function getInstantiateWasm( + pyodideWasmModule: WebAssembly.Module +): EmscriptenSettings['instantiateWasm'] { + return function instantiateWasm( + wasmImports: WebAssembly.Imports, + successCallback: ( + inst: WebAssembly.Instance, + mod: WebAssembly.Module + ) => void + ): WebAssembly.Exports { + (async function () { + // Instantiate pyodideWasmModule with wasmImports + const instance = await WebAssembly.instantiate( + pyodideWasmModule, + wasmImports + ); + successCallback(instance, pyodideWasmModule); + })(); + + return {}; + }; +} + +/** + * The Emscripten settings object + * + * This isn't public API of Pyodide so it's a bit fiddly. + */ +function getEmscriptenSettings( + lockfile: PackageLock, + indexURL: string, + pythonStdlib: Uint8Array, + pyodideWasmModule: WebAssembly.Module +): EmscriptenSettings { + const config = { + // jsglobals is used for the js module. + jsglobals: globalThis, + // environment variables go here + env: { + HOME: '/session', + // We don't have access to entropy at startup so we cannot support hash + // randomization. Setting `PYTHONHASHSEED` disables it. See further + // discussion in topLevelEntropy/entropy_patches.py + PYTHONHASHSEED: '111', + }, + // This is the index that we use as the base URL to fetch the wheels. + indexURL, + }; + // loadPackage initializes its state using lockFilePromise. + const lockFilePromise = lockfile ? Promise.resolve(lockfile) : undefined; + const API = { config, lockFilePromise }; + let resolveReadyPromise: (mod: Module) => void; + const readyPromise: Promise = new Promise( + (res) => (resolveReadyPromise = res) + ); + const waitForDynlibs = getWaitForDynlibs(resolveReadyPromise!); + const prepareFileSystem = getPrepareFileSystem(pythonStdlib); + const instantiateWasm = getInstantiateWasm(pyodideWasmModule); + + // Emscripten settings to control runtime instantiation. + return { + // preRun hook to set up the file system before running main + // The preRun hook gets run independently of noInitialRun, which is + // important because the file system lives outside of linear memory. + preRun: [prepareFileSystem, setEnv, waitForDynlibs], + instantiateWasm, + reportUndefinedSymbolsNoOp() {}, + readyPromise, + API, // Pyodide requires we pass this in. + }; +} + +/** + * Force Emscripten to feature detect the way we want. + * We want it to think we're the browser main thread. + */ +function* featureDetectionMonkeyPatchesContextManager() { + const global = globalThis as any; + // Make Emscripten think + global.window = {}; + global.document = { createElement() {} }; + global.sessionStorage = {}; + try { + yield; + } finally { + delete global.window; + delete global.document; + delete global.sessionStorage; + } +} + +/** + * Simple wrapper around _createPyodideModule that applies some monkey patches + * to force the environment to be detected the way we want. + * + * In the long run we should fix this in `pyodide.asm.js` instead. + * + * Returns the instantiated emscriptenModule object. + */ +export async function instantiateEmscriptenModule( + lockfile: PackageLock, + indexURL: string, + pythonStdlib: Uint8Array, + wasmModule: WebAssembly.Module +): Promise { + const emscriptenSettings = getEmscriptenSettings( + lockfile, + indexURL, + pythonStdlib, + wasmModule + ); + try { + for (const _ of featureDetectionMonkeyPatchesContextManager()) { + // Ignore the returned promise, it won't resolve until we're done preloading dynamic + // libraries. + const _promise = _createPyodideModule(emscriptenSettings); + } + + // Wait until we've executed all the preRun hooks before proceeding + const emscriptenModule = await emscriptenSettings.readyPromise; + return emscriptenModule; + } catch (e) { + console.warn('Error in instantiateEmscriptenModule'); + reportError(e); + } +} diff --git a/src/pyodide/internal/python.ts b/src/pyodide/internal/python.ts index a46f04c7062..7cf7a2111a7 100644 --- a/src/pyodide/internal/python.ts +++ b/src/pyodide/internal/python.ts @@ -1,19 +1,16 @@ -Error.stackTraceLimit = Infinity; import { enterJaegerSpan } from 'pyodide-internal:jaeger'; import { - TRANSITIVE_REQUIREMENTS, SITE_PACKAGES, adjustSysPath, mountSitePackages, mountWorkerFiles, } from 'pyodide-internal:setupPackages'; -import { reportError } from 'pyodide-internal:util'; import { SHOULD_RESTORE_SNAPSHOT, finishSnapshotSetup, - getSnapshotSettings, maybeSetupSnapshotUpload, restoreSnapshot, + preloadDynamicLibs, } from 'pyodide-internal:snapshot'; import { entropyMountFiles, @@ -33,7 +30,6 @@ import { * _createPyodideModule and pyodideWasmModule together are produced by the * Emscripten linker */ -import { _createPyodideModule } from 'pyodide-internal:generated/pyodide.asm'; import pyodideWasmModule from 'pyodide-internal:generated/pyodide.asm.wasm'; /** @@ -45,154 +41,8 @@ import pyodideWasmModule from 'pyodide-internal:generated/pyodide.asm.wasm'; * with a bunch of C libs to unpack various archive formats, but they need stuff * in this zip file to initialize their runtime state. */ -import stdlib from 'pyodide-internal:generated/python_stdlib.zip'; - -/** - * A hook that the Emscripten runtime calls to perform the WebAssembly - * instantiation action. Once instantiated, this callback function should call - * ``successCallback()`` with the generated WebAssembly Instance object. - * - * @param wasmImports a JS object which contains all the function imports that - * need to be passed to the WebAssembly Module when instantiating - * @param successCallback A callback to indicate that instantiation was - * successful, - * @returns The return value of this function should contain the ``exports`` object of - * the instantiated WebAssembly Module, or an empty dictionary object ``{}`` if - * the instantiation is performed asynchronously, or ``false`` if instantiation - * synchronously failed. There is no way to indicate asynchronous failure. - */ -function instantiateWasm( - wasmImports: WebAssembly.Imports, - successCallback: (inst: WebAssembly.Instance, mod: WebAssembly.Module) => void -): WebAssembly.Exports { - (async function () { - // Instantiate pyodideWasmModule with wasmImports - const instance = await WebAssembly.instantiate( - pyodideWasmModule, - wasmImports - ); - successCallback(instance, pyodideWasmModule); - })(); - - return {}; -} - -/** - * This is passed as a preRun hook in EmscriptenSettings, run just before - * main(). It ensures that the file system includes the stuff that main() needs, - * most importantly the Python standard library. - * - * Put the Python + Pyodide standard libraries into a zip file in the - * appropriate location /lib/python311.zip . Python will import stuff directly - * from this zip file using ZipImporter. - * - * ZipImporter is quite useful here -- the Python runtime knows how to unpack a - * bunch of different archive formats but it is not possible to use these until - * the runtime state is initialized. So ZipImporter breaks this bootstrapping - * knot for us. - * - * We also make an empty home directory and an empty global site-packages - * directory `/lib/pythonv.vv/site-packages`. - * - * This is a simplified version of the `prepareFileSystem` function here: - * https://github.com/pyodide/pyodide/blob/main/src/js/module.ts - */ -function prepareFileSystem(Module: Module): void { - try { - const pymajor = Module._py_version_major(); - const pyminor = Module._py_version_minor(); - Module.FS.mkdirTree(`/lib/python${pymajor}.${pyminor}/site-packages`); - Module.FS.writeFile( - `/lib/python${pymajor}${pyminor}.zip`, - new Uint8Array(stdlib), - { canOwn: true } - ); - Module.FS.mkdirTree(Module.API.config.env.HOME); - } catch (e) { - reportError(e); - } -} - -/** - * A preRun hook. Make sure environment variables are visible at runtime. - */ -function setEnv(Module: Module): void { - Object.assign(Module.ENV, Module.API.config.env); -} - -/** - * The Emscripten settings object - * - * This isn't public API of Pyodide so it's a bit fiddly. - */ -function getEmscriptenSettings( - lockfile: PackageLock, - indexURL: string -): EmscriptenSettings { - const config = { - // jsglobals is used for the js module. - jsglobals: globalThis, - // environment variables go here - env: { - HOME: '/session', - // We don't have access to entropy at startup so we cannot support hash - // randomization. Setting `PYTHONHASHSEED` disables it. See further - // discussion in topLevelEntropy/entropy_patches.py - PYTHONHASHSEED: '111', - }, - // This is the index that we use as the base URL to fetch the wheels. - indexURL, - }; - // loadPackage initializes its state using lockFilePromise. - const lockFilePromise = lockfile ? Promise.resolve(lockfile) : undefined; - const API = { config, lockFilePromise }; - const { preRun: snapshotPreRun, ...snapshotSettings } = getSnapshotSettings(); - // Emscripten settings to control runtime instantiation. - return { - // preRun hook to set up the file system before running main - // The preRun hook gets run independently of noInitialRun, which is - // important because the file system lives outside of linear memory. - preRun: [prepareFileSystem, setEnv, ...snapshotPreRun], - instantiateWasm, - reportUndefinedSymbolsNoOp() {}, - ...snapshotSettings, - API, // Pyodide requires we pass this in. - }; -} - -/** - * Simple wrapper around _createPyodideModule that applies some monkey patches - * to force the environment to be detected the way we want. - * - * In the long run we should fix this in `pyodide.asm.js` instead. - * - * Returns the instantiated emscriptenModule object. - */ -async function instantiateEmscriptenModule( - emscriptenSettings: EmscriptenSettings -): Promise { - try { - // Force Emscripten to feature detect the way we want - // They used to have an `environment` setting that did this but it has been - // removed =( - // If/when we link our own Pyodide we can remove this. - const global = globalThis as any; - global.window = {}; // makes ENVIRONMENT_IS_WEB = true - global.document = { createElement() {} }; - global.sessionStorage = {}; - global.importScripts = 1; // makes ENVIRONMENT_IS_WORKER = false - const p = _createPyodideModule(emscriptenSettings); - delete global.window; - delete global.document; - delete global.sessionStorage; - delete global.importScripts; - const emscriptenModule = await p; - return emscriptenModule; - } catch (e) { - console.warn('Error in instantiateEmscriptenModule'); - reportError(e); - } -} +import pythonStdlib from 'pyodide-internal:generated/python_stdlib.zip'; +import { instantiateEmscriptenModule } from 'pyodide-internal:emscriptenSetup'; /** * After running `instantiateEmscriptenModule` but before calling into any C @@ -204,6 +54,9 @@ async function prepareWasmLinearMemory(Module: Module): Promise { // Note: if we are restoring from a snapshot, runtime is not initialized yet. mountSitePackages(Module, SITE_PACKAGES.rootInfo); entropyMountFiles(Module); + Module.noInitialRun = !SHOULD_RESTORE_SNAPSHOT; + preloadDynamicLibs(Module); + Module.removeRunDependency('dynlibs'); if (SHOULD_RESTORE_SNAPSHOT) { restoreSnapshot(Module); } @@ -222,9 +75,13 @@ export async function loadPyodide( lockfile: PackageLock, indexURL: string ): Promise { - const emscriptenSettings = getEmscriptenSettings(lockfile, indexURL); const Module = await enterJaegerSpan('instantiate_emscripten', () => - instantiateEmscriptenModule(emscriptenSettings) + instantiateEmscriptenModule( + lockfile, + indexURL, + pythonStdlib, + pyodideWasmModule + ) ); await enterJaegerSpan('prepare_wasm_linear_memory', () => prepareWasmLinearMemory(Module) diff --git a/src/pyodide/internal/snapshot.ts b/src/pyodide/internal/snapshot.ts index 5d2021c4965..544884b54f9 100644 --- a/src/pyodide/internal/snapshot.ts +++ b/src/pyodide/internal/snapshot.ts @@ -161,17 +161,6 @@ export function preloadDynamicLibs(Module: Module): void { } } -export function getSnapshotSettings() { - return { - preRun: [preloadDynamicLibs], - // if SNAPSHOT_SIZE is defined, start with the linear memory big enough to - // fit the snapshot. If it's not defined, this falls back to the default. - INITIAL_MEMORY: SNAPSHOT_SIZE, - // skip running main() if we have a snapshot - noInitialRun: SHOULD_RESTORE_SNAPSHOT, - }; -} - type DylinkInfo = { [name: string]: { handles: string[] }; } & { @@ -400,6 +389,7 @@ export function restoreSnapshot(Module: Module): void { if (!READ_MEMORY) { throw Error('READ_MEMORY not defined when restoring snapshot'); } + Module.growMemory(SNAPSHOT_SIZE!); READ_MEMORY(Module); } diff --git a/src/pyodide/internal/topLevelEntropy/lib.ts b/src/pyodide/internal/topLevelEntropy/lib.ts index 8ed1420f91e..3a5880cae81 100644 --- a/src/pyodide/internal/topLevelEntropy/lib.ts +++ b/src/pyodide/internal/topLevelEntropy/lib.ts @@ -19,7 +19,7 @@ let allowed_entropy_calls_addr: number; * We make an array in Python and then get its address in JavaScript so * shouldAllowBadEntropy can check / write back the value */ -function setupShouldAllowBadEntropy(Module: Module) { +function setupShouldAllowBadEntropy(Module: Module): void { // get_bad_entropy_flag prints the address we want into stderr which is returned into res. // We parse this as an integer. const res = simpleRunPython( @@ -31,7 +31,7 @@ function setupShouldAllowBadEntropy(Module: Module) { allowed_entropy_calls_addr = Number(res); } -function shouldAllowBadEntropy(Module: Module) { +function shouldAllowBadEntropy(Module: Module): boolean { const val = Module.HEAP8[allowed_entropy_calls_addr]; if (val) { Module.HEAP8[allowed_entropy_calls_addr]--; @@ -53,7 +53,7 @@ let IN_REQUEST_CONTEXT = false; * See entropy_import_context.py where `allow_bad_entropy_calls` is used to dole * out the bad entropy. */ -export function getRandomValues(Module: Module, arr: Uint8Array) { +export function getRandomValues(Module: Module, arr: Uint8Array): void { if (IN_REQUEST_CONTEXT) { return crypto.getRandomValues(arr); } @@ -72,7 +72,7 @@ export function getRandomValues(Module: Module, arr: Uint8Array) { * after instantiating the Emscripten module but before restoring the snapshot. * Hypothetically, we could skip it for new dedicated snapshots. */ -export function entropyMountFiles(Module: Module) { +export function entropyMountFiles(Module: Module): void { Module.FS.mkdir(`/lib/python3.12/site-packages/_cloudflare`); Module.FS.writeFile( `/lib/python3.12/site-packages/_cloudflare/__init__.py`, @@ -102,7 +102,7 @@ export function entropyMountFiles(Module: Module) { * after the runtime is ready, so after restoring the snapshot in the snapshot * branch and after entropyMountFiles in the no-snapshot branch. */ -export function entropyAfterRuntimeInit(Module: Module) { +export function entropyAfterRuntimeInit(Module: Module): void { setupShouldAllowBadEntropy(Module); } @@ -110,7 +110,7 @@ export function entropyAfterRuntimeInit(Module: Module) { * This prepares us to execute the top level scope. It changes only Python state * so it doesn't need to be called when restoring from snapshot. */ -export function entropyBeforeTopLevel(Module: Module) { +export function entropyBeforeTopLevel(Module: Module): void { simpleRunPython( Module, ` @@ -125,7 +125,7 @@ let isReady = false; /** * Called to reseed rngs and turn off blocks that prevent access to rng APIs. */ -export function entropyBeforeRequest(Module: Module) { +export function entropyBeforeRequest(Module: Module): void { if (isReady) { // I think this is only ever called once, but we guard it just to be sure. return; diff --git a/src/pyodide/types/Module.d.ts b/src/pyodide/types/Module.d.ts deleted file mode 100644 index 6257162d26f..00000000000 --- a/src/pyodide/types/Module.d.ts +++ /dev/null @@ -1,42 +0,0 @@ -interface ENV { - HOME: string; -} - -interface API { - config: { - env: ENV; - }; - finalizeBootstrap: () => void; - public_api: Pyodide; - rawRun: (code: string) => [status: number, err: string]; -} - -interface LDSO { - loadedLibsByHandle: { - [handle: string]: DSO; - }; -} - -interface DSO { - name: string; - refcount: number; - global: boolean; - exports: WebAssembly.Exports; -} - -interface Module { - HEAP8: Uint8Array; - _dump_traceback: () => void; - FS: FS; - API: API; - ENV: ENV; - LDSO: LDSO; - newDSO: (path: string, opt: object | undefined, handle: string) => DSO; - _py_version_major: () => number; - _py_version_minor: () => number; - loadWebAssemblyModule: ( - mod: WebAssembly.Module, - opt: object, - path: string - ) => WebAssembly.Exports; -} diff --git a/src/pyodide/types/emscripten.d.ts b/src/pyodide/types/emscripten.d.ts index 46d465901d5..140ce8e5287 100644 --- a/src/pyodide/types/emscripten.d.ts +++ b/src/pyodide/types/emscripten.d.ts @@ -1,3 +1,29 @@ +interface ENV { + HOME: string; +} + +interface API { + config: { + env: ENV; + }; + finalizeBootstrap: () => void; + public_api: Pyodide; + rawRun: (code: string) => [status: number, err: string]; +} + +interface LDSO { + loadedLibsByHandle: { + [handle: string]: DSO; + }; +} + +interface DSO { + name: string; + refcount: number; + global: boolean; + exports: WebAssembly.Exports; +} + interface EmscriptenSettings { preRun: ((mod: Module) => void)[]; instantiateWasm: ( @@ -8,8 +34,30 @@ interface EmscriptenSettings { ) => void ) => WebAssembly.Exports; reportUndefinedSymbolsNoOp: () => void; - noInitialRun: boolean; + noInitialRun?: boolean; API: { config: API['config']; }; + readyPromise: Promise; +} + +interface Module { + HEAP8: Uint8Array; + _dump_traceback: () => void; + FS: FS; + API: API; + ENV: ENV; + LDSO: LDSO; + newDSO: (path: string, opt: object | undefined, handle: string) => DSO; + _py_version_major: () => number; + _py_version_minor: () => number; + loadWebAssemblyModule: ( + mod: WebAssembly.Module, + opt: object, + path: string + ) => WebAssembly.Exports; + growMemory(newSize: number): void; + addRunDependency(x: string): void; + removeRunDependency(x: string): void; + noInitialRun: boolean; }