Skip to content

Commit

Permalink
Refactor Python startup code to prepare for Python pools
Browse files Browse the repository at this point in the history
For Python pools, we have to run `instantiateEmscriptenModule` before we bind
the feature flags and set up the global scope and module registry. We will need
to bundle all of the relevant code into a single script with no imports. This PR
makes most of the typescript changes that will be needed to enable this.

I split part of `python.ts` into a new file called `emscriptenSetup.ts` which
intended for execution as part of the pool. To do this, `emscriptenSetup.ts`
cannot import anything that uses C++ APIs. After this PR, it nearly satisfies
this constraint except for the use of UnsafeEval by `newWasmModule`. I will fix
that in a followup.

Aside from moving code from `python.ts` to `emscriptenSetup.ts`, the code
changes relate to the fact that `emscriptenSettings` cannot receive information
from snapshot.ts. We have to cause the relevant effects dynamically. Instead of
setting `INITIAL_MEMORY`, we use `Module.growMemory`. We patch Emscripten to
read the value of `Module.noInitialRun` directly before calling `main()`
(instead of loading it early and ignoring subsequent changes). And instead of
calling `preloadDynamicLibs()` from a prerun hook, we use `addRunDependency()`
to stall the initialization of the runtime until we have a chance to load the
dynamic libraries.
  • Loading branch information
hoodmane committed Oct 8, 2024
1 parent 0f366d5 commit 36d38e1
Show file tree
Hide file tree
Showing 6 changed files with 280 additions and 216 deletions.
211 changes: 211 additions & 0 deletions src/pyodide/internal/emscriptenSetup.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
/**
* This file is intended to be executed in the Python pool (once it exists). As such, it cannot
* import anything that transitively uses C++ extension modules. It has to work in a vanilla v8
* isolate. Also, we will have to bundle this file and all of its transitive imports into a single
* js file.
*/

import { reportError } from 'pyodide-internal:util';

/**
* _createPyodideModule and pyodideWasmModule together are produced by the
* Emscripten linker
*/
import { _createPyodideModule } from 'pyodide-internal:generated/pyodide.asm';

/**
* A preRun hook. Make sure environment variables are visible at runtime.
*/
function setEnv(Module: Module): void {
Object.assign(Module.ENV, Module.API.config.env);
}

function getWaitForDynlibs(resolveReadyPromise: PreRunHook): PreRunHook {
return function waitForDynlibs(Module: Module): void {
// Block the instantiation of the runtime until we can preload the dynamic libraries. The
// promise returned by _createPyodideModule won't resolve until we call
// `removeRunDependency('dynlibs')` so we use `emscriptenSettings.readyPromise` to continue
// execution when we've gotten to this point.
Module.addRunDependency('dynlibs');
resolveReadyPromise(Module);
};
}

/**
* This is passed as a preRun hook in EmscriptenSettings, run just before
* main(). It ensures that the file system includes the stuff that main() needs,
* most importantly the Python standard library.
*
* Put the Python + Pyodide standard libraries into a zip file in the
* appropriate location /lib/python311.zip . Python will import stuff directly
* from this zip file using ZipImporter.
*
* ZipImporter is quite useful here -- the Python runtime knows how to unpack a
* bunch of different archive formats but it is not possible to use these until
* the runtime state is initialized. So ZipImporter breaks this bootstrapping
* knot for us.
*
* We also make an empty home directory and an empty global site-packages
* directory `/lib/pythonv.vv/site-packages`.
*
* This is a simplified version of the `prepareFileSystem` function here:
* https://github.com/pyodide/pyodide/blob/main/src/js/module.ts
*/
function getPrepareFileSystem(pythonStdlib: Uint8Array): PreRunHook {
return function prepareFileSystem(Module: Module): void {
try {
const pymajor = Module._py_version_major();
const pyminor = Module._py_version_minor();
Module.FS.mkdirTree(`/lib/python${pymajor}.${pyminor}/site-packages`);
Module.FS.writeFile(
`/lib/python${pymajor}${pyminor}.zip`,
new Uint8Array(pythonStdlib),
{ canOwn: true }
);
Module.FS.mkdirTree(Module.API.config.env.HOME);
} catch (e) {
reportError(e);
}
};
}

/**
* A hook that the Emscripten runtime calls to perform the WebAssembly
* instantiation action. Once instantiated, this callback function should call
* ``successCallback()`` with the generated WebAssembly Instance object.
*
* @param wasmImports a JS object which contains all the function imports that
* need to be passed to the WebAssembly Module when instantiating
* @param successCallback A callback to indicate that instantiation was
* successful,
* @returns The return value of this function should contain the ``exports`` object of
* the instantiated WebAssembly Module, or an empty dictionary object ``{}`` if
* the instantiation is performed asynchronously, or ``false`` if instantiation
* synchronously failed. There is no way to indicate asynchronous failure.
*/
function getInstantiateWasm(
pyodideWasmModule: WebAssembly.Module
): EmscriptenSettings['instantiateWasm'] {
return function instantiateWasm(
wasmImports: WebAssembly.Imports,
successCallback: (
inst: WebAssembly.Instance,
mod: WebAssembly.Module
) => void
): WebAssembly.Exports {
(async function () {
// Instantiate pyodideWasmModule with wasmImports
const instance = await WebAssembly.instantiate(
pyodideWasmModule,
wasmImports
);
successCallback(instance, pyodideWasmModule);
})();

return {};
};
}

/**
* The Emscripten settings object
*
* This isn't public API of Pyodide so it's a bit fiddly.
*/
function getEmscriptenSettings(
lockfile: PackageLock,
indexURL: string,
pythonStdlib: Uint8Array,
pyodideWasmModule: WebAssembly.Module
): EmscriptenSettings {
const config = {
// jsglobals is used for the js module.
jsglobals: globalThis,
// environment variables go here
env: {
HOME: '/session',
// We don't have access to entropy at startup so we cannot support hash
// randomization. Setting `PYTHONHASHSEED` disables it. See further
// discussion in topLevelEntropy/entropy_patches.py
PYTHONHASHSEED: '111',
},
// This is the index that we use as the base URL to fetch the wheels.
indexURL,
};
// loadPackage initializes its state using lockFilePromise.
const lockFilePromise = lockfile ? Promise.resolve(lockfile) : undefined;
const API = { config, lockFilePromise };
let resolveReadyPromise: (mod: Module) => void;
const readyPromise: Promise<Module> = new Promise(
(res) => (resolveReadyPromise = res)
);
const waitForDynlibs = getWaitForDynlibs(resolveReadyPromise!);
const prepareFileSystem = getPrepareFileSystem(pythonStdlib);
const instantiateWasm = getInstantiateWasm(pyodideWasmModule);

// Emscripten settings to control runtime instantiation.
return {
// preRun hook to set up the file system before running main
// The preRun hook gets run independently of noInitialRun, which is
// important because the file system lives outside of linear memory.
preRun: [prepareFileSystem, setEnv, waitForDynlibs],
instantiateWasm,
reportUndefinedSymbolsNoOp() {},
readyPromise,
API, // Pyodide requires we pass this in.
};
}

/**
* Force Emscripten to feature detect the way we want.
* We want it to think we're the browser main thread.
*/
function* featureDetectionMonkeyPatchesContextManager() {
const global = globalThis as any;
// Make Emscripten think
global.window = {};
global.document = { createElement() {} };
global.sessionStorage = {};
try {
yield;
} finally {
delete global.window;
delete global.document;
delete global.sessionStorage;
}
}

/**
* Simple wrapper around _createPyodideModule that applies some monkey patches
* to force the environment to be detected the way we want.
*
* In the long run we should fix this in `pyodide.asm.js` instead.
*
* Returns the instantiated emscriptenModule object.
*/
export async function instantiateEmscriptenModule(
lockfile: PackageLock,
indexURL: string,
pythonStdlib: Uint8Array,
wasmModule: WebAssembly.Module
): Promise<Module> {
const emscriptenSettings = getEmscriptenSettings(
lockfile,
indexURL,
pythonStdlib,
wasmModule
);
try {
for (const _ of featureDetectionMonkeyPatchesContextManager()) {
// Ignore the returned promise, it won't resolve until we're done preloading dynamic
// libraries.
const _promise = _createPyodideModule(emscriptenSettings);
}

// Wait until we've executed all the preRun hooks before proceeding
const emscriptenModule = await emscriptenSettings.readyPromise;
return emscriptenModule;
} catch (e) {
console.warn('Error in instantiateEmscriptenModule');
reportError(e);
}
}
Loading

0 comments on commit 36d38e1

Please sign in to comment.