Skip to content

Commit

Permalink
Merge pull request #1543 from cloudflare/dominik/embed-packages-python
Browse files Browse the repository at this point in the history
Implements embedding of Python packages in workerd.
  • Loading branch information
dom96 authored Jan 18, 2024
2 parents 03a3396 + cea33f5 commit 6331b8f
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 13 deletions.
8 changes: 8 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,14 @@ http_archive(
build_file = "//:build/BUILD.pyodide",
)

http_archive(
name = "pyodide_packages",
sha256 = "f8fca2c4ecc09a57c86a2ed8d217e955d7599a83dad73fb989c86206706fdff6",
type = "zip",
urls = ["https://github.com/dom96/pyodide_packages/releases/download/v0.1/pyodide_packages_unzipped_0.1.tar.zip"],
build_file = "//:build/BUILD.pyodide_packages",
)

# ========================================================================================
# Dawn
#
Expand Down
3 changes: 3 additions & 0 deletions build/BUILD.pyodide_packages
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
exports_files(
["pyodide_packages_unzipped_0.1.tar"],
)
9 changes: 9 additions & 0 deletions src/pyodide/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
load("@bazel_skylib//rules:copy_file.bzl", "copy_file")
load("@bazel_skylib//rules:copy_directory.bzl", "copy_directory")
load("@bazel_skylib//rules:expand_template.bzl", "expand_template")
load("@workerd//:build/wd_js_bundle.bzl", "wd_js_bundle")

Expand All @@ -14,6 +15,12 @@ copy_file(
out = "generated/python_stdlib.zip",
)

copy_file(
name = "pyodide_packages_file.zip@rule",
src = "@pyodide_packages//:pyodide_packages_unzipped_0.1.tar",
out = "generated/pyodide_packages_unzipped_0.1.tar",
)

PRELUDE = """
import { newWasmModule, monotonicDateNow, wasmInstantiate } from "pyodide-internal:builtin_wrappers";
Expand All @@ -39,6 +46,7 @@ wd_js_bundle(
name = "pyodide",
builtin_modules = [
"python.js",
"generated/pyodide_packages_unzipped_0.1.tar"
] + glob(["internal/patches/*.py"]),
import_name = "pyodide",
internal_data_modules = ["generated/python_stdlib.zip"],
Expand All @@ -53,5 +61,6 @@ wd_js_bundle(
"pyodide.asm.js@rule",
"pyodide.asm.wasm@rule",
"python_stdlib.zip@rule",
"pyodide_packages_file.zip@rule"
],
)
81 changes: 68 additions & 13 deletions src/pyodide/internal/pyodide-bootstrap.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { loadPyodide } from "pyodide:python";
import { getMetadata } from "pyodide:current-bundle";
import { lockFile } from "pyodide:package-lock.json";
import { getPatches } from "pyodide:patches";
import embeddedPackages from "pyodide:embedded_packages";

function initializePackageIndex(pyodide, lockfile) {
if (!lockfile.packages) {
Expand Down Expand Up @@ -47,6 +48,44 @@ function initializePackageIndex(pyodide, lockfile) {
};
}

// These packages are currently embedded inside workerd and so don't need to
// be separately installed.
const EMBEDDED_PYTHON_PACKAGES = [
"tqdm",
"openai",
"numpy",
"SQLAlchemy",
"typing_extensions",
"PyYAML",
"aiohttp",
"aiosignal",
"frozenlist",
"async_timeout",
"attrs",
"six",
"charset_normalizer",
"multidict",
"yarl",
"idna",
"pydantic",
"certifi",
"langchain",
"anyio",
"tenacity",
"langsmith",
"dataclasses_json",
"jsonpatch",
"requests",
"sniffio",
"marshmallow",
"urllib3",
"typing_inspect",
"jsonpointer",
"mypy_extensions",
"micropip",
"packaging"
];

export default {
async fetch(request, env) {
// The metadata is a JSON-serialised WorkerBundle (defined in pipeline.capnp).
Expand All @@ -57,6 +96,7 @@ export default {

// Loop through globals that define Python modules in the metadata passed to our Worker. For
// each one, save it in Pyodide's file system.
let hasRequirements = false;
const pythonRequirements = [];
const micropipRequirements = [];
for (const { name, value } of metadata.globals) {
Expand All @@ -67,33 +107,48 @@ export default {
}

if (value.pythonRequirement !== undefined) {
switch (name) {
case "langchain":
micropipRequirements.push("langchain<=0.0.339");
break;
case "openai":
micropipRequirements.push("openai<=0.28.1");
break;
default:
pythonRequirements.push(name);
hasRequirements = true;
if (!EMBEDDED_PYTHON_PACKAGES.includes(name)) {
pythonRequirements.push(name);
}
}
}

if (micropipRequirements.length > 0) {
await pyodide.loadPackage("micropip");
await pyodide.loadPackage("ssl");
if (hasRequirements) {
const name = "pyodide_packages_unzipped_0.1.tar";
const path = `/lib/python3.11/site-packages/${name}`;
pyodide.FS.writeFile(path, new Uint8Array(embeddedPackages), {
encoding: 'binary',
});

pyodide.runPython(`
import tarfile
import os
tar_file_path = "${path}"
containing_dir = os.path.dirname(tar_file_path)
with tarfile.open(tar_file_path, 'r') as tar:
tar.extractall(containing_dir)
`)

const micropip = pyodide.pyimport("micropip");
await micropip.install(micropipRequirements);
if (micropipRequirements.length > 0) {
// Micropip and ssl packages are contained in the tarball which is extracted above. This means
// we should be able to load micropip directly now.
await micropip.install(micropipRequirements);
}

// Apply patches that enable some packages to work.
const patches = getPatches();
// TODO(EW-8055): Why does micropip.list not work?
if (JSON.parse(micropip.freeze())["packages"]["aiohttp"] !== undefined) {
pyodide.runPython(patches["aiohttp_fetch_patch.py"]);
}
}


await pyodide.loadPackage(pythonRequirements);

const result = await pyodide.pyimport(metadata.mainModule).fetch(request);

return result;
Expand Down
11 changes: 11 additions & 0 deletions src/workerd/api/pyodide.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,15 @@ kj::StringPtr getPyodidePatch(kj::StringPtr name) {
return _::lookupModule(kj::str("pyodide:internal/patches/", name));
}

capnp::Data::Reader getPyodideEmbeddedPackages() {
// TODO(later): strip the version from this.
auto moduleName = "pyodide:generated/pyodide_packages_unzipped_0.1.tar";
for (auto m : PYODIDE_BUNDLE->getModules()) {
if (m.getName() == moduleName) {
return m.getSrc();
}
}
KJ_UNREACHABLE;
}

} // namespace workerd

0 comments on commit 6331b8f

Please sign in to comment.