diff --git a/src/workerd/api/pyodide/pyodide-test.c++ b/src/workerd/api/pyodide/pyodide-test.c++ index 49d069622a1..820b2515b69 100644 --- a/src/workerd/api/pyodide/pyodide-test.c++ +++ b/src/workerd/api/pyodide/pyodide-test.c++ @@ -115,5 +115,50 @@ KJ_TEST("supports backslash") { KJ_REQUIRE(result[5] == "c"); } +KJ_TEST("multiline-strings ignored") { + auto files = kj::heapArrayBuilder(4); + files.add(kj::str("FOO=\"\"\"\nimport x\nfrom y import z\n\"\"\"")); + files.add(kj::str("FOO=\'\'\'\nimport f\nfrom g import z\n\'\'\'")); + files.add(kj::str("FOO = \"\\\nimport b \\\n\"")); + files.add(kj::str("FOO=\"\"\" \nimport x\nfrom y import z\n\"\"\"")); + auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish()); + KJ_REQUIRE(result.size() == 0); +} + +KJ_TEST("multiline-strings with imports in-between") { + auto files = kj::heapArrayBuilder(1); + files.add(kj::str("FOO=\"\"\"\nimport x\nfrom y import z\n\"\"\"import q\nimport w\nBAR=\"\"\"\nimport e\n\"\"\"\nfrom t import u")); + auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish()); + KJ_REQUIRE(result.size() == 2); + KJ_REQUIRE(result[0] == "w"); + KJ_REQUIRE(result[1] == "t"); +} + +KJ_TEST("import after string literal") { + auto files = kj::heapArrayBuilder(1); + files.add(kj::str("import a\n\"import b")); + auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish()); + KJ_REQUIRE(result.size() == 1); + KJ_REQUIRE(result[0] == "a"); +} + +KJ_TEST("import after `i`") { + auto files = kj::heapArrayBuilder(1); + files.add(kj::str("import a\niimport b")); + auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish()); + KJ_REQUIRE(result.size() == 1); + KJ_REQUIRE(result[0] == "a"); +} + +KJ_TEST("langchain import") { + auto files = kj::heapArrayBuilder(1); + files.add(kj::str("from js import Response, console, URL\nfrom langchain.chat_models import ChatOpenAI\nimport openai")); + auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish()); + KJ_REQUIRE(result.size() == 3); + KJ_REQUIRE(result[0] == "js"); + KJ_REQUIRE(result[1] == "langchain.chat_models"); + KJ_REQUIRE(result[2] == "openai"); +} + } // namespace } // namespace workerd::api diff --git a/src/workerd/api/pyodide/pyodide.c++ b/src/workerd/api/pyodide/pyodide.c++ index ad5db738ec7..0d24b62025c 100644 --- a/src/workerd/api/pyodide/pyodide.c++ +++ b/src/workerd/api/pyodide/pyodide.c++ @@ -207,12 +207,14 @@ kj::Array ArtifactBundler::parsePythonScriptImports(kj::Array ArtifactBundler::parsePythonScriptImports(kj::Array` + // skip until quote, but ignore `\"`. + while (file[i] != quote && file[i-1] != '\\') { + i += skipUntil(file, {quote}, i); + } + i += 1; // skip quote. + } else { + i += 1; // skip quote. + } + + // skip until EOL so that we don't mistakenly parse and capture `"import x`. + i += skipUntil(file, {'\n', '\r', '"', '\''}, i); + break; + } default: - // Skip to the next line. - i += skipUntil(file, {'\n', '\r'}, i); + // Skip to the next line or " or ' + i += skipUntil(file, {'\n', '\r', '"', '\''}, i); + if (file[i] == '"' || file[i] == '\'') { + continue; // Allow the quotes to be handled above. + } if (file[i] != '\0') { i += skipChar(file, {'\n', '\r'}, i); // skip newline. }