From 985782776cd4c70d0de2aae87471d9580cb45fc8 Mon Sep 17 00:00:00 2001 From: Kamil Pyc Date: Mon, 12 Feb 2024 16:02:12 +0100 Subject: [PATCH 1/3] Added support for Azure bot --- action.yml | 9 + dist/37.index.js | 2 +- dist/73.index.js | 129 + dist/index.js | 89834 ++++++++++++++++++++++++++++++---------- dist/licenses.txt | 25 + package-lock.json | 1759 +- package.json | 2 + src/azure-bot.ts | 110 + src/bot-interface.ts | 10 + src/bot.ts | 9 +- src/main.ts | 42 +- src/options.ts | 14 +- src/review-comment.ts | 4 +- src/review.ts | 6 +- tsconfig.json | 1 + 15 files changed, 70778 insertions(+), 21178 deletions(-) create mode 100644 dist/73.index.js create mode 100644 src/azure-bot.ts create mode 100644 src/bot-interface.ts diff --git a/action.yml b/action.yml index 8c96b0ba..ae45a13d 100644 --- a/action.yml +++ b/action.yml @@ -144,6 +144,15 @@ inputs: required: false description: 'Disable release notes' default: 'false' + azure_api_instance_name: + required: false + description: 'For example, if your Azure instance is hosted under https://{INSTANCE_NAME}.openai.azure.com/openai/deployments/{DEPLOYMENT_NAME} use INSTANCE_NAME' + azure_api_deployment_name: + required: false + description: 'For example, if your Azure instance is hosted under https://{INSTANCE_NAME}.openai.azure.com/openai/deployments/{DEPLOYMENT_NAME} use DEPLOYMENT_NAME' + azure_api_version: + required: false + description: 'Api version like "2023-07-01-preview"' openai_base_url: required: false description: 'The url of the openai api interface.' diff --git a/dist/37.index.js b/dist/37.index.js index c349ca0a..9a8bec6e 100644 --- a/dist/37.index.js +++ b/dist/37.index.js @@ -10,7 +10,7 @@ __webpack_require__.r(__webpack_exports__); /* harmony export */ __webpack_require__.d(__webpack_exports__, { /* harmony export */ "toFormData": () => (/* binding */ toFormData) /* harmony export */ }); -/* harmony import */ var fetch_blob_from_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(2777); +/* harmony import */ var fetch_blob_from_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(7972); /* harmony import */ var formdata_polyfill_esm_min_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(8010); diff --git a/dist/73.index.js b/dist/73.index.js new file mode 100644 index 00000000..dd9de4c0 --- /dev/null +++ b/dist/73.index.js @@ -0,0 +1,129 @@ +"use strict"; +exports.id = 73; +exports.ids = [73]; +exports.modules = { + +/***/ 4073: +/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => { + +// ESM COMPAT FLAG +__webpack_require__.r(__webpack_exports__); + +// EXPORTS +__webpack_require__.d(__webpack_exports__, { + "fileFromPath": () => (/* binding */ fileFromPath), + "fileFromPathSync": () => (/* binding */ fileFromPathSync), + "isFile": () => (/* reexport */ isFile/* isFile */.z) +}); + +// EXTERNAL MODULE: external "fs" +var external_fs_ = __webpack_require__(7147); +// EXTERNAL MODULE: external "path" +var external_path_ = __webpack_require__(1017); +// EXTERNAL MODULE: ./node_modules/node-domexception/index.js +var node_domexception = __webpack_require__(7760); +// EXTERNAL MODULE: ./node_modules/formdata-node/lib/esm/File.js +var File = __webpack_require__(2084); +;// CONCATENATED MODULE: ./node_modules/formdata-node/lib/esm/isPlainObject.js +const getType = (value) => (Object.prototype.toString.call(value).slice(8, -1).toLowerCase()); +function isPlainObject(value) { + if (getType(value) !== "object") { + return false; + } + const pp = Object.getPrototypeOf(value); + if (pp === null || pp === undefined) { + return true; + } + const Ctor = pp.constructor && pp.constructor.toString(); + return Ctor === Object.toString(); +} +/* harmony default export */ const esm_isPlainObject = (isPlainObject); + +// EXTERNAL MODULE: ./node_modules/formdata-node/lib/esm/isFile.js +var isFile = __webpack_require__(1574); +;// CONCATENATED MODULE: ./node_modules/formdata-node/lib/esm/fileFromPath.js +var __classPrivateFieldSet = (undefined && undefined.__classPrivateFieldSet) || function (receiver, state, value, kind, f) { + if (kind === "m") throw new TypeError("Private method is not writable"); + if (kind === "a" && !f) throw new TypeError("Private accessor was defined without a setter"); + if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot write private member to an object whose class did not declare it"); + return (kind === "a" ? f.call(receiver, value) : f ? f.value = value : state.set(receiver, value)), value; +}; +var __classPrivateFieldGet = (undefined && undefined.__classPrivateFieldGet) || function (receiver, state, kind, f) { + if (kind === "a" && !f) throw new TypeError("Private accessor was defined without a getter"); + if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it"); + return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver); +}; +var _FileFromPath_path, _FileFromPath_start; + + + + + + +const MESSAGE = "The requested file could not be read, " + + "typically due to permission problems that have occurred after a reference " + + "to a file was acquired."; +class FileFromPath { + constructor(input) { + _FileFromPath_path.set(this, void 0); + _FileFromPath_start.set(this, void 0); + __classPrivateFieldSet(this, _FileFromPath_path, input.path, "f"); + __classPrivateFieldSet(this, _FileFromPath_start, input.start || 0, "f"); + this.name = (0,external_path_.basename)(__classPrivateFieldGet(this, _FileFromPath_path, "f")); + this.size = input.size; + this.lastModified = input.lastModified; + } + slice(start, end) { + return new FileFromPath({ + path: __classPrivateFieldGet(this, _FileFromPath_path, "f"), + lastModified: this.lastModified, + size: end - start, + start + }); + } + async *stream() { + const { mtimeMs } = await external_fs_.promises.stat(__classPrivateFieldGet(this, _FileFromPath_path, "f")); + if (mtimeMs > this.lastModified) { + throw new node_domexception(MESSAGE, "NotReadableError"); + } + if (this.size) { + yield* (0,external_fs_.createReadStream)(__classPrivateFieldGet(this, _FileFromPath_path, "f"), { + start: __classPrivateFieldGet(this, _FileFromPath_start, "f"), + end: __classPrivateFieldGet(this, _FileFromPath_start, "f") + this.size - 1 + }); + } + } + get [(_FileFromPath_path = new WeakMap(), _FileFromPath_start = new WeakMap(), Symbol.toStringTag)]() { + return "File"; + } +} +function createFileFromPath(path, { mtimeMs, size }, filenameOrOptions, options = {}) { + let filename; + if (esm_isPlainObject(filenameOrOptions)) { + [options, filename] = [filenameOrOptions, undefined]; + } + else { + filename = filenameOrOptions; + } + const file = new FileFromPath({ path, size, lastModified: mtimeMs }); + if (!filename) { + filename = file.name; + } + return new File/* File */.$([file], filename, { + ...options, lastModified: file.lastModified + }); +} +function fileFromPathSync(path, filenameOrOptions, options = {}) { + const stats = (0,external_fs_.statSync)(path); + return createFileFromPath(path, stats, filenameOrOptions, options); +} +async function fileFromPath(path, filenameOrOptions, options) { + const stats = await external_fs_.promises.stat(path); + return createFileFromPath(path, stats, filenameOrOptions, options); +} + + +/***/ }) + +}; +; \ No newline at end of file diff --git a/dist/index.js b/dist/index.js index a6f49664..15bc10d0 100644 --- a/dist/index.js +++ b/dist/index.js @@ -1,1774 +1,5909 @@ /******/ (() => { // webpackBootstrap /******/ var __webpack_modules__ = ({ -/***/ 6959: +/***/ 9058: /***/ ((__unused_webpack_module, __webpack_exports__, __nccwpck_require__) => { "use strict"; // EXPORTS __nccwpck_require__.d(__webpack_exports__, { - "r": () => (/* binding */ Bot) + "j": () => (/* binding */ AzureBot) }); -;// CONCATENATED MODULE: external "node:http" -const external_node_http_namespaceObject = require("node:http"); -;// CONCATENATED MODULE: external "node:https" -const external_node_https_namespaceObject = require("node:https"); -;// CONCATENATED MODULE: external "node:zlib" -const external_node_zlib_namespaceObject = require("node:zlib"); -;// CONCATENATED MODULE: external "node:stream" -const external_node_stream_namespaceObject = require("node:stream"); -;// CONCATENATED MODULE: external "node:buffer" -const external_node_buffer_namespaceObject = require("node:buffer"); -;// CONCATENATED MODULE: ./node_modules/data-uri-to-buffer/dist/index.js +// NAMESPACE OBJECT: ./node_modules/openai/error.mjs +var error_namespaceObject = {}; +__nccwpck_require__.r(error_namespaceObject); +__nccwpck_require__.d(error_namespaceObject, { + "APIConnectionError": () => (APIConnectionError), + "APIConnectionTimeoutError": () => (APIConnectionTimeoutError), + "APIError": () => (APIError), + "APIUserAbortError": () => (APIUserAbortError), + "AuthenticationError": () => (AuthenticationError), + "BadRequestError": () => (BadRequestError), + "ConflictError": () => (ConflictError), + "InternalServerError": () => (InternalServerError), + "NotFoundError": () => (NotFoundError), + "OpenAIError": () => (error_OpenAIError), + "PermissionDeniedError": () => (PermissionDeniedError), + "RateLimitError": () => (RateLimitError), + "UnprocessableEntityError": () => (UnprocessableEntityError) +}); + +// EXTERNAL MODULE: ./lib/fetch-polyfill.js + 20 modules +var fetch_polyfill = __nccwpck_require__(3179); +// EXTERNAL MODULE: ./node_modules/@actions/core/lib/core.js +var core = __nccwpck_require__(2186); +// EXTERNAL MODULE: ./node_modules/langchain/dist/chains/base.js + 1 modules +var base = __nccwpck_require__(3898); +// EXTERNAL MODULE: ./node_modules/langchain/dist/chains/llm_chain.js + 2 modules +var llm_chain = __nccwpck_require__(2663); +// EXTERNAL MODULE: ./node_modules/langchain/dist/chains/api/api_chain.js + 1 modules +var api_chain = __nccwpck_require__(6159); +// EXTERNAL MODULE: ./node_modules/@langchain/core/prompts.js + 2 modules +var prompts = __nccwpck_require__(4974); +// EXTERNAL MODULE: ./node_modules/@langchain/core/dist/messages/index.js +var dist_messages = __nccwpck_require__(526); +;// CONCATENATED MODULE: ./node_modules/@langchain/core/messages.js + +;// CONCATENATED MODULE: ./node_modules/@langchain/core/dist/memory.js /** - * Returns a `Buffer` instance from the given data URI `uri`. - * - * @param {String} uri Data URI to turn into a Buffer instance - * @returns {Buffer} Buffer instance from Data URI - * @api public + * Abstract base class for memory in LangChain's Chains. Memory refers to + * the state in Chains. It can be used to store information about past + * executions of a Chain and inject that information into the inputs of + * future executions of the Chain. */ -function dataUriToBuffer(uri) { - if (!/^data:/i.test(uri)) { - throw new TypeError('`uri` does not appear to be a Data URI (must begin with "data:")'); +class memory_BaseMemory { +} +const getValue = (values, key) => { + if (key !== undefined) { + return values[key]; } - // strip newlines - uri = uri.replace(/\r?\n/g, ''); - // split the URI up into the "metadata" and the "data" portions - const firstComma = uri.indexOf(','); - if (firstComma === -1 || firstComma <= 4) { - throw new TypeError('malformed data: URI'); + const keys = Object.keys(values); + if (keys.length === 1) { + return values[keys[0]]; } - // remove the "data:" scheme and parse the metadata - const meta = uri.substring(5, firstComma).split(';'); - let charset = ''; - let base64 = false; - const type = meta[0] || 'text/plain'; - let typeFull = type; - for (let i = 1; i < meta.length; i++) { - if (meta[i] === 'base64') { - base64 = true; - } - else if (meta[i]) { - typeFull += `;${meta[i]}`; - if (meta[i].indexOf('charset=') === 0) { - charset = meta[i].substring(8); - } - } +}; +/** + * This function is used by memory classes to select the input value + * to use for the memory. If there is only one input value, it is used. + * If there are multiple input values, the inputKey must be specified. + */ +const memory_getInputValue = (inputValues, inputKey) => { + const value = getValue(inputValues, inputKey); + if (!value) { + const keys = Object.keys(inputValues); + throw new Error(`input values have ${keys.length} keys, you must specify an input key or pass only 1 key as input`); } - // defaults to US-ASCII only if type is not provided - if (!meta[0] && !charset.length) { - typeFull += ';charset=US-ASCII'; - charset = 'US-ASCII'; + return value; +}; +/** + * This function is used by memory classes to select the output value + * to use for the memory. If there is only one output value, it is used. + * If there are multiple output values, the outputKey must be specified. + * If no outputKey is specified, an error is thrown. + */ +const memory_getOutputValue = (outputValues, outputKey) => { + const value = getValue(outputValues, outputKey); + if (!value) { + const keys = Object.keys(outputValues); + throw new Error(`output values have ${keys.length} keys, you must specify an output key or pass only 1 key as output`); } - // get the encoded data portion and decode URI-encoded chars - const encoding = base64 ? 'base64' : 'ascii'; - const data = unescape(uri.substring(firstComma + 1)); - const buffer = Buffer.from(data, encoding); - // set `.type` and `.typeFull` properties to MIME type - buffer.type = type; - buffer.typeFull = typeFull; - // set the `.charset` property - buffer.charset = charset; - return buffer; -} -/* harmony default export */ const dist = (dataUriToBuffer); -//# sourceMappingURL=index.js.map -;// CONCATENATED MODULE: external "node:util" -const external_node_util_namespaceObject = require("node:util"); -// EXTERNAL MODULE: ./node_modules/fetch-blob/index.js -var fetch_blob = __nccwpck_require__(1410); -// EXTERNAL MODULE: ./node_modules/formdata-polyfill/esm.min.js -var esm_min = __nccwpck_require__(8010); -;// CONCATENATED MODULE: ./node_modules/node-fetch/src/errors/base.js -class FetchBaseError extends Error { - constructor(message, type) { - super(message); - // Hide custom error implementation details from end-users - Error.captureStackTrace(this, this.constructor); - - this.type = type; - } - - get name() { - return this.constructor.name; - } - - get [Symbol.toStringTag]() { - return this.constructor.name; - } + return value; +}; +/** + * Function used by memory classes to get the key of the prompt input, + * excluding any keys that are memory variables or the "stop" key. If + * there is not exactly one prompt input key, an error is thrown. + */ +function memory_getPromptInputKey(inputs, memoryVariables) { + const promptInputKeys = Object.keys(inputs).filter((key) => !memoryVariables.includes(key) && key !== "stop"); + if (promptInputKeys.length !== 1) { + throw new Error(`One input key expected, but got ${promptInputKeys.length}`); + } + return promptInputKeys[0]; } -;// CONCATENATED MODULE: ./node_modules/node-fetch/src/errors/fetch-error.js - - +// EXTERNAL MODULE: ./node_modules/@langchain/core/dist/load/serializable.js + 1 modules +var serializable = __nccwpck_require__(6815); +;// CONCATENATED MODULE: ./node_modules/@langchain/core/dist/chat_history.js -/** - * @typedef {{ address?: string, code: string, dest?: string, errno: number, info?: object, message: string, path?: string, port?: number, syscall: string}} SystemError -*/ /** - * FetchError interface for operational errors + * Base class for all chat message histories. All chat message histories + * should extend this class. */ -class FetchError extends FetchBaseError { - /** - * @param {string} message - Error message for human - * @param {string} [type] - Error type for machine - * @param {SystemError} [systemError] - For Node.js system error - */ - constructor(message, type, systemError) { - super(message, type); - // When err.type is `system`, err.erroredSysCall contains system error and err.code contains system error code - if (systemError) { - // eslint-disable-next-line no-multi-assign - this.code = this.errno = systemError.code; - this.erroredSysCall = systemError.syscall; - } - } +class BaseChatMessageHistory extends (/* unused pure expression or super */ null && (Serializable)) { } - -;// CONCATENATED MODULE: ./node_modules/node-fetch/src/utils/is.js /** - * Is.js - * - * Object type checks. + * Base class for all list chat message histories. All list chat message + * histories should extend this class. */ +class BaseListChatMessageHistory extends serializable/* Serializable */.i { + addUserMessage(message) { + return this.addMessage(new dist_messages/* HumanMessage */.xk(message)); + } + /** @deprecated Use addAIMessage instead */ + addAIChatMessage(message) { + return this.addMessage(new dist_messages/* AIMessage */.gY(message)); + } + addAIMessage(message) { + return this.addMessage(new dist_messages/* AIMessage */.gY(message)); + } +} -const NAME = Symbol.toStringTag; - -/** - * Check if `obj` is a URLSearchParams object - * ref: https://github.com/node-fetch/node-fetch/issues/296#issuecomment-307598143 - * @param {*} object - Object to check for - * @return {boolean} - */ -const isURLSearchParameters = object => { - return ( - typeof object === 'object' && - typeof object.append === 'function' && - typeof object.delete === 'function' && - typeof object.get === 'function' && - typeof object.getAll === 'function' && - typeof object.has === 'function' && - typeof object.set === 'function' && - typeof object.sort === 'function' && - object[NAME] === 'URLSearchParams' - ); -}; +;// CONCATENATED MODULE: ./node_modules/@langchain/core/chat_history.js -/** - * Check if `object` is a W3C `Blob` object (which `File` inherits from) - * @param {*} object - Object to check for - * @return {boolean} - */ -const isBlob = object => { - return ( - object && - typeof object === 'object' && - typeof object.arrayBuffer === 'function' && - typeof object.type === 'string' && - typeof object.stream === 'function' && - typeof object.constructor === 'function' && - /^(Blob|File)$/.test(object[NAME]) - ); -}; +;// CONCATENATED MODULE: ./node_modules/langchain/node_modules/@langchain/community/dist/stores/message/in_memory.js /** - * Check if `obj` is an instance of AbortSignal. - * @param {*} object - Object to check for - * @return {boolean} + * Class for storing chat message history in-memory. It extends the + * BaseListChatMessageHistory class and provides methods to get, add, and + * clear messages. */ -const isAbortSignal = object => { - return ( - typeof object === 'object' && ( - object[NAME] === 'AbortSignal' || - object[NAME] === 'EventTarget' - ) - ); -}; +class ChatMessageHistory extends BaseListChatMessageHistory { + constructor(messages) { + super(...arguments); + Object.defineProperty(this, "lc_namespace", { + enumerable: true, + configurable: true, + writable: true, + value: ["langchain", "stores", "message", "in_memory"] + }); + Object.defineProperty(this, "messages", { + enumerable: true, + configurable: true, + writable: true, + value: [] + }); + this.messages = messages ?? []; + } + /** + * Method to get all the messages stored in the ChatMessageHistory + * instance. + * @returns Array of stored BaseMessage instances. + */ + async getMessages() { + return this.messages; + } + /** + * Method to add a new message to the ChatMessageHistory instance. + * @param message The BaseMessage instance to add. + * @returns A promise that resolves when the message has been added. + */ + async addMessage(message) { + this.messages.push(message); + } + /** + * Method to clear all the messages from the ChatMessageHistory instance. + * @returns A promise that resolves when all messages have been cleared. + */ + async clear() { + this.messages = []; + } +} -/** - * isDomainOrSubdomain reports whether sub is a subdomain (or exact match) of - * the parent domain. - * - * Both domains must already be in canonical form. - * @param {string|URL} original - * @param {string|URL} destination - */ -const isDomainOrSubdomain = (destination, original) => { - const orig = new URL(original).hostname; - const dest = new URL(destination).hostname; +;// CONCATENATED MODULE: ./node_modules/langchain/node_modules/@langchain/community/dist/memory/chat_memory.js - return orig === dest || orig.endsWith(`.${dest}`); -}; /** - * isSameProtocol reports whether the two provided URLs use the same protocol. - * - * Both domains must already be in canonical form. - * @param {string|URL} original - * @param {string|URL} destination + * Abstract class that provides a base for implementing different types of + * memory systems. It is designed to maintain the state of an application, + * specifically the history of a conversation. This class is typically + * extended by other classes to create specific types of memory systems. */ -const isSameProtocol = (destination, original) => { - const orig = new URL(original).protocol; - const dest = new URL(destination).protocol; +class chat_memory_BaseChatMemory extends memory_BaseMemory { + constructor(fields) { + super(); + Object.defineProperty(this, "chatHistory", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "returnMessages", { + enumerable: true, + configurable: true, + writable: true, + value: false + }); + Object.defineProperty(this, "inputKey", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "outputKey", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + this.chatHistory = fields?.chatHistory ?? new ChatMessageHistory(); + this.returnMessages = fields?.returnMessages ?? this.returnMessages; + this.inputKey = fields?.inputKey ?? this.inputKey; + this.outputKey = fields?.outputKey ?? this.outputKey; + } + /** + * Method to add user and AI messages to the chat history in sequence. + * @param inputValues The input values from the user. + * @param outputValues The output values from the AI. + * @returns Promise that resolves when the context has been saved. + */ + async saveContext(inputValues, outputValues) { + // this is purposefully done in sequence so they're saved in order + await this.chatHistory.addUserMessage(memory_getInputValue(inputValues, this.inputKey)); + await this.chatHistory.addAIChatMessage(memory_getOutputValue(outputValues, this.outputKey)); + } + /** + * Method to clear the chat history. + * @returns Promise that resolves when the chat history has been cleared. + */ + async clear() { + await this.chatHistory.clear(); + } +} - return orig === dest; -}; +;// CONCATENATED MODULE: ./node_modules/langchain/node_modules/@langchain/community/memory/chat_memory.js + +;// CONCATENATED MODULE: ./node_modules/langchain/dist/memory/buffer_memory.js -;// CONCATENATED MODULE: ./node_modules/node-fetch/src/body.js /** - * Body.js + * The `BufferMemory` class is a type of memory component used for storing + * and managing previous chat messages. It is a wrapper around + * `ChatMessageHistory` that extracts the messages into an input variable. + * This class is particularly useful in applications like chatbots where + * it is essential to remember previous interactions. Note: The memory + * instance represents the history of a single conversation. Therefore, it + * is not recommended to share the same history or memory instance between + * two different chains. If you deploy your LangChain app on a serverless + * environment, do not store memory instances in a variable, as your + * hosting provider may reset it by the next time the function is called. + * @example + * ```typescript + * // Initialize the memory to store chat history and set up the language model with a specific temperature. + * const memory = new BufferMemory({ memoryKey: "chat_history" }); + * const model = new ChatOpenAI({ temperature: 0.9 }); * - * Body interface provides common methods for Request and Response + * // Create a prompt template for a friendly conversation between a human and an AI. + * const prompt = + * PromptTemplate.fromTemplate(`The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. + * + * Current conversation: + * {chat_history} + * Human: {input} + * AI:`); + * + * // Set up the chain with the language model, prompt, and memory. + * const chain = new LLMChain({ llm: model, prompt, memory }); + * + * // Example usage of the chain to continue the conversation. + * // The `call` method sends the input to the model and returns the AI's response. + * const res = await chain.call({ input: "Hi! I'm Jim." }); + * console.log({ res }); + * + * ``` */ +class BufferMemory extends chat_memory_BaseChatMemory { + constructor(fields) { + super({ + chatHistory: fields?.chatHistory, + returnMessages: fields?.returnMessages ?? false, + inputKey: fields?.inputKey, + outputKey: fields?.outputKey, + }); + Object.defineProperty(this, "humanPrefix", { + enumerable: true, + configurable: true, + writable: true, + value: "Human" + }); + Object.defineProperty(this, "aiPrefix", { + enumerable: true, + configurable: true, + writable: true, + value: "AI" + }); + Object.defineProperty(this, "memoryKey", { + enumerable: true, + configurable: true, + writable: true, + value: "history" + }); + this.humanPrefix = fields?.humanPrefix ?? this.humanPrefix; + this.aiPrefix = fields?.aiPrefix ?? this.aiPrefix; + this.memoryKey = fields?.memoryKey ?? this.memoryKey; + } + get memoryKeys() { + return [this.memoryKey]; + } + /** + * Loads the memory variables. It takes an `InputValues` object as a + * parameter and returns a `Promise` that resolves with a + * `MemoryVariables` object. + * @param _values `InputValues` object. + * @returns A `Promise` that resolves with a `MemoryVariables` object. + */ + async loadMemoryVariables(_values) { + const messages = await this.chatHistory.getMessages(); + if (this.returnMessages) { + const result = { + [this.memoryKey]: messages, + }; + return result; + } + const result = { + [this.memoryKey]: (0,dist_messages/* getBufferString */.zs)(messages, this.humanPrefix, this.aiPrefix), + }; + return result; + } +} +;// CONCATENATED MODULE: ./node_modules/langchain/dist/chains/conversation.js +const conversation_DEFAULT_TEMPLATE = `The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. - - - - - - - -const pipeline = (0,external_node_util_namespaceObject.promisify)(external_node_stream_namespaceObject.pipeline); -const INTERNALS = Symbol('Body internals'); - +Current conversation: +{history} +Human: {input} +AI:`; /** - * Body mixin + * A class for conducting conversations between a human and an AI. It + * extends the {@link LLMChain} class. + * @example + * ```typescript + * const model = new ChatOpenAI({}); + * const chain = new ConversationChain({ llm: model }); * - * Ref: https://fetch.spec.whatwg.org/#body + * // Sending a greeting to the conversation chain + * const res1 = await chain.call({ input: "Hi! I'm Jim." }); + * console.log({ res1 }); * - * @param Stream body Readable stream - * @param Object opts Response options - * @return Void + * // Following up with a question in the conversation + * const res2 = await chain.call({ input: "What's my name?" }); + * console.log({ res2 }); + * ``` */ -class Body { - constructor(body, { - size = 0 - } = {}) { - let boundary = null; - - if (body === null) { - // Body is undefined or null - body = null; - } else if (isURLSearchParameters(body)) { - // Body is a URLSearchParams - body = external_node_buffer_namespaceObject.Buffer.from(body.toString()); - } else if (isBlob(body)) { - // Body is blob - } else if (external_node_buffer_namespaceObject.Buffer.isBuffer(body)) { - // Body is Buffer - } else if (external_node_util_namespaceObject.types.isAnyArrayBuffer(body)) { - // Body is ArrayBuffer - body = external_node_buffer_namespaceObject.Buffer.from(body); - } else if (ArrayBuffer.isView(body)) { - // Body is ArrayBufferView - body = external_node_buffer_namespaceObject.Buffer.from(body.buffer, body.byteOffset, body.byteLength); - } else if (body instanceof external_node_stream_namespaceObject) { - // Body is stream - } else if (body instanceof esm_min/* FormData */.Ct) { - // Body is FormData - body = (0,esm_min/* formDataToBlob */.au)(body); - boundary = body.type.split('=')[1]; - } else { - // None of the above - // coerce to string then buffer - body = external_node_buffer_namespaceObject.Buffer.from(String(body)); - } - - let stream = body; - - if (external_node_buffer_namespaceObject.Buffer.isBuffer(body)) { - stream = external_node_stream_namespaceObject.Readable.from(body); - } else if (isBlob(body)) { - stream = external_node_stream_namespaceObject.Readable.from(body.stream()); - } - - this[INTERNALS] = { - body, - stream, - boundary, - disturbed: false, - error: null - }; - this.size = size; - - if (body instanceof external_node_stream_namespaceObject) { - body.on('error', error_ => { - const error = error_ instanceof FetchBaseError ? - error_ : - new FetchError(`Invalid response body while trying to fetch ${this.url}: ${error_.message}`, 'system', error_); - this[INTERNALS].error = error; - }); - } - } - - get body() { - return this[INTERNALS].stream; - } - - get bodyUsed() { - return this[INTERNALS].disturbed; - } - - /** - * Decode response as ArrayBuffer - * - * @return Promise - */ - async arrayBuffer() { - const {buffer, byteOffset, byteLength} = await consumeBody(this); - return buffer.slice(byteOffset, byteOffset + byteLength); - } - - async formData() { - const ct = this.headers.get('content-type'); - - if (ct.startsWith('application/x-www-form-urlencoded')) { - const formData = new esm_min/* FormData */.Ct(); - const parameters = new URLSearchParams(await this.text()); +class conversation_ConversationChain extends llm_chain.LLMChain { + static lc_name() { + return "ConversationChain"; + } + constructor({ prompt, outputKey, memory, ...rest }) { + super({ + prompt: prompt ?? + new prompts/* PromptTemplate */.Pf({ + template: conversation_DEFAULT_TEMPLATE, + inputVariables: ["history", "input"], + }), + outputKey: outputKey ?? "response", + memory: memory ?? new BufferMemory(), + ...rest, + }); + } +} - for (const [name, value] of parameters) { - formData.append(name, value); - } +// EXTERNAL MODULE: ./node_modules/langchain/dist/chains/sequential_chain.js + 1 modules +var sequential_chain = __nccwpck_require__(7210); +// EXTERNAL MODULE: ./node_modules/langchain/dist/chains/combine_docs_chain.js +var combine_docs_chain = __nccwpck_require__(3608); +// EXTERNAL MODULE: ./node_modules/langchain/dist/chains/question_answering/load.js + 3 modules +var load = __nccwpck_require__(5000); +;// CONCATENATED MODULE: ./node_modules/langchain/dist/chains/chat_vector_db_chain.js - return formData; - } - const {toFormData} = await __nccwpck_require__.e(/* import() */ 37).then(__nccwpck_require__.bind(__nccwpck_require__, 4037)); - return toFormData(this.body, ct); - } - /** - * Return raw response as Blob - * - * @return Promise - */ - async blob() { - const ct = (this.headers && this.headers.get('content-type')) || (this[INTERNALS].body && this[INTERNALS].body.type) || ''; - const buf = await this.arrayBuffer(); - return new fetch_blob/* default */.Z([buf], { - type: ct - }); - } +const question_generator_template = (/* unused pure expression or super */ null && (`Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. - /** - * Decode response as json - * - * @return Promise - */ - async json() { - const text = await this.text(); - return JSON.parse(text); - } +Chat History: +{chat_history} +Follow Up Input: {question} +Standalone question:`)); +const qa_template = (/* unused pure expression or super */ null && (`Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. - /** - * Decode response as text - * - * @return Promise - */ - async text() { - const buffer = await consumeBody(this); - return new TextDecoder().decode(buffer); - } +{context} - /** - * Decode response as buffer (non-spec api) - * - * @return Promise - */ - buffer() { - return consumeBody(this); - } +Question: {question} +Helpful Answer:`)); +/** @deprecated use `ConversationalRetrievalQAChain` instead. */ +class ChatVectorDBQAChain extends (/* unused pure expression or super */ null && (BaseChain)) { + get inputKeys() { + return [this.inputKey, this.chatHistoryKey]; + } + get outputKeys() { + return [this.outputKey]; + } + constructor(fields) { + super(fields); + Object.defineProperty(this, "k", { + enumerable: true, + configurable: true, + writable: true, + value: 4 + }); + Object.defineProperty(this, "inputKey", { + enumerable: true, + configurable: true, + writable: true, + value: "question" + }); + Object.defineProperty(this, "chatHistoryKey", { + enumerable: true, + configurable: true, + writable: true, + value: "chat_history" + }); + Object.defineProperty(this, "outputKey", { + enumerable: true, + configurable: true, + writable: true, + value: "result" + }); + Object.defineProperty(this, "vectorstore", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "combineDocumentsChain", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "questionGeneratorChain", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + Object.defineProperty(this, "returnSourceDocuments", { + enumerable: true, + configurable: true, + writable: true, + value: false + }); + this.vectorstore = fields.vectorstore; + this.combineDocumentsChain = fields.combineDocumentsChain; + this.questionGeneratorChain = fields.questionGeneratorChain; + this.inputKey = fields.inputKey ?? this.inputKey; + this.outputKey = fields.outputKey ?? this.outputKey; + this.k = fields.k ?? this.k; + this.returnSourceDocuments = + fields.returnSourceDocuments ?? this.returnSourceDocuments; + } + /** @ignore */ + async _call(values, runManager) { + if (!(this.inputKey in values)) { + throw new Error(`Question key ${this.inputKey} not found.`); + } + if (!(this.chatHistoryKey in values)) { + throw new Error(`chat history key ${this.inputKey} not found.`); + } + const question = values[this.inputKey]; + const chatHistory = values[this.chatHistoryKey]; + let newQuestion = question; + if (chatHistory.length > 0) { + const result = await this.questionGeneratorChain.call({ + question, + chat_history: chatHistory, + }, runManager?.getChild("question_generator")); + const keys = Object.keys(result); + console.log("_call", values, keys); + if (keys.length === 1) { + newQuestion = result[keys[0]]; + } + else { + throw new Error("Return from llm chain has multiple values, only single values supported."); + } + } + const docs = await this.vectorstore.similaritySearch(newQuestion, this.k, undefined, runManager?.getChild("vectorstore")); + const inputs = { + question: newQuestion, + input_documents: docs, + chat_history: chatHistory, + }; + const result = await this.combineDocumentsChain.call(inputs, runManager?.getChild("combine_documents")); + if (this.returnSourceDocuments) { + return { + ...result, + sourceDocuments: docs, + }; + } + return result; + } + _chainType() { + return "chat-vector-db"; + } + static async deserialize(data, values) { + if (!("vectorstore" in values)) { + throw new Error(`Need to pass in a vectorstore to deserialize VectorDBQAChain`); + } + const { vectorstore } = values; + return new ChatVectorDBQAChain({ + combineDocumentsChain: await BaseChain.deserialize(data.combine_documents_chain), + questionGeneratorChain: await LLMChain.deserialize(data.question_generator), + k: data.k, + vectorstore, + }); + } + serialize() { + return { + _type: this._chainType(), + combine_documents_chain: this.combineDocumentsChain.serialize(), + question_generator: this.questionGeneratorChain.serialize(), + k: this.k, + }; + } + /** + * Creates an instance of ChatVectorDBQAChain using a BaseLanguageModel + * and other options. + * @param llm Instance of BaseLanguageModel used to generate a new question. + * @param vectorstore Instance of VectorStore used for vector operations. + * @param options (Optional) Additional options for creating the ChatVectorDBQAChain instance. + * @returns New instance of ChatVectorDBQAChain. + */ + static fromLLM(llm, vectorstore, options = {}) { + const { questionGeneratorTemplate, qaTemplate, verbose, ...rest } = options; + const question_generator_prompt = PromptTemplate.fromTemplate(questionGeneratorTemplate || question_generator_template); + const qa_prompt = PromptTemplate.fromTemplate(qaTemplate || qa_template); + const qaChain = loadQAStuffChain(llm, { prompt: qa_prompt, verbose }); + const questionGeneratorChain = new LLMChain({ + prompt: question_generator_prompt, + llm, + verbose, + }); + const instance = new this({ + vectorstore, + combineDocumentsChain: qaChain, + questionGeneratorChain, + ...rest, + }); + return instance; + } } -Body.prototype.buffer = (0,external_node_util_namespaceObject.deprecate)(Body.prototype.buffer, 'Please use \'response.arrayBuffer()\' instead of \'response.buffer()\'', 'node-fetch#buffer'); - -// In browsers, all properties are enumerable. -Object.defineProperties(Body.prototype, { - body: {enumerable: true}, - bodyUsed: {enumerable: true}, - arrayBuffer: {enumerable: true}, - blob: {enumerable: true}, - json: {enumerable: true}, - text: {enumerable: true}, - data: {get: (0,external_node_util_namespaceObject.deprecate)(() => {}, - 'data doesn\'t exist, use json(), text(), arrayBuffer(), or body instead', - 'https://github.com/node-fetch/node-fetch/issues/1000 (response)')} -}); +// EXTERNAL MODULE: ./node_modules/@langchain/core/dist/runnables/base.js + 3 modules +var runnables_base = __nccwpck_require__(1011); +;// CONCATENATED MODULE: ./node_modules/@langchain/core/dist/documents/transformers.js /** - * Consume and convert an entire Body to a Buffer. + * Abstract base class for document transformation systems. * - * Ref: https://fetch.spec.whatwg.org/#concept-body-consume-body + * A document transformation system takes an array of Documents and returns an + * array of transformed Documents. These arrays do not necessarily have to have + * the same length. * - * @return Promise + * One example of this is a text splitter that splits a large document into + * many smaller documents. */ -async function consumeBody(data) { - if (data[INTERNALS].disturbed) { - throw new TypeError(`body used already for: ${data.url}`); - } - - data[INTERNALS].disturbed = true; - - if (data[INTERNALS].error) { - throw data[INTERNALS].error; - } +class transformers_BaseDocumentTransformer extends (/* unused pure expression or super */ null && (Runnable)) { + constructor() { + super(...arguments); + Object.defineProperty(this, "lc_namespace", { + enumerable: true, + configurable: true, + writable: true, + value: ["langchain_core", "documents", "transformers"] + }); + } + /** + * Method to invoke the document transformation. This method calls the + * transformDocuments method with the provided input. + * @param input The input documents to be transformed. + * @param _options Optional configuration object to customize the behavior of callbacks. + * @returns A Promise that resolves to the transformed documents. + */ + invoke(input, _options) { + return this.transformDocuments(input); + } +} +/** + * Class for document transformers that return exactly one transformed document + * for each input document. + */ +class MappingDocumentTransformer extends (/* unused pure expression or super */ null && (transformers_BaseDocumentTransformer)) { + async transformDocuments(documents) { + const newDocuments = []; + for (const document of documents) { + const transformedDocument = await this._transformDocument(document); + newDocuments.push(transformedDocument); + } + return newDocuments; + } +} - const {body} = data; +;// CONCATENATED MODULE: ./node_modules/@langchain/core/dist/documents/index.js - // Body is null - if (body === null) { - return external_node_buffer_namespaceObject.Buffer.alloc(0); - } - /* c8 ignore next 3 */ - if (!(body instanceof external_node_stream_namespaceObject)) { - return external_node_buffer_namespaceObject.Buffer.alloc(0); - } - // Body is stream - // get ready to actually consume the body - const accum = []; - let accumBytes = 0; +;// CONCATENATED MODULE: ./node_modules/@langchain/core/documents.js - try { - for await (const chunk of body) { - if (data.size > 0 && accumBytes + chunk.length > data.size) { - const error = new FetchError(`content size at ${data.url} over limit: ${data.size}`, 'max-size'); - body.destroy(error); - throw error; - } +// EXTERNAL MODULE: ./node_modules/@langchain/core/dist/utils/tiktoken.js + 1 modules +var tiktoken = __nccwpck_require__(5550); +;// CONCATENATED MODULE: ./node_modules/@langchain/core/utils/tiktoken.js - accumBytes += chunk.length; - accum.push(chunk); - } - } catch (error) { - const error_ = error instanceof FetchBaseError ? error : new FetchError(`Invalid response body while trying to fetch ${data.url}: ${error.message}`, 'system', error); - throw error_; - } +;// CONCATENATED MODULE: ./node_modules/langchain/dist/text_splitter.js - if (body.readableEnded === true || body._readableState.ended === true) { - try { - if (accum.every(c => typeof c === 'string')) { - return external_node_buffer_namespaceObject.Buffer.from(accum.join('')); - } - return external_node_buffer_namespaceObject.Buffer.concat(accum, accumBytes); - } catch (error) { - throw new FetchError(`Could not create Buffer from response body for ${data.url}: ${error.message}`, 'system', error); - } - } else { - throw new FetchError(`Premature close of server response while trying to fetch ${data.url}`); - } +class TextSplitter extends (/* unused pure expression or super */ null && (BaseDocumentTransformer)) { + constructor(fields) { + super(fields); + Object.defineProperty(this, "lc_namespace", { + enumerable: true, + configurable: true, + writable: true, + value: ["langchain", "document_transformers", "text_splitters"] + }); + Object.defineProperty(this, "chunkSize", { + enumerable: true, + configurable: true, + writable: true, + value: 1000 + }); + Object.defineProperty(this, "chunkOverlap", { + enumerable: true, + configurable: true, + writable: true, + value: 200 + }); + Object.defineProperty(this, "keepSeparator", { + enumerable: true, + configurable: true, + writable: true, + value: false + }); + Object.defineProperty(this, "lengthFunction", { + enumerable: true, + configurable: true, + writable: true, + value: void 0 + }); + this.chunkSize = fields?.chunkSize ?? this.chunkSize; + this.chunkOverlap = fields?.chunkOverlap ?? this.chunkOverlap; + this.keepSeparator = fields?.keepSeparator ?? this.keepSeparator; + this.lengthFunction = + fields?.lengthFunction ?? ((text) => text.length); + if (this.chunkOverlap >= this.chunkSize) { + throw new Error("Cannot have chunkOverlap >= chunkSize"); + } + } + async transformDocuments(documents, chunkHeaderOptions = {}) { + return this.splitDocuments(documents, chunkHeaderOptions); + } + splitOnSeparator(text, separator) { + let splits; + if (separator) { + if (this.keepSeparator) { + const regexEscapedSeparator = separator.replace(/[/\-\\^$*+?.()|[\]{}]/g, "\\$&"); + splits = text.split(new RegExp(`(?=${regexEscapedSeparator})`)); + } + else { + splits = text.split(separator); + } + } + else { + splits = text.split(""); + } + return splits.filter((s) => s !== ""); + } + async createDocuments(texts, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + metadatas = [], chunkHeaderOptions = {}) { + // if no metadata is provided, we create an empty one for each text + const _metadatas = metadatas.length > 0 ? metadatas : new Array(texts.length).fill({}); + const { chunkHeader = "", chunkOverlapHeader = "(cont'd) ", appendChunkOverlapHeader = false, } = chunkHeaderOptions; + const documents = new Array(); + for (let i = 0; i < texts.length; i += 1) { + const text = texts[i]; + let lineCounterIndex = 1; + let prevChunk = null; + let indexPrevChunk = -1; + for (const chunk of await this.splitText(text)) { + let pageContent = chunkHeader; + // we need to count the \n that are in the text before getting removed by the splitting + const indexChunk = text.indexOf(chunk, indexPrevChunk + 1); + if (prevChunk === null) { + const newLinesBeforeFirstChunk = this.numberOfNewLines(text, 0, indexChunk); + lineCounterIndex += newLinesBeforeFirstChunk; + } + else { + const indexEndPrevChunk = indexPrevChunk + (await this.lengthFunction(prevChunk)); + if (indexEndPrevChunk < indexChunk) { + const numberOfIntermediateNewLines = this.numberOfNewLines(text, indexEndPrevChunk, indexChunk); + lineCounterIndex += numberOfIntermediateNewLines; + } + else if (indexEndPrevChunk > indexChunk) { + const numberOfIntermediateNewLines = this.numberOfNewLines(text, indexChunk, indexEndPrevChunk); + lineCounterIndex -= numberOfIntermediateNewLines; + } + if (appendChunkOverlapHeader) { + pageContent += chunkOverlapHeader; + } + } + const newLinesCount = this.numberOfNewLines(chunk); + const loc = _metadatas[i].loc && typeof _metadatas[i].loc === "object" + ? { ..._metadatas[i].loc } + : {}; + loc.lines = { + from: lineCounterIndex, + to: lineCounterIndex + newLinesCount, + }; + const metadataWithLinesNumber = { + ..._metadatas[i], + loc, + }; + pageContent += chunk; + documents.push(new Document({ + pageContent, + metadata: metadataWithLinesNumber, + })); + lineCounterIndex += newLinesCount; + prevChunk = chunk; + indexPrevChunk = indexChunk; + } + } + return documents; + } + numberOfNewLines(text, start, end) { + const textSection = text.slice(start, end); + return (textSection.match(/\n/g) || []).length; + } + async splitDocuments(documents, chunkHeaderOptions = {}) { + const selectedDocuments = documents.filter((doc) => doc.pageContent !== undefined); + const texts = selectedDocuments.map((doc) => doc.pageContent); + const metadatas = selectedDocuments.map((doc) => doc.metadata); + return this.createDocuments(texts, metadatas, chunkHeaderOptions); + } + joinDocs(docs, separator) { + const text = docs.join(separator).trim(); + return text === "" ? null : text; + } + async mergeSplits(splits, separator) { + const docs = []; + const currentDoc = []; + let total = 0; + for (const d of splits) { + const _len = await this.lengthFunction(d); + if (total + _len + (currentDoc.length > 0 ? separator.length : 0) > + this.chunkSize) { + if (total > this.chunkSize) { + console.warn(`Created a chunk of size ${total}, + +which is longer than the specified ${this.chunkSize}`); + } + if (currentDoc.length > 0) { + const doc = this.joinDocs(currentDoc, separator); + if (doc !== null) { + docs.push(doc); + } + // Keep on popping if: + // - we have a larger chunk than in the chunk overlap + // - or if we still have any chunks and the length is long + while (total > this.chunkOverlap || + (total + _len > this.chunkSize && total > 0)) { + total -= await this.lengthFunction(currentDoc[0]); + currentDoc.shift(); + } + } + } + currentDoc.push(d); + total += _len; + } + const doc = this.joinDocs(currentDoc, separator); + if (doc !== null) { + docs.push(doc); + } + return docs; + } +} +class CharacterTextSplitter extends (/* unused pure expression or super */ null && (TextSplitter)) { + static lc_name() { + return "CharacterTextSplitter"; + } + constructor(fields) { + super(fields); + Object.defineProperty(this, "separator", { + enumerable: true, + configurable: true, + writable: true, + value: "\n\n" + }); + this.separator = fields?.separator ?? this.separator; + } + async splitText(text) { + // First we naively split the large input into a bunch of smaller ones. + const splits = this.splitOnSeparator(text, this.separator); + return this.mergeSplits(splits, this.keepSeparator ? "" : this.separator); + } +} +const SupportedTextSplitterLanguages = (/* unused pure expression or super */ null && ([ + "cpp", + "go", + "java", + "js", + "php", + "proto", + "python", + "rst", + "ruby", + "rust", + "scala", + "swift", + "markdown", + "latex", + "html", + "sol", +])); +class text_splitter_RecursiveCharacterTextSplitter extends (/* unused pure expression or super */ null && (TextSplitter)) { + static lc_name() { + return "RecursiveCharacterTextSplitter"; + } + constructor(fields) { + super(fields); + Object.defineProperty(this, "separators", { + enumerable: true, + configurable: true, + writable: true, + value: ["\n\n", "\n", " ", ""] + }); + this.separators = fields?.separators ?? this.separators; + this.keepSeparator = fields?.keepSeparator ?? true; + } + async _splitText(text, separators) { + const finalChunks = []; + // Get appropriate separator to use + let separator = separators[separators.length - 1]; + let newSeparators; + for (let i = 0; i < separators.length; i += 1) { + const s = separators[i]; + if (s === "") { + separator = s; + break; + } + if (text.includes(s)) { + separator = s; + newSeparators = separators.slice(i + 1); + break; + } + } + // Now that we have the separator, split the text + const splits = this.splitOnSeparator(text, separator); + // Now go merging things, recursively splitting longer texts. + let goodSplits = []; + const _separator = this.keepSeparator ? "" : separator; + for (const s of splits) { + if ((await this.lengthFunction(s)) < this.chunkSize) { + goodSplits.push(s); + } + else { + if (goodSplits.length) { + const mergedText = await this.mergeSplits(goodSplits, _separator); + finalChunks.push(...mergedText); + goodSplits = []; + } + if (!newSeparators) { + finalChunks.push(s); + } + else { + const otherInfo = await this._splitText(s, newSeparators); + finalChunks.push(...otherInfo); + } + } + } + if (goodSplits.length) { + const mergedText = await this.mergeSplits(goodSplits, _separator); + finalChunks.push(...mergedText); + } + return finalChunks; + } + async splitText(text) { + return this._splitText(text, this.separators); + } + static fromLanguage(language, options) { + return new text_splitter_RecursiveCharacterTextSplitter({ + ...options, + separators: text_splitter_RecursiveCharacterTextSplitter.getSeparatorsForLanguage(language), + }); + } + static getSeparatorsForLanguage(language) { + if (language === "cpp") { + return [ + // Split along class definitions + "\nclass ", + // Split along function definitions + "\nvoid ", + "\nint ", + "\nfloat ", + "\ndouble ", + // Split along control flow statements + "\nif ", + "\nfor ", + "\nwhile ", + "\nswitch ", + "\ncase ", + // Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ]; + } + else if (language === "go") { + return [ + // Split along function definitions + "\nfunc ", + "\nvar ", + "\nconst ", + "\ntype ", + // Split along control flow statements + "\nif ", + "\nfor ", + "\nswitch ", + "\ncase ", + // Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ]; + } + else if (language === "java") { + return [ + // Split along class definitions + "\nclass ", + // Split along method definitions + "\npublic ", + "\nprotected ", + "\nprivate ", + "\nstatic ", + // Split along control flow statements + "\nif ", + "\nfor ", + "\nwhile ", + "\nswitch ", + "\ncase ", + // Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ]; + } + else if (language === "js") { + return [ + // Split along function definitions + "\nfunction ", + "\nconst ", + "\nlet ", + "\nvar ", + "\nclass ", + // Split along control flow statements + "\nif ", + "\nfor ", + "\nwhile ", + "\nswitch ", + "\ncase ", + "\ndefault ", + // Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ]; + } + else if (language === "php") { + return [ + // Split along function definitions + "\nfunction ", + // Split along class definitions + "\nclass ", + // Split along control flow statements + "\nif ", + "\nforeach ", + "\nwhile ", + "\ndo ", + "\nswitch ", + "\ncase ", + // Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ]; + } + else if (language === "proto") { + return [ + // Split along message definitions + "\nmessage ", + // Split along service definitions + "\nservice ", + // Split along enum definitions + "\nenum ", + // Split along option definitions + "\noption ", + // Split along import statements + "\nimport ", + // Split along syntax declarations + "\nsyntax ", + // Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ]; + } + else if (language === "python") { + return [ + // First, try to split along class definitions + "\nclass ", + "\ndef ", + "\n\tdef ", + // Now split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ]; + } + else if (language === "rst") { + return [ + // Split along section titles + "\n===\n", + "\n---\n", + "\n***\n", + // Split along directive markers + "\n.. ", + // Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ]; + } + else if (language === "ruby") { + return [ + // Split along method definitions + "\ndef ", + "\nclass ", + // Split along control flow statements + "\nif ", + "\nunless ", + "\nwhile ", + "\nfor ", + "\ndo ", + "\nbegin ", + "\nrescue ", + // Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ]; + } + else if (language === "rust") { + return [ + // Split along function definitions + "\nfn ", + "\nconst ", + "\nlet ", + // Split along control flow statements + "\nif ", + "\nwhile ", + "\nfor ", + "\nloop ", + "\nmatch ", + "\nconst ", + // Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ]; + } + else if (language === "scala") { + return [ + // Split along class definitions + "\nclass ", + "\nobject ", + // Split along method definitions + "\ndef ", + "\nval ", + "\nvar ", + // Split along control flow statements + "\nif ", + "\nfor ", + "\nwhile ", + "\nmatch ", + "\ncase ", + // Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ]; + } + else if (language === "swift") { + return [ + // Split along function definitions + "\nfunc ", + // Split along class definitions + "\nclass ", + "\nstruct ", + "\nenum ", + // Split along control flow statements + "\nif ", + "\nfor ", + "\nwhile ", + "\ndo ", + "\nswitch ", + "\ncase ", + // Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ]; + } + else if (language === "markdown") { + return [ + // First, try to split along Markdown headings (starting with level 2) + "\n## ", + "\n### ", + "\n#### ", + "\n##### ", + "\n###### ", + // Note the alternative syntax for headings (below) is not handled here + // Heading level 2 + // --------------- + // End of code block + "```\n\n", + // Horizontal lines + "\n\n***\n\n", + "\n\n---\n\n", + "\n\n___\n\n", + // Note that this splitter doesn't handle horizontal lines defined + // by *three or more* of ***, ---, or ___, but this is not handled + "\n\n", + "\n", + " ", + "", + ]; + } + else if (language === "latex") { + return [ + // First, try to split along Latex sections + "\n\\chapter{", + "\n\\section{", + "\n\\subsection{", + "\n\\subsubsection{", + // Now split by environments + "\n\\begin{enumerate}", + "\n\\begin{itemize}", + "\n\\begin{description}", + "\n\\begin{list}", + "\n\\begin{quote}", + "\n\\begin{quotation}", + "\n\\begin{verse}", + "\n\\begin{verbatim}", + // Now split by math environments + "\n\\begin{align}", + "$$", + "$", + // Now split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ]; + } + else if (language === "html") { + return [ + // First, try to split along HTML tags + "", + "
", + "

", + "
", + "

  • ", + "

    ", + "

    ", + "

    ", + "

    ", + "

    ", + "
    ", + "", + "", + "", + "
    ", + "", + "
      ", + "
        ", + "
        ", + "