Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: langchain turbopuffer #319

Merged
merged 1 commit into from
Dec 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions packages/crgpt-loader/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,30 @@
"main": "dist/index.js",
"types": "dist/index.d.ts",
"scripts": {
"test": "ts-node test.ts"
"test": "ts-node test.ts",
"build": "tsc",
"publish-package": "pnpm build && npm publish --access public"
},
"keywords": [
"github",
"AI",
"loader",
"vector store",
"crgpt"
"crgpt",
"turbopuffer"
],
"author": "Matt Carey",
"license": "MIT",
"dependencies": {
"dotenv": "^16.3.1",
"ignore": "^5.3.0",
"langchain": "^0.0.204"
},
"devDependencies": {
"@types/node": "^20.10.5",
"ts-node": "^10.9.2",
"typescript": "^5.3.3"
}
},
"files": [
"dist/*"
]
}
13 changes: 2 additions & 11 deletions packages/crgpt-loader/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

65 changes: 41 additions & 24 deletions packages/crgpt-loader/src/crgpt-loader.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import axios, { AxiosResponse } from "axios";
import dotenv from "dotenv";
import { promises as fsPromises } from "fs";
import { Document } from "langchain/document";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import os from "os";
import path from "path";
import { tmpdir } from "os";
import { join } from "path";
import { removeFilesCommand, removeFoldersCommand } from "./constants";
import { executeCommand, openFile, savePage } from "./utils";

Expand Down Expand Up @@ -67,15 +66,15 @@ export class CRGPTLoader {
"Content-Type": "application/json",
};

await axios.post(
apiEndpoint,
{
await fetch(apiEndpoint, {
method: "POST",
headers,
body: JSON.stringify({
ids,
vectors: embeddings,
attributes,
},
{ headers }
);
}),
});
} catch (error) {
console.error("Error storing documents:", error);
throw error;
Expand All @@ -102,9 +101,7 @@ export class CRGPTLoader {
}

private async cloneRepository(): Promise<string> {
const tempDir = await fsPromises.mkdtemp(
path.join(os.tmpdir(), "CRGPTLoader-")
);
const tempDir = await fsPromises.mkdtemp(join(tmpdir(), "CRGPTLoader-"));
const cloneCommand = `git clone --depth 1 ${this.link} ${tempDir}`;
await executeCommand(cloneCommand);
return tempDir;
Expand Down Expand Up @@ -132,7 +129,7 @@ export class CRGPTLoader {
const documents: Document<{ source: string }>[] = [];

for (const entry of entries) {
const fullPath = path.join(directory, entry.name);
const fullPath = join(directory, entry.name);
if (entry.isDirectory()) {
documents.push(...(await this.createDocuments(fullPath)));
} else if (entry.isFile()) {
Expand Down Expand Up @@ -164,7 +161,10 @@ export class CRGPTLoader {
continue;
}

const { ids, vectors, attributes, next_cursor } = response.data;
// Parse the response body as JSON
const data = await response.json();
const { ids, vectors, attributes, next_cursor } = data;

savePage(dataDir, pageIndex, ids, vectors, attributes);

nextCursor = next_cursor;
Expand All @@ -179,16 +179,27 @@ export class CRGPTLoader {
private async fetchPage(
namespace: string,
cursor: string | null
): Promise<AxiosResponse> {
const apiEndpoint = `https://api.turbopuffer.com/v1/vectors/${namespace}`;
const params = cursor ? { cursor } : {};

return axios.get(apiEndpoint, {
headers: { Authorization: `Bearer ${process.env.TURBOPUFFER_API_KEY}` },
params,
maxContentLength: Infinity,
maxBodyLength: Infinity,
): Promise<Response> {
const apiEndpoint = new URL(
`https://api.turbopuffer.com/v1/vectors/${namespace}`
);

if (cursor) {
apiEndpoint.searchParams.append("cursor", cursor);
}

const response = await fetch(apiEndpoint.toString(), {
method: "GET",
headers: {
Authorization: `Bearer ${process.env.TURBOPUFFER_API_KEY}`,
},
});

if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}

return response;
}

public async delete(indexName = this.extractRepoName()): Promise<void> {
Expand All @@ -200,7 +211,13 @@ export class CRGPTLoader {
};

// Make the DELETE request
const response = await axios.delete(apiEndpoint, { headers });
const res = await fetch(apiEndpoint, {
method: "DELETE",
headers,
});

// Parse the response
const response = await res.json();

// Log the response status
console.log("Delete response:", response.data);
Expand Down
15 changes: 15 additions & 0 deletions packages/crgpt-loader/src/lc_wip/turbopufferVectorStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ import { Document } from "langchain/document";
import { Embeddings } from "langchain/embeddings/base";
import { VectorStore } from "langchain/vectorstores/base";

interface TurboPufferIntegrationParams {
apiKey?: string;
namespace?: string;
}

interface TurboPufferHeaders {
headers: {
Authorization: string;
Expand Down Expand Up @@ -185,4 +190,14 @@ export class TurboPuffer extends VectorStore {

return result;
}

static async fromDocuments(
docs: Document[],
embeddings: Embeddings,
dbConfig: TurboPufferIntegrationParams
): Promise<TurboPuffer> {
const instance = new this(embeddings, dbConfig);
await instance.addDocuments(docs);
return instance;
}
}
42 changes: 42 additions & 0 deletions packages/lc-turbopuffer/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# LangChainJS x TurboPuffer

This is a 3rd party integration of [TurboPuffer](https://turbopuffer.com/) as a Vector Store class into the [LangChain](https://langchain.org/) ecosystem.

Turbopuffer is a vector store built ontop of object storage so it is remarkably cheap and scalable.

Python is always going to come first as an official version so I thought I'd make a JS version for the community.

## Installation

```bash
npm install langchain-turbopuffer
```

or if you use pnpm

```bash
pnpm install langchain-turbopuffer
```

## Usage

```javascript
import { TurboPufferVectorStore } from "langchain-turbopuffer";

const embeddings = new OpenAIEmbeddings();

const vectorStore = new TurboPufferVectorStore(embeddings);

const doc = new Document({
pageContent: "This is a test",
metadata: {
source: "https://example.com",
},
});

await vectorStore.addDocuments([doc]);
```

## Contribute to the project

This is a community project, so feel free to contribute to it and bring up any issues. If you have any questions, please contact me on the Turbopuffer Slack.
31 changes: 31 additions & 0 deletions packages/lc-turbopuffer/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"name": "langchain-turbopuffer",
"version": "0.0.2",
"description": "An open source and LangChain compatible Vector Store class for Turbopuffer. Store vectors in object storage for cheap.",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"scripts": {
"build": "tsc",
"publish-package": "pnpm build && npm publish --access public"
},
"keywords": [
"AI",
"retriever",
"vector store",
"object store",
"turbopuffer",
"langchain"
],
"author": "Matt Carey",
"license": "MIT",
"dependencies": {
"langchain": "^0.0.204"
},
"devDependencies": {
"@types/node": "^20.10.5",
"typescript": "^5.3.3"
},
"files": [
"dist/*"
]
}
Loading
Loading