From 2abb96a8e607269bc310426cb8a0d3460a70e904 Mon Sep 17 00:00:00 2001 From: Garrett Gu Date: Mon, 7 Oct 2024 13:55:38 -0500 Subject: [PATCH] Support ndjson-format for insert & upsert ops in vectorize api --- src/cloudflare/internal/vectorize-api.ts | 59 ++++++++++++++++-------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/src/cloudflare/internal/vectorize-api.ts b/src/cloudflare/internal/vectorize-api.ts index 4168e320b16..c3fbc6468d2 100644 --- a/src/cloudflare/internal/vectorize-api.ts +++ b/src/cloudflare/internal/vectorize-api.ts @@ -18,6 +18,10 @@ enum Operation { type VectorizeVersion = 'v1' | 'v2'; +function toNdJson(arr: object[]) { + return arr.map((o) => JSON.stringify(o)).join('\n'); +} + /* * The Vectorize beta VectorizeIndex shares the same methods, so to keep things simple, they share one implementation. * The types here are specific to Vectorize GA, but the types here don't actually matter as they are stripped away @@ -27,7 +31,8 @@ class VectorizeIndexImpl implements Vectorize { public constructor( private readonly fetcher: Fetcher, private readonly indexId: string, - private readonly indexVersion: VectorizeVersion + private readonly indexVersion: VectorizeVersion, + private readonly useNdJson: boolean ) {} public async describe(): Promise { @@ -116,18 +121,24 @@ class VectorizeIndexImpl implements Vectorize { this.indexVersion === 'v2' ? `insert` : `binding/indexes/${this.indexId}/insert`; + let bodyVecArr = vectors.map((vec) => ({ + ...vec, + values: Array.isArray(vec.values) ? vec.values : Array.from(vec.values), + })); + + const body = this.useNdJson + ? toNdJson(bodyVecArr) + : JSON.stringify({ vectors: bodyVecArr }); + + const contentType = this.useNdJson + ? 'application/x-ndjson' + : 'application/json'; + const res = await this._send(Operation.VECTOR_INSERT, endpoint, { method: 'POST', - body: JSON.stringify({ - vectors: vectors.map((vec) => ({ - ...vec, - values: Array.isArray(vec.values) - ? vec.values - : Array.from(vec.values), - })), - }), + body, headers: { - 'content-type': 'application/json', + 'content-type': contentType, 'cf-vector-search-dim-width': String( vectors.length ? vectors[0]?.values?.length : 0 ), @@ -146,18 +157,24 @@ class VectorizeIndexImpl implements Vectorize { this.indexVersion === 'v2' ? `upsert` : `binding/indexes/${this.indexId}/upsert`; + const bodyVecArr = vectors.map((vec) => ({ + ...vec, + values: Array.isArray(vec.values) ? vec.values : Array.from(vec.values), + })); + + const body = this.useNdJson + ? toNdJson(bodyVecArr) + : JSON.stringify({ vectors: bodyVecArr }); + + const contentType = this.useNdJson + ? 'application/x-ndjson' + : 'application/json'; + const res = await this._send(Operation.VECTOR_UPSERT, endpoint, { method: 'POST', - body: JSON.stringify({ - vectors: vectors.map((vec) => ({ - ...vec, - values: Array.isArray(vec.values) - ? vec.values - : Array.from(vec.values), - })), - }), + body, headers: { - 'content-type': 'application/json', + 'content-type': contentType, 'cf-vector-search-dim-width': String( vectors.length ? vectors[0]?.values?.length : 0 ), @@ -274,11 +291,13 @@ export function makeBinding(env: { fetcher: Fetcher; indexId: string; indexVersion?: VectorizeVersion; + useNdJson?: boolean; }): Vectorize { return new VectorizeIndexImpl( env.fetcher, env.indexId, - env.indexVersion ?? 'v1' + env.indexVersion ?? 'v1', + env.useNdJson ?? false ); }