Skip to main content

Tutorial

The tutorial offers a step-by-step guide on using closevector-node for text embeddings and vector storage. It covers installation via npm, access key generation for authentication, and code samples in TypeScript for importing classes, token encryption, and data preparation. The tutorial helps you fetch Hacker News stories, convert them to a specific format, and initialize a vector store, culminating in index creation.

Installing closevector-node

npm install closevector-node

Generate your first access key

First you need to create an account on closevector.getmegaportal.com. Then you can generate an access key on the Access key Management.

Importing packages

import {
CloseVectorEmbeddings,
HNSWLib,
upsertIndex
} from 'closevector-node';
import { Document } from 'langchain/document';

Authenticate your requests

import jwt from '@tsndr/cloudflare-worker-jwt'; // you can also use any other JWT library, for example: jsonwebtoken

export async function encryptToken(object: Record<string, any>, secret: string): Promise<string> {
const token = await jwt.sign({
// 1 hour from now on
...object,
iat: Date.now(),
exp: Math.floor((Date.now() + 60 * 60 * 1000) / 1000),
}, secret);
return token;
}

export const fetchUsingToken = async function fetchUsingToken(url: string, options: RequestInit) {
const accessKey = 'your access key';
const secret = 'your secret';

let res = await fetch(url, {
...options,
headers: {
...options.headers,
Authorization: `Bearer ${accessKey}:${await encryptToken({ accessKey }, secret)}`,
},
});

if (res.status !== 200) {
let json = await res.json();
throw new Error(await (json as any).message);
}

return res;
};

Preparing your data and creating index

// get list from recent hot hacker news posts: https://hacker-news.firebaseio.com/v0/topstories.json?print=pretty

function concurrentPromise<T>(promises: (() => Promise<T>)[], limit: number): Promise<T[]> {
return new Promise((resolve, reject) => {
let results: T[] = [];
let index = 0;
let running = 0;
let done = 0;

function run() {
if (done === promises.length) {
resolve(results);
return;
}

while (running < limit && index < promises.length) {
running++;

function handler(index: number) {
return function (result: T) {
results[index] = result;
running--;
done++;
run();
}
}

promises[index]().then(handler(index)).catch((err) => {
reject(err);
});
index++;
}
}

run();
});
}

(async () => {

const hackerNewsTopStories = (await (await fetch('https://hacker-news.firebaseio.com/v0/topstories.json?print=pretty')).json());

console.log("getting posts");
const posts = await concurrentPromise(hackerNewsTopStories.map((id: number) => async () => {
return await (await fetch(`https://hacker-news.firebaseio.com/v0/item/${id}.json?print=pretty`)).json();
}), 5);

console.log("getting posts done");

let documents: Document[] = posts.map((p: any) => {
return {
pageContent: `title: ${p.title}` + "\n" + `text: ${p.text}` + "\n" + `score: ${p.score}`,
metadata: {
url: p.url,
}
}
})

const vectorStore = await HNSWLib.fromDocuments(
documents,
new CloseVectorEmbeddings({
key: accessKey,
secret: secret,
})
);

await upsertIndex(vectorStore, {
description: '',
accessKey,
secret,
onProgress: (e) => {
console.log(e)
}
});

})();