Skip to main content

OpenAI Integration

Initializing the Client

Create a single shared client instance — not one per request.

src/lib/openai.js
import OpenAI from 'openai';
import { OPENAI_API_KEY } from '../config/env.js';

const openai = new OpenAI({
apiKey: OPENAI_API_KEY,
timeout: 60_000, // 60 second request timeout
maxRetries: 2, // automatic retries on transient errors (429, 529, 5xx)
});

export default openai;

The SDK handles retries with exponential backoff automatically when maxRetries is set.


Chat Completions

src/services/openai.service.js
import openai from '../lib/openai.js';

export async function chat({
messages,
model = 'gpt-4o',
temperature = 0.7, // 0 = deterministic, 2 = very creative
maxTokens = 1024,
systemPrompt = null,
}) {
const fullMessages = systemPrompt
? [{ role: 'system', content: systemPrompt }, ...messages]
: messages;

const response = await openai.chat.completions.create({
model,
messages: fullMessages,
temperature,
max_tokens: maxTokens,
});

return {
content: response.choices[0].message.content,
usage: response.usage,
finishReason: response.choices[0].finish_reason,
};
}

finish_reason values you should check:

ValueMeaning
stopNormal completion — model finished the response
lengthHit max_tokens limit — response was cut off
content_filterBlocked by OpenAI's content policy
tool_callsModel wants to call a tool

If you see length, either increase max_tokens or trim the conversation history.

Express route

src/routes/chat.route.js
import { Router } from 'express';
import { chat } from '../services/openai.service.js';

const router = Router();

router.post('/', async (req, res, next) => {
try {
const { messages, systemPrompt } = req.body;
const result = await chat({ messages, systemPrompt });
res.json({ reply: result.content, usage: result.usage });
} catch (err) {
next(err);
}
});

export default router;

Streaming with SSE

A non-streaming request holds the HTTP connection open until the entire response is ready — for a 500-word answer, that can be 10+ seconds of silence. Streaming sends each token as it is generated, giving users instant feedback.

How SSE works:

SSE is a one-way channel from server to client over a normal HTTP connection. The response content type is text/event-stream and each event is formatted as:

data: {"token": "Hello"}\n\n
data: {"token": " world"}\n\n
data: [DONE]\n\n

The double newline \n\n terminates each event.

src/routes/chat.route.js — streaming endpoint
import openai from '../lib/openai.js';

router.post('/stream', async (req, res, next) => {
try {
const { messages, systemPrompt } = req.body;

res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
res.setHeader('X-Accel-Buffering', 'no'); // disable Nginx buffering

const fullMessages = systemPrompt
? [{ role: 'system', content: systemPrompt }, ...messages]
: messages;

const stream = await openai.chat.completions.create({
model: 'gpt-4o',
messages: fullMessages,
stream: true,
});

for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta;
if (delta?.content) {
res.write(`data: ${JSON.stringify({ token: delta.content })}\n\n`);
}
if (chunk.choices[0]?.finish_reason === 'stop') {
res.write(`data: ${JSON.stringify({ done: true })}\n\n`);
}
}

res.end();
} catch (err) {
if (res.headersSent) {
res.write(`data: ${JSON.stringify({ error: err.message })}\n\n`);
res.end();
} else {
next(err);
}
}
});

Consuming the stream on the frontend:

Frontend — fetch with ReadableStream
const response = await fetch('/api/chat/stream', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ messages }),
});

const reader = response.body.getReader();
const decoder = new TextDecoder();

while (true) {
const { done, value } = await reader.read();
if (done) break;

const lines = decoder.decode(value).split('\n\n').filter(Boolean);
for (const line of lines) {
if (!line.startsWith('data: ')) continue;
const payload = JSON.parse(line.slice(6));
if (payload.done) break;
appendToChat(payload.token); // update your UI
}
}

Vision — Sending Images

GPT-4o is multimodal. You can send images alongside text using a structured content array:

Sending an image URL
const response = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'What is in this image?' },
{
type: 'image_url',
image_url: {
url: 'https://example.com/photo.jpg',
detail: 'high', // 'low', 'high', or 'auto'
},
},
],
},
],
});
Sending a base64 image (e.g. from file upload)
import { readFileSync } from 'fs';

const base64 = readFileSync('./photo.jpg').toString('base64');

const response = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'Describe this chart.' },
{
type: 'image_url',
image_url: { url: `data:image/jpeg;base64,${base64}` },
},
],
},
],
});

Embeddings

Embeddings convert text into a vector (array of numbers) that represents its semantic meaning. Useful for semantic search, document similarity, and RAG (Retrieval-Augmented Generation).

src/services/embeddings.service.js
import openai from '../lib/openai.js';

export async function embed(text) {
const response = await openai.embeddings.create({
model: 'text-embedding-3-small',
input: text,
});
return response.data[0].embedding; // float array, length 1536
}

export function cosineSimilarity(a, b) {
const dot = a.reduce((sum, val, i) => sum + val * b[i], 0);
const magA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
const magB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
return dot / (magA * magB);
}

Structured Output (JSON Mode)

JSON mode

const response = await openai.chat.completions.create({
model: 'gpt-4o',
response_format: { type: 'json_object' }, // guarantees valid JSON output
messages: [
{
role: 'system',
content: 'You are a data extraction assistant. Always respond with valid JSON.',
},
{
role: 'user',
content: 'Extract name and email from: "Hi, I\'m Sarah at sarah@example.com"',
},
],
});

const data = JSON.parse(response.choices[0].message.content);

Schema enforcement

With newer models you can enforce an exact JSON schema — the model is guaranteed to output only fields you define:

const response = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [{ role: 'user', content: 'Classify this review: "Great product, fast shipping!"' }],
response_format: {
type: 'json_schema',
json_schema: {
name: 'review_classification',
strict: true,
schema: {
type: 'object',
properties: {
sentiment: { type: 'string', enum: ['positive', 'neutral', 'negative'] },
score: { type: 'number', description: 'Confidence score 0-1' },
topics: { type: 'array', items: { type: 'string' } },
},
required: ['sentiment', 'score', 'topics'],
additionalProperties: false,
},
},
},
});