OpenAI Integration
Initializing the Client
Create a single shared client instance — not one per request.
import OpenAI from 'openai';
import { OPENAI_API_KEY } from '../config/env.js';
const openai = new OpenAI({
apiKey: OPENAI_API_KEY,
timeout: 60_000, // 60 second request timeout
maxRetries: 2, // automatic retries on transient errors (429, 529, 5xx)
});
export default openai;
The SDK handles retries with exponential backoff automatically when maxRetries is set.
Chat Completions
import openai from '../lib/openai.js';
export async function chat({
messages,
model = 'gpt-4o',
temperature = 0.7, // 0 = deterministic, 2 = very creative
maxTokens = 1024,
systemPrompt = null,
}) {
const fullMessages = systemPrompt
? [{ role: 'system', content: systemPrompt }, ...messages]
: messages;
const response = await openai.chat.completions.create({
model,
messages: fullMessages,
temperature,
max_tokens: maxTokens,
});
return {
content: response.choices[0].message.content,
usage: response.usage,
finishReason: response.choices[0].finish_reason,
};
}
finish_reason values you should check:
| Value | Meaning |
|---|---|
stop | Normal completion — model finished the response |
length | Hit max_tokens limit — response was cut off |
content_filter | Blocked by OpenAI's content policy |
tool_calls | Model wants to call a tool |
If you see length, either increase max_tokens or trim the conversation history.
Express route
import { Router } from 'express';
import { chat } from '../services/openai.service.js';
const router = Router();
router.post('/', async (req, res, next) => {
try {
const { messages, systemPrompt } = req.body;
const result = await chat({ messages, systemPrompt });
res.json({ reply: result.content, usage: result.usage });
} catch (err) {
next(err);
}
});
export default router;
Streaming with SSE
A non-streaming request holds the HTTP connection open until the entire response is ready — for a 500-word answer, that can be 10+ seconds of silence. Streaming sends each token as it is generated, giving users instant feedback.
How SSE works:
SSE is a one-way channel from server to client over a normal HTTP connection. The response content type is text/event-stream and each event is formatted as:
data: {"token": "Hello"}\n\n
data: {"token": " world"}\n\n
data: [DONE]\n\n
The double newline \n\n terminates each event.
import openai from '../lib/openai.js';
router.post('/stream', async (req, res, next) => {
try {
const { messages, systemPrompt } = req.body;
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
res.setHeader('X-Accel-Buffering', 'no'); // disable Nginx buffering
const fullMessages = systemPrompt
? [{ role: 'system', content: systemPrompt }, ...messages]
: messages;
const stream = await openai.chat.completions.create({
model: 'gpt-4o',
messages: fullMessages,
stream: true,
});
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta;
if (delta?.content) {
res.write(`data: ${JSON.stringify({ token: delta.content })}\n\n`);
}
if (chunk.choices[0]?.finish_reason === 'stop') {
res.write(`data: ${JSON.stringify({ done: true })}\n\n`);
}
}
res.end();
} catch (err) {
if (res.headersSent) {
res.write(`data: ${JSON.stringify({ error: err.message })}\n\n`);
res.end();
} else {
next(err);
}
}
});
Consuming the stream on the frontend:
const response = await fetch('/api/chat/stream', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ messages }),
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const lines = decoder.decode(value).split('\n\n').filter(Boolean);
for (const line of lines) {
if (!line.startsWith('data: ')) continue;
const payload = JSON.parse(line.slice(6));
if (payload.done) break;
appendToChat(payload.token); // update your UI
}
}
Vision — Sending Images
GPT-4o is multimodal. You can send images alongside text using a structured content array:
const response = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'What is in this image?' },
{
type: 'image_url',
image_url: {
url: 'https://example.com/photo.jpg',
detail: 'high', // 'low', 'high', or 'auto'
},
},
],
},
],
});
import { readFileSync } from 'fs';
const base64 = readFileSync('./photo.jpg').toString('base64');
const response = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'Describe this chart.' },
{
type: 'image_url',
image_url: { url: `data:image/jpeg;base64,${base64}` },
},
],
},
],
});
Embeddings
Embeddings convert text into a vector (array of numbers) that represents its semantic meaning. Useful for semantic search, document similarity, and RAG (Retrieval-Augmented Generation).
import openai from '../lib/openai.js';
export async function embed(text) {
const response = await openai.embeddings.create({
model: 'text-embedding-3-small',
input: text,
});
return response.data[0].embedding; // float array, length 1536
}
export function cosineSimilarity(a, b) {
const dot = a.reduce((sum, val, i) => sum + val * b[i], 0);
const magA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
const magB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
return dot / (magA * magB);
}
Structured Output (JSON Mode)
JSON mode
const response = await openai.chat.completions.create({
model: 'gpt-4o',
response_format: { type: 'json_object' }, // guarantees valid JSON output
messages: [
{
role: 'system',
content: 'You are a data extraction assistant. Always respond with valid JSON.',
},
{
role: 'user',
content: 'Extract name and email from: "Hi, I\'m Sarah at sarah@example.com"',
},
],
});
const data = JSON.parse(response.choices[0].message.content);
Schema enforcement
With newer models you can enforce an exact JSON schema — the model is guaranteed to output only fields you define:
const response = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [{ role: 'user', content: 'Classify this review: "Great product, fast shipping!"' }],
response_format: {
type: 'json_schema',
json_schema: {
name: 'review_classification',
strict: true,
schema: {
type: 'object',
properties: {
sentiment: { type: 'string', enum: ['positive', 'neutral', 'negative'] },
score: { type: 'number', description: 'Confidence score 0-1' },
topics: { type: 'array', items: { type: 'string' } },
},
required: ['sentiment', 'score', 'topics'],
additionalProperties: false,
},
},
},
});