OpenAI Integration

Initializing the Client

Create a single shared client instance — not one per request.

src/lib/openai.js
import OpenAI from 'openai';
import { OPENAI_API_KEY } from '../config/env.js';

const openai = new OpenAI({
  apiKey: OPENAI_API_KEY,
  timeout: 60_000,   // 60 second request timeout
  maxRetries: 2,     // automatic retries on transient errors (429, 529, 5xx)
});

export default openai;

The SDK handles retries with exponential backoff automatically when maxRetries is set.

Chat Completions

src/services/openai.service.js
import openai from '../lib/openai.js';

export async function chat({
  messages,
  model = 'gpt-4o',
  temperature = 0.7,    // 0 = deterministic, 2 = very creative
  maxTokens = 1024,
  systemPrompt = null,
}) {
  const fullMessages = systemPrompt
    ? [{ role: 'system', content: systemPrompt }, ...messages]
    : messages;

  const response = await openai.chat.completions.create({
    model,
    messages: fullMessages,
    temperature,
    max_tokens: maxTokens,
  });

  return {
    content: response.choices[0].message.content,
    usage: response.usage,
    finishReason: response.choices[0].finish_reason,
  };
}

finish_reason values you should check:

Value	Meaning
`stop`	Normal completion — model finished the response
`length`	Hit `max_tokens` limit — response was cut off
`content_filter`	Blocked by OpenAI's content policy
`tool_calls`	Model wants to call a tool

If you see length, either increase max_tokens or trim the conversation history.

Express route

src/routes/chat.route.js
import { Router } from 'express';
import { chat } from '../services/openai.service.js';

const router = Router();

router.post('/', async (req, res, next) => {
  try {
    const { messages, systemPrompt } = req.body;
    const result = await chat({ messages, systemPrompt });
    res.json({ reply: result.content, usage: result.usage });
  } catch (err) {
    next(err);
  }
});

export default router;

Streaming with SSE

A non-streaming request holds the HTTP connection open until the entire response is ready — for a 500-word answer, that can be 10+ seconds of silence. Streaming sends each token as it is generated, giving users instant feedback.

How SSE works:

SSE is a one-way channel from server to client over a normal HTTP connection. The response content type is text/event-stream and each event is formatted as:

data: {"token": "Hello"}\n\n
data: {"token": " world"}\n\n
data: [DONE]\n\n

The double newline \n\n terminates each event.

src/routes/chat.route.js — streaming endpoint
import openai from '../lib/openai.js';

router.post('/stream', async (req, res, next) => {
  try {
    const { messages, systemPrompt } = req.body;

    res.setHeader('Content-Type', 'text/event-stream');
    res.setHeader('Cache-Control', 'no-cache');
    res.setHeader('Connection', 'keep-alive');
    res.setHeader('X-Accel-Buffering', 'no'); // disable Nginx buffering

    const fullMessages = systemPrompt
      ? [{ role: 'system', content: systemPrompt }, ...messages]
      : messages;

    const stream = await openai.chat.completions.create({
      model: 'gpt-4o',
      messages: fullMessages,
      stream: true,
    });

    for await (const chunk of stream) {
      const delta = chunk.choices[0]?.delta;
      if (delta?.content) {
        res.write(`data: ${JSON.stringify({ token: delta.content })}\n\n`);
      }
      if (chunk.choices[0]?.finish_reason === 'stop') {
        res.write(`data: ${JSON.stringify({ done: true })}\n\n`);
      }
    }

    res.end();
  } catch (err) {
    if (res.headersSent) {
      res.write(`data: ${JSON.stringify({ error: err.message })}\n\n`);
      res.end();
    } else {
      next(err);
    }
  }
});

Consuming the stream on the frontend:

Frontend — fetch with ReadableStream
const response = await fetch('/api/chat/stream', {
  method: 'POST',
  headers: { 'Content-Type': 'application/json' },
  body: JSON.stringify({ messages }),
});

const reader = response.body.getReader();
const decoder = new TextDecoder();

while (true) {
  const { done, value } = await reader.read();
  if (done) break;

  const lines = decoder.decode(value).split('\n\n').filter(Boolean);
  for (const line of lines) {
    if (!line.startsWith('data: ')) continue;
    const payload = JSON.parse(line.slice(6));
    if (payload.done) break;
    appendToChat(payload.token); // update your UI
  }
}

Vision — Sending Images

GPT-4o is multimodal. You can send images alongside text using a structured content array:

Sending an image URL
const response = await openai.chat.completions.create({
  model: 'gpt-4o',
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'What is in this image?' },
        {
          type: 'image_url',
          image_url: {
            url: 'https://example.com/photo.jpg',
            detail: 'high', // 'low', 'high', or 'auto'
          },
        },
      ],
    },
  ],
});

Sending a base64 image (e.g. from file upload)
import { readFileSync } from 'fs';

const base64 = readFileSync('./photo.jpg').toString('base64');

const response = await openai.chat.completions.create({
  model: 'gpt-4o',
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'Describe this chart.' },
        {
          type: 'image_url',
          image_url: { url: `data:image/jpeg;base64,${base64}` },
        },
      ],
    },
  ],
});

Embeddings

Embeddings convert text into a vector (array of numbers) that represents its semantic meaning. Useful for semantic search, document similarity, and RAG (Retrieval-Augmented Generation).

src/services/embeddings.service.js
import openai from '../lib/openai.js';

export async function embed(text) {
  const response = await openai.embeddings.create({
    model: 'text-embedding-3-small',
    input: text,
  });
  return response.data[0].embedding; // float array, length 1536
}

export function cosineSimilarity(a, b) {
  const dot = a.reduce((sum, val, i) => sum + val * b[i], 0);
  const magA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
  const magB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
  return dot / (magA * magB);
}

Structured Output (JSON Mode)

JSON mode

const response = await openai.chat.completions.create({
  model: 'gpt-4o',
  response_format: { type: 'json_object' }, // guarantees valid JSON output
  messages: [
    {
      role: 'system',
      content: 'You are a data extraction assistant. Always respond with valid JSON.',
    },
    {
      role: 'user',
      content: 'Extract name and email from: "Hi, I\'m Sarah at sarah@example.com"',
    },
  ],
});

const data = JSON.parse(response.choices[0].message.content);

Schema enforcement

With newer models you can enforce an exact JSON schema — the model is guaranteed to output only fields you define:

const response = await openai.chat.completions.create({
  model: 'gpt-4o',
  messages: [{ role: 'user', content: 'Classify this review: "Great product, fast shipping!"' }],
  response_format: {
    type: 'json_schema',
    json_schema: {
      name: 'review_classification',
      strict: true,
      schema: {
        type: 'object',
        properties: {
          sentiment: { type: 'string', enum: ['positive', 'neutral', 'negative'] },
          score:     { type: 'number', description: 'Confidence score 0-1' },
          topics:    { type: 'array', items: { type: 'string' } },
        },
        required: ['sentiment', 'score', 'topics'],
        additionalProperties: false,
      },
    },
  },
});

Initializing the Client​

Chat Completions​

Express route​

Streaming with SSE​

Vision — Sending Images​

Embeddings​

Structured Output (JSON Mode)​

JSON mode​

Schema enforcement​