harden database interactions and improve error handling

2026-04-28 17:05:48 +01:00
parent e1f168a302
commit b9f7d1ff25
16 changed files with 980 additions and 159 deletions
@@ -4,7 +4,10 @@ import { eq } from "drizzle-orm";
 import { askAI } from "./openrouter";

 function parseJSON<T>(raw: string): T {
-  let text = raw.replace(/<think>[\s\S]*?<\/think>/gi, "").trim();
+  let text = raw
+    .replace(/<think>[\s\S]*?<\/think>/gi, "")
+    .replace(/^\s*thought\s*\n/i, "")
+    .trim();
  try {
    return JSON.parse(text);
  } catch {
@@ -9,7 +9,10 @@ function log(lessonId: string, msg: string) {
 }

 function parseJSON<T>(raw: string): T {
-  let text = raw.replace(/<think>[\s\S]*?<\/think>/gi, "").trim();
+  let text = raw
+    .replace(/<think>[\s\S]*?<\/think>/gi, "")
+    .replace(/^\s*thought\s*\n/i, "")
+    .trim();
  try {
    return JSON.parse(text);
  } catch {
@@ -195,10 +198,7 @@ Only generate branches for the 3 wrong options. Do not generate a branch for the
          }
        }

-        const BATCH = 4;
-        for (let i = 0; i < ttsTasks.length; i += BATCH) {
-          await Promise.all(ttsTasks.slice(i, i + BATCH).map((fn) => fn()));
-        }
+        await Promise.all(ttsTasks.map((fn) => fn()));

        log(lessonId, `  step ${si} branch TTS done`);
        branchSuccesses++;
@@ -20,7 +20,10 @@ async function setStage(courseId: string, stage: Stage) {
 }

 function parseJSON<T>(raw: string): T {
-  let text = raw.replace(/<think>[\s\S]*?<\/think>/gi, "").trim();
+  let text = raw
+    .replace(/<think>[\s\S]*?<\/think>/gi, "")
+    .replace(/^\s*thought\s*\n/i, "")
+    .trim();
  try {
    return JSON.parse(text);
  } catch {
@@ -14,8 +14,11 @@ function log(topicId: string, msg: string) {
 }

 function parseJSON<T>(raw: string): T {
-  // strip <think>...</think> blocks from reasoning models (deepseek-r1 etc.)
-  let text = raw.replace(/<think>[\s\S]*?<\/think>/gi, "").trim();
+  // strip reasoning preambles — <think>...</think> tags and bare "thought\n" prefix
+  let text = raw
+    .replace(/<think>[\s\S]*?<\/think>/gi, "")
+    .replace(/^\s*thought\s*\n/i, "")
+    .trim();
  try {
    return JSON.parse(text);
  } catch {
@@ -103,11 +106,7 @@ async function generateLessonAudio(
    }
  }

-  // run in batches of 4
-  const BATCH = 4;
-  for (let i = 0; i < tasks.length; i += BATCH) {
-    await Promise.all(tasks.slice(i, i + BATCH).map((fn) => fn()));
-  }
+  await Promise.all(tasks.map((fn) => fn()));

  return { steps, cost };
 }
@@ -1,17 +1,47 @@
 import { mkdir, writeFile, access } from "fs/promises";
 import { resolve } from "path";
 import { askAI } from "./openrouter";
+import { ttsLimiter } from "./limiter";

-const NARRATION_SYSTEM_PROMPT = `You are a narration script editor for an AI voice actor. Your job is to take educational lesson text and prepare it to be read aloud naturally and engagingly.
+const NARRATION_SYSTEM_PROMPT = `You are a narration script editor for an AI voice actor reading educational lesson content. Your job is to prepare text so it sounds natural, warm, and engaging when spoken aloud.

-Rules:
- Do NOT change the meaning, facts, or structure of the content. You are not rewriting it.
- Fix anything that would sound awkward or robotic when spoken: remove markdown formatting (asterisks, backticks, hashes), spell out acronyms where helpful, rephrase code snippets or technical shorthand into speakable language.
- Add square bracket cues to give the voice character and pacing. These are the only ones you may use: [pause], [long pause], [sighs], [laughs], [clears throat], [hesitates].
- Use [pause] at natural breath points — after key ideas, before a new concept, or mid-sentence where a human would pause for effect. Don't overdo it; one every few sentences at most.
- Use [sighs] or [laughs] very sparingly — only where a human narrator genuinely would. A [sighs] before a tricky concept, a [laughs] when something is ironic or light. Maybe once or twice per lesson, if at all.
- Keep the tone warm, clear, and conversational — like a knowledgeable friend explaining something, not a textbook being read aloud.
- Return ONLY the modified narration text. No commentary, no explanation, no quotes around the output.`;
+## Content rules
+- Do NOT change the meaning, facts, or structure. You are not rewriting the lesson.
+- Fix anything that sounds robotic or awkward when spoken: strip markdown (asterisks, backticks, hashes, bullet dashes), spell out acronyms where helpful, rephrase code snippets or URLs into speakable language (e.g. "the fetch function" not "\`fetch()\`").
+
+## Voice control tags
+You have a rich set of square bracket tags to shape how the voice sounds. Use them tastefully — a well-placed tag is powerful, overuse kills it.
+
+**Pacing**
+[pause] — a natural breath beat, use at transitions or after key ideas
+[long pause] — a longer held silence, use for emphasis or before something important
+[short pause] — a very brief beat
+
+**Non-verbal sounds** (use sparingly, one or two per lesson max)
+[breath] — a natural inhale, good at the start of a new thought or after a long sentence
+[sighs] — before a tricky concept, or when something is a bit of a pain
+[laughs] — when something is genuinely ironic, surprising, or lightly funny
+[chuckles] — softer than laughs, more conversational
+[exhales] — a quiet breath out, good for winding down a dense section
+[clears throat] — before jumping into something more formal or detailed
+[gasp] — for something genuinely surprising
+
+**Delivery style** (can be chained, effect lasts until next tag or end of sentence)
+[curious] — lean in, raise intrigue
+[excited] — energy up, good for "here's the cool part"
+[whispers] — draw the listener in for an aside
+[nervous] — for content where a student might feel anxious (e.g. exams)
+[calm] — reassuring, slows things down
+[sarcastic] — very sparingly, only when the tone clearly calls for it
+
+## Placement guidance
+- [pause] can go mid-sentence before a key term, or at the end of a sentence before shifting topic
+- Emotional tags go BEFORE the text they should affect, and return to neutral naturally after a sentence or two
+- Don't open with a tag — let the voice settle first
+- Avoid back-to-back tags with no words between them
+
+## Output
+Return ONLY the modified narration text. No commentary, no labels, no quotes.`;

 async function humaniseTTSText(text: string): Promise<string> {
  try {
@@ -47,26 +77,45 @@ async function callElevenLabs(
  apiKey: string,
  voiceId: string
 ): Promise<{ audio: Buffer; chunks: AudioChunk[]; cost: number } | null> {
-  const res = await fetch(
-    `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}/with-timestamps`,
-    {
-      method: "POST",
-      headers: {
-        "xi-api-key": apiKey,
-        "Content-Type": "application/json",
-      },
-      body: JSON.stringify({
-        text,
-        model_id: "eleven_turbo_v2_5",
-        output_format: "mp3_44100_128",
-      }),
-      signal: AbortSignal.timeout(60_000),
+  const MAX_RETRIES = 5;
+  let delay = 2000;
+
+  let res!: Response;
+  for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
+    res = await fetch(
+      `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}/with-timestamps`,
+      {
+        method: "POST",
+        headers: {
+          "xi-api-key": apiKey,
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify({
+          text,
+          model_id: "eleven_v3",
+          output_format: "mp3_44100_128",
+        }),
+        signal: AbortSignal.timeout(60_000),
+      }
+    );
+
+    if (res.ok) break;
+
+    if (res.status === 429 && attempt < MAX_RETRIES) {
+      console.warn(`[tts] ElevenLabs 429 — retry ${attempt + 1}/${MAX_RETRIES} in ${delay}ms`);
+      await new Promise((r) => setTimeout(r, delay));
+      delay *= 2;
+      continue;
    }
-  );
+
+    const errText = await res.text().catch(() => "");
+    console.error(`[tts] ElevenLabs error ${res.status}: ${errText}`);
+    return null;
+  }

  if (!res.ok) {
    const errText = await res.text().catch(() => "");
-    console.error(`[tts] ElevenLabs error ${res.status}: ${errText}`);
+    console.error(`[tts] ElevenLabs failed after ${MAX_RETRIES} retries: ${res.status} ${errText}`);
    return null;
  }

@@ -140,6 +189,8 @@ async function callFishAudio(
      format: "mp3",
      mp3_bitrate: 128,
      streaming: false,
+      normalize: false,
+      model: "s2",
    }),
    signal: AbortSignal.timeout(60_000),
  });
@@ -172,13 +223,15 @@ async function callTTS(
    const apiKey = config.fishAudioApiKey as string;
    const voiceId = (config.public.fishAudioVoiceId || config.fishAudioVoiceId) as string;
    if (!apiKey) return null;
-    return callFishAudio(text, apiKey, voiceId);
+    console.log(`[tts] queued (fish) — active: ${ttsLimiter.active}, queued: ${ttsLimiter.queued}`);
+    return ttsLimiter.run(() => callFishAudio(text, apiKey, voiceId));
  }

  const apiKey = config.elevenlabsApiKey as string;
  const voiceId = (config.public.elevenlabsVoiceId || config.elevenlabsVoiceId) as string;
  if (!apiKey) return null;
-  return callElevenLabs(text, apiKey, voiceId);
+  console.log(`[tts] queued (elevenlabs) — active: ${ttsLimiter.active}, queued: ${ttsLimiter.queued}`);
+  return ttsLimiter.run(() => callElevenLabs(text, apiKey, voiceId));
 }


@@ -191,12 +244,12 @@ export async function generateStepTTS(
    const result = await callTTS(text);
    if (!result) return null;

-    const dir = resolve(process.cwd(), `public/audio/lessons/${lessonId}`);
+    const dir = resolve(process.cwd(), `data/audio/lessons/${lessonId}`);
    await mkdir(dir, { recursive: true });
    const filename = `step_${stepIndex}.mp3`;
    await writeFile(`${dir}/${filename}`, result.audio);

-    const audioPath = `/audio/lessons/${lessonId}/${filename}`;
+    const audioPath = `/api/audio/lessons/${lessonId}/${filename}`;
    console.log(`[tts] step ${stepIndex} for lesson ${lessonId} — ${result.chunks.length} chunks | $${result.cost.toFixed(4)}`);
    return { audioPath, audioChunks: result.chunks, cost: result.cost };
  } catch (err: any) {
@@ -214,12 +267,12 @@ export async function generateQuestionTTS(
    const result = await callTTS(text);
    if (!result) return null;

-    const dir = resolve(process.cwd(), `public/audio/lessons/${lessonId}`);
+    const dir = resolve(process.cwd(), `data/audio/lessons/${lessonId}`);
    await mkdir(dir, { recursive: true });
    const filename = `step_${stepIndex}_question.mp3`;
    await writeFile(`${dir}/${filename}`, result.audio);

-    const audioPath = `/audio/lessons/${lessonId}/${filename}`;
+    const audioPath = `/api/audio/lessons/${lessonId}/${filename}`;
    return { audioPath, audioChunks: result.chunks, cost: result.cost };
  } catch (err: any) {
    console.error(`[tts] question ${stepIndex} for lesson ${lessonId} failed: ${err?.message ?? err}`);
@@ -237,12 +290,12 @@ export async function generateOptionTTS(
    const result = await callTTS(text);
    if (!result) return null;

-    const dir = resolve(process.cwd(), `public/audio/lessons/${lessonId}`);
+    const dir = resolve(process.cwd(), `data/audio/lessons/${lessonId}`);
    await mkdir(dir, { recursive: true });
    const filename = `step_${stepIndex}_option_${optionIndex}.mp3`;
    await writeFile(`${dir}/${filename}`, result.audio);

-    const audioPath = `/audio/lessons/${lessonId}/${filename}`;
+    const audioPath = `/api/audio/lessons/${lessonId}/${filename}`;
    return { audioPath, cost: result.cost };
  } catch (err: any) {
    console.error(`[tts] option ${stepIndex}/${optionIndex} for lesson ${lessonId} failed: ${err?.message ?? err}`);
@@ -282,6 +335,8 @@ export async function generateClip(
          format: "mp3",
          mp3_bitrate: 128,
          streaming: false,
+          normalize: false,
+          model: "s2",
        }),
        signal: AbortSignal.timeout(60_000),
      });
@@ -304,7 +359,7 @@ export async function generateClip(
          },
          body: JSON.stringify({
            text,
-            model_id: opts?.model ?? "eleven_turbo_v2_5",
+            model_id: opts?.model ?? "eleven_v3",
            output_format: "mp3_44100_128",
            ...(opts?.voice_settings ? { voice_settings: opts.voice_settings } : {}),
          }),
@@ -323,7 +378,7 @@ export async function generateClip(
      buffer = Buffer.from(await res.arrayBuffer());
    }

-    await mkdir(resolve(process.cwd(), "public/audio/labels"), { recursive: true });
+    await mkdir(resolve(process.cwd(), "data/audio/labels"), { recursive: true });
    await writeFile(outPath, buffer);
    return { cost };
  } catch (err: any) {
@@ -341,11 +396,11 @@ export async function generateTTSToPath(
    const result = await callTTS(text);
    if (!result) return null;

-    const dir = resolve(process.cwd(), `public/audio/lessons/${lessonId}`);
+    const dir = resolve(process.cwd(), `data/audio/lessons/${lessonId}`);
    await mkdir(dir, { recursive: true });
    await writeFile(`${dir}/${filename}`, result.audio);

-    const audioPath = `/audio/lessons/${lessonId}/${filename}`;
+    const audioPath = `/api/audio/lessons/${lessonId}/${filename}`;
    return { audioPath, audioChunks: result.chunks, cost: result.cost };
  } catch (err: any) {
    console.error(`[tts] ${filename} for lesson ${lessonId} failed: ${err?.message ?? err}`);
@@ -0,0 +1,41 @@
+class Limiter {
+  private running = 0;
+  private queue: (() => void)[] = [];
+
+  constructor(private max: number) {}
+
+  async run<T>(fn: () => Promise<T>): Promise<T> {
+    await this.acquire();
+    try {
+      return await fn();
+    } finally {
+      this.release();
+    }
+  }
+
+  private acquire(): Promise<void> {
+    if (this.running < this.max) {
+      this.running++;
+      return Promise.resolve();
+    }
+    return new Promise(resolve => {
+      this.queue.push(() => { this.running++; resolve(); });
+    });
+  }
+
+  private release() {
+    this.running--;
+    const next = this.queue.shift();
+    if (next) next();
+  }
+
+  get active() { return this.running; }
+
+  get queued() { return this.queue.length; }
+}
+
+// ElevenLabs recommends max 2-3 concurrent requests
+export const ttsLimiter = new Limiter(2);
+
+// OpenRouter concurrent request cap
+export const aiLimiter = new Limiter(4);
@@ -1,3 +1,5 @@
+import { aiLimiter } from "./limiter";
+
 interface Message {
  role: "system" | "user" | "assistant";
  content: string;
@@ -39,7 +41,8 @@ export async function askAI(messages: Message[], options: AskAIOptions = {}): Pr
    const t0 = Date.now();

    try {
-      const res = await $fetch<{ id?: string; choices: { message: { content: string } }[]; usage?: { prompt_tokens?: number; completion_tokens?: number; cost?: number } }>(
+      console.log(`[openrouter] queued — active: ${aiLimiter.active}, queued: ${aiLimiter.queued}`);
+      const res = await aiLimiter.run(() => $fetch<{ id?: string; choices: { message: { content: string } }[]; usage?: { prompt_tokens?: number; completion_tokens?: number; cost?: number } }>(
        "https://openrouter.ai/api/v1/chat/completions",
        {
          method: "POST",
@@ -57,7 +60,7 @@ export async function askAI(messages: Message[], options: AskAIOptions = {}): Pr
          },
          signal: AbortSignal.timeout(600_000),
        }
-      );
+      ));

      const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
      const content = res.choices?.[0]?.message?.content;