Revisione/server/utils/generateLesson.ts

432 lines
18 KiB
TypeScript

import { db } from "../db/index";
import { courses, uploads, topics, lessons, quizQuestions } from "../db/schema";
import { eq, inArray, and } from "drizzle-orm";
import { randomUUID } from "crypto";
import { askAI } from "./openrouter";
import { generateStepTTS, generateQuestionTTS, generateOptionTTS } from "./generateTTS";
import { generateBranches } from "./generateBranches";
// one promise chain per topic so we don't double-generate
const topicMutexes = new Map<string, Promise<void>>();
function log(topicId: string, msg: string) {
console.log(`[lesson:${topicId.slice(0, 8)}] ${msg}`);
}
function parseJSON<T>(raw: string): T {
// strip <think>...</think> blocks from reasoning models (deepseek-r1 etc.)
let text = raw.replace(/<think>[\s\S]*?<\/think>/gi, "").trim();
try {
return JSON.parse(text);
} catch {
const cleaned = text.replace(/^```[a-z]*\n?/i, "").replace(/\n?```$/i, "").trim();
return JSON.parse(cleaned);
}
}
async function generateLessonAudio(
steps: any[],
lessonId: string,
topicId: string
): Promise<{ steps: any[]; cost: number }> {
let cost = 0;
// build a flat list of tasks so we can batch them
type TTSTask = () => Promise<void>;
const tasks: TTSTask[] = [];
for (let si = 0; si < steps.length; si++) {
const step = steps[si];
if (step.type === "concept" || step.type === "example") {
const text = [step.body, step.callout].filter(Boolean).join(" ");
if (!text.trim()) continue;
tasks.push(async () => {
const result = await generateStepTTS(text, lessonId, si);
if (result) {
step.audioPath = result.audioPath;
step.audioChunks = result.audioChunks;
cost += result.cost;
}
log(topicId, ` step ${si} (${step.type}) TTS done`);
});
} else if (step.type === "summary") {
const text = Array.isArray(step.bullets) ? step.bullets.join(". ") : "";
if (!text.trim()) continue;
tasks.push(async () => {
const result = await generateStepTTS(text, lessonId, si);
if (result) {
step.audioPath = result.audioPath;
step.audioChunks = result.audioChunks;
cost += result.cost;
}
log(topicId, ` step ${si} (summary) TTS done`);
});
} else if (step.type === "question") {
if (step.body?.trim()) {
tasks.push(async () => {
const qResult = await generateQuestionTTS(step.body, lessonId, si);
if (qResult) {
step.questionAudioPath = qResult.audioPath;
step.questionAudioChunks = qResult.audioChunks;
cost += qResult.cost;
}
});
}
if (Array.isArray(step.options)) {
step.optionAudioPaths = new Array(step.options.length).fill(null);
for (let oi = 0; oi < step.options.length; oi++) {
const optText = step.options[oi];
const oiCopy = oi;
if (optText?.trim()) {
tasks.push(async () => {
const oResult = await generateOptionTTS(optText, lessonId, si, oiCopy);
if (oResult) {
step.optionAudioPaths[oiCopy] = oResult.audioPath;
cost += oResult.cost;
}
});
}
}
tasks.push(async () => {
log(topicId, ` step ${si} (question) TTS done`);
});
}
}
}
// run in batches of 4
const BATCH = 4;
for (let i = 0; i < tasks.length; i += BATCH) {
await Promise.all(tasks.slice(i, i + BATCH).map((fn) => fn()));
}
return { steps, cost };
}
export async function generateLesson(topicId: string): Promise<void> {
// chain onto existing promise for this topic so only one runs at a time
const prev = topicMutexes.get(topicId) ?? Promise.resolve();
let resolveMutex!: () => void;
const thisSlot = new Promise<void>((res) => { resolveMutex = res; });
topicMutexes.set(topicId, prev.then(() => thisSlot));
await prev;
try {
// ── Step 1 — atomically claim the topic (only if still pending) ─────────
const claimed = await db
.update(topics)
.set({ status: "generating" })
.where(and(eq(topics.id, topicId), eq(topics.status, "pending")))
.returning();
if (claimed.length === 0) {
log(topicId, "topic not in pending state, bailing out — already generating or ready");
return;
}
// ── Step 2 — load context ───────────────────────────────────────────────
const topic = await db.query.topics.findFirst({ where: eq(topics.id, topicId) });
if (!topic) throw new Error(`Topic ${topicId} not found`);
const course = await db.query.courses.findFirst({ where: eq(courses.id, topic.courseId) });
if (!course) throw new Error(`Course ${topic.courseId} not found`);
// completed lessons for this course (for prior knowledge context)
const allTopics = await db.query.topics.findMany({
where: eq(topics.courseId, topic.courseId),
orderBy: (t, { asc }) => asc(t.order),
});
const completedLessons: { order: number; title: string; keyConcepts: string[]; analogiesUsed: string[] }[] = [];
const priorTopics = allTopics.filter((t) => t.status === "ready" && t.order < topic.order);
if (priorTopics.length > 0) {
const priorLessons = await db.query.lessons.findMany({
where: inArray(lessons.topicId, priorTopics.map((t) => t.id)),
});
const lessonByTopicId = new Map(priorLessons.map((l) => [l.topicId, l]));
for (const t of priorTopics) {
const l = lessonByTopicId.get(t.id);
if (!l) continue;
try {
const parsed = JSON.parse(l.content) as { keyConcepts?: string[]; analogiesUsed?: string[] };
completedLessons.push({
order: t.order,
title: t.title,
keyConcepts: parsed.keyConcepts ?? [],
analogiesUsed: parsed.analogiesUsed ?? [],
});
} catch { /* skip malformed */ }
}
}
// load relevant source files
const topicRelevantFiles: string[] = (() => {
try { return JSON.parse(topic.relevantFiles ?? "[]"); } catch { return []; }
})();
const uploadRows = await db.query.uploads.findMany({
where: eq(uploads.courseId, topic.courseId),
});
const relevantUploads = topicRelevantFiles.length > 0
? uploadRows.filter((u) => topicRelevantFiles.includes(u.filename) && u.extractedText)
: uploadRows.filter((u) => (u.type === "past_paper" || u.type === "lab_worksheet") && u.extractedText);
const primaryTextForLesson = relevantUploads
.map((u) => `--- ${u.filename} ---\n${u.extractedText}`)
.join("\n\n");
const secondaryTextForLesson = topicRelevantFiles.length > 0
? uploadRows
.filter((u) => topicRelevantFiles.includes(u.filename) && u.extractedText && u.type === "slides")
.map((u) => `--- ${u.filename} ---\n${u.extractedText}`)
.join("\n\n")
: uploadRows
.filter((u) => u.type === "slides" && u.extractedText)
.map((u) => `--- ${u.filename} ---\n${u.extractedText}`)
.join("\n\n");
const topicListText = allTopics.map((t) => `${t.order + 1}. ${t.title}`).join("\n");
const isFirst = topic.order === 0;
const courseSubject = course.subject;
// ── Step 3 — generate lesson ────────────────────────────────────────────
const lessonPrompt = `You are writing a lesson for a course on ${courseSubject}.
YOUR ONLY MEASURE OF SUCCESS:
A student who completes this lesson must be able to answer any past paper or lab question that requires knowledge of this topic. That means they must be able to DO the thing, not just understand it. If this topic involves a calculation, they must be able to perform it. If it involves an algorithm, they must be able to apply it step by step. If it involves pseudocode, they must be able to write it. Conceptual understanding alone is never the goal — competence is the goal.
WHAT THE STUDENT KNOWS:
- Basic English, everyday maths (arithmetic, simple algebra, fractions, proportions), and general school-level science
- Nothing domain-specific about ${courseSubject} unless it appears below
- Everything explicitly taught in previous lessons:
${isFirst ? `This is the very first lesson. The student knows nothing about this subject yet. Start from absolute zero.` : completedLessons.map((l) => `Lesson ${l.order + 1}${l.title}: ${l.keyConcepts.join(", ")}`).join("\n")}
DO NOT use any technical term that does not appear in the above list or is not introduced and explained in the current lesson. This is a hard rule. It applies everywhere — questions, options, callouts, summaries.
COURSE STRUCTURE:
This course has ${allTopics.length} lessons in this order:
${topicListText}
YOUR CURRENT LESSON: ${topic.title}${topic.description}
SOURCE MATERIAL:
The following are the actual source files relevant to this topic — past papers, lab worksheets, and lecture slides. Your lesson must prepare the student to answer every question in these files that relates to this topic:
${primaryTextForLesson || "(no primary sources provided)"}
${secondaryTextForLesson ? `\nLECTURE SLIDES:\n${secondaryTextForLesson}` : ""}
TEACHING PHILOSOPHY:
OPENING:
- The very first sentence must make the student curious, smile, or feel something. Never open with a definition, a recap, or a statement of what they are about to learn.
- Open with the analogy or human moment immediately.
ANALOGIES:
- Every concept step must open with a concrete real-world analogy before any technical language.
- The analogy comes first. The technical idea is revealed through it.
- Never repeat an analogy used in a previous lesson.
- Analogies must connect to everyday life, not the subject domain.
BUILDING ON PRIOR KNOWLEDGE:
- Freely use terms and concepts from completed lessons without re-explaining them.
- Reference prior concepts as bridges: "remember how X worked — this is that same idea applied to Y."
MATHEMATICS, ALGORITHMS, AND PROCEDURES:
- Before any formula, write one sentence in plain English saying what the relationship means intuitively. Vary the phrasing — never use "In plain terms" more than once per lesson.
- After introducing a formula or algorithm, immediately show a complete worked example that matches the style of the past paper questions for this topic.
- Never show more than one formula per step.
- Never introduce a variable without saying in plain English what it represents.
- If this topic requires the student to perform a procedure step by step, there must be at least one example step that walks through the complete procedure on a concrete example, showing every step explicitly.
- If past papers ask for pseudocode on this topic, there must be a concept or example step that shows the pseudocode and explains each line.
QUESTIONS:
- Every question must be answerable using only what has been explicitly taught in this lesson up to that point, plus concepts from completed lessons.
- Questions immediately after the first concept step must be the simplest — their only job is to confirm the student understood the core analogy.
- Never ask a student to perform a full calculation in a single question. Use only:
(a) PARTIAL WORKING: Show known values and partial working, ask the student to identify the correct next step.
(b) INTERPRET THE RESULT: Give the numerical answer, ask what it means in context.
(c) SPOT THE ERROR: Show a worked example with a mistake, ask the student to identify what went wrong and why.
- Answer options must be short and scannable — under 15 words each.
- Wrong answer options must represent genuine conceptual misconceptions, not arithmetic errors.
- Never use a technical term in any answer option that has not already been taught.
RHYTHM AND PACING:
- Never place two question steps consecutively without a concept or example step between them.
- The lesson must not become more question-heavy in the second half.
- A concept or example step must always appear after the final question and before the summary.
- Every concept and example step body: 3-4 sentences maximum.
- The lesson should feel like it has a rhythm: teach, check, teach, check, show, check, land.
TONE:
- Warm, clear, occasionally witty. The most engaging teacher the student has ever had.
- Never dry, never robotic, never formal for formality's sake.
- Short sentences. Active voice. Concrete over abstract.
SUMMARY:
- Bullet count must exactly match keyConcepts count.
- Each bullet must be a complete thought that makes sense without reading the lesson.
- The final bullet must gesture forward — what will this knowledge unlock?
OUTPUT FORMAT:
Return only valid JSON with no markdown fences:
{
"keyConcepts": ["..."],
"analogiesUsed": ["..."],
"steps": [
{ "type": "concept", "title": "...", "body": "..." },
{ "type": "question", "body": "...", "options": ["...", "...", "...", "..."], "answer": "full correct answer text", "explanation": "..." },
{ "type": "example", "title": "...", "body": "...", "callout": "..." },
{ "type": "question", "body": "...", "options": ["...", "...", "...", "..."], "answer": "full correct answer text", "explanation": "..." },
{ "type": "summary", "title": "Key Takeaways", "bullets": ["...", "..."] }
]
}
Steps must interleave concept/example and question types — never two questions or two concepts in a row. Minimum 6 steps, maximum 16. Use more steps when the topic requires it to achieve full competence.`;
log(topicId, `generating lesson for "${topic.title}"…`);
const lessonResult = await askAI([{ role: "user", content: lessonPrompt }]);
let costAI = lessonResult.cost;
let lessonContent: { keyConcepts: string[]; analogiesUsed: string[]; steps: any[] } = parseJSON(lessonResult.text);
// validate shape
if (!Array.isArray(lessonContent.steps) || lessonContent.steps.length === 0) {
throw new Error("lesson content has no steps");
}
for (const step of lessonContent.steps) {
if (!step.type) throw new Error(`a lesson step is missing the type field`);
}
// ── Step 4 — generate quiz in memory (before any DB writes) ────────────
const quizPrompt = `You are an exam question writer for a university course on ${courseSubject}.
COURSE CONTEXT:
The student has just completed a lesson on "${topic.title}" which covered: ${(lessonContent.keyConcepts ?? []).join(", ")}.
This is topic ${topic.order + 1} of ${allTopics.length} — difficulty level: ${topic.difficulty}/5.
SOURCE MATERIAL FOR THIS TOPIC (use these to match question style, difficulty, and content exactly):
${primaryTextForLesson || "(none provided)"}
Generate 4 quiz questions for this topic. Mix MCQ and short_answer types. For MCQ, provide 4 options labeled A, B, C, D.
Match the difficulty level — topic 1 should be very approachable, later topics can be more demanding.
Respond with ONLY valid JSON array, no markdown fences:
[
{
"question": "...",
"type": "mcq",
"options": ["A. ...", "B. ...", "C. ...", "D. ..."],
"answer": "A",
"explanation": "..."
},
{
"question": "...",
"type": "short_answer",
"options": null,
"answer": "...",
"explanation": "..."
}
]`;
log(topicId, "generating quiz…");
const quizResult = await askAI([{ role: "user", content: quizPrompt }]);
costAI += quizResult.cost;
const questions = parseJSON<{
question: string;
type: string;
options: string[] | null;
answer: string;
explanation: string;
}[]>(quizResult.text);
// ── Step 5 — commit lesson + quiz + topic status in one transaction ──────
const lessonId = randomUUID();
const ttsProvider = (useRuntimeConfig().ttsProvider as string | undefined)?.toLowerCase() ?? "elevenlabs";
// better-sqlite3 doesnt support async transactions — run inserts sequentially
await db.insert(lessons).values({
id: lessonId,
topicId: topic.id,
content: JSON.stringify(lessonContent),
ttsProvider,
costAI,
costAudio: 0,
costBranchAI: 0,
costBranchAudio: 0,
costTotal: costAI,
branchStatus: "pending",
});
for (const q of questions) {
await db.insert(quizQuestions).values({
id: randomUUID(),
topicId: topic.id,
question: q.question,
type: q.type as "mcq" | "short_answer" | "worked",
options: q.options ? JSON.stringify(q.options) : null,
answer: q.answer,
explanation: q.explanation,
});
}
await db.update(topics).set({ status: "ready" }).where(eq(topics.id, topicId));
log(topicId, `lesson + quiz saved (${lessonId}), generating TTS…`);
// ── Step 6 — TTS (outside transaction — long running) ───────────────────
let costAudio = 0;
try {
const { steps: stepsWithAudio, cost: audioCost } = await generateLessonAudio(
lessonContent.steps as any[],
lessonId,
topicId
);
lessonContent.steps = stepsWithAudio;
costAudio = audioCost;
} catch (err: any) {
console.error(`[lesson] TTS failed for ${lessonId}: ${err?.message ?? err}`);
}
// update lesson with TTS paths + final costs
await db.update(lessons)
.set({
content: JSON.stringify(lessonContent),
costAI,
costAudio,
costTotal: costAI + costAudio,
branchStatus: "pending",
})
.where(eq(lessons.id, lessonId));
log(topicId, `✓ lesson ready — cost AI $${costAI.toFixed(4)}, audio $${costAudio.toFixed(4)}`);
// ── Step 7 — fire and forget branch generation ──────────────────────────
generateBranches(topicId, lessonId).catch((err: any) => {
console.error(`[lesson] branch generation failed for ${lessonId}: ${err?.message ?? err}`);
});
} catch (err: any) {
console.error(`[lesson:${topicId.slice(0, 8)}] ✗ failed: ${err?.message ?? err}`);
await db.update(topics).set({ status: "error" }).where(eq(topics.id, topicId));
} finally {
resolveMutex();
// if nobody queued after us, remove the entry so the map doesnt grow
topicMutexes.delete(topicId);
}
}