Revisione/server/utils/detectUploadType.ts

50 lines
1.8 KiB
TypeScript

import { askAI } from "./openrouter";
import { useRuntimeConfig } from "#imports";
type UploadType = "slides" | "past_paper" | "lab_worksheet";
const PAST_PAPER_RE = /exam|past[\s_-]?paper|specimen|mock|resit|20(1[9]|2[0-6])/i;
const LAB_RE = /lab|worksheet|practical|experiment|\bprac\b/i;
const SLIDES_RE = /lecture|slides|\blec\b|week\d|topic\d|chapter/i;
export async function detectUploadType(filename: string, extractedText: string): Promise<UploadType> {
const name = filename.toLowerCase();
// layer 1 — filename heuristics
if (PAST_PAPER_RE.test(name)) return "past_paper";
if (LAB_RE.test(name)) return "lab_worksheet";
if (SLIDES_RE.test(name)) return "slides";
// layer 2 — AI classification
const opening = extractedText.slice(0, 1500);
const prompt = `You are classifying a university document. Based on the filename and the opening text, classify it as exactly one of: past_paper, lab_worksheet, or slides.
past_paper: an exam or test with questions students must answer under exam conditions
lab_worksheet: a practical worksheet with experiments, procedures, or guided tasks
slides: lecture slides or course notes presenting theory and concepts
Filename: ${filename}
Opening text: ${opening}
Respond with only one of: past_paper, lab_worksheet, slides`;
try {
const config = useRuntimeConfig();
const classificationModel = (config as any).openrouterClassificationModel ?? "anthropic/claude-haiku-4-5";
const { text } = await askAI(
[{ role: "user", content: prompt }],
{ model: classificationModel, temperature: 0 }
);
const result = text.trim().toLowerCase() as UploadType;
if (result === "past_paper" || result === "lab_worksheet" || result === "slides") {
return result;
}
} catch {
// fall through to default
}
return "slides";
}