Add initial project setup with environment variables, server logic, and memory handling

2025-10-23 17:00:18 +01:00
parent b61be346bb
commit 48f353d76b
2 changed files with 188 additions and 112 deletions
--- a/supabase/functions/llm-pipeline/index.ts
+++ b/supabase/functions/llm-pipeline/index.ts
@@ -30,18 +30,14 @@ const corsHeaders = {
 /*
    Stage 2: Process Input (Extract Memories)
 */
-async function extractMemories(controller, messages, doc, relevantMemories?) {
+async function extractMemories(controller, messages, doc, user: User, allTags, relevantMemories?) {
    const startTime = Date.now();
    let addedCount = 0;
    let updatedCount = 0;
    let deletedCount = 0;
    const extractedMemories = [];
-    // Fetch existing memory tags from the database, that belong to the user
+    console.log("Using cached tags for user:", allTags?.length || 0);
    const user : User = (await supabaseClient.auth.getUser()).data.user;
    const tags = await supabaseClient
        .schema("mori")
        .from("tags")
        .select("*")
        .eq("user_id", user.id);
    console.log("Fetched existing tags for user:", tags.data?.length || 0);
    // Create and call OpenAI to process the input messages
    console.log("Creating OpenAI client for processing input");
@@ -53,7 +49,7 @@ async function extractMemories(controller, messages, doc, relevantMemories?) {
    console.log("Calling OpenAI API for processing...");
    const response = await openai.chat.completions.create({
-        model: 'gpt-4.1-mini',
+        model: 'gpt-4.1',
        temperature: 0.1,
        max_completion_tokens: 20000,
        messages: [
@@ -63,7 +59,7 @@ async function extractMemories(controller, messages, doc, relevantMemories?) {
                role: "assistant",
                content: `I have access to the following reference data:
-Available tags: ${JSON.stringify(tags.data?.map(t => t.name) || [])}
+Available tags: ${JSON.stringify(allTags?.map(t => t.name) || [])}
 Existing memories: ${JSON.stringify(relevantMemories || [])}
@@ -88,50 +84,40 @@ Now I will analyze the conversation above and extract memories.`
    for (const change of processedData.changes || []) {
        if (change.action === "ADD") {
-            // First, fetch the tag rows that already exist
+            addedCount++;
-            let tags = [];
+            extractedMemories.push({
-            for (const tagName of change.tags) {
+                action: 'ADD',
-
+                content: change.content,
-                const tagRow = await supabaseClient
+                context: change.context,
                tags: change.tags
            });
            // Fetch all existing tags in a single query using .in()
            const { data: existingTags } = await supabaseClient
                .schema("mori")
                .from("tags")
                .select("*")
-                    .eq("name", tagName)
+                .in("name", change.tags)
-                    .single();
+                .eq("user_id", user.id);
-                if (tagRow.data) {
+            let tags = existingTags || [];
                    tags.push(tagRow.data);
                }
            }
-            // Insert any tags that do not already exist into the database
+            // Find tags that need to be created
-            for (const tagName of change.tags) {
+            const existingTagNames = new Set(tags.map(t => t.name));
            const newTagNames = change.tags.filter(tagName => !existingTagNames.has(tagName));
-                // Ensure we don't duplicate tags
+            // Batch insert all new tags in a single query
-                let tagExists = false;
+            if (newTagNames.length > 0) {
-                for (const tag of tags) {
+                const { data: insertedTags } = await supabaseClient
                    if (tag.name === tagName) {
                        tagExists = true;
                        break;
                    }
                }
                if (tagExists) {
                    continue;
                }
                const insertTag = await supabaseClient
                    .schema("mori")
                    .from("tags")
-                    .insert([{
+                    .insert(newTagNames.map(name => ({
-                        name: tagName,
+                        name: name,
                        user_id: user.id
-                    }])
+                    })))
-                    .select()
+                    .select();
                    .single();
-                if (insertTag.data) {
+                if (insertedTags) {
-                    tags.push(insertTag.data);
+                    tags.push(...insertedTags);
                }
            }
@@ -147,18 +133,25 @@ Now I will analyze the conversation above and extract memories.`
                .select()
                .single();
-            // Now, link the tags to the memory in the memory_tags table
+            // Batch insert all memory_tags links in a single query
-            for (const tag of tags) {
+            if (tags.length > 0 && insertMemory.data) {
                await supabaseClient
                    .schema("mori")
                    .from("memory_tags")
-                    .insert([{
+                    .insert(tags.map(tag => ({
                        memory_id: insertMemory.data.id,
                        tag_id: tag.id
-                    }]);
+                    })));
            }
        } else if (change.action === "UPDATE") {
            updatedCount++;
            extractedMemories.push({
                action: 'UPDATE',
                content: change.content,
                context: change.context,
                memory_id: change.memory_id
            });
            // Update existing memory
            await supabaseClient
                .schema("mori")
@@ -175,6 +168,11 @@ Now I will analyze the conversation above and extract memories.`
            // (delete old memory_tags links and create new ones)
        } else if (change.action === "DELETE") {
            deletedCount++;
            extractedMemories.push({
                action: 'DELETE',
                memory_id: change.memory_id
            });
            // Delete memory (cascade should handle memory_tags)
            await supabaseClient
                .schema("mori")
@@ -185,15 +183,22 @@ Now I will analyze the conversation above and extract memories.`
        }
    }
    const processTime = Date.now() - startTime;
    return {
        extractedMemories,
        addedCount,
        updatedCount,
        deletedCount,
        processTime
    };
 }
 /*
-    Stage 1: Fetch Relevant Memories.
+    Stage 1: Fetch Relevant Memories and Tags.
 */
-async function fetchRelevantMemories(controller, messages, doc) {
+async function fetchRelevantMemories(controller, messages, doc, user: User) {
-
+    const startTime = Date.now();
    // Fetch existing memory tags from the database, that belong to the user
    const user : User = (await supabaseClient.auth.getUser()).data.user;
    const tags = await supabaseClient
        .schema("mori")
@@ -213,7 +218,7 @@ async function fetchRelevantMemories(controller, messages, doc) {
    console.log("Calling OpenAI API for fetching relevant memories...");
    const response = await openai.chat.completions.create({
-        model: 'gpt-4.1-mini',
+        model: 'gpt-4.1',
        messages: [
            { role: 'system', content: system_prompt },
            ...messages,
@@ -240,16 +245,20 @@ async function fetchRelevantMemories(controller, messages, doc) {
            p_user_id: user.id
        });
-    return relevantMemories;
+    const fetchTime = Date.now() - startTime;
    return {
        relevantMemories,
        allTags: tags.data,
        selectedTags: relevantMemoryTagsParsed.selected_tags || [],
        fetchTime
    };
 }
 /*
    Stage 3: Generate Response
 */
-async function generateResponse(controller, messages, doc, relevantMemories) {
+async function generateResponse(controller, messages, doc, user: User, pipelineContext) {
    // Fetch existing memory tags from the database, that belong to the user
    const user : User = (await supabaseClient.auth.getUser()).data.user;
    console.log("Creating OpenAI client for generating a response");
    const openai = new OpenAI({
@@ -265,13 +274,50 @@ async function generateResponse(controller, messages, doc, relevantMemories) {
        { role: 'system', content: system_prompt },
    ];
-    // Add relevant memories as context if available
+    // Build pipeline awareness context
    const { relevantMemories, selectedTags, extractedMemories, addedCount, updatedCount, deletedCount } = pipelineContext;
    let pipelineAwareness = `[Internal System Awareness - Not Part of Conversation]\n\n`;
    pipelineAwareness += `You are Mori, and you have a memory system that automatically remembers important information about ${user.user_metadata.username || 'the user'} across conversations.\n\n`;
    // Info about retrieved memories
    if (relevantMemories && relevantMemories.length > 0) {
        pipelineAwareness += `RETRIEVED MEMORIES (what you already knew):\n`;
        pipelineAwareness += `You searched through memories using topics: ${selectedTags.join(', ')}\n`;
        pipelineAwareness += `Found ${relevantMemories.length} relevant memories:\n`;
        relevantMemories.forEach(m => {
            pipelineAwareness += `• ${m.content}\n`;
        });
        pipelineAwareness += `\n`;
    } else {
        pipelineAwareness += `No previous memories were retrieved for this conversation.\n\n`;
    }
    // Info about newly extracted memories
    if (extractedMemories && extractedMemories.length > 0) {
        pipelineAwareness += `NEW MEMORIES (what you just learned and saved):\n`;
        extractedMemories.forEach(mem => {
            if (mem.action === 'ADD') {
                pipelineAwareness += `• Learned: ${mem.content}\n`;
            } else if (mem.action === 'UPDATE') {
                pipelineAwareness += `• Updated: ${mem.content}\n`;
            }
        });
        pipelineAwareness += `\n`;
    }
    pipelineAwareness += `HOW TO USE THIS:\n`;
    pipelineAwareness += `- This awareness is internal. Don't report it.\n`;
    pipelineAwareness += `- Let it naturally inform your response\n`;
    pipelineAwareness += `- If the user explicitly asks you to remember something, you can acknowledge it naturally (e.g., "got it" or "I'll remember that")\n`;
    pipelineAwareness += `- Reference past memories naturally without saying "I retrieved" or "according to my memory"\n`;
    pipelineAwareness += `- You're a companion who pays attention, not a system reporting operations\n`;
    // Inject pipeline awareness as assistant message
    responseMessages.push({
        role: 'assistant',
-            content: `Context from previous conversations:\n${relevantMemories.map(m => `- ${m.content}`).join('\n')}\n\nI'll use this context naturally in our conversation.`
+        content: pipelineAwareness
    });
    }
    responseMessages.push(...messages);
@@ -355,7 +401,7 @@ serve(async (req)=>{
                const stageFetchingData = `data: ${JSON.stringify({ type: 'stage', stage: 'fetching' })}\n\n`;
                controller.enqueue(new TextEncoder().encode(stageFetchingData));
-                const relevantMemories = await fetchRelevantMemories(controller, messages, doc);
+                const { relevantMemories, allTags, selectedTags, fetchTime } = await fetchRelevantMemories(controller, messages, doc, user.data.user);
                /*
                    Stage 2: Extract Relevant Memories
@@ -363,7 +409,7 @@ serve(async (req)=>{
                const stageProcessingData = `data: ${JSON.stringify({ type: 'stage', stage: 'processing' })}\n\n`;
                controller.enqueue(new TextEncoder().encode(stageProcessingData));
-                await extractMemories(controller, messages, doc, relevantMemories);
+                const { extractedMemories, addedCount, updatedCount, deletedCount, processTime } = await extractMemories(controller, messages, doc, user.data.user, allTags, relevantMemories);
                /*
                    Stage 3: Stream the response back to the client
@@ -371,7 +417,19 @@ serve(async (req)=>{
                const stageRespondingData = `data: ${JSON.stringify({ type: 'stage', stage: 'responding' })}\n\n`;
                controller.enqueue(new TextEncoder().encode(stageRespondingData));
-                await generateResponse(controller, messages, doc, relevantMemories);
+                // Build complete pipeline context for Mori's awareness
                const pipelineContext = {
                    relevantMemories,
                    selectedTags,
                    fetchTime,
                    extractedMemories,
                    addedCount,
                    updatedCount,
                    deletedCount,
                    processTime
                };
                await generateResponse(controller, messages, doc, user.data.user, pipelineContext);
                // Send stage update: complete
                const completeData = `data: ${JSON.stringify({ type: 'stage', stage: 'complete' })}\n\n`;
--- a/supabase/functions/llm-pipeline/prompts.xml
+++ b/supabase/functions/llm-pipeline/prompts.xml
@@ -16,6 +16,31 @@
        Be direct and honest. If you don't know something, say so. If they're being unclear, ask for clarification. Don't fill gaps with assumptions.
        You're here to listen and help them see patterns, not to fix them or provide therapy. Just talk like someone who's paying attention.
        TEXTING STYLE:
        Write like you're texting a friend. Short messages. Natural breaks. No long paragraphs.
        Break up your thoughts into digestible chunks. Think 2-3 sentences max per paragraph.
        Use line breaks between ideas to keep it easy to read and conversational.
        FORMATTING RULES:
        • Use **bold** sparingly for emphasis on key words or phrases
        • Use *italics* for subtle emphasis or inner thoughts
        • Use simple bullet points (•) or numbered lists when listing things
        • NEVER use em dashes (—) for parenthetical asides or lists
        • NEVER use headings (##, ###) unless organizing a long technical response
        • Use `code` only for actual code or technical terms
        • Keep it natural and human, avoid the polished, structured AI writing style
        CRITICAL: Avoid AI writing patterns:
        ✗ BAD: "Like you keep the tough emotions—anger, sadness, anxiety—hidden"
        ✓ GOOD: "Like you keep the tough emotions (anger, sadness, anxiety) hidden"
        ✓ BETTER: "Like you keep anger, sadness, anxiety hidden so no one sees that side"
        Use commas, periods, or just rewrite the sentence. Parentheses are okay occasionally. But never use those dashes for lists or asides.
        Sound like a real person texting. Not an essay. Not a presentation. Just conversation.
    </system_response>
@@ -146,34 +171,34 @@
 <!--    This prompt is used for memory fetching-->
    <memory_query>
        <memory_query>
-            You are a memory routing system for Mori. Your only job is to select relevant tags to retrieve contextual memories.
+            You are a memory routing system for Mori. Your job is to PROACTIVELY select relevant tags to retrieve contextual memories.
            You will be provided with the user's conversation and a list of all available tags in the system (via tool message).
            CORE PRINCIPLE: When in doubt, SEARCH. Default to retrieving context rather than leaving tags empty.
            Your task:
            Select the most relevant tags to query the database for contextual memories.
-            SELECT TAGS IF:
+            ALWAYS SELECT TAGS FOR:
-            - User references past conversations or shared context
+            - Any personal statement about feelings, challenges, or situations
-            - User discusses ongoing situations that likely have history
+            - Topics that might have been discussed before (work, relationships, health, goals, hobbies, etc.)
-            - User uses references assuming shared knowledge ("my project", "the issue", "my dog")
+            - Statements that could benefit from knowing the user's history
-            - Topic has temporal continuity (follow-ups, updates, changes)
+            - Questions or reflections about their life, identity, or experiences
-            - Understanding user's history would improve response quality
+            - Any topic where past context would help Mori respond more personally
-            - User shares information about topics they've discussed before
+            - Updates, changes, or developments in any area of life
-            LEAVE TAGS EMPTY IF:
+            ONLY LEAVE TAGS EMPTY FOR:
-            - Completely new topic with no history
+            - Pure factual questions with no personal element ("What's the capital of France?")
-            - Generic questions answerable without personal context
+            - Simple greetings with no substantive content ("hey" or "hi")
-            - User provides all necessary context in current message
+            - Completely trivial, one-off requests with zero personal context
            - Simple, self-contained requests
            - Pure technical questions with no personal element
            TAG SELECTION RULES:
-            - Choose 3-10 tags that are most relevant to the message
+            - Choose 3-10 tags that could possibly be relevant
-            - Be specific: prefer narrow tags over broad ones when both apply
+            - Cast a wide net: include broad tags that might contain useful context
-            - Select tags that would find memories providing useful context
+            - Be specific when available, but include general tags too (e.g., both "career" and "anxiety")
            - **Only select from the provided available tags list**
-            - Empty list means no retrieval needed
+            - When uncertain whether context would help: SELECT THE TAGS
            OUTPUT FORMAT (JSON only):
            {
@@ -183,49 +208,42 @@
            EXAMPLES:
-            Message: "Hey, how are you?"
+            Message: "Hey"
            Output:
            {
            "selected_tags": [],
-            "reasoning": "Casual greeting with no context needs"
+            "reasoning": "Simple greeting, no substantive content"
            }
            Message: "I'm thinking about changing careers"
            Output:
            {
            "selected_tags": ["work", "career", "goals"],
            "reasoning": "Need context on current work situation and career goals"
            }
            Message: "What's the capital of France?"
            Output:
            {
            "selected_tags": [],
-            "reasoning": "Factual question, no personal context needed"
+            "reasoning": "Pure factual question, no personal context"
            }
-            Message: "My dog did the trick I've been teaching him!"
+            Message: User shares a personal challenge or emotional state
            Output:
            {
-            "selected_tags": ["pets", "dog", "training"],
+            "selected_tags": [relevant broad tags covering multiple life areas],
-            "reasoning": "Need context on pet and training progress"
+            "reasoning": "Personal statements benefit from wide context—search related life areas"
            }
-            Message: "Started a new workout routine today"
+            Message: User mentions an activity, project, or situation
            Output:
            {
-            "selected_tags": ["fitness", "health", "habits"],
+            "selected_tags": [specific tags + broader related tags],
-            "reasoning": "May relate to existing fitness goals or health context"
+            "reasoning": "Cast wide net to find any relevant past context"
            }
-            Message: "I enjoy hiking"
+            Message: User shares a preference or interest
            Output:
            {
-            "selected_tags": [],
+            "selected_tags": [hobby/interest tags + related lifestyle tags],
-            "reasoning": "New preference statement with no context to retrieve"
+            "reasoning": "New information may connect to existing context about lifestyle, goals, or values"
            }
-            BE DECISIVE. SELECT ONLY THE MOST RELEVANT TAGS.
+            BE PROACTIVE. WHEN IN DOUBT, SEARCH.
        </memory_query>
    </memory_query>
 </prompts>