From b61be346bb0459a1a2873d65e10ab95fc6cee93f Mon Sep 17 00:00:00 2001 From: ImBenji Date: Thu, 23 Oct 2025 15:36:45 +0100 Subject: [PATCH] Add initial project setup with environment variables, server logic, and memory handling --- .gitignore | 31 ++ .idea/.gitignore | 8 + deno.json | 9 + deno.lock | 289 ++++++++++++++ gitignore | 0 index.js | 37 ++ package.json | 10 + supabase/functions/llm-pipeline/index.ts | 397 ++++++++++++++++++++ supabase/functions/llm-pipeline/prompts.xml | 231 ++++++++++++ 9 files changed, 1012 insertions(+) create mode 100644 .gitignore create mode 100644 .idea/.gitignore create mode 100644 deno.json create mode 100644 deno.lock create mode 100644 gitignore create mode 100644 index.js create mode 100644 package.json create mode 100644 supabase/functions/llm-pipeline/index.ts create mode 100644 supabase/functions/llm-pipeline/prompts.xml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a778333 --- /dev/null +++ b/.gitignore @@ -0,0 +1,31 @@ +# Environment variables +.env +.env.* +*.env + +# Node modules +node_modules/ + +# Deno +.deno/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# Build outputs +dist/ +build/ \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/deno.json b/deno.json new file mode 100644 index 0000000..e582e99 --- /dev/null +++ b/deno.json @@ -0,0 +1,9 @@ +{ + "nodeModulesDir": "auto", + "imports": { + "openai": "npm:openai@4" + }, + "tasks": { + "serve": "deno run --allow-all supabase/functions/llm-pipeline/index.ts" + } +} \ No newline at end of file diff --git a/deno.lock b/deno.lock new file mode 100644 index 0000000..0f60284 --- /dev/null +++ b/deno.lock @@ -0,0 +1,289 @@ +{ + "version": "5", + "specifiers": { + "jsr:@supabase/functions-js@*": "2.5.0", + "npm:openai@4": "4.104.0" + }, + "jsr": { + "@supabase/functions-js@2.5.0": { + "integrity": "c48e3fc5da1510ce6dab34af3fc49cd637ea7b721d92faffd0545cbdcc548430" + } + }, + "npm": { + "@types/node-fetch@2.6.13": { + "integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==", + "dependencies": [ + "@types/node@24.2.0", + "form-data" + ] + }, + "@types/node@18.19.130": { + "integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==", + "dependencies": [ + "undici-types@5.26.5" + ] + }, + "@types/node@24.2.0": { + "integrity": "sha512-3xyG3pMCq3oYCNg7/ZP+E1ooTaGB4cG8JWRsqqOYQdbWNY4zbaV0Ennrd7stjiJEFZCaybcIgpTjJWHRfBSIDw==", + "dependencies": [ + "undici-types@7.10.0" + ] + }, + "abort-controller@3.0.0": { + "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", + "dependencies": [ + "event-target-shim" + ] + }, + "agentkeepalive@4.6.0": { + "integrity": "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==", + "dependencies": [ + "humanize-ms" + ] + }, + "asynckit@0.4.0": { + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + }, + "call-bind-apply-helpers@1.0.2": { + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "dependencies": [ + "es-errors", + "function-bind" + ] + }, + "combined-stream@1.0.8": { + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "dependencies": [ + "delayed-stream" + ] + }, + "delayed-stream@1.0.0": { + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==" + }, + "dunder-proto@1.0.1": { + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "dependencies": [ + "call-bind-apply-helpers", + "es-errors", + "gopd" + ] + }, + "es-define-property@1.0.1": { + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==" + }, + "es-errors@1.3.0": { + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==" + }, + "es-object-atoms@1.1.1": { + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "dependencies": [ + "es-errors" + ] + }, + "es-set-tostringtag@2.1.0": { + "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", + "dependencies": [ + "es-errors", + "get-intrinsic", + "has-tostringtag", + "hasown" + ] + }, + "event-target-shim@5.0.1": { + "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==" + }, + "form-data-encoder@1.7.2": { + "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==" + }, + "form-data@4.0.4": { + "integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==", + "dependencies": [ + "asynckit", + "combined-stream", + "es-set-tostringtag", + "hasown", + "mime-types" + ] + }, + "formdata-node@4.4.1": { + "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", + "dependencies": [ + "node-domexception", + "web-streams-polyfill" + ] + }, + "function-bind@1.1.2": { + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==" + }, + "get-intrinsic@1.3.0": { + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "dependencies": [ + "call-bind-apply-helpers", + "es-define-property", + "es-errors", + "es-object-atoms", + "function-bind", + "get-proto", + "gopd", + "has-symbols", + "hasown", + "math-intrinsics" + ] + }, + "get-proto@1.0.1": { + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "dependencies": [ + "dunder-proto", + "es-object-atoms" + ] + }, + "gopd@1.2.0": { + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==" + }, + "has-symbols@1.1.0": { + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==" + }, + "has-tostringtag@1.0.2": { + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "dependencies": [ + "has-symbols" + ] + }, + "hasown@2.0.2": { + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "dependencies": [ + "function-bind" + ] + }, + "humanize-ms@1.2.1": { + "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==", + "dependencies": [ + "ms" + ] + }, + "math-intrinsics@1.1.0": { + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==" + }, + "mime-db@1.52.0": { + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==" + }, + "mime-types@2.1.35": { + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "dependencies": [ + "mime-db" + ] + }, + "ms@2.1.3": { + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" + }, + "node-domexception@1.0.0": { + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", + "deprecated": true + }, + "node-fetch@2.7.0": { + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "dependencies": [ + "whatwg-url" + ] + }, + "openai@4.104.0": { + "integrity": "sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==", + "dependencies": [ + "@types/node@18.19.130", + "@types/node-fetch", + "abort-controller", + "agentkeepalive", + "form-data-encoder", + "formdata-node", + "node-fetch" + ], + "bin": true + }, + "tr46@0.0.3": { + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" + }, + "undici-types@5.26.5": { + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==" + }, + "undici-types@7.10.0": { + "integrity": "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==" + }, + "web-streams-polyfill@4.0.0-beta.3": { + "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==" + }, + "webidl-conversions@3.0.1": { + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" + }, + "whatwg-url@5.0.0": { + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "dependencies": [ + "tr46", + "webidl-conversions" + ] + } + }, + "redirects": { + "https://esm.sh/@supabase/supabase-js@2": "https://esm.sh/@supabase/supabase-js@2.76.1", + "https://esm.sh/tr46@~0.0.3?target=denonext": "https://esm.sh/tr46@0.0.3?target=denonext", + "https://esm.sh/webidl-conversions@^3.0.0?target=denonext": "https://esm.sh/webidl-conversions@3.0.1?target=denonext", + "https://esm.sh/whatwg-url@^5.0.0?target=denonext": "https://esm.sh/whatwg-url@5.0.0?target=denonext" + }, + "remote": { + "https://deno.land/std@0.168.0/async/abortable.ts": "80b2ac399f142cc528f95a037a7d0e653296352d95c681e284533765961de409", + "https://deno.land/std@0.168.0/async/deadline.ts": "2c2deb53c7c28ca1dda7a3ad81e70508b1ebc25db52559de6b8636c9278fd41f", + "https://deno.land/std@0.168.0/async/debounce.ts": "60301ffb37e730cd2d6f9dadfd0ecb2a38857681bd7aaf6b0a106b06e5210a98", + "https://deno.land/std@0.168.0/async/deferred.ts": "77d3f84255c3627f1cc88699d8472b664d7635990d5358c4351623e098e917d6", + "https://deno.land/std@0.168.0/async/delay.ts": "5a9bfba8de38840308a7a33786a0155a7f6c1f7a859558ddcec5fe06e16daf57", + "https://deno.land/std@0.168.0/async/mod.ts": "7809ad4bb223e40f5fdc043e5c7ca04e0e25eed35c32c3c32e28697c553fa6d9", + "https://deno.land/std@0.168.0/async/mux_async_iterator.ts": "770a0ff26c59f8bbbda6b703a2235f04e379f73238e8d66a087edc68c2a2c35f", + "https://deno.land/std@0.168.0/async/pool.ts": "6854d8cd675a74c73391c82005cbbe4cc58183bddcd1fbbd7c2bcda42b61cf69", + "https://deno.land/std@0.168.0/async/retry.ts": "e8e5173623915bbc0ddc537698fa418cf875456c347eda1ed453528645b42e67", + "https://deno.land/std@0.168.0/async/tee.ts": "3a47cc4e9a940904fd4341f0224907e199121c80b831faa5ec2b054c6d2eff5e", + "https://deno.land/std@0.168.0/http/server.ts": "e99c1bee8a3f6571ee4cdeb2966efad465b8f6fe62bec1bdb59c1f007cc4d155", + "https://deno.land/std@0.224.0/dotenv/mod.ts": "0180eaeedaaf88647318811cdaa418cc64dc51fb08354f91f5f480d0a1309f7d", + "https://deno.land/std@0.224.0/dotenv/parse.ts": "09977ff88dfd1f24f9973a338f0f91bbdb9307eb5ff6085446e7c423e4c7ba0c", + "https://deno.land/std@0.224.0/dotenv/stringify.ts": "275da322c409170160440836342eaa7cf012a1d11a7e700d8ca4e7f2f8aa4615", + "https://deno.land/x/deno_dom@v0.1.45/build/deno-wasm/deno-wasm.js": "d6841a06342eb6a2798ef28de79ad69c0f2fa349fa04d3ca45e5fcfbf50a9340", + "https://deno.land/x/deno_dom@v0.1.45/deno-dom-wasm.ts": "a33d160421bbb6e3104285ea5ebf33352b7ad50d82ea8765e3cf65f972b25119", + "https://deno.land/x/deno_dom@v0.1.45/src/api.ts": "0ff5790f0a3eeecb4e00b7d8fbfa319b165962cf6d0182a65ba90f158d74f7d7", + "https://deno.land/x/deno_dom@v0.1.45/src/constructor-lock.ts": "59714df7e0571ec7bd338903b1f396202771a6d4d7f55a452936bd0de9deb186", + "https://deno.land/x/deno_dom@v0.1.45/src/deserialize.ts": "1cf4096678d8afed8ed28dbad690504c4d2c28149ba768b26eacd1416873425b", + "https://deno.land/x/deno_dom@v0.1.45/src/dom/document-fragment.ts": "1c7352a3c816587ed7fad574b42636198f680f17abc3836fcfe7799b31e7718f", + "https://deno.land/x/deno_dom@v0.1.45/src/dom/document.ts": "a182727dd9179e5712e31be66f4f72b766a5b714c765a75950babe6dd756b4ee", + "https://deno.land/x/deno_dom@v0.1.45/src/dom/dom-parser.ts": "609097b426f8c2358f3e5d2bca55ed026cf26cdf86562e94130dfdb0f2537f92", + "https://deno.land/x/deno_dom@v0.1.45/src/dom/element.ts": "d5371cd83ff2128353c1975465c368ef83d7441568626b386557deba51315111", + "https://deno.land/x/deno_dom@v0.1.45/src/dom/elements/html-template-element.ts": "740b97a5378c9a14cccf3429299846eda240b613013e2d2d7f20b393897453c2", + "https://deno.land/x/deno_dom@v0.1.45/src/dom/html-collection.ts": "829a965f419f8286d5f43a12e27886d10836d519ca2d5e74cb3f2e1d35f35746", + "https://deno.land/x/deno_dom@v0.1.45/src/dom/node-list.ts": "9008303fe236e40e74f9f93e398bd173d2e9b09065932a0153dd0142c759397b", + "https://deno.land/x/deno_dom@v0.1.45/src/dom/node.ts": "3069e6fc93ac4111a136ed68199d76673339842b9751610ba06f111ba7dc10a7", + "https://deno.land/x/deno_dom@v0.1.45/src/dom/selectors/custom-api.ts": "852696bd58e534bc41bd3be9e2250b60b67cd95fd28ed16b1deff1d548531a71", + "https://deno.land/x/deno_dom@v0.1.45/src/dom/selectors/nwsapi-types.ts": "c43b36c36acc5d32caabaa54fda8c9d239b2b0fcbce9a28efb93c84aa1021698", + "https://deno.land/x/deno_dom@v0.1.45/src/dom/selectors/nwsapi.js": "985d7d8fc1eabbb88946b47a1c44c1b2d4aa79ff23c21424219f1528fa27a2ff", + "https://deno.land/x/deno_dom@v0.1.45/src/dom/selectors/selectors.ts": "83eab57be2290fb48e3130533448c93c6c61239f2a2f3b85f1917f80ca0fdc75", + "https://deno.land/x/deno_dom@v0.1.45/src/dom/selectors/sizzle-types.ts": "78149e2502409989ce861ed636b813b059e16bc267bb543e7c2b26ef43e4798b", + "https://deno.land/x/deno_dom@v0.1.45/src/dom/selectors/sizzle.js": "c3aed60c1045a106d8e546ac2f85cc82e65f62d9af2f8f515210b9212286682a", + "https://deno.land/x/deno_dom@v0.1.45/src/dom/utils-types.ts": "96db30e3e4a75b194201bb9fa30988215da7f91b380fca6a5143e51ece2a8436", + "https://deno.land/x/deno_dom@v0.1.45/src/dom/utils.ts": "4c6206516fb8f61f37a209c829e812c4f5a183e46d082934dd14c91bde939263", + "https://deno.land/x/deno_dom@v0.1.45/src/parser.ts": "e06b2300d693e6ae7564e53dfa5c9a9e97fdb8c044c39c52c8b93b5d60860be3", + "https://esm.sh/@supabase/auth-js@2.76.1/denonext/auth-js.mjs": "34acd2684c9861b7b753d2d06c77e548eb63d06c8a6081e3e2aa6aac01b006f9", + "https://esm.sh/@supabase/functions-js@2.76.1/denonext/functions-js.mjs": "8f74804adb4268b17cc2d677dd84269f4e2c5ab848eeaf3e5759d5ab78e18100", + "https://esm.sh/@supabase/node-fetch@2.6.15/denonext/node-fetch.mjs": "0bae9052231f4f6dbccc7234d05ea96923dbf967be12f402764580b6bf9f713d", + "https://esm.sh/@supabase/postgrest-js@2.76.1/denonext/postgrest-js.mjs": "50284739ae9a5f487dcf76bccbbdda5d08fea9962cc28c829cca12ae81dedda8", + "https://esm.sh/@supabase/realtime-js@2.76.1/denonext/realtime-js.mjs": "e5d64b84666fdc989495478299e78779d3c756d4fd900f56cf83fb20e7b568d8", + "https://esm.sh/@supabase/storage-js@2.76.1/denonext/storage-js.mjs": "71ff35507eb09ef4197cf3a7d5b1601fecccb6a9a69d09e271cbb6c98af3b3a4", + "https://esm.sh/@supabase/supabase-js@2.76.1": "99f07a2597a0be25436c7520104699b5570de0c91d8e42b1679359dba7716324", + "https://esm.sh/@supabase/supabase-js@2.76.1/denonext/supabase-js.mjs": "2f5357f9c5cbef8b66035f8c287b00891c5214832e937118105f926bd0e6ae0d", + "https://esm.sh/tr46@0.0.3/denonext/tr46.mjs": "5753ec0a99414f4055f0c1f97691100f13d88e48a8443b00aebb90a512785fa2", + "https://esm.sh/tr46@0.0.3?target=denonext": "19cb9be0f0d418a0c3abb81f2df31f080e9540a04e43b0f699bce1149cba0cbb", + "https://esm.sh/tslib@2.8.1/denonext/tslib.mjs": "ebce3cd5facb654623020337f867b426ba95f71596ba87acc9e6c6f4e55905ca", + "https://esm.sh/webidl-conversions@3.0.1/denonext/webidl-conversions.mjs": "54b5c2d50a294853c4ccebf9d5ed8988c94f4e24e463d84ec859a866ea5fafec", + "https://esm.sh/webidl-conversions@3.0.1?target=denonext": "4e20318d50528084616c79d7b3f6e7f0fe7b6d09013bd01b3974d7448d767e29", + "https://esm.sh/whatwg-url@5.0.0/denonext/whatwg-url.mjs": "29b16d74ee72624c915745bbd25b617cfd2248c6af0f5120d131e232a9a9af79", + "https://esm.sh/whatwg-url@5.0.0?target=denonext": "f001a2cadf81312d214ca330033f474e74d81a003e21e8c5d70a1f46dc97b02d" + }, + "workspace": { + "dependencies": [ + "npm:openai@4" + ] + } +} diff --git a/gitignore b/gitignore new file mode 100644 index 0000000..e69de29 diff --git a/index.js b/index.js new file mode 100644 index 0000000..2712a22 --- /dev/null +++ b/index.js @@ -0,0 +1,37 @@ +fetch('http://localhost:8000', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + messages: [{ role: 'user', content: 'Hello, tell me a joke' }] + }) +}) + .then(response => { + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let fullText = ''; + + function read() { + reader.read().then(({ done, value }) => { + if (done) { + console.log('\n--- Stream complete ---'); + console.log('Full response:', fullText); + return; + } + + const chunk = decoder.decode(value); + const lines = chunk.split('\n').filter(line => line.startsWith('data: ')); + + lines.forEach(line => { + const json = JSON.parse(line.replace('data: ', '')); + if (json.content) { + fullText += json.content; + console.clear(); + console.log(fullText); + } + }); + + read(); + }); + } + read(); + }); \ No newline at end of file diff --git a/package.json b/package.json new file mode 100644 index 0000000..f7db7e5 --- /dev/null +++ b/package.json @@ -0,0 +1,10 @@ +{ + "name": "mori_test", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "private": true +} diff --git a/supabase/functions/llm-pipeline/index.ts b/supabase/functions/llm-pipeline/index.ts new file mode 100644 index 0000000..ce3fbbd --- /dev/null +++ b/supabase/functions/llm-pipeline/index.ts @@ -0,0 +1,397 @@ +// @ts-ignore +import { serve } from "https://deno.land/std@0.168.0/http/server.ts"; +// @ts-ignore +import { load } from "https://deno.land/std@0.224.0/dotenv/mod.ts"; +// @ts-ignore +import { DOMParser } from "https://deno.land/x/deno_dom@v0.1.45/deno-dom-wasm.ts"; +import { User } from "https://esm.sh/@supabase/auth-js@2.76.1/dist/module/lib/types.d.ts"; + +import { createClient } from "https://esm.sh/@supabase/supabase-js@2"; + +// + +import OpenAI from "npm:openai@4"; + +// Load environment variables +await load({ export: true, envPath: ".env" }); + +// Initialize Supabase client +let supabaseClient = null; + +// Load and parse prompts.xml +const xmlContent = await Deno.readTextFile(new URL('./prompts.xml', import.meta.url).pathname); +const doc = new DOMParser().parseFromString(xmlContent, 'text/html'); + +const corsHeaders = { + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type' +}; + +/* + Stage 2: Process Input (Extract Memories) + */ +async function extractMemories(controller, messages, doc, relevantMemories?) { + + // Fetch existing memory tags from the database, that belong to the user + const user : User = (await supabaseClient.auth.getUser()).data.user; + + const tags = await supabaseClient + .schema("mori") + .from("tags") + .select("*") + .eq("user_id", user.id); + + console.log("Fetched existing tags for user:", tags.data?.length || 0); + + // Create and call OpenAI to process the input messages + console.log("Creating OpenAI client for processing input"); + const openai = new OpenAI({ + apiKey: Deno.env.get('OPENAI_API_KEY') + }); + + const system_prompt = doc?.querySelector('memory_extraction')?.textContent?.trim() || ''; + + console.log("Calling OpenAI API for processing..."); + const response = await openai.chat.completions.create({ + model: 'gpt-4.1-mini', + temperature: 0.1, + max_completion_tokens: 20000, + messages: [ + { role: 'system', content: system_prompt }, + ...messages, + { + role: "assistant", + content: `I have access to the following reference data: + +Available tags: ${JSON.stringify(tags.data?.map(t => t.name) || [])} + +Existing memories: ${JSON.stringify(relevantMemories || [])} + +Now I will analyze the conversation above and extract memories.` + } + ] + }); + + const processedContent = response.choices[0]?.message?.content || ''; + console.log("Processing complete, sending processed content to client"); + + // Decode the json content + let processedData; + try { + processedData = JSON.parse(processedContent); + } catch (error) { + console.error("Error parsing processed content:", error); + throw new Error("Failed to parse processed content"); + } + + // Iterate over the changes and process them + for (const change of processedData.changes || []) { + + if (change.action === "ADD") { + // First, fetch the tag rows that already exist + let tags = []; + for (const tagName of change.tags) { + + const tagRow = await supabaseClient + .schema("mori") + .from("tags") + .select("*") + .eq("name", tagName) + .single(); + + if (tagRow.data) { + tags.push(tagRow.data); + } + } + + // Insert any tags that do not already exist into the database + for (const tagName of change.tags) { + + // Ensure we don't duplicate tags + let tagExists = false; + for (const tag of tags) { + if (tag.name === tagName) { + tagExists = true; + break; + } + } + + if (tagExists) { + continue; + } + + const insertTag = await supabaseClient + .schema("mori") + .from("tags") + .insert([{ + name: tagName, + user_id: user.id + }]) + .select() + .single(); + + if (insertTag.data) { + tags.push(insertTag.data); + } + } + + // Now, insert the memory itself + const insertMemory = await supabaseClient + .schema("mori") + .from("memories") + .insert([{ + content: change.content, + context: change.context, + user_id: user.id, + }]) + .select() + .single(); + + // Now, link the tags to the memory in the memory_tags table + for (const tag of tags) { + await supabaseClient + .schema("mori") + .from("memory_tags") + .insert([{ + memory_id: insertMemory.data.id, + tag_id: tag.id + }]); + } + + } else if (change.action === "UPDATE") { + // Update existing memory + await supabaseClient + .schema("mori") + .from("memories") + .update({ + content: change.content, + context: change.context, + updated_at: new Date().toISOString() + }) + .eq("id", change.memory_id) + .eq("user_id", user.id); + + // TODO: Handle tag updates if needed + // (delete old memory_tags links and create new ones) + + } else if (change.action === "DELETE") { + // Delete memory (cascade should handle memory_tags) + await supabaseClient + .schema("mori") + .from("memories") + .delete() + .eq("id", change.memory_id) + .eq("user_id", user.id); + } + } + +} + +/* + Stage 1: Fetch Relevant Memories. + */ +async function fetchRelevantMemories(controller, messages, doc) { + + // Fetch existing memory tags from the database, that belong to the user + const user : User = (await supabaseClient.auth.getUser()).data.user; + + const tags = await supabaseClient + .schema("mori") + .from("tags") + .select("*") + .eq("user_id", user.id); + + console.log("Fetched existing tags for user:", tags.data?.length || 0); + + // Create and call OpenAI to process the input messages + console.log("Creating OpenAI client for generating a response"); + const openai = new OpenAI({ + apiKey: Deno.env.get('OPENAI_API_KEY') + }); + + let system_prompt = doc?.querySelector('memory_query')?.textContent?.trim() || ''; + + console.log("Calling OpenAI API for fetching relevant memories..."); + const response = await openai.chat.completions.create({ + model: 'gpt-4.1-mini', + messages: [ + { role: 'system', content: system_prompt }, + ...messages, + { + role: "user", + content: "Existing tags: " + JSON.stringify(tags.data || []) + } + ], + }); + + const relevantMemoryTags = response.choices[0]?.message?.content || ''; + let relevantMemoryTagsParsed; + try { + relevantMemoryTagsParsed = JSON.parse(relevantMemoryTags); + } catch (error) { + console.error("Error parsing relevant memories content:", error); + throw new Error("Failed to parse relevant memories content"); + } + + + const { data: relevantMemories } = await supabaseClient + .rpc("get_memories_by_tags", { + tag_names: relevantMemoryTagsParsed.selected_tags, + p_user_id: user.id + }); + + return relevantMemories; +} + +/* + Stage 3: Generate Response + */ +async function generateResponse(controller, messages, doc, relevantMemories) { + + // Fetch existing memory tags from the database, that belong to the user + const user : User = (await supabaseClient.auth.getUser()).data.user; + + console.log("Creating OpenAI client for generating a response"); + const openai = new OpenAI({ + apiKey: Deno.env.get('OPENAI_API_KEY') + }); + + let system_prompt = doc?.querySelector('system_response')?.textContent?.trim() || ''; + system_prompt = system_prompt.replaceAll("{{username}}", user.user_metadata.username || 'User'); + + console.log("Calling OpenAI API for streaming response..."); + + const responseMessages = [ + { role: 'system', content: system_prompt }, + ]; + + // Add relevant memories as context if available + if (relevantMemories && relevantMemories.length > 0) { + responseMessages.push({ + role: 'assistant', + content: `Context from previous conversations:\n${relevantMemories.map(m => `- ${m.content}`).join('\n')}\n\nI'll use this context naturally in our conversation.` + }); + } + + responseMessages.push(...messages); + + const stream = await openai.chat.completions.create({ + model: 'gpt-4.1-mini', + messages: responseMessages, + stream: true + }); + + console.log("Stream created, starting to read chunks..."); + for await (const chunk of stream) { + const content = chunk.choices[0]?.delta?.content || ''; + if (content) { + const data = `data: ${JSON.stringify({ type: 'content', content })}\n\n`; + controller.enqueue(new TextEncoder().encode(data)); + } + } +} + +serve(async (req)=>{ + + /* + Handle CORS preflight requests + */ + if (req.method === 'OPTIONS') { + return new Response('ok', { + headers: corsHeaders + }); + } + + /* + Authenticate with supabase API key + */ + + // Get the token from the Authorization header + const authHeader = req.headers.get('Authorization') + const token = authHeader?.replace('Bearer ', '') + + // Initialise the Supabase client + supabaseClient = createClient( + Deno.env.get('SUPABASE_URL') || '', + Deno.env.get('SUPABASE_ANON_KEY') || '', + { + global: { + headers: { Authorization: `Bearer ${token}` }, + }, + } + ); + + const user = await supabaseClient.auth.getUser(token); + + if (user.error) { + return new Response(JSON.stringify({ + error: 'Unauthorized' + }), { + status: 401, + headers: { + ...corsHeaders, + 'Content-Type': 'application/json' + } + }); + } + + const username = user.data.user?.user_metadata.username || 'User'; + + + /* + Gearing up to process the request + */ + const body = await req.json(); + const { messages } = body; + + // Create the stream that will be used throughout the pipeline + const readable = new ReadableStream({ + async start(controller) { + try { + + /* + Stage 1: Fetch Relevant Memories + */ + const stageFetchingData = `data: ${JSON.stringify({ type: 'stage', stage: 'fetching' })}\n\n`; + controller.enqueue(new TextEncoder().encode(stageFetchingData)); + + const relevantMemories = await fetchRelevantMemories(controller, messages, doc); + + /* + Stage 2: Extract Relevant Memories + */ + const stageProcessingData = `data: ${JSON.stringify({ type: 'stage', stage: 'processing' })}\n\n`; + controller.enqueue(new TextEncoder().encode(stageProcessingData)); + + await extractMemories(controller, messages, doc, relevantMemories); + + /* + Stage 3: Stream the response back to the client + */ + const stageRespondingData = `data: ${JSON.stringify({ type: 'stage', stage: 'responding' })}\n\n`; + controller.enqueue(new TextEncoder().encode(stageRespondingData)); + + await generateResponse(controller, messages, doc, relevantMemories); + + // Send stage update: complete + const completeData = `data: ${JSON.stringify({ type: 'stage', stage: 'complete' })}\n\n`; + controller.enqueue(new TextEncoder().encode(completeData)); + + console.log("Stream completed, closing controller"); + controller.close(); + } catch (error) { + console.error("Error in pipeline:", error); + controller.error(error); + } + } + }); + + return new Response(readable, { + headers: { + ...corsHeaders, + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive' + } + }); +}); diff --git a/supabase/functions/llm-pipeline/prompts.xml b/supabase/functions/llm-pipeline/prompts.xml new file mode 100644 index 0000000..51bdc79 --- /dev/null +++ b/supabase/functions/llm-pipeline/prompts.xml @@ -0,0 +1,231 @@ + + + + + + You are Mori, a personal companion designed to help {{username}} think through things and process what's on their mind. + + Speak naturally and conversationally. Keep responses brief unless they ask for more detail. No corporate AI language, no "as an AI" disclaimers. + + MEMORY CONTEXT: You may receive relevant memories from previous conversations. Use this context naturally—reference past discussions, recall details they've shared, and build on previous topics. Never explicitly say "according to my memory" or "I recall from our previous conversation"—just use the information naturally as if you've been paying attention all along. + + If {{username}} references something you don't have context for, simply ask them to share more: "Can you remind me about that?" or "Tell me more about what happened there." No apologies, no explanations about limitations. + + Use their name naturally—{{username}}. Reference it as you would if you'd been talking for years. + + Be direct and honest. If you don't know something, say so. If they're being unclear, ask for clarification. Don't fill gaps with assumptions. + + You're here to listen and help them see patterns, not to fix them or provide therapy. Just talk like someone who's paying attention. + + + + + + You are a memory extraction system for Mori. Your role is to identify and extract **atomic, distinct facts** about the user from conversations. + + CRITICAL: At the end of the conversation, you will receive a message starting with "--- REFERENCE DATA (DO NOT EXTRACT FROM THIS) ---". This contains existing tags and memories for YOUR REFERENCE ONLY. DO NOT extract memories from this data. ONLY extract from the actual user conversation messages that appear BEFORE the reference data. + + CORE PRINCIPLE: One memory = one fact + Each memory should be so specific that it cannot be meaningfully split further. + + ✓ GOOD (atomic): + - "User is 28 years old" + - "User works as a software engineer" + - "User has a dog named Max" + - "User prefers morning workouts" + - "User is learning Spanish" + + ✗ BAD (compound): + - "User is a 28-year-old software engineer who works out in the morning and has a dog" + + EXTRACT INFORMATION ABOUT: + - Demographics (age, location, occupation - separate facts) + - Education & career (institution, field, year, specific courses/projects) + - Health & wellness (conditions, symptoms, specific behaviors, habits) + - Relationships (specific people, relationship dynamics, conflicts) + - Preferences & habits (specific likes/dislikes, routines, coping mechanisms) + - Skills & experience (languages, tools, years of experience, specific projects) + - Values & beliefs (attitudes toward specific topics, worldview elements) + - Significant events (life changes, achievements, challenges) + - Goals & fears (specific aspirations or concerns) + + DO NOT EXTRACT: + - Casual small talk or filler + - Questions to Mori + - Generic opinions unconnected to the user + - Vague statements without specificity + - Information that's too obvious or contextual to be useful alone + - Information from the reference data section + + GRANULARITY RULES: + 1. **Split compound statements**: If "and" or ";" appears, consider splitting + 2. **Separate general from specific**: "Has anxiety" + "Avoids phone calls" = 2 memories + 3. **One person per memory**: Partner's hobby is separate from relationship dynamic + 4. **One time period per memory**: Past event separate from current feelings about it + 5. **Avoid redundancy**: Don't extract near-duplicates with different wording + + MEMORY RECONCILIATION: + You will be provided with existing memories in the reference data that may be relevant to the current conversation. + For each potential new memory, you must decide: + + **ADD** - Completely new information not previously captured + **UPDATE** - Replaces or refines an existing memory (provide memory_id) + **DELETE** - Explicitly invalidates an existing memory (provide memory_id) + + Reconciliation rules: + - If info contradicts existing memory, UPDATE the old one + - If info is already captured accurately, don't extract anything + - Temporal facts (age, job, location) should UPDATE old versions + - If user explicitly says something changed/ended, DELETE old memory + - Don't create duplicates—check existing memories first + + TAGGING GUIDELINES: + You will be provided with existing tags in the reference data section. + - **Reuse existing tags whenever possible** to maintain consistency + - Only create new tags when no existing tag fits + - 2-4 tags per memory + - Use lowercase, specific tags + - Include both broad ("health", "career") and specific ("python", "meditation") tags + - Prefer specific over generic when both apply + + New tag rules (only when necessary): + - Use lowercase + - Be specific but not overly narrow + - Follow existing tag patterns + - 1-2 words maximum + + CONTEXT FIELD: + Keep it brief (5-10 words). Note: + - When it was mentioned ("during work discussion", "in latest message") + - Why it matters ("explains morning routine", "background for project") + + OUTPUT FORMAT: + Return **only valid JSON**, nothing else. + + If memories extracted: + { + "changes": [ + { + "action": "ADD", + "content": "One atomic, self-contained fact", + "context": "Brief note on when/why mentioned", + "tags": ["specific", "relevant", "tags"] + }, + { + "action": "UPDATE", + "memory_id": "mem_12345", + "content": "Updated fact", + "context": "Brief context", + "tags": ["updated", "tags"], + "reason": "Why this replaces the old memory" + }, + { + "action": "DELETE", + "memory_id": "mem_67890", + "reason": "Why this memory is no longer valid" + } + ] + } + + If no memories to extract: + { + "changes": [], + "reason": "Brief explanation of why nothing was extracted" + } + + EXTRACTION THOROUGHNESS: + - Rich sources (long messages, reports) should yield 20-50+ changes + - Don't self-limit; extract ALL atomic facts + - Err on the side of over-extraction rather than under-extraction + - Each paragraph of substantial content likely contains multiple extractable facts + + BE PRECISE. BE THOROUGH. BE ATOMIC. + Extract every distinct, useful fact about the user from their conversation messages - ignore the reference data section completely. + + + + + + You are a memory routing system for Mori. Your only job is to select relevant tags to retrieve contextual memories. + + You will be provided with the user's conversation and a list of all available tags in the system (via tool message). + + Your task: + Select the most relevant tags to query the database for contextual memories. + + SELECT TAGS IF: + - User references past conversations or shared context + - User discusses ongoing situations that likely have history + - User uses references assuming shared knowledge ("my project", "the issue", "my dog") + - Topic has temporal continuity (follow-ups, updates, changes) + - Understanding user's history would improve response quality + - User shares information about topics they've discussed before + + LEAVE TAGS EMPTY IF: + - Completely new topic with no history + - Generic questions answerable without personal context + - User provides all necessary context in current message + - Simple, self-contained requests + - Pure technical questions with no personal element + + TAG SELECTION RULES: + - Choose 3-10 tags that are most relevant to the message + - Be specific: prefer narrow tags over broad ones when both apply + - Select tags that would find memories providing useful context + - **Only select from the provided available tags list** + - Empty list means no retrieval needed + + OUTPUT FORMAT (JSON only): + { + "selected_tags": ["tag1", "tag2", "tag3"], + "reasoning": "Brief explanation of tag selection" + } + + EXAMPLES: + + Message: "Hey, how are you?" + Output: + { + "selected_tags": [], + "reasoning": "Casual greeting with no context needs" + } + + Message: "I'm thinking about changing careers" + Output: + { + "selected_tags": ["work", "career", "goals"], + "reasoning": "Need context on current work situation and career goals" + } + + Message: "What's the capital of France?" + Output: + { + "selected_tags": [], + "reasoning": "Factual question, no personal context needed" + } + + Message: "My dog did the trick I've been teaching him!" + Output: + { + "selected_tags": ["pets", "dog", "training"], + "reasoning": "Need context on pet and training progress" + } + + Message: "Started a new workout routine today" + Output: + { + "selected_tags": ["fitness", "health", "habits"], + "reasoning": "May relate to existing fitness goals or health context" + } + + Message: "I enjoy hiking" + Output: + { + "selected_tags": [], + "reasoning": "New preference statement with no context to retrieve" + } + + BE DECISIVE. SELECT ONLY THE MOST RELEVANT TAGS. + + + \ No newline at end of file