handling of citations in openRouter provider #3581 (#3620)

* handling of citations in openRouter provider #3581 * Update pplx enrichToken function comment Modify OR enrichToken to be generic handler function with optional params handle _just_ Perplexity in-line citations since no other models support this functionality * remove console log --------- Co-authored-by: timothycarambat <rambat1010@gmail.com>
2025-04-15 19:57:09 +02:00 · 2025-04-15 19:57:09 +02:00 · be27299897
commit be27299897
parent 716f348d6c
2 changed files with 68 additions and 18 deletions
--- a/server/utils/AiProviders/openRouter/index.js
+++ b/server/utils/AiProviders/openRouter/index.js
@ -51,6 +51,32 @@ class OpenRouterLLM {
    this.log("Initialized with model:", this.model);
  }

+  /**
+   * Returns true if the model is a Perplexity model.
+   * OpenRouter has support for a lot of models and we have some special handling for Perplexity models
+   * that support in-line citations.
+   * @returns {boolean}
+   */
+  get isPerplexityModel() {
+    return this.model.startsWith("perplexity/");
+  }
+
+  /**
+   * Generic formatting of a token for the following use cases:
+   * - Perplexity models that return inline citations in the token text
+   * @param {{token: string, citations: string[]}} options - The token text and citations.
+   * @returns {string} - The formatted token text.
+   */
+  enrichToken({ token, citations = [] }) {
+    if (!Array.isArray(citations) || citations.length === 0) return token;
+    return token.replace(/\[(\d+)\]/g, (match, index) => {
+      const citationIndex = parseInt(index) - 1;
+      return citations[citationIndex]
+        ? `[[${index}](${citations[citationIndex]})]`
+        : match;
+    });
+  }
+
  log(text, ...args) {
    console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
  }
@ -126,9 +152,9 @@ class OpenRouterLLM {
    const cacheModelPath = path.resolve(cacheFolder, "models.json");
    const availableModels = fs.existsSync(cacheModelPath)
      ? safeJsonParse(
-          fs.readFileSync(cacheModelPath, { encoding: "utf-8" }),
-          {}
-        )
+        fs.readFileSync(cacheModelPath, { encoding: "utf-8" }),
+        {}
+      )
      : {};
    return availableModels[modelName]?.maxLength || 4096;
  }
@ -284,6 +310,8 @@ class OpenRouterLLM {
      let fullText = "";
      let reasoningText = "";
      let lastChunkTime = null; // null when first token is still not received.
+      let pplxCitations = []; // Array of inline citations for Perplexity models (if applicable)
+      let isPerplexity = this.isPerplexityModel;

      // Establish listener to early-abort a streaming response
      // in case things go sideways or the user does not like the response.
@ -309,6 +337,7 @@ class OpenRouterLLM {

        const now = Number(new Date());
        const diffMs = now - lastChunkTime;
+
        if (diffMs >= timeoutThresholdMs) {
          console.log(
            `OpenRouter stream did not self-close and has been stale for >${timeoutThresholdMs}ms. Closing response stream.`
@ -337,9 +366,22 @@ class OpenRouterLLM {
          const reasoningToken = message?.delta?.reasoning;
          lastChunkTime = Number(new Date());

+          // Some models will return citations (e.g. Perplexity) - we should preserve them for inline citations if applicable.
+          if (
+            isPerplexity &&
+            Array.isArray(chunk?.citations) &&
+            chunk?.citations?.length !== 0
+          )
+            pplxCitations.push(...chunk.citations);
+
          // Reasoning models will always return the reasoning text before the token text.
          // can be null or ''
          if (reasoningToken) {
+            const formattedReasoningToken = this.enrichToken({
+              token: reasoningToken,
+              citations: pplxCitations,
+            });
+
            // If the reasoning text is empty (''), we need to initialize it
            // and send the first chunk of reasoning text.
            if (reasoningText.length === 0) {
@ -347,11 +389,11 @@ class OpenRouterLLM {
                uuid,
                sources: [],
                type: "textResponseChunk",
-                textResponse: `<think>${reasoningToken}`,
+                textResponse: `<think>${formattedReasoningToken}`,
                close: false,
                error: false,
              });
-              reasoningText += `<think>${reasoningToken}`;
+              reasoningText += `<think>${formattedReasoningToken}`;
              continue;
            } else {
              // If the reasoning text is not empty, we need to append the reasoning text
@ -360,11 +402,11 @@ class OpenRouterLLM {
                uuid,
                sources: [],
                type: "textResponseChunk",
-                textResponse: reasoningToken,
+                textResponse: formattedReasoningToken,
                close: false,
                error: false,
              });
-              reasoningText += reasoningToken;
+              reasoningText += formattedReasoningToken;
            }
          }

@ -384,12 +426,16 @@ class OpenRouterLLM {
          }

          if (token) {
-            fullText += token;
+            const formattedToken = this.enrichToken({
+              token,
+              citations: pplxCitations,
+            });
+            fullText += formattedToken;
            writeResponseChunk(response, {
              uuid,
              sources: [],
              type: "textResponseChunk",
-              textResponse: token,
+              textResponse: formattedToken,
              close: false,
              error: false,
            });
--- a/server/utils/AiProviders/perplexity/index.js
+++ b/server/utils/AiProviders/perplexity/index.js
@ -139,16 +139,20 @@ class PerplexityLLM {
    return measuredStreamRequest;
  }

+  /**
+   * Enrich a token with citations if available for in-line citations.
+   * @param {string} token - The token to enrich.
+   * @param {Array} citations - The citations to enrich the token with.
+   * @returns {string} The enriched token.
+   */
  enrichToken(token, citations) {
-    if (Array.isArray(citations) && citations.length !== 0) {
-      return token.replace(/\[(\d+)\]/g, (match, index) => {
-        const citationIndex = parseInt(index) - 1;
-        return citations[citationIndex]
-          ? `[[${index}](${citations[citationIndex]})]`
-          : match;
-      });
-    }
-    return token;
+    if (!Array.isArray(citations) || citations.length === 0) return token;
+    return token.replace(/\[(\d+)\]/g, (match, index) => {
+      const citationIndex = parseInt(index) - 1;
+      return citations[citationIndex]
+        ? `[[${index}](${citations[citationIndex]})]`
+        : match;
+    });
  }

  handleStream(response, stream, responseProps) {