Docker support (#34)

* Updates for Linux for frontend/server

* frontend/server docker

* updated Dockerfile for deps related to node vectordb

* updates for collector in docker

* docker deps for ODT processing

* ignore another collector dir

* storage mount improvements; run as UID

* fix pypandoc version typo

* permissions fixes
This commit is contained in:
frasergr 2023-06-13 11:26:11 -07:00 committed by GitHub
parent ebd3a62866
commit 9f33b3dfcb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 4921 additions and 200 deletions

13
.dockerignore Normal file
View File

@ -0,0 +1,13 @@
server/storage/documents/**
server/storage/vector-cache/**
server/storage/*.db
server/storage/lancedb
collector/hotdir/**
collector/v-env/**
collector/outputs/**
**/node_modules/
**/dist/
**/v-env/
**/__pycache__/
**/.env
**/.env.*

1
.gitignore vendored
View File

@ -5,6 +5,5 @@ v-env
node_modules node_modules
__pycache__ __pycache__
v-env v-env
*.lock
.DS_Store .DS_Store

View File

@ -52,9 +52,9 @@ Next, you will need some content to embed. This could be a Youtube Channel, Medi
[Go set up and run collector scripts](./collector/README.md) [Go set up and run collector scripts](./collector/README.md)
[Learn about documents](./server/documents/DOCUMENTS.md) [Learn about documents](./server/storage/documents/DOCUMENTS.md)
[Learn about vector caching](./server/vector-cache/VECTOR_CACHE.md) [Learn about vector caching](./server/storage/vector-cache/VECTOR_CACHE.md)
### Contributing ### Contributing
- create issue - create issue

View File

@ -1,5 +1,5 @@
import os import os
from whaaaaat import prompt, Separator from InquirerPy import inquirer
from scripts.youtube import youtube from scripts.youtube import youtube
from scripts.link import link, links from scripts.link import link, links
from scripts.substack import substack from scripts.substack import substack
@ -20,57 +20,46 @@ def main():
selection = input("Your selection: ") selection = input("Your selection: ")
method = methods.get(str(selection)) method = methods.get(str(selection))
else: else:
questions = [ method = inquirer.select(
{ message="What kind of data would you like to add to convert into long-term memory?",
"type": "list", choices=[
"name": "collector", {"name": "YouTube Channel", "value": "YouTube Channel"},
"message": "What kind of data would you like to add to convert into long-term memory?", {"name": "Substack", "value": "Substack"},
"choices": [ {"name": "Medium", "value": "Medium"},
"YouTube Channel", {"name": "Article or Blog Link(s)", "value": "Article or Blog Link(s)"},
"Substack", {"name": "Gitbook", "value": "Gitbook"},
"Medium", {"name": "Twitter", "value": "Twitter", "disabled": "Needs PR"},
"Article or Blog Link(s)", {"name": "Abort", "value": "Abort"},
"Gitbook", ],
Separator(), ).execute()
{"name": "Twitter", "disabled": "Needs PR"},
"Abort", if 'Article or Blog Link' in method:
], method = inquirer.select(
}, message="Do you want to scrape a single article/blog/url or many at once?",
] choices=[
method = prompt(questions).get('collector') {"name": "Single URL", "value": "Single URL"},
{"name": "Multiple URLs", "value": "Multiple URLs"},
if('Article or Blog Link' in method): {"name": "Abort", "value": "Abort"},
questions = [ ],
{ ).execute()
"type": "list", if method == 'Single URL':
"name": "collector",
"message": "Do you want to scrape a single article/blog/url or many at once?",
"choices": [
'Single URL',
'Multiple URLs',
'Abort',
],
},
]
method = prompt(questions).get('collector')
if(method == 'Single URL'):
link() link()
exit(0) exit(0)
if(method == 'Multiple URLs'): if method == 'Multiple URLs':
links() links()
exit(0) exit(0)
if(method == 'Abort'): exit(0) if method == 'Abort': exit(0)
if(method == 'YouTube Channel'): if method == 'YouTube Channel':
youtube() youtube()
exit(0) exit(0)
if(method == 'Substack'): if method == 'Substack':
substack() substack()
exit(0) exit(0)
if(method == 'Medium'): if method == 'Medium':
medium() medium()
exit(0) exit(0)
if(method == 'Gitbook'): if method == 'Gitbook':
gitbook() gitbook()
exit(0) exit(0)

View File

@ -20,6 +20,7 @@ cryptography==41.0.1
cssselect==1.2.0 cssselect==1.2.0
dataclasses-json==0.5.7 dataclasses-json==0.5.7
Deprecated==1.2.14 Deprecated==1.2.14
docx2txt==0.8
et-xmlfile==1.1.0 et-xmlfile==1.1.0
exceptiongroup==1.1.1 exceptiongroup==1.1.1
fake-useragent==1.1.3 fake-useragent==1.1.3
@ -30,6 +31,7 @@ h11==0.14.0
httpcore==0.16.3 httpcore==0.16.3
httpx==0.23.3 httpx==0.23.3
idna==3.4 idna==3.4
InquirerPy==0.3.4
importlib-metadata==6.6.0 importlib-metadata==6.6.0
importlib-resources==5.12.0 importlib-resources==5.12.0
install==1.3.5 install==1.3.5
@ -54,132 +56,13 @@ pandas==1.5.3
parse==1.19.0 parse==1.19.0
pdfminer.six==20221105 pdfminer.six==20221105
Pillow==9.5.0 Pillow==9.5.0
prompt-toolkit==1.0.14 prompt-toolkit==3.0.38
pycparser==2.21 pycparser==2.21
pydantic==1.10.8 pydantic==1.10.8
pyee==8.2.2 pyee==8.2.2
Pygments==2.15.1 Pygments==2.15.1
pyobjc==9.1.1
pyobjc-core==9.1.1
pyobjc-framework-Accounts==9.1.1
pyobjc-framework-AddressBook==9.1.1
pyobjc-framework-AdSupport==9.1.1
pyobjc-framework-AppleScriptKit==9.1.1
pyobjc-framework-AppleScriptObjC==9.1.1
pyobjc-framework-ApplicationServices==9.1.1
pyobjc-framework-AudioVideoBridging==9.1.1
pyobjc-framework-AuthenticationServices==9.1.1
pyobjc-framework-AutomaticAssessmentConfiguration==9.1.1
pyobjc-framework-Automator==9.1.1
pyobjc-framework-AVFoundation==9.1.1
pyobjc-framework-AVKit==9.1.1
pyobjc-framework-BusinessChat==9.1.1
pyobjc-framework-CalendarStore==9.1.1
pyobjc-framework-CFNetwork==9.1.1
pyobjc-framework-CloudKit==9.1.1
pyobjc-framework-Cocoa==9.1.1
pyobjc-framework-Collaboration==9.1.1
pyobjc-framework-ColorSync==9.1.1
pyobjc-framework-Contacts==9.1.1
pyobjc-framework-ContactsUI==9.1.1
pyobjc-framework-CoreAudio==9.1.1
pyobjc-framework-CoreAudioKit==9.1.1
pyobjc-framework-CoreBluetooth==9.1.1
pyobjc-framework-CoreData==9.1.1
pyobjc-framework-CoreHaptics==9.1.1
pyobjc-framework-CoreLocation==9.1.1
pyobjc-framework-CoreMedia==9.1.1
pyobjc-framework-CoreMediaIO==9.1.1
pyobjc-framework-CoreMIDI==9.1.1
pyobjc-framework-CoreML==9.1.1
pyobjc-framework-CoreMotion==9.1.1
pyobjc-framework-CoreServices==9.1.1
pyobjc-framework-CoreSpotlight==9.1.1
pyobjc-framework-CoreText==9.1.1
pyobjc-framework-CoreWLAN==9.1.1
pyobjc-framework-CryptoTokenKit==9.1.1
pyobjc-framework-DeviceCheck==9.1.1
pyobjc-framework-DictionaryServices==9.1.1
pyobjc-framework-DiscRecording==9.1.1
pyobjc-framework-DiscRecordingUI==9.1.1
pyobjc-framework-DiskArbitration==9.1.1
pyobjc-framework-DVDPlayback==9.1.1
pyobjc-framework-EventKit==9.1.1
pyobjc-framework-ExceptionHandling==9.1.1
pyobjc-framework-ExecutionPolicy==9.1.1
pyobjc-framework-ExternalAccessory==9.1.1
pyobjc-framework-FileProvider==9.1.1
pyobjc-framework-FileProviderUI==9.1.1
pyobjc-framework-FinderSync==9.1.1
pyobjc-framework-FSEvents==9.1.1
pyobjc-framework-GameCenter==9.1.1
pyobjc-framework-GameController==9.1.1
pyobjc-framework-GameKit==9.1.1
pyobjc-framework-GameplayKit==9.1.1
pyobjc-framework-ImageCaptureCore==9.1.1
pyobjc-framework-IMServicePlugIn==9.1.1
pyobjc-framework-InputMethodKit==9.1.1
pyobjc-framework-InstallerPlugins==9.1.1
pyobjc-framework-InstantMessage==9.1.1
pyobjc-framework-Intents==9.1.1
pyobjc-framework-IOBluetooth==9.1.1
pyobjc-framework-IOBluetoothUI==9.1.1
pyobjc-framework-IOSurface==9.1.1
pyobjc-framework-iTunesLibrary==9.1.1
pyobjc-framework-LatentSemanticMapping==9.1.1
pyobjc-framework-LaunchServices==9.1.1
pyobjc-framework-libdispatch==9.1.1
pyobjc-framework-libxpc==9.1.1
pyobjc-framework-LinkPresentation==9.1.1
pyobjc-framework-LocalAuthentication==9.1.1
pyobjc-framework-MapKit==9.1.1
pyobjc-framework-MediaAccessibility==9.1.1
pyobjc-framework-MediaLibrary==9.1.1
pyobjc-framework-MediaPlayer==9.1.1
pyobjc-framework-MediaToolbox==9.1.1
pyobjc-framework-Metal==9.1.1
pyobjc-framework-MetalKit==9.1.1
pyobjc-framework-MetalPerformanceShaders==9.1.1
pyobjc-framework-ModelIO==9.1.1
pyobjc-framework-MultipeerConnectivity==9.1.1
pyobjc-framework-NaturalLanguage==9.1.1
pyobjc-framework-NetFS==9.1.1
pyobjc-framework-Network==9.1.1
pyobjc-framework-NetworkExtension==9.1.1
pyobjc-framework-NotificationCenter==9.1.1
pyobjc-framework-OpenDirectory==9.1.1
pyobjc-framework-OSAKit==9.1.1
pyobjc-framework-OSLog==9.1.1
pyobjc-framework-PencilKit==9.1.1
pyobjc-framework-Photos==9.1.1
pyobjc-framework-PhotosUI==9.1.1
pyobjc-framework-PreferencePanes==9.1.1
pyobjc-framework-PushKit==9.1.1
pyobjc-framework-Quartz==9.1.1
pyobjc-framework-QuickLookThumbnailing==9.1.1
pyobjc-framework-SafariServices==9.1.1
pyobjc-framework-SceneKit==9.1.1
pyobjc-framework-ScreenSaver==9.1.1
pyobjc-framework-ScriptingBridge==9.1.1
pyobjc-framework-SearchKit==9.1.1
pyobjc-framework-Security==9.1.1
pyobjc-framework-SecurityFoundation==9.1.1
pyobjc-framework-SecurityInterface==9.1.1
pyobjc-framework-ServiceManagement==9.1.1
pyobjc-framework-Social==9.1.1
pyobjc-framework-SoundAnalysis==9.1.1
pyobjc-framework-Speech==9.1.1
pyobjc-framework-SpriteKit==9.1.1
pyobjc-framework-StoreKit==9.1.1
pyobjc-framework-SyncServices==9.1.1
pyobjc-framework-SystemConfiguration==9.1.1
pyobjc-framework-SystemExtensions==9.1.1
pyobjc-framework-UserNotifications==9.1.1
pyobjc-framework-VideoSubscriberAccount==9.1.1
pyobjc-framework-VideoToolbox==9.1.1
pyobjc-framework-Vision==9.1.1
pyobjc-framework-WebKit==9.1.1
pypandoc==1.4 pypandoc==1.4
pypdf==3.9.0
pyppeteer==1.0.2 pyppeteer==1.0.2
pyquery==2.0.0 pyquery==2.0.0
python-dateutil==2.8.2 python-dateutil==2.8.2
@ -199,6 +82,7 @@ six==1.16.0
sniffio==1.3.0 sniffio==1.3.0
soupsieve==2.4.1 soupsieve==2.4.1
SQLAlchemy==2.0.15 SQLAlchemy==2.0.15
tabulate==0.9.0
tenacity==8.2.2 tenacity==8.2.2
text-unidecode==1.3 text-unidecode==1.3
tiktoken==0.4.0 tiktoken==0.4.0
@ -212,10 +96,9 @@ uuid==1.30
w3lib==2.1.1 w3lib==2.1.1
wcwidth==0.2.6 wcwidth==0.2.6
websockets==10.4 websockets==10.4
whaaaaat==0.5.2
wrapt==1.14.1 wrapt==1.14.1
xlrd==2.0.1 xlrd==2.0.1
XlsxWriter==3.1.2 XlsxWriter==3.1.2
yarl==1.9.2 yarl==1.9.2
youtube-transcript-api==0.6.0 youtube-transcript-api==0.6.0
zipp==3.15.0 zipp==3.15.0

View File

@ -14,7 +14,7 @@ def gitbook():
primary_source = urlparse(url) primary_source = urlparse(url)
output_path = f"./outputs/gitbook-logs/{primary_source.netloc}" output_path = f"./outputs/gitbook-logs/{primary_source.netloc}"
transaction_output_dir = f"../server/documents/gitbook-{primary_source.netloc}" transaction_output_dir = f"../server/storage/documents/gitbook-{primary_source.netloc}"
if os.path.exists(output_path) == False:os.makedirs(output_path) if os.path.exists(output_path) == False:os.makedirs(output_path)
if os.path.exists(transaction_output_dir) == False: os.makedirs(transaction_output_dir) if os.path.exists(transaction_output_dir) == False: os.makedirs(transaction_output_dir)

View File

@ -36,7 +36,7 @@ def link():
output_path = f"./outputs/website-logs" output_path = f"./outputs/website-logs"
transaction_output_filename = f"article-{source.path.replace('/','_')}.json" transaction_output_filename = f"article-{source.path.replace('/','_')}.json"
transaction_output_dir = f"../server/documents/website-{source.netloc}" transaction_output_dir = f"../server/storage/documents/website-{source.netloc}"
if os.path.isdir(output_path) == False: if os.path.isdir(output_path) == False:
os.makedirs(output_path) os.makedirs(output_path)
@ -109,7 +109,7 @@ def links():
output_path = f"./outputs/website-logs" output_path = f"./outputs/website-logs"
transaction_output_filename = f"article-{source.path.replace('/','_')}.json" transaction_output_filename = f"article-{source.path.replace('/','_')}.json"
transaction_output_dir = f"../server/documents/website-{source.netloc}" transaction_output_dir = f"../server/storage/documents/website-{source.netloc}"
if os.path.isdir(output_path) == False: if os.path.isdir(output_path) == False:
os.makedirs(output_path) os.makedirs(output_path)

View File

@ -23,7 +23,7 @@ def medium():
exit(1) exit(1)
totalTokenCount = 0 totalTokenCount = 0
transaction_output_dir = f"../server/documents/medium-{handle}" transaction_output_dir = f"../server/storage/documents/medium-{handle}"
if os.path.isdir(transaction_output_dir) == False: if os.path.isdir(transaction_output_dir) == False:
os.makedirs(transaction_output_dir) os.makedirs(transaction_output_dir)

View File

@ -27,7 +27,7 @@ def substack():
print(f"{len(valid_publications)} of {len(publications)} publications are readable publically text posts - collecting those.") print(f"{len(valid_publications)} of {len(publications)} publications are readable publically text posts - collecting those.")
totalTokenCount = 0 totalTokenCount = 0
transaction_output_dir = f"../server/documents/substack-{subdomain}" transaction_output_dir = f"../server/storage/documents/substack-{subdomain}"
if os.path.isdir(transaction_output_dir) == False: if os.path.isdir(transaction_output_dir) == False:
os.makedirs(transaction_output_dir) os.makedirs(transaction_output_dir)

View File

@ -24,7 +24,7 @@ def move_source(working_dir='hotdir', new_destination_filename= ''):
return return
def write_to_server_documents(data, filename): def write_to_server_documents(data, filename):
destination = f"../server/documents/custom-documents" destination = f"../server/storage/documents/custom-documents"
if os.path.exists(destination) == False: os.makedirs(destination) if os.path.exists(destination) == False: os.makedirs(destination)
with open(f"{destination}/{filename}.json", 'w', encoding='utf-8') as file: with open(f"{destination}/{filename}.json", 'w', encoding='utf-8') as file:
json.dump(data, file, ensure_ascii=True, indent=4) json.dump(data, file, ensure_ascii=True, indent=4)

View File

@ -17,7 +17,7 @@ def youtube():
exit(1) exit(1)
channel_data = fetch_channel_video_information(channel_id) channel_data = fetch_channel_video_information(channel_id)
transaction_output_dir = f"../server/documents/youtube-{channel_data.get('channelTitle')}" transaction_output_dir = f"../server/storage/documents/youtube-{channel_data.get('channelTitle')}"
if os.path.isdir(transaction_output_dir) == False: if os.path.isdir(transaction_output_dir) == False:
os.makedirs(transaction_output_dir) os.makedirs(transaction_output_dir)

25
docker/.env.example Normal file
View File

@ -0,0 +1,25 @@
SERVER_PORT=3001
OPEN_AI_KEY=
OPEN_MODEL_PREF='gpt-3.5-turbo'
CACHE_VECTORS="true"
# Enable all below if you are using vector database: Chroma.
# VECTOR_DB="chroma"
# CHROMA_ENDPOINT='http://localhost:8000'
# Enable all below if you are using vector database: Pinecone.
VECTOR_DB="pinecone"
PINECONE_ENVIRONMENT=
PINECONE_API_KEY=
PINECONE_INDEX=
# Enable all below if you are using vector database: LanceDB.
# VECTOR_DB="lancedb"
# CLOUD DEPLOYMENT VARIRABLES ONLY
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
# JWT_SECRET="my-random-string-for-seeding" # Only needed if AUTH_TOKEN is set. Please generate random string at least 12 chars long.
STORAGE_DIR="./server/storage"
GOOGLE_APIS_KEY=
UID='1000'
GID='1000'

94
docker/Dockerfile Normal file
View File

@ -0,0 +1,94 @@
# Setup base image
FROM ubuntu:jammy-20230522 AS base
# Build arguments
ARG ARG_UID
ARG ARG_GID
# Install system dependencies
RUN DEBIAN_FRONTEND=noninteractive apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -yq --no-install-recommends \
curl libgfortran5 python3 python3-pip tzdata netcat \
libasound2 libatk1.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 \
libgcc1 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libx11-6 libx11-xcb1 libxcb1 \
libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 \
libxss1 libxtst6 ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release \
xdg-utils && \
curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \
apt-get install -yq --no-install-recommends nodejs && \
curl -LO https://github.com/yarnpkg/yarn/releases/download/v1.22.19/yarn_1.22.19_all.deb \
&& dpkg -i yarn_1.22.19_all.deb \
&& rm yarn_1.22.19_all.deb && \
curl -LO https://github.com/jgm/pandoc/releases/download/3.1.3/pandoc-3.1.3-1-amd64.deb \
&& dpkg -i pandoc-3.1.3-1-amd64.deb \
&& rm pandoc-3.1.3-1-amd64.deb && \
rm -rf /var/lib/apt/lists/* /usr/share/icons && \
dpkg-reconfigure -f noninteractive tzdata && \
python3 -m pip install --no-cache-dir virtualenv
# Create a group and user with specific UID and GID
RUN groupadd -g $ARG_GID anythingllm && \
useradd -u $ARG_UID -m -d /app -s /bin/bash -g anythingllm anythingllm && \
mkdir -p /app/frontend/ /app/server/ /app/collector/ && chown -R anythingllm:anythingllm /app
# Copy the docker entrypoint and healthcheck scripts
COPY ./docker/docker-entrypoint.sh /usr/local/bin/
COPY ./docker/docker-healthcheck.sh /usr/local/bin/
# Ensure the scripts are executable
RUN chmod +x /usr/local/bin/docker-entrypoint.sh && \
chmod +x /usr/local/bin/docker-healthcheck.sh
USER anythingllm
WORKDIR /app
# Install frontend dependencies
FROM base as frontend-deps
COPY ./frontend/package.json ./frontend/yarn.lock ./frontend/
RUN cd ./frontend/ && yarn install && yarn cache clean
# Install server dependencies
FROM base as server-deps
COPY ./server/package.json ./server/yarn.lock ./server/
RUN cd ./server/ && yarn install --production && yarn cache clean && \
rm /app/server/node_modules/vectordb/x86_64-apple-darwin.node && \
rm /app/server/node_modules/vectordb/aarch64-apple-darwin.node
# Build the frontend
FROM frontend-deps as build-stage
COPY ./frontend/ ./frontend/
RUN cd ./frontend/ && yarn build && yarn cache clean
# Setup the server
FROM server-deps as production-stage
COPY ./server/ ./server/
# Copy built static frontend files to the server public directory
COPY --from=build-stage /app/frontend/dist ./server/public
# Copy the collector
COPY ./collector/ ./collector/
# Install collector dependencies
RUN cd /app/collector && \
python3 -m virtualenv v-env && \
. v-env/bin/activate && \
pip install --no-cache-dir -r requirements.txt
# Setup the environment
ENV NODE_ENV=production
ENV PATH=/app/collector/v-env/bin:$PATH
# Expose the server port
EXPOSE 3001
# Setup the healthcheck
HEALTHCHECK --interval=1m --timeout=10s --start-period=1m \
CMD /bin/bash /usr/local/bin/docker-healthcheck.sh || exit 1
# Run the server
ENTRYPOINT ["docker-entrypoint.sh"]
CMD ["node", "/app/server/index.js"]

30
docker/docker-compose.yml Normal file
View File

@ -0,0 +1,30 @@
version: '3.9'
networks:
anything-llm:
driver: bridge
# chroma_net:
# external: true
services:
anything-llm:
container_name: anything-llm
image: anything-llm:latest
build:
context: ../.
dockerfile: ./docker/Dockerfile
args:
ARG_UID: ${UID}
ARG_GID: ${GID}
volumes:
- "../server/storage:/app/server/storage"
- "../collector/hotdir/:/app/collector/hotdir"
- "../collector/outputs/:/app/collector/outputs"
user: "${UID}:${GID}"
ports:
- "3001:3001"
env_file:
- .env
networks:
- anything-llm
# - chroma_net

3
docker/docker-entrypoint.sh Executable file
View File

@ -0,0 +1,3 @@
#!/usr/bin/env bash
exec "$@"

View File

@ -0,0 +1,13 @@
#!/bin/bash
# Send a request to the specified URL
response=$(curl --write-out '%{http_code}' --silent --output /dev/null http://localhost:3001/api/ping)
# If the HTTP response code is 200 (OK), the server is up
if [ $response -eq 200 ]; then
echo "Server is up"
exit 0
else
echo "Server is down"
exit 1
fi

View File

@ -3,6 +3,7 @@
"private": false, "private": false,
"version": "0.0.1-beta", "version": "0.0.1-beta",
"type": "module", "type": "module",
"license": "MIT",
"scripts": { "scripts": {
"start": "vite --open", "start": "vite --open",
"build": "vite build", "build": "vite build",

View File

@ -1,2 +1,2 @@
export const API_BASE = export const API_BASE =
import.meta.env.VITE_API_BASE || "http://localhost:3001"; import.meta.env.VITE_API_BASE || "http://localhost:3001/api";

2604
frontend/yarn.lock Normal file

File diff suppressed because it is too large Load Diff

View File

@ -10,7 +10,7 @@
}, },
"scripts": { "scripts": {
"lint": "cd server && yarn lint && cd .. && cd frontend && yarn lint", "lint": "cd server && yarn lint && cd .. && cd frontend && yarn lint",
"setup": "cd server && yarn && cd .. && yarn setup:envs && echo \"Please run yarn dev:server and yarn dev:frontend in separate terminal tabs.\"", "setup": "cd server && yarn && cd ../frontend && yarn && cd .. && yarn setup:envs && echo \"Please run yarn dev:server and yarn dev:frontend in separate terminal tabs.\"",
"setup:envs": "cd server && cp -n .env.example .env.development && cd ../collector && cp -n .env.example .env && cd ..", "setup:envs": "cd server && cp -n .env.example .env.development && cd ../collector && cp -n .env.example .env && cd ..",
"dev:server": "cd server && yarn dev", "dev:server": "cd server && yarn dev",
"dev:frontend": "cd frontend && yarn start", "dev:frontend": "cd frontend && yarn start",

9
server/.gitignore vendored
View File

@ -1,8 +1,9 @@
.env.production .env.production
.env.development .env.development
documents/* storage/documents/*
vector-cache/*.json storage/vector-cache/*.json
!documents/DOCUMENTS.md !storage/documents/DOCUMENTS.md
logs/server.log logs/server.log
*.db *.db
lancedb storage/lancedb
public/

View File

@ -5,6 +5,7 @@ process.env.NODE_ENV === "development"
const express = require("express"); const express = require("express");
const bodyParser = require("body-parser"); const bodyParser = require("body-parser");
const cors = require("cors"); const cors = require("cors");
const path = require("path");
const { validatedRequest } = require("./utils/middleware/validatedRequest"); const { validatedRequest } = require("./utils/middleware/validatedRequest");
const { reqBody } = require("./utils/http"); const { reqBody } = require("./utils/http");
const { systemEndpoints } = require("./endpoints/system"); const { systemEndpoints } = require("./endpoints/system");
@ -12,6 +13,7 @@ const { workspaceEndpoints } = require("./endpoints/workspaces");
const { chatEndpoints } = require("./endpoints/chat"); const { chatEndpoints } = require("./endpoints/chat");
const { getVectorDbClass } = require("./utils/helpers"); const { getVectorDbClass } = require("./utils/helpers");
const app = express(); const app = express();
const apiRouter = express.Router();
app.use(cors({ origin: true })); app.use(cors({ origin: true }));
app.use(bodyParser.text()); app.use(bodyParser.text());
@ -22,13 +24,13 @@ app.use(
}) })
); );
app.use("/system/*", validatedRequest); apiRouter.use("/system/*", validatedRequest);
app.use("/workspace/*", validatedRequest); apiRouter.use("/workspace/*", validatedRequest);
systemEndpoints(app); systemEndpoints(apiRouter);
workspaceEndpoints(app); workspaceEndpoints(apiRouter);
chatEndpoints(app); chatEndpoints(apiRouter);
app.post("/v/:command", async (request, response) => { apiRouter.post("/v/:command", async (request, response) => {
try { try {
const VectorDb = getVectorDbClass(); const VectorDb = getVectorDbClass();
const { command } = request.params; const { command } = request.params;
@ -56,14 +58,24 @@ app.post("/v/:command", async (request, response) => {
} }
}); });
app.use("/api", apiRouter);
if (process.env.NODE_ENV !== "development") {
app.use(express.static(path.resolve(__dirname, 'public'), {extensions: ["js"]}));
app.use("/", function (_, response) {
response.sendFile(path.join(__dirname, "public", "index.html"));
})
}
app.all("*", function (_, response) { app.all("*", function (_, response) {
response.sendStatus(404); response.sendStatus(404);
}); });
app app
.listen(process.env.SERVER_PORT || 5000, () => { .listen(process.env.SERVER_PORT || 3001, () => {
console.log( console.log(
`Example app listening on port ${process.env.SERVER_PORT || 5000}` `Example app listening on port ${process.env.SERVER_PORT || 3001}`
); );
}) })
.on("error", function (err) { .on("error", function (err) {

View File

@ -20,7 +20,7 @@ const Document = {
const db = await open({ const db = await open({
filename: `${ filename: `${
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "" !!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "storage/"
}anythingllm.db`, }anythingllm.db`,
driver: sqlite3.Database, driver: sqlite3.Database,
}); });

View File

@ -18,7 +18,7 @@ const DocumentVectors = {
const db = await open({ const db = await open({
filename: `${ filename: `${
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "" !!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "storage/"
}anythingllm.db`, }anythingllm.db`,
driver: sqlite3.Database, driver: sqlite3.Database,
}); });

View File

@ -17,7 +17,7 @@ const Workspace = {
const db = await open({ const db = await open({
filename: `${ filename: `${
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "" !!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "storage/"
}anythingllm.db`, }anythingllm.db`,
driver: sqlite3.Database, driver: sqlite3.Database,
}); });

View File

@ -15,7 +15,7 @@ const WorkspaceChats = {
const db = await open({ const db = await open({
filename: `${ filename: `${
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "" !!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "storage/"
}anythingllm.db`, }anythingllm.db`,
driver: sqlite3.Database, driver: sqlite3.Database,
}); });

View File

@ -31,7 +31,7 @@
"sqlite3": "^5.1.6", "sqlite3": "^5.1.6",
"uuid": "^9.0.0", "uuid": "^9.0.0",
"jsonwebtoken": "^8.5.1", "jsonwebtoken": "^8.5.1",
"vectordb": "0.1.5-beta" "vectordb": "0.1.5"
}, },
"devDependencies": { "devDependencies": {
"nodemon": "^2.0.22", "nodemon": "^2.0.22",

View File

@ -6,7 +6,7 @@ async function collectDocumentData(folderName = null) {
if (!folderName) throw new Error("No docPath provided in request"); if (!folderName) throw new Error("No docPath provided in request");
const folder = const folder =
process.env.NODE_ENV === "development" process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../documents/${folderName}`) ? path.resolve(__dirname, `../../storage/documents/${folderName}`)
: path.resolve(process.env.STORAGE_DIR, `documents/${folderName}`); : path.resolve(process.env.STORAGE_DIR, `documents/${folderName}`);
const dirExists = fs.existsSync(folder); const dirExists = fs.existsSync(folder);
@ -35,7 +35,7 @@ async function fileData(filePath = null) {
const fullPath = const fullPath =
process.env.NODE_ENV === "development" process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../documents/${filePath}`) ? path.resolve(__dirname, `../../storage/documents/${filePath}`)
: path.resolve(process.env.STORAGE_DIR, `documents/${filePath}`); : path.resolve(process.env.STORAGE_DIR, `documents/${filePath}`);
const fileExists = fs.existsSync(fullPath); const fileExists = fs.existsSync(fullPath);
if (!fileExists) return null; if (!fileExists) return null;
@ -47,7 +47,7 @@ async function fileData(filePath = null) {
async function viewLocalFiles() { async function viewLocalFiles() {
const folder = const folder =
process.env.NODE_ENV === "development" process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../documents`) ? path.resolve(__dirname, `../../storage/documents`)
: path.resolve(process.env.STORAGE_DIR, `documents`); : path.resolve(process.env.STORAGE_DIR, `documents`);
const dirExists = fs.existsSync(folder); const dirExists = fs.existsSync(folder);
if (!dirExists) fs.mkdirSync(folder); if (!dirExists) fs.mkdirSync(folder);
@ -63,7 +63,7 @@ async function viewLocalFiles() {
const folderPath = const folderPath =
process.env.NODE_ENV === "development" process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../documents/${file}`) ? path.resolve(__dirname, `../../storage/documents/${file}`)
: path.resolve(process.env.STORAGE_DIR, `documents/${file}`); : path.resolve(process.env.STORAGE_DIR, `documents/${file}`);
const isFolder = fs.lstatSync(folderPath).isDirectory(); const isFolder = fs.lstatSync(folderPath).isDirectory();
@ -106,7 +106,7 @@ async function cachedVectorInformation(filename = null, checkOnly = false) {
const digest = uuidv5(filename, uuidv5.URL); const digest = uuidv5(filename, uuidv5.URL);
const file = const file =
process.env.NODE_ENV === "development" process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../vector-cache/${digest}.json`) ? path.resolve(__dirname, `../../storage/vector-cache/${digest}.json`)
: path.resolve(process.env.STORAGE_DIR, `vector-cache/${digest}.json`); : path.resolve(process.env.STORAGE_DIR, `vector-cache/${digest}.json`);
const exists = fs.existsSync(file); const exists = fs.existsSync(file);
@ -130,7 +130,7 @@ async function storeVectorResult(vectorData = [], filename = null) {
); );
const folder = const folder =
process.env.NODE_ENV === "development" process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../vector-cache`) ? path.resolve(__dirname, `../../storage/vector-cache`)
: path.resolve(process.env.STORAGE_DIR, `vector-cache`); : path.resolve(process.env.STORAGE_DIR, `vector-cache`);
if (!fs.existsSync(folder)) fs.mkdirSync(folder); if (!fs.existsSync(folder)) fs.mkdirSync(folder);

View File

@ -27,7 +27,7 @@ function curateLanceSources(sources = []) {
const LanceDb = { const LanceDb = {
uri: `${ uri: `${
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "./" !!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "./storage/"
}lancedb`, }lancedb`,
name: "LanceDb", name: "LanceDb",
connect: async function () { connect: async function () {

2054
server/yarn.lock Normal file

File diff suppressed because it is too large Load Diff