Docker support (#34)
* Updates for Linux for frontend/server * frontend/server docker * updated Dockerfile for deps related to node vectordb * updates for collector in docker * docker deps for ODT processing * ignore another collector dir * storage mount improvements; run as UID * fix pypandoc version typo * permissions fixes
This commit is contained in:
parent
ebd3a62866
commit
9f33b3dfcb
13
.dockerignore
Normal file
13
.dockerignore
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
server/storage/documents/**
|
||||||
|
server/storage/vector-cache/**
|
||||||
|
server/storage/*.db
|
||||||
|
server/storage/lancedb
|
||||||
|
collector/hotdir/**
|
||||||
|
collector/v-env/**
|
||||||
|
collector/outputs/**
|
||||||
|
**/node_modules/
|
||||||
|
**/dist/
|
||||||
|
**/v-env/
|
||||||
|
**/__pycache__/
|
||||||
|
**/.env
|
||||||
|
**/.env.*
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@ -5,6 +5,5 @@ v-env
|
|||||||
node_modules
|
node_modules
|
||||||
__pycache__
|
__pycache__
|
||||||
v-env
|
v-env
|
||||||
*.lock
|
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
|
||||||
|
|||||||
@ -52,9 +52,9 @@ Next, you will need some content to embed. This could be a Youtube Channel, Medi
|
|||||||
|
|
||||||
[Go set up and run collector scripts](./collector/README.md)
|
[Go set up and run collector scripts](./collector/README.md)
|
||||||
|
|
||||||
[Learn about documents](./server/documents/DOCUMENTS.md)
|
[Learn about documents](./server/storage/documents/DOCUMENTS.md)
|
||||||
|
|
||||||
[Learn about vector caching](./server/vector-cache/VECTOR_CACHE.md)
|
[Learn about vector caching](./server/storage/vector-cache/VECTOR_CACHE.md)
|
||||||
|
|
||||||
### Contributing
|
### Contributing
|
||||||
- create issue
|
- create issue
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
from whaaaaat import prompt, Separator
|
from InquirerPy import inquirer
|
||||||
from scripts.youtube import youtube
|
from scripts.youtube import youtube
|
||||||
from scripts.link import link, links
|
from scripts.link import link, links
|
||||||
from scripts.substack import substack
|
from scripts.substack import substack
|
||||||
@ -20,57 +20,46 @@ def main():
|
|||||||
selection = input("Your selection: ")
|
selection = input("Your selection: ")
|
||||||
method = methods.get(str(selection))
|
method = methods.get(str(selection))
|
||||||
else:
|
else:
|
||||||
questions = [
|
method = inquirer.select(
|
||||||
{
|
message="What kind of data would you like to add to convert into long-term memory?",
|
||||||
"type": "list",
|
choices=[
|
||||||
"name": "collector",
|
{"name": "YouTube Channel", "value": "YouTube Channel"},
|
||||||
"message": "What kind of data would you like to add to convert into long-term memory?",
|
{"name": "Substack", "value": "Substack"},
|
||||||
"choices": [
|
{"name": "Medium", "value": "Medium"},
|
||||||
"YouTube Channel",
|
{"name": "Article or Blog Link(s)", "value": "Article or Blog Link(s)"},
|
||||||
"Substack",
|
{"name": "Gitbook", "value": "Gitbook"},
|
||||||
"Medium",
|
{"name": "Twitter", "value": "Twitter", "disabled": "Needs PR"},
|
||||||
"Article or Blog Link(s)",
|
{"name": "Abort", "value": "Abort"},
|
||||||
"Gitbook",
|
],
|
||||||
Separator(),
|
).execute()
|
||||||
{"name": "Twitter", "disabled": "Needs PR"},
|
|
||||||
"Abort",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
]
|
|
||||||
method = prompt(questions).get('collector')
|
|
||||||
|
|
||||||
if('Article or Blog Link' in method):
|
if 'Article or Blog Link' in method:
|
||||||
questions = [
|
method = inquirer.select(
|
||||||
{
|
message="Do you want to scrape a single article/blog/url or many at once?",
|
||||||
"type": "list",
|
choices=[
|
||||||
"name": "collector",
|
{"name": "Single URL", "value": "Single URL"},
|
||||||
"message": "Do you want to scrape a single article/blog/url or many at once?",
|
{"name": "Multiple URLs", "value": "Multiple URLs"},
|
||||||
"choices": [
|
{"name": "Abort", "value": "Abort"},
|
||||||
'Single URL',
|
],
|
||||||
'Multiple URLs',
|
).execute()
|
||||||
'Abort',
|
if method == 'Single URL':
|
||||||
],
|
|
||||||
},
|
|
||||||
]
|
|
||||||
method = prompt(questions).get('collector')
|
|
||||||
if(method == 'Single URL'):
|
|
||||||
link()
|
link()
|
||||||
exit(0)
|
exit(0)
|
||||||
if(method == 'Multiple URLs'):
|
if method == 'Multiple URLs':
|
||||||
links()
|
links()
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
if(method == 'Abort'): exit(0)
|
if method == 'Abort': exit(0)
|
||||||
if(method == 'YouTube Channel'):
|
if method == 'YouTube Channel':
|
||||||
youtube()
|
youtube()
|
||||||
exit(0)
|
exit(0)
|
||||||
if(method == 'Substack'):
|
if method == 'Substack':
|
||||||
substack()
|
substack()
|
||||||
exit(0)
|
exit(0)
|
||||||
if(method == 'Medium'):
|
if method == 'Medium':
|
||||||
medium()
|
medium()
|
||||||
exit(0)
|
exit(0)
|
||||||
if(method == 'Gitbook'):
|
if method == 'Gitbook':
|
||||||
gitbook()
|
gitbook()
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
|
|||||||
@ -20,6 +20,7 @@ cryptography==41.0.1
|
|||||||
cssselect==1.2.0
|
cssselect==1.2.0
|
||||||
dataclasses-json==0.5.7
|
dataclasses-json==0.5.7
|
||||||
Deprecated==1.2.14
|
Deprecated==1.2.14
|
||||||
|
docx2txt==0.8
|
||||||
et-xmlfile==1.1.0
|
et-xmlfile==1.1.0
|
||||||
exceptiongroup==1.1.1
|
exceptiongroup==1.1.1
|
||||||
fake-useragent==1.1.3
|
fake-useragent==1.1.3
|
||||||
@ -30,6 +31,7 @@ h11==0.14.0
|
|||||||
httpcore==0.16.3
|
httpcore==0.16.3
|
||||||
httpx==0.23.3
|
httpx==0.23.3
|
||||||
idna==3.4
|
idna==3.4
|
||||||
|
InquirerPy==0.3.4
|
||||||
importlib-metadata==6.6.0
|
importlib-metadata==6.6.0
|
||||||
importlib-resources==5.12.0
|
importlib-resources==5.12.0
|
||||||
install==1.3.5
|
install==1.3.5
|
||||||
@ -54,132 +56,13 @@ pandas==1.5.3
|
|||||||
parse==1.19.0
|
parse==1.19.0
|
||||||
pdfminer.six==20221105
|
pdfminer.six==20221105
|
||||||
Pillow==9.5.0
|
Pillow==9.5.0
|
||||||
prompt-toolkit==1.0.14
|
prompt-toolkit==3.0.38
|
||||||
pycparser==2.21
|
pycparser==2.21
|
||||||
pydantic==1.10.8
|
pydantic==1.10.8
|
||||||
pyee==8.2.2
|
pyee==8.2.2
|
||||||
Pygments==2.15.1
|
Pygments==2.15.1
|
||||||
pyobjc==9.1.1
|
|
||||||
pyobjc-core==9.1.1
|
|
||||||
pyobjc-framework-Accounts==9.1.1
|
|
||||||
pyobjc-framework-AddressBook==9.1.1
|
|
||||||
pyobjc-framework-AdSupport==9.1.1
|
|
||||||
pyobjc-framework-AppleScriptKit==9.1.1
|
|
||||||
pyobjc-framework-AppleScriptObjC==9.1.1
|
|
||||||
pyobjc-framework-ApplicationServices==9.1.1
|
|
||||||
pyobjc-framework-AudioVideoBridging==9.1.1
|
|
||||||
pyobjc-framework-AuthenticationServices==9.1.1
|
|
||||||
pyobjc-framework-AutomaticAssessmentConfiguration==9.1.1
|
|
||||||
pyobjc-framework-Automator==9.1.1
|
|
||||||
pyobjc-framework-AVFoundation==9.1.1
|
|
||||||
pyobjc-framework-AVKit==9.1.1
|
|
||||||
pyobjc-framework-BusinessChat==9.1.1
|
|
||||||
pyobjc-framework-CalendarStore==9.1.1
|
|
||||||
pyobjc-framework-CFNetwork==9.1.1
|
|
||||||
pyobjc-framework-CloudKit==9.1.1
|
|
||||||
pyobjc-framework-Cocoa==9.1.1
|
|
||||||
pyobjc-framework-Collaboration==9.1.1
|
|
||||||
pyobjc-framework-ColorSync==9.1.1
|
|
||||||
pyobjc-framework-Contacts==9.1.1
|
|
||||||
pyobjc-framework-ContactsUI==9.1.1
|
|
||||||
pyobjc-framework-CoreAudio==9.1.1
|
|
||||||
pyobjc-framework-CoreAudioKit==9.1.1
|
|
||||||
pyobjc-framework-CoreBluetooth==9.1.1
|
|
||||||
pyobjc-framework-CoreData==9.1.1
|
|
||||||
pyobjc-framework-CoreHaptics==9.1.1
|
|
||||||
pyobjc-framework-CoreLocation==9.1.1
|
|
||||||
pyobjc-framework-CoreMedia==9.1.1
|
|
||||||
pyobjc-framework-CoreMediaIO==9.1.1
|
|
||||||
pyobjc-framework-CoreMIDI==9.1.1
|
|
||||||
pyobjc-framework-CoreML==9.1.1
|
|
||||||
pyobjc-framework-CoreMotion==9.1.1
|
|
||||||
pyobjc-framework-CoreServices==9.1.1
|
|
||||||
pyobjc-framework-CoreSpotlight==9.1.1
|
|
||||||
pyobjc-framework-CoreText==9.1.1
|
|
||||||
pyobjc-framework-CoreWLAN==9.1.1
|
|
||||||
pyobjc-framework-CryptoTokenKit==9.1.1
|
|
||||||
pyobjc-framework-DeviceCheck==9.1.1
|
|
||||||
pyobjc-framework-DictionaryServices==9.1.1
|
|
||||||
pyobjc-framework-DiscRecording==9.1.1
|
|
||||||
pyobjc-framework-DiscRecordingUI==9.1.1
|
|
||||||
pyobjc-framework-DiskArbitration==9.1.1
|
|
||||||
pyobjc-framework-DVDPlayback==9.1.1
|
|
||||||
pyobjc-framework-EventKit==9.1.1
|
|
||||||
pyobjc-framework-ExceptionHandling==9.1.1
|
|
||||||
pyobjc-framework-ExecutionPolicy==9.1.1
|
|
||||||
pyobjc-framework-ExternalAccessory==9.1.1
|
|
||||||
pyobjc-framework-FileProvider==9.1.1
|
|
||||||
pyobjc-framework-FileProviderUI==9.1.1
|
|
||||||
pyobjc-framework-FinderSync==9.1.1
|
|
||||||
pyobjc-framework-FSEvents==9.1.1
|
|
||||||
pyobjc-framework-GameCenter==9.1.1
|
|
||||||
pyobjc-framework-GameController==9.1.1
|
|
||||||
pyobjc-framework-GameKit==9.1.1
|
|
||||||
pyobjc-framework-GameplayKit==9.1.1
|
|
||||||
pyobjc-framework-ImageCaptureCore==9.1.1
|
|
||||||
pyobjc-framework-IMServicePlugIn==9.1.1
|
|
||||||
pyobjc-framework-InputMethodKit==9.1.1
|
|
||||||
pyobjc-framework-InstallerPlugins==9.1.1
|
|
||||||
pyobjc-framework-InstantMessage==9.1.1
|
|
||||||
pyobjc-framework-Intents==9.1.1
|
|
||||||
pyobjc-framework-IOBluetooth==9.1.1
|
|
||||||
pyobjc-framework-IOBluetoothUI==9.1.1
|
|
||||||
pyobjc-framework-IOSurface==9.1.1
|
|
||||||
pyobjc-framework-iTunesLibrary==9.1.1
|
|
||||||
pyobjc-framework-LatentSemanticMapping==9.1.1
|
|
||||||
pyobjc-framework-LaunchServices==9.1.1
|
|
||||||
pyobjc-framework-libdispatch==9.1.1
|
|
||||||
pyobjc-framework-libxpc==9.1.1
|
|
||||||
pyobjc-framework-LinkPresentation==9.1.1
|
|
||||||
pyobjc-framework-LocalAuthentication==9.1.1
|
|
||||||
pyobjc-framework-MapKit==9.1.1
|
|
||||||
pyobjc-framework-MediaAccessibility==9.1.1
|
|
||||||
pyobjc-framework-MediaLibrary==9.1.1
|
|
||||||
pyobjc-framework-MediaPlayer==9.1.1
|
|
||||||
pyobjc-framework-MediaToolbox==9.1.1
|
|
||||||
pyobjc-framework-Metal==9.1.1
|
|
||||||
pyobjc-framework-MetalKit==9.1.1
|
|
||||||
pyobjc-framework-MetalPerformanceShaders==9.1.1
|
|
||||||
pyobjc-framework-ModelIO==9.1.1
|
|
||||||
pyobjc-framework-MultipeerConnectivity==9.1.1
|
|
||||||
pyobjc-framework-NaturalLanguage==9.1.1
|
|
||||||
pyobjc-framework-NetFS==9.1.1
|
|
||||||
pyobjc-framework-Network==9.1.1
|
|
||||||
pyobjc-framework-NetworkExtension==9.1.1
|
|
||||||
pyobjc-framework-NotificationCenter==9.1.1
|
|
||||||
pyobjc-framework-OpenDirectory==9.1.1
|
|
||||||
pyobjc-framework-OSAKit==9.1.1
|
|
||||||
pyobjc-framework-OSLog==9.1.1
|
|
||||||
pyobjc-framework-PencilKit==9.1.1
|
|
||||||
pyobjc-framework-Photos==9.1.1
|
|
||||||
pyobjc-framework-PhotosUI==9.1.1
|
|
||||||
pyobjc-framework-PreferencePanes==9.1.1
|
|
||||||
pyobjc-framework-PushKit==9.1.1
|
|
||||||
pyobjc-framework-Quartz==9.1.1
|
|
||||||
pyobjc-framework-QuickLookThumbnailing==9.1.1
|
|
||||||
pyobjc-framework-SafariServices==9.1.1
|
|
||||||
pyobjc-framework-SceneKit==9.1.1
|
|
||||||
pyobjc-framework-ScreenSaver==9.1.1
|
|
||||||
pyobjc-framework-ScriptingBridge==9.1.1
|
|
||||||
pyobjc-framework-SearchKit==9.1.1
|
|
||||||
pyobjc-framework-Security==9.1.1
|
|
||||||
pyobjc-framework-SecurityFoundation==9.1.1
|
|
||||||
pyobjc-framework-SecurityInterface==9.1.1
|
|
||||||
pyobjc-framework-ServiceManagement==9.1.1
|
|
||||||
pyobjc-framework-Social==9.1.1
|
|
||||||
pyobjc-framework-SoundAnalysis==9.1.1
|
|
||||||
pyobjc-framework-Speech==9.1.1
|
|
||||||
pyobjc-framework-SpriteKit==9.1.1
|
|
||||||
pyobjc-framework-StoreKit==9.1.1
|
|
||||||
pyobjc-framework-SyncServices==9.1.1
|
|
||||||
pyobjc-framework-SystemConfiguration==9.1.1
|
|
||||||
pyobjc-framework-SystemExtensions==9.1.1
|
|
||||||
pyobjc-framework-UserNotifications==9.1.1
|
|
||||||
pyobjc-framework-VideoSubscriberAccount==9.1.1
|
|
||||||
pyobjc-framework-VideoToolbox==9.1.1
|
|
||||||
pyobjc-framework-Vision==9.1.1
|
|
||||||
pyobjc-framework-WebKit==9.1.1
|
|
||||||
pypandoc==1.4
|
pypandoc==1.4
|
||||||
|
pypdf==3.9.0
|
||||||
pyppeteer==1.0.2
|
pyppeteer==1.0.2
|
||||||
pyquery==2.0.0
|
pyquery==2.0.0
|
||||||
python-dateutil==2.8.2
|
python-dateutil==2.8.2
|
||||||
@ -199,6 +82,7 @@ six==1.16.0
|
|||||||
sniffio==1.3.0
|
sniffio==1.3.0
|
||||||
soupsieve==2.4.1
|
soupsieve==2.4.1
|
||||||
SQLAlchemy==2.0.15
|
SQLAlchemy==2.0.15
|
||||||
|
tabulate==0.9.0
|
||||||
tenacity==8.2.2
|
tenacity==8.2.2
|
||||||
text-unidecode==1.3
|
text-unidecode==1.3
|
||||||
tiktoken==0.4.0
|
tiktoken==0.4.0
|
||||||
@ -212,7 +96,6 @@ uuid==1.30
|
|||||||
w3lib==2.1.1
|
w3lib==2.1.1
|
||||||
wcwidth==0.2.6
|
wcwidth==0.2.6
|
||||||
websockets==10.4
|
websockets==10.4
|
||||||
whaaaaat==0.5.2
|
|
||||||
wrapt==1.14.1
|
wrapt==1.14.1
|
||||||
xlrd==2.0.1
|
xlrd==2.0.1
|
||||||
XlsxWriter==3.1.2
|
XlsxWriter==3.1.2
|
||||||
|
|||||||
@ -14,7 +14,7 @@ def gitbook():
|
|||||||
|
|
||||||
primary_source = urlparse(url)
|
primary_source = urlparse(url)
|
||||||
output_path = f"./outputs/gitbook-logs/{primary_source.netloc}"
|
output_path = f"./outputs/gitbook-logs/{primary_source.netloc}"
|
||||||
transaction_output_dir = f"../server/documents/gitbook-{primary_source.netloc}"
|
transaction_output_dir = f"../server/storage/documents/gitbook-{primary_source.netloc}"
|
||||||
|
|
||||||
if os.path.exists(output_path) == False:os.makedirs(output_path)
|
if os.path.exists(output_path) == False:os.makedirs(output_path)
|
||||||
if os.path.exists(transaction_output_dir) == False: os.makedirs(transaction_output_dir)
|
if os.path.exists(transaction_output_dir) == False: os.makedirs(transaction_output_dir)
|
||||||
|
|||||||
@ -36,7 +36,7 @@ def link():
|
|||||||
output_path = f"./outputs/website-logs"
|
output_path = f"./outputs/website-logs"
|
||||||
|
|
||||||
transaction_output_filename = f"article-{source.path.replace('/','_')}.json"
|
transaction_output_filename = f"article-{source.path.replace('/','_')}.json"
|
||||||
transaction_output_dir = f"../server/documents/website-{source.netloc}"
|
transaction_output_dir = f"../server/storage/documents/website-{source.netloc}"
|
||||||
|
|
||||||
if os.path.isdir(output_path) == False:
|
if os.path.isdir(output_path) == False:
|
||||||
os.makedirs(output_path)
|
os.makedirs(output_path)
|
||||||
@ -109,7 +109,7 @@ def links():
|
|||||||
output_path = f"./outputs/website-logs"
|
output_path = f"./outputs/website-logs"
|
||||||
|
|
||||||
transaction_output_filename = f"article-{source.path.replace('/','_')}.json"
|
transaction_output_filename = f"article-{source.path.replace('/','_')}.json"
|
||||||
transaction_output_dir = f"../server/documents/website-{source.netloc}"
|
transaction_output_dir = f"../server/storage/documents/website-{source.netloc}"
|
||||||
|
|
||||||
if os.path.isdir(output_path) == False:
|
if os.path.isdir(output_path) == False:
|
||||||
os.makedirs(output_path)
|
os.makedirs(output_path)
|
||||||
|
|||||||
@ -23,7 +23,7 @@ def medium():
|
|||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
totalTokenCount = 0
|
totalTokenCount = 0
|
||||||
transaction_output_dir = f"../server/documents/medium-{handle}"
|
transaction_output_dir = f"../server/storage/documents/medium-{handle}"
|
||||||
if os.path.isdir(transaction_output_dir) == False:
|
if os.path.isdir(transaction_output_dir) == False:
|
||||||
os.makedirs(transaction_output_dir)
|
os.makedirs(transaction_output_dir)
|
||||||
|
|
||||||
|
|||||||
@ -27,7 +27,7 @@ def substack():
|
|||||||
print(f"{len(valid_publications)} of {len(publications)} publications are readable publically text posts - collecting those.")
|
print(f"{len(valid_publications)} of {len(publications)} publications are readable publically text posts - collecting those.")
|
||||||
|
|
||||||
totalTokenCount = 0
|
totalTokenCount = 0
|
||||||
transaction_output_dir = f"../server/documents/substack-{subdomain}"
|
transaction_output_dir = f"../server/storage/documents/substack-{subdomain}"
|
||||||
if os.path.isdir(transaction_output_dir) == False:
|
if os.path.isdir(transaction_output_dir) == False:
|
||||||
os.makedirs(transaction_output_dir)
|
os.makedirs(transaction_output_dir)
|
||||||
|
|
||||||
|
|||||||
@ -24,7 +24,7 @@ def move_source(working_dir='hotdir', new_destination_filename= ''):
|
|||||||
return
|
return
|
||||||
|
|
||||||
def write_to_server_documents(data, filename):
|
def write_to_server_documents(data, filename):
|
||||||
destination = f"../server/documents/custom-documents"
|
destination = f"../server/storage/documents/custom-documents"
|
||||||
if os.path.exists(destination) == False: os.makedirs(destination)
|
if os.path.exists(destination) == False: os.makedirs(destination)
|
||||||
with open(f"{destination}/{filename}.json", 'w', encoding='utf-8') as file:
|
with open(f"{destination}/{filename}.json", 'w', encoding='utf-8') as file:
|
||||||
json.dump(data, file, ensure_ascii=True, indent=4)
|
json.dump(data, file, ensure_ascii=True, indent=4)
|
||||||
|
|||||||
@ -17,7 +17,7 @@ def youtube():
|
|||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
channel_data = fetch_channel_video_information(channel_id)
|
channel_data = fetch_channel_video_information(channel_id)
|
||||||
transaction_output_dir = f"../server/documents/youtube-{channel_data.get('channelTitle')}"
|
transaction_output_dir = f"../server/storage/documents/youtube-{channel_data.get('channelTitle')}"
|
||||||
|
|
||||||
if os.path.isdir(transaction_output_dir) == False:
|
if os.path.isdir(transaction_output_dir) == False:
|
||||||
os.makedirs(transaction_output_dir)
|
os.makedirs(transaction_output_dir)
|
||||||
|
|||||||
25
docker/.env.example
Normal file
25
docker/.env.example
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
SERVER_PORT=3001
|
||||||
|
OPEN_AI_KEY=
|
||||||
|
OPEN_MODEL_PREF='gpt-3.5-turbo'
|
||||||
|
CACHE_VECTORS="true"
|
||||||
|
|
||||||
|
# Enable all below if you are using vector database: Chroma.
|
||||||
|
# VECTOR_DB="chroma"
|
||||||
|
# CHROMA_ENDPOINT='http://localhost:8000'
|
||||||
|
|
||||||
|
# Enable all below if you are using vector database: Pinecone.
|
||||||
|
VECTOR_DB="pinecone"
|
||||||
|
PINECONE_ENVIRONMENT=
|
||||||
|
PINECONE_API_KEY=
|
||||||
|
PINECONE_INDEX=
|
||||||
|
|
||||||
|
# Enable all below if you are using vector database: LanceDB.
|
||||||
|
# VECTOR_DB="lancedb"
|
||||||
|
|
||||||
|
# CLOUD DEPLOYMENT VARIRABLES ONLY
|
||||||
|
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
|
||||||
|
# JWT_SECRET="my-random-string-for-seeding" # Only needed if AUTH_TOKEN is set. Please generate random string at least 12 chars long.
|
||||||
|
STORAGE_DIR="./server/storage"
|
||||||
|
GOOGLE_APIS_KEY=
|
||||||
|
UID='1000'
|
||||||
|
GID='1000'
|
||||||
94
docker/Dockerfile
Normal file
94
docker/Dockerfile
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
# Setup base image
|
||||||
|
FROM ubuntu:jammy-20230522 AS base
|
||||||
|
|
||||||
|
# Build arguments
|
||||||
|
ARG ARG_UID
|
||||||
|
ARG ARG_GID
|
||||||
|
|
||||||
|
# Install system dependencies
|
||||||
|
RUN DEBIAN_FRONTEND=noninteractive apt-get update && \
|
||||||
|
DEBIAN_FRONTEND=noninteractive apt-get install -yq --no-install-recommends \
|
||||||
|
curl libgfortran5 python3 python3-pip tzdata netcat \
|
||||||
|
libasound2 libatk1.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 \
|
||||||
|
libgcc1 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libx11-6 libx11-xcb1 libxcb1 \
|
||||||
|
libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 \
|
||||||
|
libxss1 libxtst6 ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release \
|
||||||
|
xdg-utils && \
|
||||||
|
curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \
|
||||||
|
apt-get install -yq --no-install-recommends nodejs && \
|
||||||
|
curl -LO https://github.com/yarnpkg/yarn/releases/download/v1.22.19/yarn_1.22.19_all.deb \
|
||||||
|
&& dpkg -i yarn_1.22.19_all.deb \
|
||||||
|
&& rm yarn_1.22.19_all.deb && \
|
||||||
|
curl -LO https://github.com/jgm/pandoc/releases/download/3.1.3/pandoc-3.1.3-1-amd64.deb \
|
||||||
|
&& dpkg -i pandoc-3.1.3-1-amd64.deb \
|
||||||
|
&& rm pandoc-3.1.3-1-amd64.deb && \
|
||||||
|
rm -rf /var/lib/apt/lists/* /usr/share/icons && \
|
||||||
|
dpkg-reconfigure -f noninteractive tzdata && \
|
||||||
|
python3 -m pip install --no-cache-dir virtualenv
|
||||||
|
|
||||||
|
# Create a group and user with specific UID and GID
|
||||||
|
RUN groupadd -g $ARG_GID anythingllm && \
|
||||||
|
useradd -u $ARG_UID -m -d /app -s /bin/bash -g anythingllm anythingllm && \
|
||||||
|
mkdir -p /app/frontend/ /app/server/ /app/collector/ && chown -R anythingllm:anythingllm /app
|
||||||
|
|
||||||
|
# Copy the docker entrypoint and healthcheck scripts
|
||||||
|
COPY ./docker/docker-entrypoint.sh /usr/local/bin/
|
||||||
|
COPY ./docker/docker-healthcheck.sh /usr/local/bin/
|
||||||
|
|
||||||
|
# Ensure the scripts are executable
|
||||||
|
RUN chmod +x /usr/local/bin/docker-entrypoint.sh && \
|
||||||
|
chmod +x /usr/local/bin/docker-healthcheck.sh
|
||||||
|
|
||||||
|
USER anythingllm
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install frontend dependencies
|
||||||
|
FROM base as frontend-deps
|
||||||
|
|
||||||
|
COPY ./frontend/package.json ./frontend/yarn.lock ./frontend/
|
||||||
|
RUN cd ./frontend/ && yarn install && yarn cache clean
|
||||||
|
|
||||||
|
# Install server dependencies
|
||||||
|
FROM base as server-deps
|
||||||
|
COPY ./server/package.json ./server/yarn.lock ./server/
|
||||||
|
RUN cd ./server/ && yarn install --production && yarn cache clean && \
|
||||||
|
rm /app/server/node_modules/vectordb/x86_64-apple-darwin.node && \
|
||||||
|
rm /app/server/node_modules/vectordb/aarch64-apple-darwin.node
|
||||||
|
|
||||||
|
# Build the frontend
|
||||||
|
FROM frontend-deps as build-stage
|
||||||
|
COPY ./frontend/ ./frontend/
|
||||||
|
RUN cd ./frontend/ && yarn build && yarn cache clean
|
||||||
|
|
||||||
|
# Setup the server
|
||||||
|
FROM server-deps as production-stage
|
||||||
|
COPY ./server/ ./server/
|
||||||
|
|
||||||
|
# Copy built static frontend files to the server public directory
|
||||||
|
COPY --from=build-stage /app/frontend/dist ./server/public
|
||||||
|
|
||||||
|
# Copy the collector
|
||||||
|
COPY ./collector/ ./collector/
|
||||||
|
|
||||||
|
# Install collector dependencies
|
||||||
|
RUN cd /app/collector && \
|
||||||
|
python3 -m virtualenv v-env && \
|
||||||
|
. v-env/bin/activate && \
|
||||||
|
pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Setup the environment
|
||||||
|
ENV NODE_ENV=production
|
||||||
|
ENV PATH=/app/collector/v-env/bin:$PATH
|
||||||
|
|
||||||
|
# Expose the server port
|
||||||
|
EXPOSE 3001
|
||||||
|
|
||||||
|
# Setup the healthcheck
|
||||||
|
HEALTHCHECK --interval=1m --timeout=10s --start-period=1m \
|
||||||
|
CMD /bin/bash /usr/local/bin/docker-healthcheck.sh || exit 1
|
||||||
|
|
||||||
|
# Run the server
|
||||||
|
ENTRYPOINT ["docker-entrypoint.sh"]
|
||||||
|
|
||||||
|
CMD ["node", "/app/server/index.js"]
|
||||||
30
docker/docker-compose.yml
Normal file
30
docker/docker-compose.yml
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
version: '3.9'
|
||||||
|
|
||||||
|
networks:
|
||||||
|
anything-llm:
|
||||||
|
driver: bridge
|
||||||
|
# chroma_net:
|
||||||
|
# external: true
|
||||||
|
|
||||||
|
services:
|
||||||
|
anything-llm:
|
||||||
|
container_name: anything-llm
|
||||||
|
image: anything-llm:latest
|
||||||
|
build:
|
||||||
|
context: ../.
|
||||||
|
dockerfile: ./docker/Dockerfile
|
||||||
|
args:
|
||||||
|
ARG_UID: ${UID}
|
||||||
|
ARG_GID: ${GID}
|
||||||
|
volumes:
|
||||||
|
- "../server/storage:/app/server/storage"
|
||||||
|
- "../collector/hotdir/:/app/collector/hotdir"
|
||||||
|
- "../collector/outputs/:/app/collector/outputs"
|
||||||
|
user: "${UID}:${GID}"
|
||||||
|
ports:
|
||||||
|
- "3001:3001"
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
networks:
|
||||||
|
- anything-llm
|
||||||
|
# - chroma_net
|
||||||
3
docker/docker-entrypoint.sh
Executable file
3
docker/docker-entrypoint.sh
Executable file
@ -0,0 +1,3 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
exec "$@"
|
||||||
13
docker/docker-healthcheck.sh
Normal file
13
docker/docker-healthcheck.sh
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Send a request to the specified URL
|
||||||
|
response=$(curl --write-out '%{http_code}' --silent --output /dev/null http://localhost:3001/api/ping)
|
||||||
|
|
||||||
|
# If the HTTP response code is 200 (OK), the server is up
|
||||||
|
if [ $response -eq 200 ]; then
|
||||||
|
echo "Server is up"
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
echo "Server is down"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
@ -3,6 +3,7 @@
|
|||||||
"private": false,
|
"private": false,
|
||||||
"version": "0.0.1-beta",
|
"version": "0.0.1-beta",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
|
"license": "MIT",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"start": "vite --open",
|
"start": "vite --open",
|
||||||
"build": "vite build",
|
"build": "vite build",
|
||||||
|
|||||||
@ -1,2 +1,2 @@
|
|||||||
export const API_BASE =
|
export const API_BASE =
|
||||||
import.meta.env.VITE_API_BASE || "http://localhost:3001";
|
import.meta.env.VITE_API_BASE || "http://localhost:3001/api";
|
||||||
|
|||||||
2604
frontend/yarn.lock
Normal file
2604
frontend/yarn.lock
Normal file
File diff suppressed because it is too large
Load Diff
@ -10,7 +10,7 @@
|
|||||||
},
|
},
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"lint": "cd server && yarn lint && cd .. && cd frontend && yarn lint",
|
"lint": "cd server && yarn lint && cd .. && cd frontend && yarn lint",
|
||||||
"setup": "cd server && yarn && cd .. && yarn setup:envs && echo \"Please run yarn dev:server and yarn dev:frontend in separate terminal tabs.\"",
|
"setup": "cd server && yarn && cd ../frontend && yarn && cd .. && yarn setup:envs && echo \"Please run yarn dev:server and yarn dev:frontend in separate terminal tabs.\"",
|
||||||
"setup:envs": "cd server && cp -n .env.example .env.development && cd ../collector && cp -n .env.example .env && cd ..",
|
"setup:envs": "cd server && cp -n .env.example .env.development && cd ../collector && cp -n .env.example .env && cd ..",
|
||||||
"dev:server": "cd server && yarn dev",
|
"dev:server": "cd server && yarn dev",
|
||||||
"dev:frontend": "cd frontend && yarn start",
|
"dev:frontend": "cd frontend && yarn start",
|
||||||
|
|||||||
9
server/.gitignore
vendored
9
server/.gitignore
vendored
@ -1,8 +1,9 @@
|
|||||||
.env.production
|
.env.production
|
||||||
.env.development
|
.env.development
|
||||||
documents/*
|
storage/documents/*
|
||||||
vector-cache/*.json
|
storage/vector-cache/*.json
|
||||||
!documents/DOCUMENTS.md
|
!storage/documents/DOCUMENTS.md
|
||||||
logs/server.log
|
logs/server.log
|
||||||
*.db
|
*.db
|
||||||
lancedb
|
storage/lancedb
|
||||||
|
public/
|
||||||
@ -5,6 +5,7 @@ process.env.NODE_ENV === "development"
|
|||||||
const express = require("express");
|
const express = require("express");
|
||||||
const bodyParser = require("body-parser");
|
const bodyParser = require("body-parser");
|
||||||
const cors = require("cors");
|
const cors = require("cors");
|
||||||
|
const path = require("path");
|
||||||
const { validatedRequest } = require("./utils/middleware/validatedRequest");
|
const { validatedRequest } = require("./utils/middleware/validatedRequest");
|
||||||
const { reqBody } = require("./utils/http");
|
const { reqBody } = require("./utils/http");
|
||||||
const { systemEndpoints } = require("./endpoints/system");
|
const { systemEndpoints } = require("./endpoints/system");
|
||||||
@ -12,6 +13,7 @@ const { workspaceEndpoints } = require("./endpoints/workspaces");
|
|||||||
const { chatEndpoints } = require("./endpoints/chat");
|
const { chatEndpoints } = require("./endpoints/chat");
|
||||||
const { getVectorDbClass } = require("./utils/helpers");
|
const { getVectorDbClass } = require("./utils/helpers");
|
||||||
const app = express();
|
const app = express();
|
||||||
|
const apiRouter = express.Router();
|
||||||
|
|
||||||
app.use(cors({ origin: true }));
|
app.use(cors({ origin: true }));
|
||||||
app.use(bodyParser.text());
|
app.use(bodyParser.text());
|
||||||
@ -22,13 +24,13 @@ app.use(
|
|||||||
})
|
})
|
||||||
);
|
);
|
||||||
|
|
||||||
app.use("/system/*", validatedRequest);
|
apiRouter.use("/system/*", validatedRequest);
|
||||||
app.use("/workspace/*", validatedRequest);
|
apiRouter.use("/workspace/*", validatedRequest);
|
||||||
systemEndpoints(app);
|
systemEndpoints(apiRouter);
|
||||||
workspaceEndpoints(app);
|
workspaceEndpoints(apiRouter);
|
||||||
chatEndpoints(app);
|
chatEndpoints(apiRouter);
|
||||||
|
|
||||||
app.post("/v/:command", async (request, response) => {
|
apiRouter.post("/v/:command", async (request, response) => {
|
||||||
try {
|
try {
|
||||||
const VectorDb = getVectorDbClass();
|
const VectorDb = getVectorDbClass();
|
||||||
const { command } = request.params;
|
const { command } = request.params;
|
||||||
@ -56,14 +58,24 @@ app.post("/v/:command", async (request, response) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
app.use("/api", apiRouter);
|
||||||
|
|
||||||
|
if (process.env.NODE_ENV !== "development") {
|
||||||
|
app.use(express.static(path.resolve(__dirname, 'public'), {extensions: ["js"]}));
|
||||||
|
|
||||||
|
app.use("/", function (_, response) {
|
||||||
|
response.sendFile(path.join(__dirname, "public", "index.html"));
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
app.all("*", function (_, response) {
|
app.all("*", function (_, response) {
|
||||||
response.sendStatus(404);
|
response.sendStatus(404);
|
||||||
});
|
});
|
||||||
|
|
||||||
app
|
app
|
||||||
.listen(process.env.SERVER_PORT || 5000, () => {
|
.listen(process.env.SERVER_PORT || 3001, () => {
|
||||||
console.log(
|
console.log(
|
||||||
`Example app listening on port ${process.env.SERVER_PORT || 5000}`
|
`Example app listening on port ${process.env.SERVER_PORT || 3001}`
|
||||||
);
|
);
|
||||||
})
|
})
|
||||||
.on("error", function (err) {
|
.on("error", function (err) {
|
||||||
|
|||||||
@ -20,7 +20,7 @@ const Document = {
|
|||||||
|
|
||||||
const db = await open({
|
const db = await open({
|
||||||
filename: `${
|
filename: `${
|
||||||
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : ""
|
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "storage/"
|
||||||
}anythingllm.db`,
|
}anythingllm.db`,
|
||||||
driver: sqlite3.Database,
|
driver: sqlite3.Database,
|
||||||
});
|
});
|
||||||
|
|||||||
@ -18,7 +18,7 @@ const DocumentVectors = {
|
|||||||
|
|
||||||
const db = await open({
|
const db = await open({
|
||||||
filename: `${
|
filename: `${
|
||||||
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : ""
|
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "storage/"
|
||||||
}anythingllm.db`,
|
}anythingllm.db`,
|
||||||
driver: sqlite3.Database,
|
driver: sqlite3.Database,
|
||||||
});
|
});
|
||||||
|
|||||||
@ -17,7 +17,7 @@ const Workspace = {
|
|||||||
|
|
||||||
const db = await open({
|
const db = await open({
|
||||||
filename: `${
|
filename: `${
|
||||||
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : ""
|
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "storage/"
|
||||||
}anythingllm.db`,
|
}anythingllm.db`,
|
||||||
driver: sqlite3.Database,
|
driver: sqlite3.Database,
|
||||||
});
|
});
|
||||||
|
|||||||
@ -15,7 +15,7 @@ const WorkspaceChats = {
|
|||||||
|
|
||||||
const db = await open({
|
const db = await open({
|
||||||
filename: `${
|
filename: `${
|
||||||
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : ""
|
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "storage/"
|
||||||
}anythingllm.db`,
|
}anythingllm.db`,
|
||||||
driver: sqlite3.Database,
|
driver: sqlite3.Database,
|
||||||
});
|
});
|
||||||
|
|||||||
@ -31,7 +31,7 @@
|
|||||||
"sqlite3": "^5.1.6",
|
"sqlite3": "^5.1.6",
|
||||||
"uuid": "^9.0.0",
|
"uuid": "^9.0.0",
|
||||||
"jsonwebtoken": "^8.5.1",
|
"jsonwebtoken": "^8.5.1",
|
||||||
"vectordb": "0.1.5-beta"
|
"vectordb": "0.1.5"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"nodemon": "^2.0.22",
|
"nodemon": "^2.0.22",
|
||||||
|
|||||||
@ -6,7 +6,7 @@ async function collectDocumentData(folderName = null) {
|
|||||||
if (!folderName) throw new Error("No docPath provided in request");
|
if (!folderName) throw new Error("No docPath provided in request");
|
||||||
const folder =
|
const folder =
|
||||||
process.env.NODE_ENV === "development"
|
process.env.NODE_ENV === "development"
|
||||||
? path.resolve(__dirname, `../../documents/${folderName}`)
|
? path.resolve(__dirname, `../../storage/documents/${folderName}`)
|
||||||
: path.resolve(process.env.STORAGE_DIR, `documents/${folderName}`);
|
: path.resolve(process.env.STORAGE_DIR, `documents/${folderName}`);
|
||||||
|
|
||||||
const dirExists = fs.existsSync(folder);
|
const dirExists = fs.existsSync(folder);
|
||||||
@ -35,7 +35,7 @@ async function fileData(filePath = null) {
|
|||||||
|
|
||||||
const fullPath =
|
const fullPath =
|
||||||
process.env.NODE_ENV === "development"
|
process.env.NODE_ENV === "development"
|
||||||
? path.resolve(__dirname, `../../documents/${filePath}`)
|
? path.resolve(__dirname, `../../storage/documents/${filePath}`)
|
||||||
: path.resolve(process.env.STORAGE_DIR, `documents/${filePath}`);
|
: path.resolve(process.env.STORAGE_DIR, `documents/${filePath}`);
|
||||||
const fileExists = fs.existsSync(fullPath);
|
const fileExists = fs.existsSync(fullPath);
|
||||||
if (!fileExists) return null;
|
if (!fileExists) return null;
|
||||||
@ -47,7 +47,7 @@ async function fileData(filePath = null) {
|
|||||||
async function viewLocalFiles() {
|
async function viewLocalFiles() {
|
||||||
const folder =
|
const folder =
|
||||||
process.env.NODE_ENV === "development"
|
process.env.NODE_ENV === "development"
|
||||||
? path.resolve(__dirname, `../../documents`)
|
? path.resolve(__dirname, `../../storage/documents`)
|
||||||
: path.resolve(process.env.STORAGE_DIR, `documents`);
|
: path.resolve(process.env.STORAGE_DIR, `documents`);
|
||||||
const dirExists = fs.existsSync(folder);
|
const dirExists = fs.existsSync(folder);
|
||||||
if (!dirExists) fs.mkdirSync(folder);
|
if (!dirExists) fs.mkdirSync(folder);
|
||||||
@ -63,7 +63,7 @@ async function viewLocalFiles() {
|
|||||||
|
|
||||||
const folderPath =
|
const folderPath =
|
||||||
process.env.NODE_ENV === "development"
|
process.env.NODE_ENV === "development"
|
||||||
? path.resolve(__dirname, `../../documents/${file}`)
|
? path.resolve(__dirname, `../../storage/documents/${file}`)
|
||||||
: path.resolve(process.env.STORAGE_DIR, `documents/${file}`);
|
: path.resolve(process.env.STORAGE_DIR, `documents/${file}`);
|
||||||
|
|
||||||
const isFolder = fs.lstatSync(folderPath).isDirectory();
|
const isFolder = fs.lstatSync(folderPath).isDirectory();
|
||||||
@ -106,7 +106,7 @@ async function cachedVectorInformation(filename = null, checkOnly = false) {
|
|||||||
const digest = uuidv5(filename, uuidv5.URL);
|
const digest = uuidv5(filename, uuidv5.URL);
|
||||||
const file =
|
const file =
|
||||||
process.env.NODE_ENV === "development"
|
process.env.NODE_ENV === "development"
|
||||||
? path.resolve(__dirname, `../../vector-cache/${digest}.json`)
|
? path.resolve(__dirname, `../../storage/vector-cache/${digest}.json`)
|
||||||
: path.resolve(process.env.STORAGE_DIR, `vector-cache/${digest}.json`);
|
: path.resolve(process.env.STORAGE_DIR, `vector-cache/${digest}.json`);
|
||||||
const exists = fs.existsSync(file);
|
const exists = fs.existsSync(file);
|
||||||
|
|
||||||
@ -130,7 +130,7 @@ async function storeVectorResult(vectorData = [], filename = null) {
|
|||||||
);
|
);
|
||||||
const folder =
|
const folder =
|
||||||
process.env.NODE_ENV === "development"
|
process.env.NODE_ENV === "development"
|
||||||
? path.resolve(__dirname, `../../vector-cache`)
|
? path.resolve(__dirname, `../../storage/vector-cache`)
|
||||||
: path.resolve(process.env.STORAGE_DIR, `vector-cache`);
|
: path.resolve(process.env.STORAGE_DIR, `vector-cache`);
|
||||||
|
|
||||||
if (!fs.existsSync(folder)) fs.mkdirSync(folder);
|
if (!fs.existsSync(folder)) fs.mkdirSync(folder);
|
||||||
|
|||||||
@ -27,7 +27,7 @@ function curateLanceSources(sources = []) {
|
|||||||
|
|
||||||
const LanceDb = {
|
const LanceDb = {
|
||||||
uri: `${
|
uri: `${
|
||||||
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "./"
|
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "./storage/"
|
||||||
}lancedb`,
|
}lancedb`,
|
||||||
name: "LanceDb",
|
name: "LanceDb",
|
||||||
connect: async function () {
|
connect: async function () {
|
||||||
|
|||||||
2054
server/yarn.lock
Normal file
2054
server/yarn.lock
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user