dockerfile cleanup; enforce text LF line endings (#81)
This commit is contained in:
parent
3945a77290
commit
4079020de0
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
* text=auto eol=lf
|
||||||
@ -1,39 +1,39 @@
|
|||||||
import requests
|
import requests
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from scripts.link import parse_links
|
from scripts.link import parse_links
|
||||||
import re
|
import re
|
||||||
|
|
||||||
def parse_sitemap(url):
|
def parse_sitemap(url):
|
||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
root = ET.fromstring(response.content)
|
root = ET.fromstring(response.content)
|
||||||
|
|
||||||
urls = []
|
urls = []
|
||||||
for element in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}url'):
|
for element in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}url'):
|
||||||
for loc in element.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc'):
|
for loc in element.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc'):
|
||||||
if not has_extension_to_ignore(loc.text):
|
if not has_extension_to_ignore(loc.text):
|
||||||
urls.append(loc.text)
|
urls.append(loc.text)
|
||||||
else:
|
else:
|
||||||
print(f"Skipping filetype: {loc.text}")
|
print(f"Skipping filetype: {loc.text}")
|
||||||
|
|
||||||
return urls
|
return urls
|
||||||
|
|
||||||
# Example sitemap URL https://www.nerdwallet.com/blog/wp-sitemap-news-articles-1.xml
|
# Example sitemap URL https://www.nerdwallet.com/blog/wp-sitemap-news-articles-1.xml
|
||||||
def sitemap():
|
def sitemap():
|
||||||
sitemap_url = input("Enter the URL of the sitemap: ")
|
sitemap_url = input("Enter the URL of the sitemap: ")
|
||||||
|
|
||||||
if(len(sitemap_url) == 0):
|
if(len(sitemap_url) == 0):
|
||||||
print("No valid sitemap provided!")
|
print("No valid sitemap provided!")
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
url_array = parse_sitemap(sitemap_url)
|
url_array = parse_sitemap(sitemap_url)
|
||||||
|
|
||||||
#parse links from array
|
#parse links from array
|
||||||
parse_links(url_array)
|
parse_links(url_array)
|
||||||
|
|
||||||
def has_extension_to_ignore(string):
|
def has_extension_to_ignore(string):
|
||||||
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.pdf']
|
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.pdf']
|
||||||
|
|
||||||
pattern = r'\b(' + '|'.join(re.escape(ext) for ext in image_extensions) + r')\b'
|
pattern = r'\b(' + '|'.join(re.escape(ext) for ext in image_extensions) + r')\b'
|
||||||
match = re.search(pattern, string, re.IGNORECASE)
|
match = re.search(pattern, string, re.IGNORECASE)
|
||||||
|
|
||||||
return match is not None
|
return match is not None
|
||||||
@ -34,12 +34,10 @@ RUN groupadd -g $ARG_GID anythingllm && \
|
|||||||
# Copy docker helper scripts
|
# Copy docker helper scripts
|
||||||
COPY ./docker/docker-entrypoint.sh /usr/local/bin/
|
COPY ./docker/docker-entrypoint.sh /usr/local/bin/
|
||||||
COPY ./docker/docker-healthcheck.sh /usr/local/bin/
|
COPY ./docker/docker-healthcheck.sh /usr/local/bin/
|
||||||
COPY ./docker/dual_boot.sh /usr/local/bin/
|
|
||||||
|
|
||||||
# Ensure the scripts are executable
|
# Ensure the scripts are executable
|
||||||
RUN chmod +x /usr/local/bin/docker-entrypoint.sh && \
|
RUN chmod +x /usr/local/bin/docker-entrypoint.sh && \
|
||||||
chmod +x /usr/local/bin/docker-healthcheck.sh && \
|
chmod +x /usr/local/bin/docker-healthcheck.sh
|
||||||
chmod 777 /usr/local/bin/dual_boot.sh
|
|
||||||
|
|
||||||
USER anythingllm
|
USER anythingllm
|
||||||
|
|
||||||
@ -91,6 +89,4 @@ HEALTHCHECK --interval=1m --timeout=10s --start-period=1m \
|
|||||||
CMD /bin/bash /usr/local/bin/docker-healthcheck.sh || exit 1
|
CMD /bin/bash /usr/local/bin/docker-healthcheck.sh || exit 1
|
||||||
|
|
||||||
# Run the server
|
# Run the server
|
||||||
ENTRYPOINT ["docker-entrypoint.sh"]
|
ENTRYPOINT ["/bin/bash", "/usr/local/bin/docker-entrypoint.sh"]
|
||||||
|
|
||||||
CMD /bin/bash /usr/local/bin/dual_boot.sh
|
|
||||||
@ -1,3 +1,5 @@
|
|||||||
#!/usr/bin/env bash
|
#!/bin/bash
|
||||||
|
node /app/server/index.js &
|
||||||
exec "$@"
|
{ FLASK_ENV=production FLASK_APP=wsgi.py cd collector && gunicorn --workers 4 --bind 0.0.0.0:8888 wsgi:api; } &
|
||||||
|
wait -n
|
||||||
|
exit $?
|
||||||
@ -1,5 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
node /app/server/index.js &
|
|
||||||
{ FLASK_ENV=production FLASK_APP=wsgi.py cd collector && gunicorn --workers 4 --bind 0.0.0.0:8888 wsgi:api; } &
|
|
||||||
wait -n
|
|
||||||
exit $?
|
|
||||||
Loading…
Reference in New Issue
Block a user