Skip to main content

Sources API Script

If you want to upload files programmatically, you can use the Sources API. This allows you to upload files in the same way as you would do it via the user interface. You can find the API documentation here.

caution

Under fair use policy, please do not execute the following script daily, but at most once a week and only if you have new files to upload. The script will delete all existing sources and upload all files again, inducing load on the database and costs for embedding API calls required for re-indexing.

The following script is an example of how to upload files to LoyJoy using the Sources API. The script is written in Python and uses the requests library. The script uploads all PDF and HTML files from the specified directories. It also reads a CSV file containing the docid and the URL of the source. The URL is used to link the source to an internal URL, e.g. SharePoint or Moodle. The script also deletes all existing sources before uploading the new files. The script is written for Windows, but can be easily adapted for other operating systems.

#!/usr/bin/env python3

# pip3 install requests

import glob
import os
import requests
import csv
import time

AUTH_TOKEN = ""

CSV_FILE = "X:/.../urls.csv"
HTML_DIR = "X:/.../html/"
PDF_DIR = "X:/.../pdf/"

API_URL = "https://app-stable.loyjoy.com/api/sources/upload"

# If you are behind a proxy, you can set the proxy here
PROXY = { "http": "http://proxy-username:proxy-password@localhost:8080", "https": "http://proxy-username:proxy-password@localhost:8080" }


# Optional: Read a CSV containing the docid and the URL of the source
csv_content = []
with open(CSV_FILE, "r") as csv_file:
csv_reader = csv.DictReader(csv_file, delimiter=";", quotechar='"', fieldnames=["key", "value"])
for row in csv_reader:
csv_content.append(row)


# Delete all existing sources
req = requests.delete(API_URL,
headers={"Authorization": AUTH_TOKEN},
proxies=PROXY)

docs = 9999
max_tries = 10
try_count = 0

while docs > 0 and try_count < max_tries:
req = requests.get(API_URL,
headers={"Authorization": AUTH_TOKEN},
proxies=PROXY)
# fetch number of sources in LoyJoy, while deletion is still in progress
docs=len(req.json())
try_count += 1
print("Waiting for 60 seconds")
time.sleep(60)


# Upload PDF files
paths = sorted(glob.glob(PDF_DIR + "**/*.pdf", recursive=True))

for pdf in paths:
for row in csv_content:
if row["value"] == pdf[pdf.rfind("_")+1:pdf.find(".pdf")]:
docid = row["key"]

if "docid" in locals():
# Optional: Create URL so that the chat can link the sources to internal URLs, e.g. SharePoint or Moodle
url = requests.utils.quote(f"https://localhost:8443/pdf/{docid}")
optional_folder = "some_folder"
req = requests.post(f"{API_URL}?url={url}&folder={optional_folder}",
files={"file": (os.path.basename(pdf), open(pdf, "rb"), "application/pdf")},
headers={"Authorization": AUTH_TOKEN},
proxies=PROXY)
del docid
else:
req = requests.post(f"{API_URL}",
files={"file": (os.path.basename(pdf), open(pdf, "rb"), "application/pdf")},
headers={"Authorization": AUTH_TOKEN},
proxies=PROXY)

if req.status_code == 200:
print("Uploaded: {}".format(pdf))
else:
print("Error: {}".format(pdf))
print(req.status_code)
print(req.text)


# Upload HTML files
paths = sorted(glob.glob(HTML_DIR + "**/*.html", recursive=True))

for html in paths:
docid = os.path.splitext(os.path.basename(html))[0]
# Optional: Create URL so that the chat bot can link to the source
url = requests.utils.quote(f"https://localhost:8443/html/{docid}")
optional_folder = "some_folder"

req = requests.post(f"{API_URL}?url={url}&folder={optional_folder}",
files={"file": (os.path.basename(html), open(html, "rb"), "text/html")},
headers={"Authorization": AUTH_TOKEN},
proxies=PROXY)

if req.status_code == 200:
print("Uploaded: {}".format(html))
else:
print("Error: {}".format(html))
print(req.status_code)
print(req.text)

print("Upload finished")