diff --git a/.gitea/workflows/datadog-sca.yml b/.gitea/workflows/datadog-sca.yml new file mode 100644 index 0000000..2c99cb9 --- /dev/null +++ b/.gitea/workflows/datadog-sca.yml @@ -0,0 +1,20 @@ +on: [push] + +name: Datadog Software Composition Analysis + +jobs: + software-composition-analysis: + runs-on: ubuntu-latest + name: Datadog SBOM Generation and Upload + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Check imported libraries are secure and compliant + id: datadog-software-composition-analysis + uses: DataDog/datadog-sca-github-action@main + with: + dd_api_key: ${{ secrets.DD_API_KEY }} + dd_app_key: ${{ secrets.DD_APP_KEY }} + dd_service: jarvis + dd_env: ci + dd_site: us5.datadoghq.com diff --git a/.gitea/workflows/datadog-static-analysis.yml b/.gitea/workflows/datadog-static-analysis.yml new file mode 100644 index 0000000..f183ff3 --- /dev/null +++ b/.gitea/workflows/datadog-static-analysis.yml @@ -0,0 +1,21 @@ +on: [push] + +name: Datadog Static Analysis + +jobs: + static-analysis: + runs-on: ubuntu-latest + name: Datadog Static Analyzer + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Check code meets quality and security standards + id: datadog-static-analysis + uses: DataDog/datadog-static-analyzer-github-action@v1 + with: + dd_api_key: ${{ secrets.DD_API_KEY }} + dd_app_key: ${{ secrets.DD_APP_KEY }} + dd_service: jarvis + dd_env: ci + dd_site: us5.datadoghq.com + cpu_count: 2 diff --git a/.github/workflows/datadog-sca.yml b/.github/workflows/datadog-sca.yml new file mode 100644 index 0000000..2c99cb9 --- /dev/null +++ b/.github/workflows/datadog-sca.yml @@ -0,0 +1,20 @@ +on: [push] + +name: Datadog Software Composition Analysis + +jobs: + software-composition-analysis: + runs-on: ubuntu-latest + name: Datadog SBOM Generation and Upload + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Check imported libraries are secure and compliant + id: datadog-software-composition-analysis + uses: DataDog/datadog-sca-github-action@main + with: + dd_api_key: ${{ secrets.DD_API_KEY }} + dd_app_key: ${{ secrets.DD_APP_KEY }} + dd_service: jarvis + dd_env: ci + dd_site: us5.datadoghq.com diff --git a/.github/workflows/datadog-static-analysis.yml b/.github/workflows/datadog-static-analysis.yml new file mode 100644 index 0000000..f183ff3 --- /dev/null +++ b/.github/workflows/datadog-static-analysis.yml @@ -0,0 +1,21 @@ +on: [push] + +name: Datadog Static Analysis + +jobs: + static-analysis: + runs-on: ubuntu-latest + name: Datadog Static Analyzer + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Check code meets quality and security standards + id: datadog-static-analysis + uses: DataDog/datadog-static-analyzer-github-action@v1 + with: + dd_api_key: ${{ secrets.DD_API_KEY }} + dd_app_key: ${{ secrets.DD_APP_KEY }} + dd_service: jarvis + dd_env: ci + dd_site: us5.datadoghq.com + cpu_count: 2 diff --git a/.gitignore b/.gitignore index 80abc65..3dea82b 100644 --- a/.gitignore +++ b/.gitignore @@ -174,3 +174,46 @@ cython_debug/ pyvenv.cfg .venv pip-selfcheck.json + + +# Logs +logs +*.log +npm-debug.log* + +# Runtime data +pids +*.pid +*.seed + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (http://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules +jspm_packages + +# Optional npm cache directory +.npm + +# Optional REPL history +.node_repl_history +.next + +config.ini +*.db \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8a58831 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +# Use an official Python runtime as a parent image +FROM python:3.9-slim + +# Set the working directory in the container +WORKDIR /app + +# Copy the current directory contents into the container at /app +COPY . /app + +# Install any needed packages specified in requirements.txt +RUN pip install --no-cache-dir -r requirements.txt + +# Make port 5001 available to the world outside this container +EXPOSE 5001 + +# Define environment variable +ENV FLASK_APP=main.py + +# Run app.py when the container launches +CMD ["python", "main.py"] \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2887b5f --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +# Jarvis + +it's actually not that smart! + + diff --git a/client.py b/client.py new file mode 100644 index 0000000..4613500 --- /dev/null +++ b/client.py @@ -0,0 +1,138 @@ +import time + +import requests + + +class LLMChatClient: + def __init__(self, base_url, api_key): + self.base_url = base_url.rstrip("/") + self.api_key = api_key + self.headers = {"X-API-Key": api_key, "Content-Type": "application/json"} + + def submit_query(self, message): + """ + Submit a query to the LLM Chat Server. + + Args: + message (str): The message to send to the server. + + Returns: + str: The query ID for the submitted query. + + Raises: + requests.RequestException: If the request fails. + + Example: + client = LLMChatClient('http://localhost:5001', 'your-api-key') + query_id = client.submit_query('What is the capital of France?') + print(f"Query ID: {query_id}") + + cURL equivalent: + curl -X POST http://localhost:5001/api/v1/query \ + -H "Content-Type: application/json" \ + -H "X-API-Key: your-api-key" \ + -d '{"message": "What is the capital of France?"}' + """ + url = f"{self.base_url}/api/v1/query" + data = {"message": message} + response = requests.post(url, json=data, headers=self.headers) + response.raise_for_status() + return response.json()["query_id"] + + def get_query_status(self, query_id): + """ + Get the status of a submitted query. + + Args: + query_id (str): The ID of the query to check. + + Returns: + dict: A dictionary containing the status and conversation history (if completed). + + Raises: + requests.RequestException: If the request fails. + + Example: + client = LLMChatClient('http://localhost:5001', 'your-api-key') + status = client.get_query_status('query-id-here') + print(f"Query status: {status['status']}") + if status['status'] == 'completed': + print(f"Conversation history: {status['conversation_history']}") + + cURL equivalent: + curl -X GET http://localhost:5001/api/v1/query_status/query-id-here \ + -H "X-API-Key: your-api-key" + """ + url = f"{self.base_url}/api/v1/query_status/{query_id}" + response = requests.get(url, headers=self.headers) + response.raise_for_status() + return response.json() + + def submit_query_and_wait(self, message, max_wait_time=300, poll_interval=2): + """ + Submit a query and wait for the result. + + Args: + message (str): The message to send to the server. + max_wait_time (int): Maximum time to wait for the result in seconds. + poll_interval (int): Time between status checks in seconds. + + Returns: + dict: The completed conversation history. + + Raises: + requests.RequestException: If the request fails. + TimeoutError: If the query doesn't complete within max_wait_time. + + Example: + client = LLMChatClient('http://localhost:5001', 'your-api-key') + result = client.submit_query_and_wait('What is the capital of France?') + print(f"Conversation history: {result}") + """ + query_id = self.submit_query(message) + start_time = time.time() + + while time.time() - start_time < max_wait_time: + status = self.get_query_status(query_id) + if status["status"] == "completed": + return status["conversation_history"] + time.sleep(poll_interval) + + raise TimeoutError(f"Query did not complete within {max_wait_time} seconds") + + +class LLMChatAdminClient: + def __init__(self, base_url, admin_key): + self.base_url = base_url.rstrip("/") + self.admin_key = admin_key + self.headers = {"X-Admin-Key": admin_key, "Content-Type": "application/json"} + + def generate_api_key(self, username): + """ + Generate a new API key for a user. + + Args: + username (str): The username to generate the API key for. + + Returns: + dict: A dictionary containing the username and generated API key. + + Raises: + requests.RequestException: If the request fails. + + Example: + admin_client = LLMChatAdminClient('http://localhost:5001', 'your-admin-key') + result = admin_client.generate_api_key('new_user') + print(f"Generated API key for {result['username']}: {result['api_key']}") + + cURL equivalent: + curl -X POST http://localhost:5001/admin/generate_key \ + -H "Content-Type: application/json" \ + -H "X-Admin-Key: your-admin-key" \ + -d '{"username": "new_user"}' + """ + url = f"{self.base_url}/admin/generate_key" + data = {"username": username} + response = requests.post(url, json=data, headers=self.headers) + response.raise_for_status() + return response.json() diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..e11985b --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,16 @@ +version: '3.8' + +services: + llm-chat-server: + build: . + ports: + - "5001:5001" + volumes: + - ./llm_chat_server.db:/app/llm_chat_server.db + - ./config.ini:/app/config.ini + environment: + - FLASK_ENV=production + restart: unless-stopped + +volumes: + llm_chat_server_db: \ No newline at end of file diff --git a/index.html b/index.html index 654a579..8ebd44d 100644 --- a/index.html +++ b/index.html @@ -9,6 +9,8 @@ + +
+
INPUT:
@@ -272,6 +380,12 @@
GPU Memory
+ + +
+
Conversation History
+
+
@@ -280,10 +394,80 @@ const chatContainer = document.getElementById('chat-container'); const userInput = document.getElementById('user-input'); const sendButton = document.getElementById('send-button'); + const chatTabs = document.getElementById('chat-tabs'); - let thinkingElement = null; - let thinkingDetails = null; - let thinkingStartTime = null; + let currentChatId = null; + let chats = {}; + + function createNewChat() { + const chatId = Date.now().toString(); + chats[chatId] = { + messages: [], + thinkingSections: [] + }; + addChatTab(chatId); + switchToChat(chatId); + saveChats(); + } + + function addChatTab(chatId) { + const tab = document.createElement('button'); + tab.classList.add('chat-tab'); + tab.textContent = `Chat ${Object.keys(chats).length}`; + tab.onclick = () => switchToChat(chatId); + + const closeButton = document.createElement('span'); + closeButton.classList.add('close-tab'); + closeButton.textContent = '×'; + closeButton.onclick = (e) => { + e.stopPropagation(); + closeChat(chatId); + }; + + tab.appendChild(closeButton); + chatTabs.insertBefore(tab, chatTabs.lastElementChild); + } + + function switchToChat(chatId) { + currentChatId = chatId; + document.querySelectorAll('.chat-tab').forEach(tab => tab.classList.remove('active')); + document.querySelector(`.chat-tab:nth-child(${Object.keys(chats).indexOf(chatId) + 1})`).classList.add('active'); + renderChat(chatId); + } + + function closeChat(chatId) { + delete chats[chatId]; + saveChats(); + const tabToRemove = Array.from(chatTabs.children).find(tab => tab.textContent.includes(`Chat ${Object.keys(chats).indexOf(chatId) + 1}`)); + if (tabToRemove) { + chatTabs.removeChild(tabToRemove); + } + if (currentChatId === chatId) { + const remainingChatIds = Object.keys(chats); + if (remainingChatIds.length > 0) { + switchToChat(remainingChatIds[0]); + } else { + createNewChat(); + } + } + } + + function renderChat(chatId) { + chatContainer.innerHTML = ''; + const chat = chats[chatId]; + chat.messages.forEach(message => addMessage(message.content, message.isUser)); + chat.thinkingSections.forEach(section => { + const thinkingSection = createThinkingSection(); + section.thoughts.forEach(thought => addThought(thought.type, thought.content, thought.details, thinkingSection)); + }); + } + + function createThinkingSection() { + const section = document.createElement('div'); + section.classList.add('thinking-section'); + chatContainer.appendChild(section); + return section; + } function addMessage(message, isUser) { const messageElement = document.createElement('div'); @@ -292,65 +476,40 @@ messageElement.innerHTML = isUser ? message : marked.parse(message); chatContainer.appendChild(messageElement); chatContainer.scrollTop = chatContainer.scrollHeight; + + if (currentChatId) { + chats[currentChatId].messages.push({ content: message, isUser: isUser }); + saveChats(); + } } - function startThinking() { - thinkingElement = document.createElement('div'); - thinkingElement.classList.add('thought-summary', 'collapsible'); + function addThought(type, content, details = '', thinkingSection) { + const stepElement = document.createElement('div'); + stepElement.classList.add('thought-summary', 'collapsible', type); + stepElement.textContent = type.charAt(0).toUpperCase() + type.slice(1).replace('_', ' ') + ':'; + stepElement.onclick = toggleStepDetails; + + const stepDetails = document.createElement('div'); + stepDetails.classList.add('thought-details'); - const led = document.createElement('div'); - led.classList.add('led', 'blinking'); - - const textNode = document.createTextNode('Thinking...'); - - thinkingElement.appendChild(led); - thinkingElement.appendChild(textNode); - thinkingElement.onclick = toggleThinkingDetails; - - thinkingDetails = document.createElement('div'); - thinkingDetails.classList.add('thought-details'); - - chatContainer.appendChild(thinkingElement); - chatContainer.appendChild(thinkingDetails); - - thinkingStartTime = Date.now(); + if (type === 'error') { + stepElement.classList.add('error-message'); + if (content.includes('retrying')) { + stepElement.classList.add('retrying'); + } + stepDetails.innerHTML = marked.parse(content + '\n\nDetails:\n```\n' + details + '\n```'); + } else { + stepDetails.innerHTML = marked.parse(content); + } + + thinkingSection.appendChild(stepElement); + thinkingSection.appendChild(stepDetails); chatContainer.scrollTop = chatContainer.scrollHeight; - } - function addThought(step, content) { - if (thinkingDetails) { - const stepElement = document.createElement('div'); - stepElement.classList.add('thought-summary', 'collapsible'); - stepElement.textContent = step; - stepElement.onclick = toggleStepDetails; - - const stepDetails = document.createElement('div'); - stepDetails.classList.add('thought-details'); - stepDetails.innerHTML = content; - - thinkingDetails.appendChild(stepElement); - thinkingDetails.appendChild(stepDetails); - chatContainer.scrollTop = chatContainer.scrollHeight; - } - } - - function endThinking(thinkingTime) { - if (thinkingElement) { - const textNode = thinkingElement.childNodes[1]; - textNode.nodeValue = `Thinking... (${thinkingTime}s)`; - const led = thinkingElement.querySelector('.led'); - led.classList.remove('blinking'); - led.style.backgroundColor = '#0f0'; - led.style.boxShadow = '0 0 10px #0f0'; - thinkingStartTime = null; - } - } - - function toggleThinkingDetails() { - this.classList.toggle('open'); - const details = this.nextElementSibling; - if (details) { - details.style.display = details.style.display === 'none' ? 'block' : 'none'; + if (currentChatId) { + const currentThinkingSection = chats[currentChatId].thinkingSections[chats[currentChatId].thinkingSections.length - 1]; + currentThinkingSection.thoughts.push({ type, content, details }); + saveChats(); } } @@ -362,34 +521,71 @@ } } - socket.on('thinking', (data) => { - if (!thinkingElement) startThinking(); - addThought(data.step, 'Started'); - }); + function saveChats() { + localStorage.setItem('chats', JSON.stringify(chats)); + } - socket.on('thought', (data) => { - addThought('Result', data.content); - }); - - socket.on('chat_response', (data) => { - endThinking(data.thinking_time); - addMessage(data.response, false); - }); - - socket.on('error', (data) => { - endThinking(data.thinking_time); - addMessage(`Error: ${data.message}`, false); - }); + function loadChats() { + const storedChats = localStorage.getItem('chats'); + if (storedChats) { + chats = JSON.parse(storedChats); + Object.keys(chats).forEach(chatId => addChatTab(chatId)); + if (Object.keys(chats).length > 0) { + switchToChat(Object.keys(chats)[0]); + } else { + createNewChat(); + } + } else { + createNewChat(); + } + } function sendMessage() { const message = userInput.value.trim(); - if (message) { + if (message && currentChatId) { addMessage(message, true); - socket.emit('chat_request', { message: message }); + chats[currentChatId].thinkingSections.push({ thoughts: [] }); + socket.emit('chat_request', { + message: message, + conversation_history: chats[currentChatId].messages.filter(m => !m.isUser).map(m => ({ role: 'assistant', content: m.content })) + .concat(chats[currentChatId].messages.filter(m => m.isUser).map(m => ({ role: 'user', content: m.content }))) + }); userInput.value = ''; } } + socket.on('thinking', (data) => { + if (currentChatId) { + const newThinkingSection = createThinkingSection(); + chats[currentChatId].thinkingSections.push({ thoughts: [] }); + addThought(data.step, 'Started', '', newThinkingSection); + } + }); + + socket.on('thought', (data) => { + if (currentChatId) { + const currentThinkingSection = chatContainer.querySelector('.thinking-section:last-child'); + addThought(data.type, data.content, data.details, currentThinkingSection); + } + }); + + socket.on('chat_response', (data) => { + if (currentChatId) { + addMessage(data.response, false); + } + }); + + socket.on('error', (data) => { + if (currentChatId) { + const currentThinkingSection = chatContainer.querySelector('.thinking-section:last-child'); + if (data.type === 'retrying') { + addThought('error', data.content, '', currentThinkingSection); + } else { + addThought('error', data.message, '', currentThinkingSection); + } + } + }); + sendButton.addEventListener('click', sendMessage); userInput.addEventListener('keypress', function(e) { if (e.key === 'Enter' && !e.shiftKey) { @@ -398,6 +594,16 @@ } }); + // Add new chat button + const newChatButton = document.createElement('button'); + newChatButton.id = 'new-chat-button'; + newChatButton.textContent = '+ New Chat'; + newChatButton.onclick = createNewChat; + chatTabs.appendChild(newChatButton); + + // Load chats when the page loads + loadChats(); + const chartOptions = { type: 'line', options: { @@ -570,6 +776,41 @@ window.addEventListener('resize', checkWindowSize); checkWindowSize(); // Initial check + + // Add this new function to update the conversation history + function updateConversationHistory(history) { + const conversationHistoryElement = document.getElementById('conversation-history'); + conversationHistoryElement.innerHTML = ''; + + history.forEach(item => { + const card = document.createElement('div'); + card.classList.add('history-card'); + + const role = document.createElement('div'); + role.classList.add('history-role'); + role.textContent = item.role.charAt(0).toUpperCase() + item.role.slice(1); + + const content = document.createElement('pre'); + content.classList.add('history-content'); + content.innerHTML = hljs.highlightAuto(item.content).value; + + card.appendChild(role); + card.appendChild(content); + conversationHistoryElement.appendChild(card); + }); + } + + // Add this new socket listener + socket.on('conversation_history', (data) => { + updateConversationHistory(data.history); + }); + + // Add event listener for the clear history button + clearHistoryButton.addEventListener('click', () => { + if (confirm('Are you sure you want to clear the conversation history?')) { + clearConversationHistory(); + } + }); \ No newline at end of file diff --git a/main.py b/main.py index 176661e..09a5729 100644 --- a/main.py +++ b/main.py @@ -1,241 +1,394 @@ -from flask import Flask, send_from_directory -from flask_socketio import SocketIO, emit -from flask_openapi3 import OpenAPI, Info -from pydantic import BaseModel -from typing import List -from models import model_manager -import structlog -import time -import psutil -import GPUtil -import threading +import configparser +import json import os +import pprint +import queue +import random +import re +import secrets +import sqlite3 +import threading +import time +import uuid +from datetime import datetime +from typing import List, Optional +import enum +import GPUtil +import ollama +import psutil +import structlog +import logging +from flask import Flask, g, jsonify, request, send_from_directory +from flask_socketio import SocketIO, emit +from pydantic import BaseModel +from werkzeug.utils import secure_filename +import base64 +from models import model_manager +from tools import DefaultToolManager +# Configure logging +logging.basicConfig(level=logging.INFO, format="%(message)s") +console_handler = logging.StreamHandler() +console_handler.setLevel(logging.INFO) logger = structlog.get_logger() +# Configuration setup +CONFIG_FILE = "config.ini" -openapi = OpenAPI(__name__, info=Info(title="LLM Chat Server", version="1.0.0")) -app = openapi +# Add this near the top of the file, after imports +processing_thread = None +processing_thread_started = False + +ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'} +MAX_IMAGE_SIZE = 1 * 1024 * 1024 # 1MB + + +def create_default_config(): + config = configparser.ConfigParser() + config["DEFAULT"] = { + "AdminKey": secrets.token_urlsafe(32), + "DatabasePath": "llm_chat_server.db", + } + config["SERVER_FEATURES"] = { + "EnableFrontend": "false", + "EnableChatEndpoints": "false", + "EnableAPIEndpoints": "true", + } + config["MODEL"] = {"PrimaryModel": "qwen2.5:14b"} + config["PERFORMANCE"] = {"UpdateInterval": "0.1"} + with open(CONFIG_FILE, "w") as configfile: + config.write(configfile) + + +def load_config(): + if not os.path.exists(CONFIG_FILE): + create_default_config() + + config = configparser.ConfigParser() + config.read(CONFIG_FILE) + return config + + +config = load_config() +ADMIN_KEY = config["DEFAULT"]["AdminKey"] +DATABASE = config["DEFAULT"]["DatabasePath"] +ENABLE_FRONTEND = config["SERVER_FEATURES"].getboolean("EnableFrontend") +ENABLE_CHAT_ENDPOINTS = config["SERVER_FEATURES"].getboolean("EnableChatEndpoints") +ENABLE_API_ENDPOINTS = config["SERVER_FEATURES"].getboolean("EnableAPIEndpoints") +PRIMARY_MODEL = config["MODEL"]["PrimaryModel"] +UPDATE_INTERVAL = config["PERFORMANCE"].getfloat("UpdateInterval") + +app = Flask(__name__) socketio = SocketIO(app, cors_allowed_origins="*") -@app.route('/') +tool_manager = DefaultToolManager() + + +# Database setup +def get_db(): + db = getattr(g, "_database", None) + if db is None: + db = g._database = sqlite3.connect(DATABASE) + db.row_factory = sqlite3.Row + return db + + +@app.teardown_appcontext +def close_connection(exception): + db = getattr(g, "_database", None) + if db is not None: + db.close() + + +class QueryStatus(enum.Enum): + QUEUED = "queued" + PROCESSING = "processing" + DONE = "done" + + +def init_db(): + with app.app_context(): + db = get_db() + db.execute(""" + CREATE TABLE IF NOT EXISTS Keys ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + username TEXT NOT NULL UNIQUE, + api_key TEXT NOT NULL UNIQUE + ); + """) + db.execute(''' + CREATE TABLE IF NOT EXISTS Queries ( + id TEXT PRIMARY KEY, + ip TEXT NOT NULL, + timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, + query TEXT NOT NULL, + api_key_id INTEGER, + status TEXT NOT NULL, + conversation_history TEXT, + FOREIGN KEY (api_key_id) REFERENCES Keys (id) + ) + ''') + db.commit() + + +# Create a schema.sql file with the following content: +""" +CREATE TABLE IF NOT EXISTS Keys ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + username TEXT NOT NULL UNIQUE, + api_key TEXT NOT NULL UNIQUE +); + +CREATE TABLE IF NOT EXISTS Queries ( + id TEXT PRIMARY KEY, + ip TEXT NOT NULL, + timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, + query TEXT NOT NULL, + api_key_id INTEGER, + status TEXT NOT NULL, + conversation_history TEXT, + FOREIGN KEY (api_key_id) REFERENCES Keys (id) +); +""" + + +def validate_api_key(api_key): + db = get_db() + cursor = db.cursor() + cursor.execute("SELECT id FROM Keys WHERE api_key = ?", (api_key,)) + result = cursor.fetchone() + return result[0] if result else None + + +@app.route("/") def index(): - logger.info("Serving index.html") - return send_from_directory('.', 'index.html') + if ENABLE_FRONTEND: + logger.info("Serving index.html") + return send_from_directory(".", "index.html") + else: + return jsonify({"error": "Frontend is disabled"}), 404 + class ChatRequest(BaseModel): message: str + class ChatResponse(BaseModel): response: str -@socketio.on('chat_request') -def handle_chat_request(data): - user_input = data['message'] - logger.info("Received chat request", user_input=user_input) - - start_time = time.time() - full_context = "" - try: - # Step 1: Generate a plan using the initial LLM - emit('thinking', {'step': 'Generating plan'}) - plan, plan_generation = generate_plan(user_input) - full_context += f"Plan Thinking:\n{plan_generation}" - full_context += f"Plan:\n{plan}" - emit('thought', {'content': f"Plan Thinking:\n{plan_generation}"}) - emit('thought', {'content': f"Plan:\n{plan}"}) - if plan[0].strip().lower() == "direct_answer": - final_response = plan[1] - thinking_time = round(time.time() - start_time, 2) - emit('chat_response', { - 'response': final_response, - 'thinking_time': thinking_time - }) - return - - # Step 2: Execute each step of the plan - step_results = [] - for i, step in enumerate(plan): - emit('thinking', {'step': f'Executing step {i+1}'}) - while True: - best_model, model_selection = select_best_model(step, step_results, full_context) - if best_model in model_manager.model_capabilities: - break - logger.warning(f"Selected model {best_model} is not in the list of available models. Retrying...") - emit('thought', {'content': f"Selected model for step {i+1}:\n{model_selection}"}) - # summary, summary_generation = summarize_context(f"Plan: {plan}\n\nSteps: {step_results}") - # emit('thought', {'content': f"Context summary:\n{summary_generation}"}) - step_result, step_execution = execute_step(step, best_model, step_results, full_context) - emit('thought', {'content': f"Step {i+1} result:\n{step_execution}"}) - emit('thought', {'content': f"Result {i+1}:\n{step_result}"}) - step_results.append(step_result) - full_context += f"Step {i+1} result:\n{step_execution}" - - # Step 3: Generate final response - emit('thinking', {'step': 'Generating final response'}) - final_response, final_generation = generate_final_response(user_input, plan, step_results) - emit('thought', {'content': f"Final response generation:\n{final_generation}"}) - +@socketio.on("chat_request") +def handle_chat_request(data): + if not ENABLE_CHAT_ENDPOINTS: + emit("error", {"message": "Chat endpoints are disabled"}) + return + + user_input = data["message"] + conversation_history = data.get("conversation_history", []) + conversation_history = [ + {"role": "system", "content": ANSWER_QUESTION_PROMPT} + ] + conversation_history + logger.info( + "Received chat request", + user_input=user_input, + conversation_history=conversation_history, + ) + + start_time = time.time() + try: + final_response = answer_question_tools(user_input, conversation_history) end_time = time.time() thinking_time = round(end_time - start_time, 2) - - emit('chat_response', { - 'response': final_response, - 'thinking_time': thinking_time - }) + + emit( + "chat_response", + {"response": final_response, "thinking_time": thinking_time}, + ) except Exception as e: logger.exception("Error during chat processing", error=str(e)) end_time = time.time() thinking_time = round(end_time - start_time, 2) - emit('error', { - 'message': f"An error occurred: {str(e)}", - 'thinking_time': thinking_time - }) + emit( + "error", + {"message": f"An error occurred: {str(e)}", "thinking_time": thinking_time}, + ) -PLAN_GENERATE_PROMPT = """ -You are building a "chain of thought" workflow for a series of LLMs to complete a task provided by a user. -Your first task is to "think" through the problem provided by the user. Probe what it would take to complete the task, see if there are hidden nuances, what constrains might be relevant, how to be efficient. -This thinking should set question the premise of the task, and sets the scene for a plan of attack to be created. -Verbalize your thoughts out loud, allow the user to see your thought process. This thought process will also be used as context for processing the generated plan. -This thought process should mimic the process of a human, and not be a simple list of steps, but should be a narrative of thought that a human would have. -Each step in the formulated plan is a step that a seperate LLM will complete. The LLM that will complete the step will be selected based on the scope of the step and the capabilities of the available models. -There are models that are good at coding and math, and there are models that are good at reasoning and planning. Some models that are generalists, multilingual, or conversational. And even some that are vision models. -Use this context of the possible models to shape each step such that a LLM can complete the step given the step and some context. -Steps should follow a logical "chain of thought" in order to best complete the overall task. -Steps should be self contained and be designed such that the results of one step can be passed on to the next step. -Steps should be phrased in such a way that it acts as a prompt or instruction to the LLM that will complete the step. -Each step will return a result, and a thought process. The thought process is extremely important, it is the "chain of thought" that the LLM went through to complete the step. This thought process is critical for the next step in the plan. -Consider how results from one step can be combined with results from another step and consider how the chain of thought from one step can inform the next step when designing each step. -Try and minimize the number of steps required to complete the task since running a lot of steps is expensive. -Your output should be your thought process, followed by a single line titled "STEPS", followed by each step to take, one step per line. -Do not add any sort of markdown formatting, code formatting, or any other formatting. -Do not add any preamble, postamble, or other text, only the thought process and the steps. -Consider the following example: +def answer_question_tools( + user_input: str, conversation_history: List[dict], max_retries: int = 100 +): + global tool_manager -Prompt: Write a program to reverse a string, then output ASCII block art of that reversed string. Do this in python. + # If conversation_history is empty, initialize it with the system prompt + if not conversation_history: + conversation_history = [ + {"role": "system", "content": ANSWER_QUESTION_PROMPT}, + ] -So there are two parts to this task. First, we need to reverse the input string. Then we need to print the ASCII block art for each character in the reversed string. -We should be able to reverse the string using either a simple loop, or a python slice. Slicing is simpler, so we should use that. -For the ASCII block art, the challenge is in creating a mapping between each character and its block art representation. There are a few ways to go about this: - - Find a library that converts text to block art - - Create our own mapping from characters to block art - - Create a procedurally generated mapping from characters to block art -Procedural generation could be done with an algorithm, but coming up with a good algorithm could be challenging. -Generating a dictionary could be a good approach, but there are 26 letters in the alphabet, and 10 digits, so we would need 36 different outputs for the block art. -We should search for a library that already does this, import it, and call it on the result of the string reversal. We would also need to tell the user to install the library. + logger.info( + "Starting chat", + user_input=user_input, + conversation_history=conversation_history, + ) + # Add the new user input to the conversation history + conversation_history.append({"role": "user", "content": user_input}) -We're now ready to create our plan. + emit("thinking", {"step": "Starting"}) + emit("conversation_history", {"history": conversation_history}) -STEPS -1. Write a function that takes a string and reverses it. -2. Write a function that takes a string and returns the ASCII block art for each character in the string, this must be done using a library. -3. Combine the two functions into a single program. + last_thought_content = None ---- + for _ in range(max_retries): + response = ollama.chat( + model=PRIMARY_MODEL, + messages=conversation_history, + tools=tool_manager.get_tools_for_ollama_dict(), + stream=False, + ) + assistant_message = response["message"] -Now you try. -""" -_REMINADER_PT =""" -Each task you create should be should be self contained and be designed such that the results of one step can be passed on to the next step. -Try and minimize the number of steps required to complete the task. -Output only a numbered list of steps, each step should be a seperate line. -Do not output any preamble or other text, only the list of steps. -If you think a task can be completed by a single step, then you can output a single step. -If you can directly answer the question, you must begin your response with a single line containing the text "DIRECT_ANSWER" and then provide the answer to the question on the next line. + conversation_history.append(assistant_message) + emit("conversation_history", {"history": conversation_history}) + pprint.pp(assistant_message) -Here are some samples: + if "tool_calls" in assistant_message: + for tool_call in assistant_message["tool_calls"]: + tool_name = tool_call["function"]["name"] + tool_args = tool_call["function"]["arguments"] + emit( + "thought", + { + "type": "tool_call", + "content": f"Tool: {tool_name}\nArguments: {tool_args}", + }, + ) + tool_response = tool_manager.get_tool(tool_name).execute(tool_args) + conversation_history.append({"role": "tool", "content": tool_response}) + emit("conversation_history", {"history": conversation_history}) + emit("thought", {"type": "tool_result", "content": tool_response}) + else: + if "" in assistant_message["content"].lower(): + reply_content = re.search( + r"(.*?)", assistant_message["content"], re.DOTALL + ) + if reply_content: + reply_answer = reply_content.group(1).strip() + emit("thought", {"type": "answer", "content": reply_answer}) + return reply_answer + else: + current_thought_content = assistant_message["content"].strip() + emit( + "thought", {"type": "thoughts", "content": current_thought_content} + ) -Input: Write a program to reverse a string, then output the ASCII art of that reversed string. Do this in python. -Steps: -1. Define a template for a program that prints the ASCII art of the reversed string. -2. Fill in the logic to reverse the string. -3. Fill in the logic to print the ASCII art of the reversed string. -4. Output the final program. + # Check for two consecutive thoughts, with the second being empty + if last_thought_content and not current_thought_content: + emit("thought", {"type": "answer", "content": last_thought_content}) + return last_thought_content -Input: What are the oceans of the world? -Steps: -1. Use the encyclopedia tool to get the page on the oceans of the world, parse, and output the results. + last_thought_content = current_thought_content + continue -Input: What is the perfect gas law? -Steps: -DIRECT_ANSWER -The perfect gas law is the equation of state of a hypothetical ideal gas. The formula is $$PV = nRT$$ where P is pressure, V is volume, n is the number of moles, R is the ideal gas constant, and T is temperature. + return f"Max iterations reached. Last response: {assistant_message['content']}" + + +ANSWER_QUESTION_PROMPT2 = f""" +The current date is {datetime.now().strftime("%A, %B %d, %Y")}, your knowledge cutoff was December 2023. +You are Dewey, an AI assistant with access to external tools and the ability to think through complex problems. Your role is to assist users by leveraging tools when necessary, thinking deeply about problems, and providing accurate and helpful information, all with a cheerful, but witty personality. Here are the tools available to you: + +{tool_manager.get_tools_and_descriptions_for_prompt()} + +When addressing a query, follow these steps: + +1. Analyze: Thoroughly analyze the query and consider multiple approaches to solving it. + +2. Plan: Develop a plan of action, considering whether you need to use any tools or if you can answer directly. + +3. Execute: If you need to use a tool, call it as you would a function. If not, proceed with your reasoning. + - Analyse the given prompt and decided whether or not it can be answered by a tool. If it can, use the following functions to respond with a JSON for a function call with its proper arguments that best answers the given prompt. Respond in the format \"name\": function name, \"parameters\": dictionary of argument name and its value. Do not use variables. + +4. Reflect: After each step or tool use, reflect on the results: + - If successful, consider if the result fully answers the user's query or if additional steps are needed. + - If there were errors or the result is unsatisfactory, don't give up! Use Tree of Thoughts reasoning: + a) Generate multiple alternative approaches or modifications to your previous approach. + b) Briefly evaluate the potential of each alternative. + c) Choose the most promising alternative and execute it. + d) Repeat this process if needed, building upon your growing understanding of the problem. + e) You cannot return a final answer after an error using a tool, you must try again. + +5. Iterate: Continue this process of execution and reflection, exploring different branches of thought as needed. + +6. Conclude: When you believe you have a comprehensive answer to the user's query, provide your final answer. + +Always explain your thought process, including your reasoning for each decision and how you arrived at your conclusions. If you're providing a final answer, or need more input from the user, put your response in tags . + +Remember, complex problems often require multiple steps and iterations. Don't hesitate to break down the problem, use tools multiple times, or explore different approaches to arrive at the best solution. +Before approaching a problem, come up with a few ways you might solve it, and then choose the most promising approach. Repeat this on each iteration. """ -def generate_plan(user_input: str) -> tuple[List[str], str]: - logger.debug("Generating plan", prompt=user_input, system=PLAN_GENERATE_PROMPT) - response = model_manager.generate_text("qwen2.5:7b", user_input, max_length=1024, system=PLAN_GENERATE_PROMPT) - plan = response.split("STEPS")[1].strip() - response_no_steps = response.split("STEPS")[0].strip() - return [step.strip() for step in plan.split("\n") if step.strip()], response_no_steps +ANSWER_QUESTION_PROMPT = f""" +You are Dewey, an AI assistant with a personality that combines the wit and sarcasm of Dr. Gregory House from House MD with the helpfulness and intelligence of Jarvis from Iron Man. Today's date is {datetime.now().strftime("%A, %B %d, %Y")}. Your knowledge cutoff date is December 2023. +When responding to user queries, follow these steps: -SELECT_BEST_MODEL_PROMPT = f""" -You are a large language model whos job it is to evaluate a step that is part of a larger plan, and determine what LLM would be best suited to complete the step based on the capabilities of the LLM. +Analyze the user's request -The LLMs and their capabilities are as follows: -{"\n".join([f"{k}: {','.join(v)}" for k,v in model_manager.model_capabilities.items()])} +Option 1: [First interpretation of the request] +Option 2: [Second interpretation of the request] +... (up to 5 options) -You will be provided with the current step of execution, the results of the previous steps in order, and the current chain of thought so far. -If the chain of thought is too long, a summary of the current chain of thought will be provided. -Your job is to use all this information to determine which of the provided LLMs would be best suited to complete the provided step given the capabilities of the LLM. -Your response should be the full name of the LLM that should complete the step. -Reply with only one of the following values: \n{'\n'.join(list(model_manager.model_capabilities.keys()))} +Selected approach: [Choose the most promising option or combine the two best] +Break down the task into subtasks + +Option 1: [First breakdown of subtasks] +Option 2: [Second breakdown of subtasks] +... (up to 5 options) + +Selected breakdown: [Choose the most promising option or combine the two best] +For each subtask, consider available tools: +{tool_manager.get_tools_and_descriptions_for_prompt()} + +Option 1: [First approach using tools] +Option 2: [Second approach using tools] +... (up to 5 options) + +Selected tool usage: [Choose the most promising option or combine the two best] +Execute the plan + +Option 1: [First execution plan] +Option 2: [Second execution plan] +... (up to 5 options) + +Selected execution: [Choose the most promising option or combine the two best] +Review and refine the response + +Option 1: [First refined response] +Option 2: [Second refined response] +... (up to 5 options) + +Selected response: [Choose the most promising option or combine the two best] +Verify the results + +Check 1: [First verification method] +Check 2: [Second verification method] +... (up to 5 checks) + +Verification outcome: [Summarize the verification results] +Generate the final response to the user within tags: + + +[Final response goes here, incorporating the following guidelines:] +- Be conversational and engaging +- Maintain a witty and slightly sarcastic tone, reminiscent of Dr. Gregory House +- Deliver factual information with the precision and helpfulness of Jarvis +- Use clever analogies or pop culture references when appropriate +- Don't be afraid to challenge the user's assumptions, but always in a constructive manner +- Ensure the response is tailored to the user's query while showcasing your unique personality + +Remember to always be helpful, accurate, and respectful in your interactions, while maintaining your distinctive character blend of House and Jarvis. """ -def select_best_model(step: str, results: List[str], context: str) -> tuple[str, str]: - prompt = f"Current Step: {step}\n\nResults So Far: {results}\n\nCurrent Chain of Thought: {context}" - logger.debug("Selecting best model", prompt=prompt, system=SELECT_BEST_MODEL_PROMPT) - response = model_manager.generate_text("llama3.2:3b", prompt, max_length=50, system=SELECT_BEST_MODEL_PROMPT) - model_name = response.strip().lower() - return model_name, response - - -def summarize_context(context: str) -> tuple[str, str]: - prompt = f"Summarize the following context: {context}" - logger.debug("Summarizing context", prompt=prompt) - response = model_manager.generate_text("llama3.2:3b", prompt, max_length=300) - return response, response - -EXECUTE_STEP_PROMPT = """ -You are a large language model that has been selected to complete a step within a larger task. -You have been selected to complete this step due to your specific capabilities. -You will be provided with the job to do in this current step, the results of the previous steps in order, and the current chain of thought so far. -If the chain of thought is too long, a summary of the current chain of thought will be provided. -Your job is to use all this information to complete the step. -Your response should be in two parts. The first part should be your thought process in completing the step, how you went about solving the step, assumptions made, relation to previous steps, and challenges faced. -You must then output a line with the word "RESPONSE". -The second part should be the result of completing your step. -The second part should contain nothing except the result of completing your step. -Only complete your part of the step. Do not extrapolate beyond the bounds of the step. Do not trample on the results of previous steps. Build on the results of previous steps, and use them to inform your work. -Do not include any preamble or other text, only the result of completing your step. -Do not use any markdown formatting, code formatting, or any other formatting. -""" - -def execute_step(step: str, model: str, results: List[str], context: str) -> tuple[str, str]: - prompt = f"Current Step: {step}\n\nResults So Far: {results}\n\nCurrent Chain of Thought: {context}" - logger.debug("Executing step", step=step, model=model, prompt=prompt) - response = model_manager.generate_text(model, prompt, max_length=1024, system=EXECUTE_STEP_PROMPT) - response_step = response.split("RESPONSE")[1].strip() - response_thinking = response.split("RESPONSE")[0].strip() - return response_step, response_thinking - -def generate_final_response(user_input: str, plan: List[str], step_results: List[str]) -> tuple[str, str]: - prompt = f"Question: {user_input}\n\nPlan:\n" - for i, step in enumerate(plan): - prompt += f"{i+1}. {step}\n" - prompt += "\nResults:\n" - for i, result in enumerate(step_results): - prompt += f"Step {i+1} result: {result}\n" - prompt += "\nBased on the above information, provide a comprehensive answer to the original question." - logger.debug("Generating final response", prompt=prompt) - response = model_manager.generate_text("qwen2.5:7b", prompt, max_length=500) - return response, response - -UPDATE_INTERVAL = 0.1 # 100ms, configurable def get_system_resources(): cpu_load = psutil.cpu_percent() @@ -244,44 +397,442 @@ def get_system_resources(): disk_io = psutil.disk_io_counters() disk_read = disk_io.read_bytes disk_write = disk_io.write_bytes - + gpus = GPUtil.getGPUs() gpu_load = gpus[0].load * 100 if gpus else 0 gpu_memory = gpus[0].memoryUtil * 100 if gpus else 0 - + return { - 'cpu_load': cpu_load, - 'memory_usage': memory_usage, - 'disk_read': disk_read, - 'disk_write': disk_write, - 'gpu_load': gpu_load, - 'gpu_memory': gpu_memory + "cpu_load": cpu_load, + "memory_usage": memory_usage, + "disk_read": disk_read, + "disk_write": disk_write, + "gpu_load": gpu_load, + "gpu_memory": gpu_memory, } + def send_system_resources(): last_disk_read = 0 last_disk_write = 0 while True: resources = get_system_resources() - + # Calculate disk I/O rates - disk_read_rate = (resources['disk_read'] - last_disk_read) / UPDATE_INTERVAL - disk_write_rate = (resources['disk_write'] - last_disk_write) / UPDATE_INTERVAL - - socketio.emit('system_resources', { - 'cpu_load': resources['cpu_load'], - 'memory_usage': resources['memory_usage'], - 'disk_read_rate': disk_read_rate, - 'disk_write_rate': disk_write_rate, - 'gpu_load': resources['gpu_load'], - 'gpu_memory': resources['gpu_memory'] - }) - - last_disk_read = resources['disk_read'] - last_disk_write = resources['disk_write'] + disk_read_rate = (resources["disk_read"] - last_disk_read) / UPDATE_INTERVAL + disk_write_rate = (resources["disk_write"] - last_disk_write) / UPDATE_INTERVAL + + socketio.emit( + "system_resources", + { + "cpu_load": resources["cpu_load"], + "memory_usage": resources["memory_usage"], + "disk_read_rate": disk_read_rate, + "disk_write_rate": disk_write_rate, + "gpu_load": resources["gpu_load"], + "gpu_memory": resources["gpu_memory"], + }, + ) + + last_disk_read = resources["disk_read"] + last_disk_write = resources["disk_write"] time.sleep(UPDATE_INTERVAL) + +class QueryRequest(BaseModel): + message: str + + +class QueryResponse(BaseModel): + query_id: str + + +class QueryStatusResponse(BaseModel): + status: str + conversation_history: Optional[List[dict]] + + +@app.post( + "/api/v1/query" +) +def api_query(): + """ + Submit a new query to the LLM Chat Server. + + This endpoint requires authentication via an API key. + + Sample cURL: + curl -X POST http://localhost:5001/api/v1/query \ + -H "Content-Type: application/json" \ + -H "X-API-Key: your-api-key" \ + -d '{"message": "What is the capital of France?"}' + """ + if not ENABLE_API_ENDPOINTS: + return jsonify({"error": "API endpoints are disabled"}), 404 + + api_key = request.headers.get('X-API-Key') + if not api_key: + return jsonify({"error": "API key is required"}), 401 + + api_key_id = validate_api_key(api_key) + if not api_key_id: + return jsonify({"error": "Invalid API key"}), 401 + + data = request.get_json() + if not data or 'message' not in data: + return jsonify({"error": "Invalid request body"}), 400 + + user_input = data['message'] + query_id = str(uuid.uuid4()) + + try: + db = get_db() + cursor = db.cursor() + cursor.execute( + "INSERT INTO Queries (id, ip, query, api_key_id, status) VALUES (?, ?, ?, ?, ?)", + (query_id, request.remote_addr, user_input, api_key_id, QueryStatus.QUEUED.value) + ) + db.commit() + logger.info(f"Added new query to database: {query_id}") + + return jsonify({"query_id": query_id}) + except Exception as e: + logger.exception(f"Error during API query processing: {str(e)}") + return jsonify({"error": str(e)}), 500 + + +@app.get( + "/api/v1/query_status/" +) +def get_query_status(query_id: str): + """ + Get the status of a submitted query. + + This endpoint requires authentication via an API key. + + Sample cURL: + curl -X GET http://localhost:5001/api/v1/query_status/query-id-here \ + -H "X-API-Key: your-api-key" + """ + api_key = request.headers.get('X-API-Key') + if not api_key: + return jsonify({"error": "API key is required"}), 401 + + api_key_id = validate_api_key(api_key) + if not api_key_id: + return jsonify({"error": "Invalid API key"}), 401 + + try: + db = get_db() + cursor = db.cursor() + cursor.execute("SELECT status, conversation_history FROM Queries WHERE id = ?", (query_id,)) + result = cursor.fetchone() + + if result is None: + return jsonify({"error": "Query not found"}), 404 + + status, conversation_history = result + + response = {"status": status} + if status == QueryStatus.DONE.value: + response["conversation_history"] = json.loads(conversation_history) + + return jsonify(response) + except Exception as e: + logger.exception("Error retrieving query status", error=str(e)) + return jsonify({"error": str(e)}), 500 + + +def answer_question_tools_api( + user_input: str, conversation_history: List[dict], max_retries: int = 100 +): + global tool_manager + + if not conversation_history: + conversation_history = [ + {"role": "system", "content": ANSWER_QUESTION_PROMPT}, + ] + + logger.info( + "Starting API chat", + user_input=user_input, + conversation_history=conversation_history, + ) + conversation_history.append({"role": "user", "content": user_input}) + + last_thought_content = None + + for _ in range(max_retries): + response = ollama.chat( + model=PRIMARY_MODEL, + messages=conversation_history, + tools=tool_manager.get_tools_for_ollama_dict(), + stream=False, + ) + logger.info(f"API Response: {response}") + assistant_message = response["message"] + + conversation_history.append(assistant_message) + + if "tool_calls" in assistant_message: + for tool_call in assistant_message["tool_calls"]: + tool_name = tool_call["function"]["name"] + tool_args = tool_call["function"]["arguments"] + if tool_name is not None and tool_args is not None: + tool_response = tool_manager.get_tool(tool_name).execute(tool_args) + conversation_history.append({"role": "tool", "content": tool_response}) + logger.info(f"API Tool response: {tool_response}") + else: + logger.warning(f"Skipping tool call due to missing tool name or arguments: {tool_call}") + else: + if "" in assistant_message["content"].lower(): + reply_content = re.search( + r"(.*?)", assistant_message["content"], re.DOTALL + ) + if reply_content: + reply_answer = reply_content.group(1).strip() + conversation_history.append( + {"role": "assistant", "content": reply_answer} + ) + return conversation_history + else: + current_thought_content = assistant_message["content"].strip() + + if last_thought_content and not current_thought_content: + conversation_history.append( + {"role": "assistant", "content": last_thought_content} + ) + return conversation_history + + last_thought_content = current_thought_content + continue + + conversation_history.append( + { + "role": "assistant", + "content": f"Max iterations reached. Last response: {assistant_message['content']}", + } + ) + return conversation_history + + +def process_queries(): + logger.info("Query processing thread started") + with app.app_context(): + while True: + try: + db = get_db() + cursor = db.cursor() + + # First, check if there are any PROCESSING queries + cursor.execute( + "SELECT id FROM Queries WHERE status = ? LIMIT 1", + (QueryStatus.PROCESSING.value,) + ) + processing_query = cursor.fetchone() + if processing_query: + logger.info(f"Found processing query: {processing_query[0]}. Waiting...") + db.commit() + time.sleep(10) + continue + + # If no PROCESSING queries, get the oldest QUEUED query + cursor.execute( + "SELECT id, query FROM Queries WHERE status = ? ORDER BY timestamp ASC LIMIT 1", + (QueryStatus.QUEUED.value,) + ) + result = cursor.fetchone() + + if result: + query_id, user_input = result + logger.info(f"Processing query: {query_id}") + + # Update status to PROCESSING + cursor.execute( + "UPDATE Queries SET status = ? WHERE id = ?", + (QueryStatus.PROCESSING.value, query_id) + ) + db.commit() + logger.info(f"Updated query {query_id} status to PROCESSING") + + # Fetch conversation history if it exists + cursor.execute("SELECT conversation_history FROM Queries WHERE id = ?", (query_id,)) + conversation_history_result = cursor.fetchone() + + if conversation_history_result and conversation_history_result[0]: + conversation_history = json.loads(conversation_history_result[0]) + else: + conversation_history = [{"role": "system", "content": ANSWER_QUESTION_PROMPT}] + + logger.info(f"Starting answer_question_tools_api for query {query_id}") + final_conversation_history = answer_question_tools_api(user_input, conversation_history) + logger.info(f"Finished answer_question_tools_api for query {query_id}") + + # Update with final result and set status to DONE + db.execute("BEGIN TRANSACTION") + cursor.execute( + "UPDATE Queries SET conversation_history = ?, status = ? WHERE id = ?", + (json.dumps(final_conversation_history), QueryStatus.DONE.value, query_id) + ) + db.commit() + logger.info(f"Updated query {query_id} status to DONE") + else: + logger.info("No queued queries found. Waiting...") + time.sleep(5) # Wait for 5 seconds before checking again if no queries are found + except Exception as e: + logger.exception(f"Error processing query: {str(e)}") + time.sleep(1) # Wait for 1 second before retrying in case of an error + + +# Admin endpoint for generating API keys +class GenerateKeyRequest(BaseModel): + username: str + + +class GenerateKeyResponse(BaseModel): + username: str + api_key: str + + +@app.post( + "/admin/generate_key" +) +def generate_api_key(): + """ + Generate a new API key for a user. + + This endpoint requires authentication via an admin key. + + Sample cURL: + curl -X POST http://localhost:5001/admin/generate_key \ + -H "Content-Type: application/json" \ + -H "X-Admin-Key: your-admin-key" \ + -d '{"username": "new_user"}' + """ + admin_key = request.headers.get("X-Admin-Key") + if not admin_key or admin_key != ADMIN_KEY: + return jsonify({"error": "Invalid admin key"}), 401 + + data = request.get_json() + if not data or 'username' not in data: + return jsonify({"error": "Invalid request body"}), 400 + + username = data['username'] + api_key = secrets.token_urlsafe(32) + + try: + db = get_db() + cursor = db.cursor() + cursor.execute( + "INSERT INTO Keys (username, api_key) VALUES (?, ?)", (username, api_key) + ) + db.commit() + return jsonify({"username": username, "api_key": api_key}) + except sqlite3.IntegrityError: + return jsonify({"error": "Username already exists"}), 400 + except Exception as e: + logger.exception("Error generating API key", error=str(e)) + return jsonify({"error": str(e)}), 500 + + +def start_processing_thread(): + global processing_thread, processing_thread_started + if not processing_thread_started: + processing_thread = threading.Thread(target=process_queries, daemon=True) + processing_thread.start() + processing_thread_started = True + logger.info("Query processing thread started") + + +def allowed_file(filename): + return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS + + +@app.post("/api/v1/query_with_image") +def api_query_with_image(): + """ + Submit a new query to the LLM Chat Server with an optional image. + + This endpoint requires authentication via an API key. + + Sample cURL: + curl -X POST http://localhost:5001/api/v1/query_with_image \ + -H "X-API-Key: your-api-key" \ + -F "message=What's in this image?" \ + -F "image=@path/to/your/image.jpg" + """ + if not ENABLE_API_ENDPOINTS: + return jsonify({"error": "API endpoints are disabled"}), 404 + + api_key = request.headers.get('X-API-Key') + if not api_key: + return jsonify({"error": "API key is required"}), 401 + + api_key_id = validate_api_key(api_key) + if not api_key_id: + return jsonify({"error": "Invalid API key"}), 401 + + if 'message' not in request.form: + return jsonify({"error": "Message is required"}), 400 + + user_input = request.form['message'] + query_id = str(uuid.uuid4()) + + image_base64 = None + if 'image' in request.files: + file = request.files['image'] + if file and allowed_file(file.filename): + if file.content_length > MAX_IMAGE_SIZE: + return jsonify({"error": "Image size exceeds 1MB limit"}), 400 + + # Read and encode the image + image_data = file.read() + image_base64 = base64.b64encode(image_data).decode('utf-8') + + try: + db = get_db() + cursor = db.cursor() + cursor.execute( + "INSERT INTO Queries (id, ip, query, api_key_id, status) VALUES (?, ?, ?, ?, ?)", + (query_id, request.remote_addr, user_input, api_key_id, QueryStatus.QUEUED.value) + ) + db.commit() + logger.info(f"Added new query with image to database: {query_id}") + + # If there's an image, add it to the conversation history + if image_base64: + conversation_history = [ + {"role": "system", "content": ANSWER_QUESTION_PROMPT}, + {"role": "user", "content": f"[An image was uploaded with this message] {user_input}"}, + {"role": "system", "content": f"An image was uploaded. You can analyze it using the analyze_image tool with the following base64 string: {image_base64}"} + ] + cursor.execute( + "UPDATE Queries SET conversation_history = ? WHERE id = ?", + (json.dumps(conversation_history), query_id) + ) + db.commit() + + return jsonify({"query_id": query_id}) + except Exception as e: + logger.exception(f"Error during API query processing with image: {str(e)}") + return jsonify({"error": str(e)}), 500 + + +# Replace the if __main__ block with this: if __name__ == "__main__": logger.info("Starting LLM Chat Server") - threading.Thread(target=send_system_resources, daemon=True).start() - socketio.run(app, debug=True, host="0.0.0.0", port=5000) \ No newline at end of file + init_db() # Initialize the database + + if ENABLE_FRONTEND or ENABLE_CHAT_ENDPOINTS: + threading.Thread(target=send_system_resources, daemon=True).start() + logger.info("System resources thread started") + + if ENABLE_API_ENDPOINTS: + start_processing_thread() + + logger.info("Starting Flask application") + socketio.run(app, debug=True, host="0.0.0.0", port=5001) +else: + # This will run when the module is imported, e.g., by the reloader + if ENABLE_API_ENDPOINTS: + start_processing_thread() \ No newline at end of file diff --git a/models.py b/models.py index bd43282..a876372 100644 --- a/models.py +++ b/models.py @@ -3,30 +3,86 @@ import structlog logger = structlog.get_logger() + class ModelManager: def __init__(self): self.model_capabilities = { - "qwen2.5:7b": ["general_knowledge", "structured_output", "multilingual", "instruction_following", "structured_data"], - "llama3.1:8b": ["general_knowledge", "reasoning", "tool_calling", "conversation", "multilingual", "instruction_following"], - "qwen2.5-coder:7b": ["code_generation", "code_analysis", "instruction_following", "math_reasoning"], - "llama3.2:3b": ["summarization", "instruction_following", "tool_calling", "multilingual"], - "llava:7b": ["visual_reasoning", "visual_conversation", "visual_tool_calling", "vision", "ocr", "multimodal"], + "ajindal/llama3.1-storm:8b": [ + "general_knowledge", + "reasoning", + "tool_calling", + "conversation", + "multilingual", + "instruction_following", + ], + "llama3.1:8b": [ + "general_knowledge", + "reasoning", + "tool_calling", + "conversation", + "multilingual", + "instruction_following", + ], + "qwen2.5:7b": [ + "general_knowledge", + "reasoning", + "tool_calling", + "conversation", + "multilingual", + "instruction_following", + ], + "llama3.2:3b": [ + "summarization", + "instruction_following", + "tool_calling", + "multilingual", + ], + "llava:7b": [ + "visual_reasoning", + "visual_conversation", + "visual_tool_calling", + "vision", + "ocr", + "multimodal", + ], } - logger.info("ModelManager initialized", model_capabilities=self.model_capabilities) + logger.info( + "ModelManager initialized", model_capabilities=self.model_capabilities + ) def get_model_capabilities(self, model_name): capabilities = self.model_capabilities.get(model_name, []) - logger.debug("Retrieved model capabilities", model=model_name, capabilities=capabilities) + logger.debug( + "Retrieved model capabilities", model=model_name, capabilities=capabilities + ) return capabilities def select_best_model(self, required_capability): - suitable_models = [model for model, capabilities in self.model_capabilities.items() if required_capability in capabilities] - selected_model = suitable_models[0] if suitable_models else list(self.model_capabilities.keys())[0] - logger.info("Selected best model", required_capability=required_capability, selected_model=selected_model) + suitable_models = [ + model + for model, capabilities in self.model_capabilities.items() + if required_capability in capabilities + ] + selected_model = ( + suitable_models[0] + if suitable_models + else list(self.model_capabilities.keys())[0] + ) + logger.info( + "Selected best model", + required_capability=required_capability, + selected_model=selected_model, + ) return selected_model - def generate_text(self, model_name, prompt, max_length=100, system="You are a helpful assistant."): - logger.debug("Generating text", model=model_name, prompt=prompt, max_length=max_length) + def generate_text( + self, + model_name, + prompt, + max_length=100, + system="You are a helpful assistant.", + tools=[], + ): # Check if model exists try: ollama.pull(model_name) @@ -38,8 +94,16 @@ class ModelManager: else: logger.exception("Error pulling model", model=model_name, error=str(e)) raise e - response = ollama.generate(model=model_name, prompt=prompt, system=system) - logger.debug("Text generated", model=model_name, response=response['response']) - return response['response'] -model_manager = ModelManager() \ No newline at end of file + response = ollama.generate( + model=model_name, + prompt=prompt, + system=system, + tools=tools, + max_tokens=max_length, + ) + logger.debug("Text generated", model=model_name, response=response["response"]) + return response["response"] + + +model_manager = ModelManager() diff --git a/requirements.txt b/requirements.txt index 642aa96..d371f39 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,11 +4,19 @@ aiohttp==3.10.5 aiosignal==1.3.1 annotated-types==0.7.0 anyio==4.6.0 +art==6.3 attrs==24.2.0 +beautifulsoup4==4.12.3 +bidict==0.23.1 +black==24.8.0 +blinker==1.8.2 +bs4==0.0.2 certifi==2024.7.4 +chardet==5.2.0 charset-normalizer==3.3.2 click==8.1.7 cloudpickle==3.0.0 +cssselect==1.2.0 datasets==3.0.0 dill==0.3.8 diskcache==5.6.3 @@ -17,9 +25,13 @@ duckduckgo_search==6.2.6 einops==0.8.0 fastapi==0.115.0 filelock==3.15.4 +Flask==3.0.3 +flask-openapi3==3.1.3 +Flask-SocketIO==5.3.7 frozenlist==1.4.1 fsspec==2024.6.1 gguf==0.9.1 +GPUtil==1.4.0 h11==0.14.0 httpcore==1.0.5 httptools==0.6.1 @@ -29,6 +41,8 @@ idna==3.7 importlib_metadata==8.5.0 inquirerpy==0.3.4 interegular==0.3.3 +isort==5.13.2 +itsdangerous==2.2.0 Jinja2==3.1.4 jiter==0.5.0 jsonschema==4.23.0 @@ -36,6 +50,9 @@ jsonschema-specifications==2023.12.1 lark==1.2.2 llvmlite==0.43.0 lm-format-enforcer==0.10.6 +lxml==5.3.0 +lxml_html_clean==0.2.2 +markdownify==0.13.1 MarkupSafe==2.1.5 mistral_common==1.4.3 mpmath==1.3.0 @@ -43,6 +60,7 @@ msgpack==1.1.0 msgspec==0.18.6 multidict==6.1.0 multiprocess==0.70.16 +mypy-extensions==1.0.0 nest-asyncio==1.6.0 networkx==3.3 numba==0.60.0 @@ -60,13 +78,16 @@ nvidia-ml-py==12.560.30 nvidia-nccl-cu12==2.20.5 nvidia-nvjitlink-cu12==12.6.20 nvidia-nvtx-cu12==12.1.105 +ollama==0.3.3 openai==1.47.1 outlines==0.0.46 packaging==24.1 pandas==2.2.3 partial-json-parser==0.2.1.1.post4 +pathspec==0.12.1 pfzy==0.3.4 pillow==10.4.0 +platformdirs==4.3.6 primp==0.5.5 prometheus-fastapi-instrumentator==7.0.0 prometheus_client==0.21.0 @@ -81,10 +102,13 @@ pydantic==2.9.2 pydantic_core==2.23.4 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 +python-engineio==4.9.1 +python-socketio==5.11.4 pytz==2024.2 PyYAML==6.0.2 pyzmq==26.2.0 ray==2.36.1 +readability-lxml==0.8.1 referencing==0.35.1 regex==2024.7.24 requests==2.32.3 @@ -92,9 +116,12 @@ rpds-py==0.20.0 safetensors==0.4.4 sentencepiece==0.2.0 setuptools==72.1.0 +simple-websocket==1.0.0 six==1.16.0 sniffio==1.3.1 +soupsieve==2.6 starlette==0.38.6 +structlog==24.4.0 sympy==1.13.2 tiktoken==0.7.0 tokenizers==0.19.1 @@ -113,6 +140,8 @@ vllm-flash-attn==2.6.1 watchfiles==0.24.0 wcwidth==0.2.13 websockets==13.1 +Werkzeug==3.0.4 +wsproto==1.2.0 xformers==0.0.27.post2 xxhash==3.5.0 yarl==1.12.0 diff --git a/schema.sql b/schema.sql new file mode 100644 index 0000000..5a0a29e --- /dev/null +++ b/schema.sql @@ -0,0 +1,16 @@ +CREATE TABLE IF NOT EXISTS Keys ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + username TEXT NOT NULL UNIQUE, + api_key TEXT NOT NULL UNIQUE +); + +CREATE TABLE IF NOT EXISTS Queries ( + id TEXT PRIMARY KEY, + ip TEXT NOT NULL, + timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, + query TEXT NOT NULL, + api_key_id INTEGER, + status TEXT NOT NULL, + conversation_history TEXT, + FOREIGN KEY (api_key_id) REFERENCES Keys (id) +); \ No newline at end of file diff --git a/tools.py b/tools.py new file mode 100644 index 0000000..265ca79 --- /dev/null +++ b/tools.py @@ -0,0 +1,369 @@ +import subprocess +import tempfile +import time +import json +import requests +from markdownify import markdownify as md +from readability.readability import Document +import duckduckgo_search +import datetime +import random +import math +import re +import base64 +from io import BytesIO +from PIL import Image, ImageDraw, ImageFont +import ollama +import os + +class Tool: + def __init__(self, name: str, description: str, arguments: dict, returns: str): + self.name = name + self.description = description + self.arguments = arguments + self.returns = returns + + def execute(self, arguments: dict) -> str: + pass + + +class ToolManager: + def __init__(self): + self.tools = [] + + def add_tool(self, tool: Tool): + self.tools.append(tool) + + def get_tool(self, name: str) -> Tool: + for tool in self.tools: + if tool.name == name: + return tool + return None + + def get_tools_and_descriptions_for_prompt(self): + return "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools]) + + def get_tools_for_ollama_dict(self): + return [ + { + "type": "function", + "function": { + "name": tool.name, + "description": tool.description, + "parameters": tool.arguments, + }, + } + for tool in self.tools + ] + + +class DefaultToolManager(ToolManager): + def __init__(self): + super().__init__() + self.add_tool(SearchTool()) + self.add_tool(GetReadablePageContentsTool()) + self.add_tool(CalculatorTool()) + self.add_tool(PythonCodeTool()) + self.add_tool(DateTimeTool()) + self.add_tool(RandomNumberTool()) + self.add_tool(RegexTool()) + self.add_tool(Base64Tool()) + self.add_tool(SimpleChartTool()) + self.add_tool(LLAVAImageAnalysisTool()) + + +class SearchTool(Tool): + def __init__(self): + super().__init__( + "search_web", + "Search the internet for information", + { + "type": "object", + "properties": { + "query": {"type": "string", "description": "The search query"} + }, + }, + "results:list[string]", + ) + + def execute(self, arg: dict) -> str: + try: + res = duckduckgo_search.DDGS().text(arg["query"], max_results=5) + return "\n\n".join([f"{r['title']}\n{r['body']}\n{r['href']}" for r in res]) + except Exception as e: + return f"Error searching the web: {str(e)}" + + +def get_readable_page_contents(url: str) -> str: + try: + response = requests.get(url) + response.raise_for_status() + doc = Document(response.content) + content = doc.summary() + return md(content) + except Exception as e: + return f"Error fetching readable content: {str(e)}" + + +class GetReadablePageContentsTool(Tool): + def __init__(self): + super().__init__( + "get_readable_page_contents", + "Get the contents of a web page in a readable format", + { + "type": "object", + "properties": { + "url": {"type": "string", "description": "The url of the web page"} + }, + }, + "contents:string", + ) + + def execute(self, arg: dict) -> str: + return get_readable_page_contents(arg["url"]) + + +class CalculatorTool(Tool): + def __init__(self): + super().__init__( + "calculator", + "Perform a calculation using python's eval function", + { + "type": "object", + "properties": { + "expression": { + "type": "string", + "description": "The mathematical expression to evaluate, should be a python mathematical expression", + } + }, + }, + "result:string", + ) + + def execute(self, arg: dict) -> str: + try: + return str(eval(arg["expression"])) + except Exception as e: + return f"Error executing code: {str(e)}" + + +class PythonCodeTool(Tool): + def __init__(self): + super().__init__( + "python_code", + "Execute python code using a temporary file and a subprocess. You must print results to stdout.", + { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "The python code to execute, can be multiline", + } + }, + }, + "result:string", + ) + + def execute(self, arg: dict) -> str: + try: + with tempfile.NamedTemporaryFile( + suffix=".py", mode="w", delete=False + ) as temp_file: + temp_file.write(arg["code"]) + temp_file.flush() + + start_time = time.time() + process = subprocess.Popen( + ["python", temp_file.name], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + stdout, stderr = process.communicate(timeout=10) # 10 second timeout + end_time = time.time() + execution_time = end_time - start_time + + result = { + "stdout": stdout, + "stderr": stderr, + "return_value": process.returncode, + "execution_time": execution_time, + } + + except subprocess.TimeoutExpired: + process.kill() + return "Error: Code execution timed out after 10 seconds" + except Exception as e: + return f"Error executing code: {str(e)}" + + return "\n".join([f"{k}:\n{v}" for k, v in result.items()]) + + +class DateTimeTool(Tool): + def __init__(self): + super().__init__( + "get_current_datetime", + "Get the current date and time", + {"type": "object", "properties": {}}, + "datetime:string" + ) + + def execute(self, arg: dict) -> str: + return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + +class RandomNumberTool(Tool): + def __init__(self): + super().__init__( + "generate_random_number", + "Generate a random number within a given range", + { + "type": "object", + "properties": { + "min": {"type": "number", "description": "The minimum value"}, + "max": {"type": "number", "description": "The maximum value"} + } + }, + "random_number:number" + ) + + def execute(self, arg: dict) -> str: + return str(random.uniform(arg["min"], arg["max"])) + + +class RegexTool(Tool): + def __init__(self): + super().__init__( + "regex_match", + "Perform a regex match on a given text", + { + "type": "object", + "properties": { + "text": {"type": "string", "description": "The text to search in"}, + "pattern": {"type": "string", "description": "The regex pattern to match"} + } + }, + "matches:list[string]" + ) + + def execute(self, arg: dict) -> str: + matches = re.findall(arg["pattern"], arg["text"]) + return json.dumps(matches) + + +class Base64Tool(Tool): + def __init__(self): + super().__init__( + "base64_encode_decode", + "Encode or decode a string using Base64", + { + "type": "object", + "properties": { + "action": {"type": "string", "enum": ["encode", "decode"], "description": "Whether to encode or decode"}, + "text": {"type": "string", "description": "The text to encode or decode"} + } + }, + "result:string" + ) + + def execute(self, arg: dict) -> str: + if arg["action"] == "encode": + return base64.b64encode(arg["text"].encode()).decode() + elif arg["action"] == "decode": + return base64.b64decode(arg["text"].encode()).decode() + else: + return "Invalid action. Use 'encode' or 'decode'." + + +class SimpleChartTool(Tool): + def __init__(self): + super().__init__( + "generate_simple_chart", + "Generate a simple bar chart image", + { + "type": "object", + "properties": { + "data": {"type": "array", "items": {"type": "number"}, "description": "List of numerical values for the chart"}, + "labels": {"type": "array", "items": {"type": "string"}, "description": "Labels for each bar"} + } + }, + "image_base64:string" + ) + + def execute(self, arg: dict) -> str: + data = arg["data"] + labels = arg["labels"] + + # Create a simple bar chart + width, height = 400, 300 + img = Image.new('RGB', (width, height), color='white') + draw = ImageDraw.Draw(img) + + # Draw bars + max_value = max(data) + bar_width = width // (len(data) + 1) + for i, value in enumerate(data): + bar_height = (value / max_value) * (height - 50) + left = (i + 1) * bar_width + draw.rectangle([left, height - bar_height, left + bar_width, height], fill='blue') + + # Add labels + font = ImageFont.load_default() + for i, label in enumerate(labels): + left = (i + 1) * bar_width + bar_width // 2 + draw.text((left, height - 20), label, fill='black', anchor='ms', font=font) + + # Convert to base64 + buffered = BytesIO() + img.save(buffered, format="PNG") + img_str = base64.b64encode(buffered.getvalue()).decode() + return img_str + + +class LLAVAImageAnalysisTool(Tool): + def __init__(self): + super().__init__( + "analyze_image", + "Analyze an image using the LLAVA model", + { + "type": "object", + "properties": { + "image_base64": {"type": "string", "description": "Base64 encoded image"}, + "question": {"type": "string", "description": "Question about the image"} + } + }, + "analysis:string" + ) + + def execute(self, arg: dict) -> str: + try: + # Decode base64 image + image_data = base64.b64decode(arg["image_base64"]) + image = Image.open(BytesIO(image_data)) + + # Save image to a temporary file + with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file: + image.save(temp_file, format="PNG") + temp_file_path = temp_file.name + + # Call LLAVA model + response = ollama.chat( + model="llava:7b", + messages=[ + { + "role": "user", + "content": arg["question"], + "images": [temp_file_path] + } + ] + ) + + # Clean up temporary file + os.remove(temp_file_path) + + # Unload LLAVA model + ollama.delete("llava:7b") + + return response['message']['content'] + except Exception as e: + return f"Error analyzing image: {str(e)}" \ No newline at end of file