tool_use #1

Open
dubey wants to merge 21 commits from tool_use into main
29 changed files with 838 additions and 6497 deletions
Showing only changes of commit a888e84079 - Show all commits

5
.gitignore vendored
View File

@ -213,4 +213,7 @@ jspm_packages
# Optional REPL history # Optional REPL history
.node_repl_history .node_repl_history
.next .next
config.ini
*.db

138
client.py Normal file
View File

@ -0,0 +1,138 @@
import time
import requests
class LLMChatClient:
def __init__(self, base_url, api_key):
self.base_url = base_url.rstrip("/")
self.api_key = api_key
self.headers = {"X-API-Key": api_key, "Content-Type": "application/json"}
def submit_query(self, message):
"""
Submit a query to the LLM Chat Server.
Args:
message (str): The message to send to the server.
Returns:
str: The query ID for the submitted query.
Raises:
requests.RequestException: If the request fails.
Example:
client = LLMChatClient('http://localhost:5001', 'your-api-key')
query_id = client.submit_query('What is the capital of France?')
print(f"Query ID: {query_id}")
cURL equivalent:
curl -X POST http://localhost:5001/api/v1/query \
-H "Content-Type: application/json" \
-H "X-API-Key: your-api-key" \
-d '{"message": "What is the capital of France?"}'
"""
url = f"{self.base_url}/api/v1/query"
data = {"message": message}
response = requests.post(url, json=data, headers=self.headers)
response.raise_for_status()
return response.json()["query_id"]
def get_query_status(self, query_id):
"""
Get the status of a submitted query.
Args:
query_id (str): The ID of the query to check.
Returns:
dict: A dictionary containing the status and conversation history (if completed).
Raises:
requests.RequestException: If the request fails.
Example:
client = LLMChatClient('http://localhost:5001', 'your-api-key')
status = client.get_query_status('query-id-here')
print(f"Query status: {status['status']}")
if status['status'] == 'completed':
print(f"Conversation history: {status['conversation_history']}")
cURL equivalent:
curl -X GET http://localhost:5001/api/v1/query_status/query-id-here \
-H "X-API-Key: your-api-key"
"""
url = f"{self.base_url}/api/v1/query_status/{query_id}"
response = requests.get(url, headers=self.headers)
response.raise_for_status()
return response.json()
def submit_query_and_wait(self, message, max_wait_time=300, poll_interval=2):
"""
Submit a query and wait for the result.
Args:
message (str): The message to send to the server.
max_wait_time (int): Maximum time to wait for the result in seconds.
poll_interval (int): Time between status checks in seconds.
Returns:
dict: The completed conversation history.
Raises:
requests.RequestException: If the request fails.
TimeoutError: If the query doesn't complete within max_wait_time.
Example:
client = LLMChatClient('http://localhost:5001', 'your-api-key')
result = client.submit_query_and_wait('What is the capital of France?')
print(f"Conversation history: {result}")
"""
query_id = self.submit_query(message)
start_time = time.time()
while time.time() - start_time < max_wait_time:
status = self.get_query_status(query_id)
if status["status"] == "completed":
return status["conversation_history"]
time.sleep(poll_interval)
raise TimeoutError(f"Query did not complete within {max_wait_time} seconds")
class LLMChatAdminClient:
def __init__(self, base_url, admin_key):
self.base_url = base_url.rstrip("/")
self.admin_key = admin_key
self.headers = {"X-Admin-Key": admin_key, "Content-Type": "application/json"}
def generate_api_key(self, username):
"""
Generate a new API key for a user.
Args:
username (str): The username to generate the API key for.
Returns:
dict: A dictionary containing the username and generated API key.
Raises:
requests.RequestException: If the request fails.
Example:
admin_client = LLMChatAdminClient('http://localhost:5001', 'your-admin-key')
result = admin_client.generate_api_key('new_user')
print(f"Generated API key for {result['username']}: {result['api_key']}")
cURL equivalent:
curl -X POST http://localhost:5001/admin/generate_key \
-H "Content-Type: application/json" \
-H "X-Admin-Key: your-admin-key" \
-d '{"username": "new_user"}'
"""
url = f"{self.base_url}/admin/generate_key"
data = {"username": username}
response = requests.post(url, json=data, headers=self.headers)
response.raise_for_status()
return response.json()

View File

@ -1,3 +0,0 @@
{
"extends": ["next/core-web-vitals", "next/typescript"]
}

36
dewey/.gitignore vendored
View File

@ -1,36 +0,0 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
# dependencies
/node_modules
/.pnp
.pnp.js
.yarn/install-state.gz
# testing
/coverage
# next.js
/.next/
/out/
# production
/build
# misc
.DS_Store
*.pem
# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# local env files
.env*.local
# vercel
.vercel
# typescript
*.tsbuildinfo
next-env.d.ts

View File

@ -1,36 +0,0 @@
This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
## Getting Started
First, run the development server:
```bash
npm run dev
# or
yarn dev
# or
pnpm dev
# or
bun dev
```
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
## Learn More
To learn more about Next.js, take a look at the following resources:
- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
## Deploy on Vercel
The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,64 +0,0 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
:root {
--foreground-rgb: 255, 255, 255;
--background-start-rgb: 0, 0, 0;
--background-end-rgb: 0, 0, 0;
}
body {
color: rgb(var(--foreground-rgb));
background: linear-gradient(
to bottom,
transparent,
rgb(var(--background-end-rgb))
)
rgb(var(--background-start-rgb));
font-family: 'Noto Sans Mono', monospace;
}
/* Custom styles from the original index.html */
.thinking-section {
margin-bottom: 20px;
border-left: 2px solid #444;
padding-left: 10px;
}
.thought-summary {
font-weight: bold;
margin-bottom: 5px;
padding: 5px;
border-radius: 3px;
}
.thought-summary.plan { background-color: #2c3e50; }
.thought-summary.decision { background-color: #34495e; }
.thought-summary.tool_call { background-color: #16a085; }
.thought-summary.tool_result { background-color: #27ae60; }
.thought-summary.think_more { background-color: #2980b9; }
.thought-summary.answer { background-color: #8e44ad; }
.thought-details {
display: none;
margin-left: 20px;
border-left: 2px solid #444;
padding-left: 10px;
margin-bottom: 10px;
white-space: pre-wrap;
font-family: 'Noto Sans Mono', monospace;
background-color: #222;
}
.collapsible::before {
content: '▶ ';
display: inline-block;
transition: transform 0.3s;
}
.collapsible.open::before {
transform: rotate(90deg);
}
/* Add any other custom styles from the original index.html here */

View File

@ -1,31 +0,0 @@
import './globals.css'
import { Inter } from 'next/font/google'
const inter = Inter({ subsets: ['latin'] })
export const metadata = {
title: 'DWS Intelligence',
description: 'AI-powered chat application',
}
export default function RootLayout({
children,
}: {
children: React.ReactNode
}) {
return (
<html lang="en">
<head>
<script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.0.1/socket.io.js"></script>
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/moment@2.29.4/moment.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script src="https://cdn.jsdelivr.net/npm/chartjs-adapter-moment@1.0.1/dist/chartjs-adapter-moment.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/highlight.min.js"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/styles/default.min.css" />
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans+Mono:wght@400;700&display=swap" rel="stylesheet" />
</head>
<body className={inter.className}>{children}</body>
</html>
)
}

View File

@ -1,44 +0,0 @@
'use client'
import { useState, useEffect } from 'react'
import ChatArea from '../components/ChatArea'
import Sidebar from '../components/Sidebar'
import useSocket from '../hooks/useSocket'
import useLocalStorage from '../hooks/useLocalStorage'
export default function Home() {
const [currentChatId, setCurrentChatId] = useState<string | null>(null)
const [chats, setChats] = useLocalStorage('chats', {})
const socket = useSocket()
useEffect(() => {
if (Object.keys(chats).length === 0) {
createNewChat()
} else {
setCurrentChatId(Object.keys(chats)[0])
}
}, [])
const createNewChat = () => {
const chatId = Date.now().toString()
setChats(prevChats => ({
...prevChats,
[chatId]: { messages: [], thinkingSections: [] }
}))
setCurrentChatId(chatId)
}
return (
<div className="flex h-screen">
<ChatArea
currentChatId={currentChatId}
setCurrentChatId={setCurrentChatId}
chats={chats}
setChats={setChats}
createNewChat={createNewChat}
socket={socket}
/>
<Sidebar socket={socket} />
</div>
)
}

View File

@ -1,133 +0,0 @@
import { useState, useEffect } from 'react'
import ChatTabs from './ChatTabs'
import ChatContainer from './ChatContainer'
import UserInput from './UserInput'
export default function ChatArea({ currentChatId, setCurrentChatId, chats, setChats, createNewChat, socket }) {
const [userInput, setUserInput] = useState('')
const sendMessage = () => {
if (userInput.trim() && currentChatId) {
const newMessage = { content: userInput, isUser: true }
setChats(prevChats => ({
...prevChats,
[currentChatId]: {
...prevChats[currentChatId],
messages: [...prevChats[currentChatId].messages, newMessage],
thinkingSections: [...prevChats[currentChatId].thinkingSections, { thoughts: [] }]
}
}))
socket.emit('chat_request', {
message: userInput,
conversation_history: chats[currentChatId].messages
.filter(m => !m.isUser).map(m => ({ role: 'assistant', content: m.content }))
.concat(chats[currentChatId].messages.filter(m => m.isUser).map(m => ({ role: 'user', content: m.content })))
})
setUserInput('')
}
}
const switchToChat = (chatId: string) => {
setCurrentChatId(chatId);
}
const closeChat = (chatId: string) => {
if (window.confirm('Are you sure you want to close this chat?')) {
setChats(prevChats => {
const newChats = { ...prevChats };
delete newChats[chatId];
return newChats;
});
if (currentChatId === chatId) {
const remainingChatIds = Object.keys(chats).filter(id => id !== chatId);
if (remainingChatIds.length > 0) {
switchToChat(remainingChatIds[0]);
} else {
createNewChat();
}
}
}
}
useEffect(() => {
if (socket) {
socket.on('thinking', (data) => {
// Handle thinking event
setChats(prevChats => ({
...prevChats,
[currentChatId]: {
...prevChats[currentChatId],
thinkingSections: [
...prevChats[currentChatId].thinkingSections,
{ thoughts: [{ type: 'thinking', content: data.step }] }
]
}
}));
});
socket.on('thought', (data) => {
// Handle thought event
setChats(prevChats => ({
...prevChats,
[currentChatId]: {
...prevChats[currentChatId],
thinkingSections: prevChats[currentChatId].thinkingSections.map((section, index) =>
index === prevChats[currentChatId].thinkingSections.length - 1
? { ...section, thoughts: [...section.thoughts, data] }
: section
)
}
}));
});
socket.on('chat_response', (data) => {
// Handle chat response event
setChats(prevChats => ({
...prevChats,
[currentChatId]: {
...prevChats[currentChatId],
messages: [...prevChats[currentChatId].messages, { content: data.response, isUser: false }]
}
}));
});
socket.on('error', (data) => {
// Handle error event
console.error('Error:', data.message);
// You might want to display this error to the user
});
}
return () => {
if (socket) {
socket.off('thinking');
socket.off('thought');
socket.off('chat_response');
socket.off('error');
}
};
}, [socket, currentChatId, setChats]);
return (
<div className="flex flex-col flex-1">
<ChatTabs
chats={chats}
currentChatId={currentChatId}
createNewChat={createNewChat}
switchToChat={switchToChat}
closeChat={closeChat}
/>
{currentChatId && (
<ChatContainer
currentChat={chats[currentChatId]}
socket={socket}
/>
)}
<UserInput
value={userInput}
onChange={setUserInput}
onSend={sendMessage}
/>
</div>
)
}

View File

@ -1,85 +0,0 @@
import React, { useEffect, useRef } from 'react';
import { marked } from 'marked';
interface ChatContainerProps {
currentChat: {
messages: Array<{ content: string; isUser: boolean }>;
thinkingSections: Array<{ thoughts: Array<{ type: string; content: string; details?: string }> }>;
} | null;
socket: any;
}
const ChatContainer: React.FC<ChatContainerProps> = ({ currentChat, socket }) => {
const chatContainerRef = useRef<HTMLDivElement>(null);
useEffect(() => {
if (chatContainerRef.current) {
chatContainerRef.current.scrollTop = chatContainerRef.current.scrollHeight;
}
}, [currentChat]);
if (!currentChat) return null;
return (
<div ref={chatContainerRef} className="flex-1 overflow-y-auto p-4 bg-gray-900">
{currentChat.messages.map((message, index) => (
<div
key={index}
className={`mb-4 ${
message.isUser ? 'text-right text-cyan-300' : 'text-left text-white'
}`}
>
<div
className={`inline-block p-2 rounded-lg ${
message.isUser ? 'bg-cyan-800' : 'bg-gray-700'
}`}
>
{message.isUser ? (
message.content
) : (
<div dangerouslySetInnerHTML={{ __html: marked(message.content) }} />
)}
</div>
</div>
))}
{currentChat.thinkingSections.map((section, sectionIndex) => (
<div key={sectionIndex} className="mb-4 border-l-2 border-gray-600 pl-4">
{section.thoughts.map((thought, thoughtIndex) => (
<div key={thoughtIndex} className="mb-2">
<div className={`font-bold ${getThoughtColor(thought.type)}`}>
{thought.type}:
</div>
<div dangerouslySetInnerHTML={{ __html: marked(thought.content) }} />
{thought.details && (
<pre className="mt-2 p-2 bg-gray-800 rounded">
{thought.details}
</pre>
)}
</div>
))}
</div>
))}
</div>
);
};
function getThoughtColor(type: string): string {
switch (type.toLowerCase()) {
case 'plan':
return 'text-blue-400';
case 'decision':
return 'text-green-400';
case 'tool_call':
return 'text-yellow-400';
case 'tool_result':
return 'text-purple-400';
case 'think_more':
return 'text-pink-400';
case 'answer':
return 'text-red-400';
default:
return 'text-gray-400';
}
}
export default ChatContainer;

View File

@ -1,48 +0,0 @@
import React from 'react';
interface ChatTabsProps {
chats: Record<string, any>;
currentChatId: string | null;
createNewChat: () => void;
switchToChat: (chatId: string) => void;
closeChat: (chatId: string) => void;
}
const ChatTabs: React.FC<ChatTabsProps> = ({ chats, currentChatId, createNewChat, switchToChat, closeChat }) => {
return (
<div className="flex bg-gray-800 p-2">
{Object.keys(chats).map((chatId) => (
<div
key={chatId}
className={`px-4 py-2 mr-2 rounded-t-lg flex items-center ${
chatId === currentChatId ? 'bg-gray-600' : 'bg-gray-700'
}`}
>
<button
onClick={() => switchToChat(chatId)}
className="flex-grow text-left"
>
Chat {chatId}
</button>
<button
className="ml-2 text-red-500 hover:text-red-700"
onClick={(e) => {
e.stopPropagation();
closeChat(chatId);
}}
>
×
</button>
</div>
))}
<button
className="px-4 py-2 bg-green-600 rounded-t-lg"
onClick={createNewChat}
>
+ New Chat
</button>
</div>
);
};
export default ChatTabs;

View File

@ -1,129 +0,0 @@
import React, { useEffect, useRef, useState } from 'react';
import dynamic from 'next/dynamic';
const Chart = dynamic(() => import('chart.js/auto').then((mod) => mod.Chart), {
ssr: false,
});
interface SidebarProps {
socket: any;
}
const Sidebar: React.FC<SidebarProps> = ({ socket }) => {
const [isCollapsed, setIsCollapsed] = useState(false);
const chartRefs = useRef<{ [key: string]: any }>({
cpu: null,
memory: null,
disk: null,
gpu: null,
gpuMemory: null,
});
useEffect(() => {
if (socket) {
socket.on('system_resources', (data: any) => {
updateCharts(data);
});
}
return () => {
if (socket) {
socket.off('system_resources');
}
};
}, [socket]);
useEffect(() => {
const initCharts = async () => {
const ChartJS = await Chart;
initializeCharts(ChartJS);
};
initCharts();
return () => {
Object.values(chartRefs.current).forEach(chart => chart?.destroy());
};
}, []);
const initializeCharts = (ChartJS: any) => {
const chartConfig = {
type: 'line',
options: {
responsive: true,
maintainAspectRatio: false,
scales: {
x: {
type: 'time',
time: {
unit: 'second',
},
},
y: {
beginAtZero: true,
max: 100,
},
},
animation: false,
},
data: {
datasets: [{
data: [],
borderColor: 'rgb(75, 192, 192)',
tension: 0.1,
}],
},
};
['cpu', 'memory', 'disk', 'gpu', 'gpuMemory'].forEach(chartName => {
const ctx = document.getElementById(`${chartName}Chart`) as HTMLCanvasElement;
if (ctx) {
chartRefs.current[chartName] = new ChartJS(ctx, chartConfig);
}
});
};
const updateCharts = (data: any) => {
const now = new Date();
Object.entries(data).forEach(([key, value]) => {
const chartName = key.replace('_', '').toLowerCase();
const chart = chartRefs.current[chartName];
if (chart) {
chart.data.datasets[0].data.push({x: now, y: value});
chart.update('none');
}
});
};
return (
<div className={`w-80 bg-gray-800 p-4 ${isCollapsed ? 'hidden' : ''}`}>
<button
onClick={() => setIsCollapsed(!isCollapsed)}
className="mb-4 px-4 py-2 bg-gray-700 text-white rounded-lg"
>
{isCollapsed ? 'Show Charts' : 'Hide Charts'}
</button>
<div className="mb-4">
<h3 className="text-white mb-2">CPU Load</h3>
<canvas id="cpuChart"></canvas>
</div>
<div className="mb-4">
<h3 className="text-white mb-2">Memory Usage</h3>
<canvas id="memoryChart"></canvas>
</div>
<div className="mb-4">
<h3 className="text-white mb-2">Disk I/O</h3>
<canvas id="diskChart"></canvas>
</div>
<div className="mb-4">
<h3 className="text-white mb-2">GPU Load</h3>
<canvas id="gpuChart"></canvas>
</div>
<div className="mb-4">
<h3 className="text-white mb-2">GPU Memory</h3>
<canvas id="gpuMemoryChart"></canvas>
</div>
</div>
);
};
export default Sidebar;

View File

@ -1,37 +0,0 @@
import React from 'react';
interface UserInputProps {
value: string;
onChange: (value: string) => void;
onSend: () => void;
}
const UserInput: React.FC<UserInputProps> = ({ value, onChange, onSend }) => {
const handleKeyPress = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
onSend();
}
};
return (
<div className="p-4 bg-gray-800">
<textarea
value={value}
onChange={(e) => onChange(e.target.value)}
onKeyPress={handleKeyPress}
className="w-full p-2 bg-gray-700 text-white rounded-lg resize-none"
rows={3}
placeholder="Type your message here..."
/>
<button
onClick={onSend}
className="mt-2 px-4 py-2 bg-blue-600 text-white rounded-lg"
>
Send
</button>
</div>
);
};
export default UserInput;

View File

@ -1,23 +0,0 @@
import { useState, useEffect } from 'react'
export default function useLocalStorage(key, initialValue) {
const [storedValue, setStoredValue] = useState(() => {
try {
const item = window.localStorage.getItem(key)
return item ? JSON.parse(item) : initialValue
} catch (error) {
console.log(error)
return initialValue
}
})
useEffect(() => {
try {
window.localStorage.setItem(key, JSON.stringify(storedValue))
} catch (error) {
console.log(error)
}
}, [key, storedValue])
return [storedValue, setStoredValue]
}

View File

@ -1,14 +0,0 @@
import { useEffect, useState } from 'react'
import io from 'socket.io-client'
export default function useSocket() {
const [socket, setSocket] = useState<any>(null)
useEffect(() => {
const newSocket = io('http://localhost:5001')
setSocket(newSocket)
return () => newSocket.close()
}, [])
return socket
}

View File

@ -1,4 +0,0 @@
/** @type {import('next').NextConfig} */
const nextConfig = {};
export default nextConfig;

5574
dewey/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -1,31 +0,0 @@
{
"name": "dewey",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start",
"lint": "next lint"
},
"dependencies": {
"autoprefixer": "^10.4.20",
"chart.js": "^3.9.1",
"chartjs-adapter-moment": "^1.0.1",
"marked": "^4.3.0",
"next": "^14.2.13",
"react": "^18.3.1",
"react-dom": "^18.3.1",
"socket.io-client": "^4.8.0"
},
"devDependencies": {
"@types/node": "^20.16.10",
"@types/react": "^18.3.10",
"@types/react-dom": "^18.3.0",
"eslint": "^8",
"eslint-config-next": "14.2.13",
"postcss": "^8.4.47",
"tailwindcss": "^3.4.13",
"typescript": "^5.6.2"
}
}

View File

@ -1,8 +0,0 @@
/** @type {import('postcss-load-config').Config} */
const config = {
plugins: {
tailwindcss: {},
},
};
export default config;

View File

@ -1,19 +0,0 @@
import type { Config } from "tailwindcss";
const config: Config = {
content: [
"./pages/**/*.{js,ts,jsx,tsx,mdx}",
"./components/**/*.{js,ts,jsx,tsx,mdx}",
"./app/**/*.{js,ts,jsx,tsx,mdx}",
],
theme: {
extend: {
colors: {
background: "var(--background)",
foreground: "var(--foreground)",
},
},
},
plugins: [],
};
export default config;

View File

@ -1,26 +0,0 @@
{
"compilerOptions": {
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [
{
"name": "next"
}
],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules"]
}

602
main.py
View File

@ -1,125 +1,273 @@
from flask import Flask, send_from_directory, request import configparser
from flask_socketio import SocketIO, emit
from flask_openapi3 import OpenAPI, Info
from pydantic import BaseModel
from typing import List
from models import model_manager
import structlog
import time
import psutil
import GPUtil
import threading
import os
from tools import DefaultToolManager
import ollama
import re
import json import json
from datetime import datetime import os
import pprint import pprint
import queue
import re
import secrets
import sqlite3
import threading
import time
import uuid
from datetime import datetime
from typing import List, Optional
import GPUtil
import ollama
import psutil
import structlog
from flask import Flask, g, jsonify, request, send_from_directory
from flask_openapi3 import Info, OpenAPI
from flask_socketio import SocketIO, emit
from pydantic import BaseModel
from models import model_manager
from tools import DefaultToolManager
logger = structlog.get_logger() logger = structlog.get_logger()
# Configuration setup
CONFIG_FILE = "config.ini"
def create_default_config():
config = configparser.ConfigParser()
config["DEFAULT"] = {
"AdminKey": secrets.token_urlsafe(32),
"DatabasePath": "llm_chat_server.db",
}
config["SERVER_FEATURES"] = {
"EnableFrontend": "false",
"EnableChatEndpoints": "false",
"EnableAPIEndpoints": "true",
}
config["MODEL"] = {"PrimaryModel": "qwen2.5:14b"}
config["PERFORMANCE"] = {"UpdateInterval": "0.1"}
with open(CONFIG_FILE, "w") as configfile:
config.write(configfile)
def load_config():
if not os.path.exists(CONFIG_FILE):
create_default_config()
config = configparser.ConfigParser()
config.read(CONFIG_FILE)
return config
config = load_config()
ADMIN_KEY = config["DEFAULT"]["AdminKey"]
DATABASE = config["DEFAULT"]["DatabasePath"]
ENABLE_FRONTEND = config["SERVER_FEATURES"].getboolean("EnableFrontend")
ENABLE_CHAT_ENDPOINTS = config["SERVER_FEATURES"].getboolean("EnableChatEndpoints")
ENABLE_API_ENDPOINTS = config["SERVER_FEATURES"].getboolean("EnableAPIEndpoints")
PRIMARY_MODEL = config["MODEL"]["PrimaryModel"]
UPDATE_INTERVAL = config["PERFORMANCE"].getfloat("UpdateInterval")
openapi = OpenAPI(__name__, info=Info(title="LLM Chat Server", version="1.0.0")) openapi = OpenAPI(__name__, info=Info(title="LLM Chat Server", version="1.0.0"))
app = openapi app = openapi
socketio = SocketIO(app, cors_allowed_origins="*") socketio = SocketIO(app, cors_allowed_origins="*")
tool_manager = DefaultToolManager() tool_manager = DefaultToolManager()
@app.route('/')
# Database setup
def get_db():
db = getattr(g, "_database", None)
if db is None:
db = g._database = sqlite3.connect(DATABASE)
db.row_factory = sqlite3.Row
return db
@app.teardown_appcontext
def close_connection(exception):
db = getattr(g, "_database", None)
if db is not None:
db.close()
def init_db():
with app.app_context():
db = get_db()
db.execute(
"""
CREATE TABLE IF NOT EXISTS Queries (
id TEXT PRIMARY KEY,
ip TEXT NOT NULL,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
query TEXT NOT NULL,
api_key_id INTEGER,
conversation_history TEXT,
FOREIGN KEY (api_key_id) REFERENCES Keys (id)
)
"""
)
db.commit()
# Create a schema.sql file with the following content:
"""
CREATE TABLE IF NOT EXISTS Keys (
id INTEGER PRIMARY KEY AUTOINCREMENT,
username TEXT NOT NULL UNIQUE,
api_key TEXT NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS Queries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
request_id TEXT NOT NULL UNIQUE,
ip TEXT NOT NULL,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
query TEXT NOT NULL,
api_key_id INTEGER,
conversation_history TEXT,
FOREIGN KEY (api_key_id) REFERENCES Keys (id)
);
"""
def validate_api_key(api_key):
db = get_db()
cursor = db.cursor()
cursor.execute("SELECT id FROM Keys WHERE api_key = ?", (api_key,))
result = cursor.fetchone()
return result[0] if result else None
@app.route("/")
def index(): def index():
logger.info("Serving index.html") if ENABLE_FRONTEND:
return send_from_directory('.', 'index.html') logger.info("Serving index.html")
return send_from_directory(".", "index.html")
else:
return jsonify({"error": "Frontend is disabled"}), 404
class ChatRequest(BaseModel): class ChatRequest(BaseModel):
message: str message: str
class ChatResponse(BaseModel): class ChatResponse(BaseModel):
response: str response: str
@socketio.on('chat_request')
@socketio.on("chat_request")
def handle_chat_request(data): def handle_chat_request(data):
user_input = data['message'] if not ENABLE_CHAT_ENDPOINTS:
conversation_history = data.get('conversation_history', []) emit("error", {"message": "Chat endpoints are disabled"})
conversation_history = [{"role": "system", "content": ANSWER_QUESTION_PROMPT}] + conversation_history return
logger.info("Received chat request", user_input=user_input, conversation_history=conversation_history)
user_input = data["message"]
conversation_history = data.get("conversation_history", [])
conversation_history = [
{"role": "system", "content": ANSWER_QUESTION_PROMPT}
] + conversation_history
logger.info(
"Received chat request",
user_input=user_input,
conversation_history=conversation_history,
)
start_time = time.time() start_time = time.time()
try: try:
final_response = answer_question_tools(user_input, conversation_history) final_response = answer_question_tools(user_input, conversation_history)
end_time = time.time() end_time = time.time()
thinking_time = round(end_time - start_time, 2) thinking_time = round(end_time - start_time, 2)
emit('chat_response', { emit(
'response': final_response, "chat_response",
'thinking_time': thinking_time {"response": final_response, "thinking_time": thinking_time},
}) )
except Exception as e: except Exception as e:
logger.exception("Error during chat processing", error=str(e)) logger.exception("Error during chat processing", error=str(e))
end_time = time.time() end_time = time.time()
thinking_time = round(end_time - start_time, 2) thinking_time = round(end_time - start_time, 2)
emit('error', { emit(
'message': f"An error occurred: {str(e)}", "error",
'thinking_time': thinking_time {"message": f"An error occurred: {str(e)}", "thinking_time": thinking_time},
}) )
def answer_question_tools(user_input: str, conversation_history: List[dict], max_retries: int = 100):
def answer_question_tools(
user_input: str, conversation_history: List[dict], max_retries: int = 100
):
global tool_manager global tool_manager
# If conversation_history is empty, initialize it with the system prompt # If conversation_history is empty, initialize it with the system prompt
if not conversation_history: if not conversation_history:
conversation_history = [ conversation_history = [
{"role": "system", "content": ANSWER_QUESTION_PROMPT}, {"role": "system", "content": ANSWER_QUESTION_PROMPT},
] ]
logger.info("Starting chat", user_input=user_input, conversation_history=conversation_history) logger.info(
"Starting chat",
user_input=user_input,
conversation_history=conversation_history,
)
# Add the new user input to the conversation history # Add the new user input to the conversation history
conversation_history.append({"role": "user", "content": user_input}) conversation_history.append({"role": "user", "content": user_input})
emit('thinking', {'step': 'Starting'}) emit("thinking", {"step": "Starting"})
emit('conversation_history', {'history': conversation_history}) emit("conversation_history", {"history": conversation_history})
last_thought_content = None last_thought_content = None
for _ in range(max_retries): for _ in range(max_retries):
response = ollama.chat(model=PRIMARY_MODEL, messages=conversation_history, tools=tool_manager.get_tools_for_ollama_dict(), stream=False) response = ollama.chat(
assistant_message = response['message'] model=PRIMARY_MODEL,
messages=conversation_history,
tools=tool_manager.get_tools_for_ollama_dict(),
stream=False,
)
assistant_message = response["message"]
conversation_history.append(assistant_message) conversation_history.append(assistant_message)
emit('conversation_history', {'history': conversation_history}) emit("conversation_history", {"history": conversation_history})
pprint.pp(assistant_message) pprint.pp(assistant_message)
if 'tool_calls' in assistant_message: if "tool_calls" in assistant_message:
for tool_call in assistant_message['tool_calls']: for tool_call in assistant_message["tool_calls"]:
tool_name = tool_call['function']['name'] tool_name = tool_call["function"]["name"]
tool_args = tool_call['function']['arguments'] tool_args = tool_call["function"]["arguments"]
emit('thought', {'type': 'tool_call', 'content': f"Tool: {tool_name}\nArguments: {tool_args}"}) emit(
"thought",
{
"type": "tool_call",
"content": f"Tool: {tool_name}\nArguments: {tool_args}",
},
)
tool_response = tool_manager.get_tool(tool_name).execute(tool_args) tool_response = tool_manager.get_tool(tool_name).execute(tool_args)
conversation_history.append({ conversation_history.append({"role": "tool", "content": tool_response})
"role": "tool", emit("conversation_history", {"history": conversation_history})
"content": tool_response emit("thought", {"type": "tool_result", "content": tool_response})
})
emit('conversation_history', {'history': conversation_history})
emit('thought', {'type': 'tool_result', 'content': tool_response})
else: else:
if "<reply>" in assistant_message['content'].lower(): if "<reply>" in assistant_message["content"].lower():
reply_content = re.search(r'<reply>(.*?)</reply>', assistant_message['content'], re.DOTALL) reply_content = re.search(
r"<reply>(.*?)</reply>", assistant_message["content"], re.DOTALL
)
if reply_content: if reply_content:
reply_answer = reply_content.group(1).strip() reply_answer = reply_content.group(1).strip()
emit('thought', {'type': 'answer', 'content': reply_answer}) emit("thought", {"type": "answer", "content": reply_answer})
return reply_answer return reply_answer
else: else:
current_thought_content = assistant_message['content'].strip() current_thought_content = assistant_message["content"].strip()
emit('thought', {'type': 'thoughts', 'content': current_thought_content}) emit(
"thought", {"type": "thoughts", "content": current_thought_content}
)
# Check for two consecutive thoughts, with the second being empty # Check for two consecutive thoughts, with the second being empty
if last_thought_content and not current_thought_content: if last_thought_content and not current_thought_content:
emit('thought', {'type': 'answer', 'content': last_thought_content}) emit("thought", {"type": "answer", "content": last_thought_content})
return last_thought_content return last_thought_content
last_thought_content = current_thought_content last_thought_content = current_thought_content
continue continue
return f"Max iterations reached. Last response: {assistant_message['content']}" return f"Max iterations reached. Last response: {assistant_message['content']}"
ANSWER_QUESTION_PROMPT2 = f""" ANSWER_QUESTION_PROMPT2 = f"""
The current date is {datetime.now().strftime("%A, %B %d, %Y")}, your knowledge cutoff was December 2023. The current date is {datetime.now().strftime("%A, %B %d, %Y")}, your knowledge cutoff was December 2023.
You are Dewey, an AI assistant with access to external tools and the ability to think through complex problems. Your role is to assist users by leveraging tools when necessary, thinking deeply about problems, and providing accurate and helpful information, all with a cheerful, but witty personality. Here are the tools available to you: You are Dewey, an AI assistant with access to external tools and the ability to think through complex problems. Your role is to assist users by leveraging tools when necessary, thinking deeply about problems, and providing accurate and helpful information, all with a cheerful, but witty personality. Here are the tools available to you:
@ -216,9 +364,6 @@ Generate the final response to the user within <reply></reply> tags:
Remember to always be helpful, accurate, and respectful in your interactions, while maintaining your distinctive character blend of House and Jarvis. Remember to always be helpful, accurate, and respectful in your interactions, while maintaining your distinctive character blend of House and Jarvis.
""" """
PRIMARY_MODEL = "qwen2.5:14b"
UPDATE_INTERVAL = 0.1 # 100ms, configurable
def get_system_resources(): def get_system_resources():
cpu_load = psutil.cpu_percent() cpu_load = psutil.cpu_percent()
@ -227,44 +372,319 @@ def get_system_resources():
disk_io = psutil.disk_io_counters() disk_io = psutil.disk_io_counters()
disk_read = disk_io.read_bytes disk_read = disk_io.read_bytes
disk_write = disk_io.write_bytes disk_write = disk_io.write_bytes
gpus = GPUtil.getGPUs() gpus = GPUtil.getGPUs()
gpu_load = gpus[0].load * 100 if gpus else 0 gpu_load = gpus[0].load * 100 if gpus else 0
gpu_memory = gpus[0].memoryUtil * 100 if gpus else 0 gpu_memory = gpus[0].memoryUtil * 100 if gpus else 0
return { return {
'cpu_load': cpu_load, "cpu_load": cpu_load,
'memory_usage': memory_usage, "memory_usage": memory_usage,
'disk_read': disk_read, "disk_read": disk_read,
'disk_write': disk_write, "disk_write": disk_write,
'gpu_load': gpu_load, "gpu_load": gpu_load,
'gpu_memory': gpu_memory "gpu_memory": gpu_memory,
} }
def send_system_resources(): def send_system_resources():
last_disk_read = 0 last_disk_read = 0
last_disk_write = 0 last_disk_write = 0
while True: while True:
resources = get_system_resources() resources = get_system_resources()
# Calculate disk I/O rates # Calculate disk I/O rates
disk_read_rate = (resources['disk_read'] - last_disk_read) / UPDATE_INTERVAL disk_read_rate = (resources["disk_read"] - last_disk_read) / UPDATE_INTERVAL
disk_write_rate = (resources['disk_write'] - last_disk_write) / UPDATE_INTERVAL disk_write_rate = (resources["disk_write"] - last_disk_write) / UPDATE_INTERVAL
socketio.emit('system_resources', { socketio.emit(
'cpu_load': resources['cpu_load'], "system_resources",
'memory_usage': resources['memory_usage'], {
'disk_read_rate': disk_read_rate, "cpu_load": resources["cpu_load"],
'disk_write_rate': disk_write_rate, "memory_usage": resources["memory_usage"],
'gpu_load': resources['gpu_load'], "disk_read_rate": disk_read_rate,
'gpu_memory': resources['gpu_memory'] "disk_write_rate": disk_write_rate,
}) "gpu_load": resources["gpu_load"],
"gpu_memory": resources["gpu_memory"],
last_disk_read = resources['disk_read'] },
last_disk_write = resources['disk_write'] )
last_disk_read = resources["disk_read"]
last_disk_write = resources["disk_write"]
time.sleep(UPDATE_INTERVAL) time.sleep(UPDATE_INTERVAL)
class QueryRequest(BaseModel):
message: str
class QueryResponse(BaseModel):
query_id: str
class QueryStatusResponse(BaseModel):
status: str
conversation_history: Optional[List[dict]]
@app.post(
"/api/v1/query",
responses={
"200": QueryResponse,
"401": {"description": "Unauthorized"},
"500": {"description": "Internal Server Error"},
},
)
def api_query(body: QueryRequest):
"""
Submit a new query to the LLM Chat Server.
This endpoint requires authentication via an API key.
Sample cURL:
curl -X POST http://localhost:5001/api/v1/query \
-H "Content-Type: application/json" \
-H "X-API-Key: your-api-key" \
-d '{"message": "What is the capital of France?"}'
"""
if not ENABLE_API_ENDPOINTS:
return jsonify({"error": "API endpoints are disabled"}), 404
api_key = request.headers.get("X-API-Key")
if not api_key:
return jsonify({"error": "API key is required"}), 401
api_key_id = validate_api_key(api_key)
if not api_key_id:
return jsonify({"error": "Invalid API key"}), 401
user_input = body.message
query_id = str(uuid.uuid4())
try:
db = get_db()
cursor = db.cursor()
cursor.execute(
"INSERT INTO Queries (id, ip, query, api_key_id) VALUES (?, ?, ?, ?)",
(query_id, request.remote_addr, user_input, api_key_id),
)
db.commit()
return jsonify({"query_id": query_id})
except Exception as e:
logger.exception("Error during API query processing", error=str(e))
return jsonify({"error": str(e)}), 500
@app.get(
"/api/v1/query_status/<query_id>",
responses={
"200": QueryStatusResponse,
"404": {"description": "Query not found"},
"500": {"description": "Internal Server Error"},
},
)
def get_query_status(query_id: str):
"""
Get the status of a submitted query.
This endpoint requires authentication via an API key.
Sample cURL:
curl -X GET http://localhost:5001/api/v1/query_status/query-id-here \
-H "X-API-Key: your-api-key"
"""
try:
db = get_db()
cursor = db.cursor()
cursor.execute(
"SELECT conversation_history FROM Queries WHERE id = ?", (query_id,)
)
result = cursor.fetchone()
if result is None:
return jsonify({"error": "Query not found"}), 404
conversation_history = result[0]
if conversation_history is None:
return jsonify({"status": "processing"}), 202
else:
return jsonify(
{
"status": "completed",
"conversation_history": json.loads(conversation_history),
}
)
except Exception as e:
logger.exception("Error retrieving query status", error=str(e))
return jsonify({"error": str(e)}), 500
def answer_question_tools_api(
user_input: str, conversation_history: List[dict], max_retries: int = 100
):
global tool_manager
if not conversation_history:
conversation_history = [
{"role": "system", "content": ANSWER_QUESTION_PROMPT},
]
logger.info(
"Starting API chat",
user_input=user_input,
conversation_history=conversation_history,
)
conversation_history.append({"role": "user", "content": user_input})
last_thought_content = None
for _ in range(max_retries):
response = ollama.chat(
model=PRIMARY_MODEL,
messages=conversation_history,
tools=tool_manager.get_tools_for_ollama_dict(),
stream=False,
)
assistant_message = response["message"]
conversation_history.append(assistant_message)
if "tool_calls" in assistant_message:
for tool_call in assistant_message["tool_calls"]:
tool_name = tool_call["function"]["name"]
tool_args = tool_call["function"]["arguments"]
tool_response = tool_manager.get_tool(tool_name).execute(tool_args)
conversation_history.append({"role": "tool", "content": tool_response})
else:
if "<reply>" in assistant_message["content"].lower():
reply_content = re.search(
r"<reply>(.*?)</reply>", assistant_message["content"], re.DOTALL
)
if reply_content:
reply_answer = reply_content.group(1).strip()
conversation_history.append(
{"role": "assistant", "content": reply_answer}
)
return conversation_history
else:
current_thought_content = assistant_message["content"].strip()
if last_thought_content and not current_thought_content:
conversation_history.append(
{"role": "assistant", "content": last_thought_content}
)
return conversation_history
last_thought_content = current_thought_content
continue
conversation_history.append(
{
"role": "assistant",
"content": f"Max iterations reached. Last response: {assistant_message['content']}",
}
)
return conversation_history
def process_queries():
with app.app_context():
while True:
try:
db = get_db()
cursor = db.cursor()
cursor.execute(
"SELECT id, query FROM Queries WHERE conversation_history IS NULL ORDER BY timestamp ASC LIMIT 1"
)
result = cursor.fetchone()
if result:
query_id, user_input = result
conversation_history = [
{"role": "system", "content": ANSWER_QUESTION_PROMPT}
]
final_conversation_history = answer_question_tools_api(
user_input, conversation_history
)
cursor.execute(
"UPDATE Queries SET conversation_history = ? WHERE id = ?",
(json.dumps(final_conversation_history), query_id),
)
db.commit()
else:
time.sleep(
1
) # Wait for 1 second before checking again if no queries are found
except Exception as e:
logger.exception("Error processing query", error=str(e))
time.sleep(1) # Wait for 1 second before retrying in case of an error
# Admin endpoint for generating API keys
class GenerateKeyRequest(BaseModel):
username: str
class GenerateKeyResponse(BaseModel):
username: str
api_key: str
@app.post(
"/admin/generate_key",
responses={
"200": GenerateKeyResponse,
"401": {"description": "Unauthorized"},
"500": {"description": "Internal Server Error"},
},
)
def generate_api_key(body: GenerateKeyRequest):
"""
Generate a new API key for a user.
This endpoint requires authentication via an admin key.
Sample cURL:
curl -X POST http://localhost:5001/admin/generate_key \
-H "Content-Type: application/json" \
-H "X-Admin-Key: your-admin-key" \
-d '{"username": "new_user"}'
"""
admin_key = request.headers.get("X-Admin-Key")
if not admin_key or admin_key != ADMIN_KEY:
return jsonify({"error": "Invalid admin key"}), 401
username = body.username
api_key = secrets.token_urlsafe(32)
try:
db = get_db()
cursor = db.cursor()
cursor.execute(
"INSERT INTO Keys (username, api_key) VALUES (?, ?)", (username, api_key)
)
db.commit()
return jsonify({"username": username, "api_key": api_key})
except sqlite3.IntegrityError:
return jsonify({"error": "Username already exists"}), 400
except Exception as e:
logger.exception("Error generating API key", error=str(e))
return jsonify({"error": str(e)}), 500
if __name__ == "__main__": if __name__ == "__main__":
logger.info("Starting LLM Chat Server") logger.info("Starting LLM Chat Server")
threading.Thread(target=send_system_resources, daemon=True).start() init_db() # Initialize the database
if ENABLE_FRONTEND or ENABLE_CHAT_ENDPOINTS:
threading.Thread(target=send_system_resources, daemon=True).start()
if ENABLE_API_ENDPOINTS:
threading.Thread(
target=lambda: app.app_context().push() and process_queries(), daemon=True
).start()
socketio.run(app, debug=True, host="0.0.0.0", port=5001) socketio.run(app, debug=True, host="0.0.0.0", port=5001)

View File

@ -3,29 +3,86 @@ import structlog
logger = structlog.get_logger() logger = structlog.get_logger()
class ModelManager: class ModelManager:
def __init__(self): def __init__(self):
self.model_capabilities = { self.model_capabilities = {
"ajindal/llama3.1-storm:8b": ["general_knowledge", "reasoning", "tool_calling", "conversation", "multilingual", "instruction_following"], "ajindal/llama3.1-storm:8b": [
"llama3.1:8b": ["general_knowledge", "reasoning", "tool_calling", "conversation", "multilingual", "instruction_following"], "general_knowledge",
"qwen2.5:7b": ["general_knowledge", "reasoning", "tool_calling", "conversation", "multilingual", "instruction_following"], "reasoning",
"llama3.2:3b": ["summarization", "instruction_following", "tool_calling", "multilingual"], "tool_calling",
"llava:7b": ["visual_reasoning", "visual_conversation", "visual_tool_calling", "vision", "ocr", "multimodal"], "conversation",
"multilingual",
"instruction_following",
],
"llama3.1:8b": [
"general_knowledge",
"reasoning",
"tool_calling",
"conversation",
"multilingual",
"instruction_following",
],
"qwen2.5:7b": [
"general_knowledge",
"reasoning",
"tool_calling",
"conversation",
"multilingual",
"instruction_following",
],
"llama3.2:3b": [
"summarization",
"instruction_following",
"tool_calling",
"multilingual",
],
"llava:7b": [
"visual_reasoning",
"visual_conversation",
"visual_tool_calling",
"vision",
"ocr",
"multimodal",
],
} }
logger.info("ModelManager initialized", model_capabilities=self.model_capabilities) logger.info(
"ModelManager initialized", model_capabilities=self.model_capabilities
)
def get_model_capabilities(self, model_name): def get_model_capabilities(self, model_name):
capabilities = self.model_capabilities.get(model_name, []) capabilities = self.model_capabilities.get(model_name, [])
logger.debug("Retrieved model capabilities", model=model_name, capabilities=capabilities) logger.debug(
"Retrieved model capabilities", model=model_name, capabilities=capabilities
)
return capabilities return capabilities
def select_best_model(self, required_capability): def select_best_model(self, required_capability):
suitable_models = [model for model, capabilities in self.model_capabilities.items() if required_capability in capabilities] suitable_models = [
selected_model = suitable_models[0] if suitable_models else list(self.model_capabilities.keys())[0] model
logger.info("Selected best model", required_capability=required_capability, selected_model=selected_model) for model, capabilities in self.model_capabilities.items()
if required_capability in capabilities
]
selected_model = (
suitable_models[0]
if suitable_models
else list(self.model_capabilities.keys())[0]
)
logger.info(
"Selected best model",
required_capability=required_capability,
selected_model=selected_model,
)
return selected_model return selected_model
def generate_text(self, model_name, prompt, max_length=100, system="You are a helpful assistant.", tools=[]): def generate_text(
self,
model_name,
prompt,
max_length=100,
system="You are a helpful assistant.",
tools=[],
):
# Check if model exists # Check if model exists
try: try:
ollama.pull(model_name) ollama.pull(model_name)
@ -37,10 +94,16 @@ class ModelManager:
else: else:
logger.exception("Error pulling model", model=model_name, error=str(e)) logger.exception("Error pulling model", model=model_name, error=str(e))
raise e raise e
response = ollama.generate(model=model_name, prompt=prompt, system=system, tools=tools, max_tokens=max_length) response = ollama.generate(
logger.debug("Text generated", model=model_name, response=response['response']) model=model_name,
return response['response'] prompt=prompt,
system=system,
tools=tools,
max_tokens=max_length,
)
logger.debug("Text generated", model=model_name, response=response["response"])
return response["response"]
model_manager = ModelManager()
model_manager = ModelManager()

View File

@ -1,13 +0,0 @@
/** @type {import('next').NextConfig} */
const nextConfig = {
reactStrictMode: true,
webpack: (config) => {
config.externals.push({
'utf-8-validate': 'commonjs utf-8-validate',
'bufferutil': 'commonjs bufferutil',
})
return config
},
}
export default nextConfig;

16
schema.sql Normal file
View File

@ -0,0 +1,16 @@
CREATE TABLE IF NOT EXISTS Keys (
id INTEGER PRIMARY KEY AUTOINCREMENT,
username TEXT NOT NULL UNIQUE,
api_key TEXT NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS Queries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
request_id TEXT NOT NULL UNIQUE,
ip TEXT NOT NULL,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
query TEXT NOT NULL,
api_key_id INTEGER,
conversation_history TEXT,
FOREIGN KEY (api_key_id) REFERENCES Keys (id)
);

121
tools.py
View File

@ -1,10 +1,12 @@
import subprocess
import tempfile
import time
import duckduckgo_search import duckduckgo_search
import requests import requests
from readability.readability import Document
from markdownify import markdownify as md from markdownify import markdownify as md
import subprocess from readability.readability import Document
import time
import tempfile
class Tool: class Tool:
def __init__(self, name: str, description: str, arguments: dict, returns: str): def __init__(self, name: str, description: str, arguments: dict, returns: str):
@ -29,13 +31,23 @@ class ToolManager:
if tool.name == name: if tool.name == name:
return tool return tool
return None return None
def get_tools_and_descriptions_for_prompt(self): def get_tools_and_descriptions_for_prompt(self):
return "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools]) return "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
def get_tools_for_ollama_dict(self): def get_tools_for_ollama_dict(self):
return [{'type': 'function', 'function': {'name': tool.name, 'description': tool.description, 'parameters': tool.arguments}} for tool in self.tools] return [
{
"type": "function",
"function": {
"name": tool.name,
"description": tool.description,
"parameters": tool.arguments,
},
}
for tool in self.tools
]
class DefaultToolManager(ToolManager): class DefaultToolManager(ToolManager):
def __init__(self): def __init__(self):
@ -48,12 +60,22 @@ class DefaultToolManager(ToolManager):
class SearchTool(Tool): class SearchTool(Tool):
def __init__(self): def __init__(self):
super().__init__("search_web", "Search the internet for information", {'type': 'object', 'properties': {'query': {'type': 'string', 'description': 'The search query'}}}, "results:list[string]") super().__init__(
"search_web",
"Search the internet for information",
{
"type": "object",
"properties": {
"query": {"type": "string", "description": "The search query"}
},
},
"results:list[string]",
)
def execute(self, arg: dict) -> str: def execute(self, arg: dict) -> str:
res = duckduckgo_search.DDGS().text(arg['query'], max_results=5) res = duckduckgo_search.DDGS().text(arg["query"], max_results=5)
return '\n\n'.join([f"{r['title']}\n{r['body']}\n{r['href']}" for r in res]) return "\n\n".join([f"{r['title']}\n{r['body']}\n{r['href']}" for r in res])
def get_readable_page_contents(url: str) -> str: def get_readable_page_contents(url: str) -> str:
try: try:
@ -64,20 +86,42 @@ def get_readable_page_contents(url: str) -> str:
return md(content) return md(content)
except Exception as e: except Exception as e:
return f"Error fetching readable content: {str(e)}" return f"Error fetching readable content: {str(e)}"
class GetReadablePageContentsTool(Tool): class GetReadablePageContentsTool(Tool):
def __init__(self): def __init__(self):
super().__init__("get_readable_page_contents", "Get the contents of a web page in a readable format", {'type': 'object', 'properties': {'url': {'type': 'string', 'description': 'The url of the web page'}}}, "contents:string") super().__init__(
"get_readable_page_contents",
"Get the contents of a web page in a readable format",
{
"type": "object",
"properties": {
"url": {"type": "string", "description": "The url of the web page"}
},
},
"contents:string",
)
def execute(self, arg: dict) -> str: def execute(self, arg: dict) -> str:
return get_readable_page_contents(arg['url']) return get_readable_page_contents(arg["url"])
class CalculatorTool(Tool): class CalculatorTool(Tool):
def __init__(self): def __init__(self):
super().__init__("calculator", "Perform a calculation using python's eval function", {'type': 'object', 'properties': {'expression': {'type': 'string', 'description': 'The mathematical expression to evaluate, should be a python mathematical expression'}}}, "result:string") super().__init__(
"calculator",
"Perform a calculation using python's eval function",
{
"type": "object",
"properties": {
"expression": {
"type": "string",
"description": "The mathematical expression to evaluate, should be a python mathematical expression",
}
},
},
"result:string",
)
def execute(self, arg: dict) -> str: def execute(self, arg: dict) -> str:
try: try:
@ -88,30 +132,45 @@ class CalculatorTool(Tool):
class PythonCodeTool(Tool): class PythonCodeTool(Tool):
def __init__(self): def __init__(self):
super().__init__("python_code", "Execute python code", super().__init__(
{'type': 'object', 'properties': {'code': {'type': 'string', 'description': 'The python code to execute, can be multiline'}}}, "python_code",
"result:string") "Execute python code using a temporary file and a subprocess. You must print results to stdout.",
{
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "The python code to execute, can be multiline",
}
},
},
"result:string",
)
def execute(self, arg: dict) -> str: def execute(self, arg: dict) -> str:
try: try:
with tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False) as temp_file: with tempfile.NamedTemporaryFile(
temp_file.write(arg['code']) suffix=".py", mode="w", delete=False
) as temp_file:
temp_file.write(arg["code"])
temp_file.flush() temp_file.flush()
start_time = time.time() start_time = time.time()
process = subprocess.Popen(['python', temp_file.name], process = subprocess.Popen(
stdout=subprocess.PIPE, ["python", temp_file.name],
stderr=subprocess.PIPE, stdout=subprocess.PIPE,
text=True) stderr=subprocess.PIPE,
text=True,
)
stdout, stderr = process.communicate(timeout=10) # 10 second timeout stdout, stderr = process.communicate(timeout=10) # 10 second timeout
end_time = time.time() end_time = time.time()
execution_time = end_time - start_time execution_time = end_time - start_time
result = { result = {
'stdout': stdout, "stdout": stdout,
'stderr': stderr, "stderr": stderr,
'return_value': process.returncode, "return_value": process.returncode,
'execution_time': execution_time "execution_time": execution_time,
} }
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
@ -119,5 +178,5 @@ class PythonCodeTool(Tool):
return "Error: Code execution timed out after 10 seconds" return "Error: Code execution timed out after 10 seconds"
except Exception as e: except Exception as e:
return f"Error executing code: {str(e)}" return f"Error executing code: {str(e)}"
return '\n'.join([f"{k}:\n{v}" for k, v in result.items()]) return "\n".join([f"{k}:\n{v}" for k, v in result.items()])