Compare commits

2 Commits

Author SHA1 Message Date
47059dabdc whole bucn of nonsense 2024-09-29 12:18:44 -04:00
d050549dd8 Squashed commit of the following:
commit 960c0009f4
Author: Tanishq Dubey <dubey@dws.rip>
Date:   Thu Sep 26 17:00:28 2024 -0400

    New UI

commit da9619fefb
Author: Tanishq Dubey <dubey@dws.rip>
Date:   Thu Sep 26 16:32:06 2024 -0400

    Change to retro styling
2024-09-26 17:01:00 -04:00
4 changed files with 967 additions and 261 deletions

View File

@ -6,63 +6,108 @@
<title>DWS Intelligence</title> <title>DWS Intelligence</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.0.1/socket.io.js"></script> <script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.0.1/socket.io.js"></script>
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/moment@2.29.4/moment.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script src="https://cdn.jsdelivr.net/npm/chartjs-adapter-moment@1.0.1/dist/chartjs-adapter-moment.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/highlight.min.js"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/styles/default.min.css">
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans+Mono:wght@400;700&display=swap" rel="stylesheet">
<style> <style>
body { body {
font-family: Arial, sans-serif; font-family: 'Noto Sans Mono', monospace;
max-width: 800px; background-color: #000;
margin: 0 auto; color: #fff;
padding: 20px; margin: 0;
padding: 0;
display: flex;
flex-direction: column;
height: 100vh;
overflow: hidden;
} }
#chat-container { #chat-container {
border: 1px solid #ccc; border: 2px solid #444;
height: 400px; flex: 1;
overflow-y: auto; overflow-y: auto;
padding: 10px; padding: 10px;
margin-bottom: 10px; background-color: #111;
box-sizing: border-box;
}
#input-container {
display: flex;
flex-direction: column;
padding: 10px;
background-color: #222;
box-sizing: border-box;
} }
#user-input { #user-input {
width: 100%; width: 100%;
padding: 10px; padding: 10px;
background-color: #000;
color: #fff;
border: 1px solid #444;
font-family: 'Noto Sans Mono', monospace;
font-size: 16px;
margin-bottom: 10px; margin-bottom: 10px;
box-sizing: border-box;
} }
#send-button { #send-button {
width: 100%; width: 100%;
padding: 10px; padding: 10px;
background-color: #4CAF50; background-color: #444;
color: white; color: #fff;
border: none; border: none;
cursor: pointer; cursor: pointer;
font-family: 'Noto Sans Mono', monospace;
font-size: 16px;
box-sizing: border-box;
} }
.message { .message {
margin-bottom: 10px; margin-bottom: 10px;
font-size: 16px;
} }
.user-message { .user-message {
text-align: right; text-align: right;
color: blue; color: #0ff;
} }
.bot-message { .bot-message {
text-align: left; text-align: left;
color: green; color: #fff;
}
.bot-message pre {
background-color: #222;
padding: 10px;
border-radius: 5px;
overflow-x: auto;
}
.bot-message code {
font-family: 'Noto Sans Mono', monospace;
font-size: 14px;
} }
.thinking { .thinking {
font-style: italic; font-style: italic;
color: #888; color: #888;
} }
.thought-summary { .thought-summary {
cursor: pointer;
color: #888;
margin-bottom: 5px;
font-weight: bold; font-weight: bold;
margin-bottom: 5px;
padding: 5px;
border-radius: 3px;
} }
.thought-summary.plan { background-color: #2c3e50; }
.thought-summary.decision { background-color: #34495e; }
.thought-summary.tool_call { background-color: #16a085; }
.thought-summary.tool_result { background-color: #27ae60; }
.thought-summary.think_more { background-color: #2980b9; }
.thought-summary.answer { background-color: #8e44ad; }
.thought-details { .thought-details {
display: none; display: none;
margin-left: 20px; margin-left: 20px;
border-left: 2px solid #ccc; border-left: 2px solid #444;
padding-left: 10px; padding-left: 10px;
margin-bottom: 10px; margin-bottom: 10px;
white-space: pre-wrap; white-space: pre-wrap;
font-family: monospace; font-family: 'Noto Sans Mono', monospace;
background-color: #f0f0f0; background-color: #222;
} }
.collapsible::before { .collapsible::before {
content: '▶ '; content: '▶ ';
@ -72,23 +117,355 @@
.collapsible.open::before { .collapsible.open::before {
transform: rotate(90deg); transform: rotate(90deg);
} }
.led {
width: 10px;
height: 10px;
border-radius: 50%;
background-color: #f00;
margin-right: 10px;
position: relative;
}
.led::after {
content: '';
position: absolute;
top: -5px;
left: -5px;
right: -5px;
bottom: -5px;
background-color: #f00;
border-radius: 50%;
filter: blur(5px);
opacity: 0;
transition: opacity 0.5s ease-in-out;
}
.led.blinking {
animation: blink 1s step-start infinite;
}
.led.blinking::after {
animation: glow 1s ease-in-out infinite alternate;
}
@keyframes blink {
50% {
opacity: 0;
}
}
@keyframes glow {
0% {
opacity: 0;
}
100% {
opacity: 0.5;
}
}
/* PDP-11 inspired styles */
#chat-container::-webkit-scrollbar {
width: 12px;
}
#chat-container::-webkit-scrollbar-track {
background: #222;
}
#chat-container::-webkit-scrollbar-thumb {
background-color: #444;
border-radius: 6px;
border: 3px solid #222;
}
.pdp-panel {
background-color: #333;
border: 2px solid #555;
border-radius: 5px;
padding: 10px;
margin-bottom: 10px;
}
.pdp-label {
font-size: 14px;
color: #888;
margin-bottom: 5px;
}
#main-container {
display: flex;
height: 100vh;
}
#chat-area {
flex: 1;
display: flex;
flex-direction: column;
}
#sidebar {
width: 300px;
background-color: #222;
padding: 10px;
box-sizing: border-box;
overflow-y: auto;
transition: transform 0.3s ease-in-out;
}
#sidebar.collapsed {
transform: translateX(100%);
}
#sidebar-toggle {
position: fixed;
top: 10px;
right: 10px;
z-index: 1000;
background-color: #444;
color: #fff;
border: none;
padding: 5px 10px;
cursor: pointer;
}
.graph-container {
margin-bottom: 20px;
height: 150px;
}
.graph-title {
color: #888;
font-size: 14px;
margin-bottom: 5px;
}
@media (max-width: 768px) {
#sidebar {
position: fixed;
right: 0;
top: 0;
bottom: 0;
width: 100%;
max-width: 300px;
transform: translateX(100%);
}
#sidebar.collapsed {
transform: translateX(0);
}
}
.conversation-history-container {
margin-top: 20px;
background-color: #222;
border-radius: 5px;
padding: 10px;
}
#conversation-history {
color: #fff;
font-family: 'Noto Sans Mono', monospace;
font-size: 12px;
}
.history-card {
background-color: #2c3e50;
border-radius: 5px;
padding: 10px;
margin-bottom: 10px;
}
.history-role {
font-weight: bold;
margin-bottom: 5px;
}
.history-content {
white-space: pre-wrap;
word-break: break-word;
}
.error-message {
background-color: #ff6b6b;
color: #fff;
padding: 10px;
border-radius: 5px;
margin-bottom: 10px;
}
.retrying {
background-color: #feca57;
color: #333;
}
#clear-history-button {
background-color: #e74c3c;
color: white;
border: none;
padding: 10px;
margin-bottom: 10px;
cursor: pointer;
font-family: 'Noto Sans Mono', monospace;
font-size: 14px;
border-radius: 5px;
}
#clear-history-button:hover {
background-color: #c0392b;
}
#chat-tabs {
display: flex;
background-color: #222;
padding: 10px 10px 0 10px;
}
.chat-tab {
background-color: #444;
color: #fff;
border: none;
padding: 10px 20px;
margin-right: 5px;
cursor: pointer;
border-top-left-radius: 5px;
border-top-right-radius: 5px;
}
.chat-tab.active {
background-color: #666;
}
#new-chat-button {
background-color: #27ae60;
color: #fff;
border: none;
padding: 10px 20px;
cursor: pointer;
border-top-left-radius: 5px;
border-top-right-radius: 5px;
}
.close-tab {
margin-left: 10px;
color: #ff6b6b;
cursor: pointer;
}
.thinking-section {
margin-bottom: 20px;
border-left: 2px solid #444;
padding-left: 10px;
}
</style> </style>
</head> </head>
<body> <body>
<h1>DWS Intelligence</h1> <div id="main-container">
<div id="chat-container"></div> <div id="chat-area">
<textarea id="user-input" placeholder="Type your message here..." rows="3"></textarea> <div id="chat-tabs"></div>
<button id="send-button">Send</button> <div id="chat-container"></div>
<div id="input-container" class="pdp-panel">
<div class="pdp-label">INPUT:</div>
<textarea id="user-input" placeholder="Type your message here..." rows="3"></textarea>
<button id="send-button">EXECUTE</button>
</div>
</div>
<button id="sidebar-toggle">Toggle Charts</button>
<div id="sidebar" class="collapsed">
<div class="graph-container">
<div class="graph-title">CPU Load</div>
<canvas id="cpuChart"></canvas>
</div>
<div class="graph-container">
<div class="graph-title">Memory Usage</div>
<canvas id="memoryChart"></canvas>
</div>
<div class="graph-container">
<div class="graph-title">Disk I/O</div>
<canvas id="diskChart"></canvas>
</div>
<div class="graph-container">
<div class="graph-title">GPU Load</div>
<canvas id="gpuChart"></canvas>
</div>
<div class="graph-container">
<div class="graph-title">GPU Memory</div>
<canvas id="gpuMemoryChart"></canvas>
</div>
<!-- Add this new section for conversation history -->
<div class="conversation-history-container">
<div class="graph-title">Conversation History</div>
<div id="conversation-history"></div>
</div>
</div>
</div>
<script> <script>
const socket = io(); const socket = io();
const chatContainer = document.getElementById('chat-container'); const chatContainer = document.getElementById('chat-container');
const userInput = document.getElementById('user-input'); const userInput = document.getElementById('user-input');
const sendButton = document.getElementById('send-button'); const sendButton = document.getElementById('send-button');
const chatTabs = document.getElementById('chat-tabs');
let thinkingElement = null; let currentChatId = null;
let thinkingDetails = null; let chats = {};
let thinkingStartTime = null;
function createNewChat() {
const chatId = Date.now().toString();
chats[chatId] = {
messages: [],
thinkingSections: []
};
addChatTab(chatId);
switchToChat(chatId);
saveChats();
}
function addChatTab(chatId) {
const tab = document.createElement('button');
tab.classList.add('chat-tab');
tab.textContent = `Chat ${Object.keys(chats).length}`;
tab.onclick = () => switchToChat(chatId);
const closeButton = document.createElement('span');
closeButton.classList.add('close-tab');
closeButton.textContent = '×';
closeButton.onclick = (e) => {
e.stopPropagation();
closeChat(chatId);
};
tab.appendChild(closeButton);
chatTabs.insertBefore(tab, chatTabs.lastElementChild);
}
function switchToChat(chatId) {
currentChatId = chatId;
document.querySelectorAll('.chat-tab').forEach(tab => tab.classList.remove('active'));
document.querySelector(`.chat-tab:nth-child(${Object.keys(chats).indexOf(chatId) + 1})`).classList.add('active');
renderChat(chatId);
}
function closeChat(chatId) {
delete chats[chatId];
saveChats();
const tabToRemove = Array.from(chatTabs.children).find(tab => tab.textContent.includes(`Chat ${Object.keys(chats).indexOf(chatId) + 1}`));
if (tabToRemove) {
chatTabs.removeChild(tabToRemove);
}
if (currentChatId === chatId) {
const remainingChatIds = Object.keys(chats);
if (remainingChatIds.length > 0) {
switchToChat(remainingChatIds[0]);
} else {
createNewChat();
}
}
}
function renderChat(chatId) {
chatContainer.innerHTML = '';
const chat = chats[chatId];
chat.messages.forEach(message => addMessage(message.content, message.isUser));
chat.thinkingSections.forEach(section => {
const thinkingSection = createThinkingSection();
section.thoughts.forEach(thought => addThought(thought.type, thought.content, thought.details, thinkingSection));
});
}
function createThinkingSection() {
const section = document.createElement('div');
section.classList.add('thinking-section');
chatContainer.appendChild(section);
return section;
}
function addMessage(message, isUser) { function addMessage(message, isUser) {
const messageElement = document.createElement('div'); const messageElement = document.createElement('div');
@ -97,53 +474,40 @@
messageElement.innerHTML = isUser ? message : marked.parse(message); messageElement.innerHTML = isUser ? message : marked.parse(message);
chatContainer.appendChild(messageElement); chatContainer.appendChild(messageElement);
chatContainer.scrollTop = chatContainer.scrollHeight; chatContainer.scrollTop = chatContainer.scrollHeight;
if (currentChatId) {
chats[currentChatId].messages.push({ content: message, isUser: isUser });
saveChats();
}
} }
function startThinking() { function addThought(type, content, details = '', thinkingSection) {
thinkingElement = document.createElement('div'); const stepElement = document.createElement('div');
thinkingElement.classList.add('thought-summary', 'collapsible'); stepElement.classList.add('thought-summary', 'collapsible', type);
thinkingElement.textContent = 'Thinking...'; stepElement.textContent = type.charAt(0).toUpperCase() + type.slice(1).replace('_', ' ') + ':';
thinkingElement.onclick = toggleThinkingDetails; stepElement.onclick = toggleStepDetails;
const stepDetails = document.createElement('div');
stepDetails.classList.add('thought-details');
thinkingDetails = document.createElement('div'); if (type === 'error') {
thinkingDetails.classList.add('thought-details'); stepElement.classList.add('error-message');
if (content.includes('retrying')) {
chatContainer.appendChild(thinkingElement); stepElement.classList.add('retrying');
chatContainer.appendChild(thinkingDetails); }
stepDetails.innerHTML = marked.parse(content + '\n\nDetails:\n```\n' + details + '\n```');
thinkingStartTime = Date.now(); } else {
stepDetails.innerHTML = marked.parse(content);
}
thinkingSection.appendChild(stepElement);
thinkingSection.appendChild(stepDetails);
chatContainer.scrollTop = chatContainer.scrollHeight; chatContainer.scrollTop = chatContainer.scrollHeight;
}
function addThought(step, content) { if (currentChatId) {
if (thinkingDetails) { const currentThinkingSection = chats[currentChatId].thinkingSections[chats[currentChatId].thinkingSections.length - 1];
const stepElement = document.createElement('div'); currentThinkingSection.thoughts.push({ type, content, details });
stepElement.classList.add('thought-summary', 'collapsible'); saveChats();
stepElement.textContent = step;
stepElement.onclick = toggleStepDetails;
const stepDetails = document.createElement('div');
stepDetails.classList.add('thought-details');
stepDetails.innerHTML = content;
thinkingDetails.appendChild(stepElement);
thinkingDetails.appendChild(stepDetails);
chatContainer.scrollTop = chatContainer.scrollHeight;
}
}
function endThinking(thinkingTime) {
if (thinkingElement) {
thinkingElement.textContent = `Thinking... (${thinkingTime}s)`;
thinkingStartTime = null;
}
}
function toggleThinkingDetails() {
this.classList.toggle('open');
const details = this.nextElementSibling;
if (details) {
details.style.display = details.style.display === 'none' ? 'block' : 'none';
} }
} }
@ -155,34 +519,71 @@
} }
} }
socket.on('thinking', (data) => { function saveChats() {
if (!thinkingElement) startThinking(); localStorage.setItem('chats', JSON.stringify(chats));
addThought(data.step, 'Started'); }
});
socket.on('thought', (data) => { function loadChats() {
addThought('Result', data.content); const storedChats = localStorage.getItem('chats');
}); if (storedChats) {
chats = JSON.parse(storedChats);
socket.on('chat_response', (data) => { Object.keys(chats).forEach(chatId => addChatTab(chatId));
endThinking(data.thinking_time); if (Object.keys(chats).length > 0) {
addMessage(data.response, false); switchToChat(Object.keys(chats)[0]);
}); } else {
createNewChat();
socket.on('error', (data) => { }
endThinking(data.thinking_time); } else {
addMessage(`Error: ${data.message}`, false); createNewChat();
}); }
}
function sendMessage() { function sendMessage() {
const message = userInput.value.trim(); const message = userInput.value.trim();
if (message) { if (message && currentChatId) {
addMessage(message, true); addMessage(message, true);
socket.emit('chat_request', { message: message }); chats[currentChatId].thinkingSections.push({ thoughts: [] });
socket.emit('chat_request', {
message: message,
conversation_history: chats[currentChatId].messages.filter(m => !m.isUser).map(m => ({ role: 'assistant', content: m.content }))
.concat(chats[currentChatId].messages.filter(m => m.isUser).map(m => ({ role: 'user', content: m.content })))
});
userInput.value = ''; userInput.value = '';
} }
} }
socket.on('thinking', (data) => {
if (currentChatId) {
const newThinkingSection = createThinkingSection();
chats[currentChatId].thinkingSections.push({ thoughts: [] });
addThought(data.step, 'Started', '', newThinkingSection);
}
});
socket.on('thought', (data) => {
if (currentChatId) {
const currentThinkingSection = chatContainer.querySelector('.thinking-section:last-child');
addThought(data.type, data.content, data.details, currentThinkingSection);
}
});
socket.on('chat_response', (data) => {
if (currentChatId) {
addMessage(data.response, false);
}
});
socket.on('error', (data) => {
if (currentChatId) {
const currentThinkingSection = chatContainer.querySelector('.thinking-section:last-child');
if (data.type === 'retrying') {
addThought('error', data.content, '', currentThinkingSection);
} else {
addThought('error', data.message, '', currentThinkingSection);
}
}
});
sendButton.addEventListener('click', sendMessage); sendButton.addEventListener('click', sendMessage);
userInput.addEventListener('keypress', function(e) { userInput.addEventListener('keypress', function(e) {
if (e.key === 'Enter' && !e.shiftKey) { if (e.key === 'Enter' && !e.shiftKey) {
@ -190,6 +591,224 @@
sendMessage(); sendMessage();
} }
}); });
// Add new chat button
const newChatButton = document.createElement('button');
newChatButton.id = 'new-chat-button';
newChatButton.textContent = '+ New Chat';
newChatButton.onclick = createNewChat;
chatTabs.appendChild(newChatButton);
// Load chats when the page loads
loadChats();
const chartOptions = {
type: 'line',
options: {
responsive: true,
maintainAspectRatio: false,
animation: false,
elements: {
line: {
tension: 0
},
point: {
radius: 0
}
},
scales: {
x: {
type: 'time',
time: {
unit: 'second',
displayFormats: {
second: 'HH:mm:ss'
}
},
ticks: {
display: false
}
},
y: {
beginAtZero: true,
max: 100,
ticks: {
callback: function(value) {
return value + '%';
}
}
}
},
plugins: {
legend: {
display: false
}
}
}
};
const cpuChart = new Chart(document.getElementById('cpuChart').getContext('2d'), {
...chartOptions,
data: {
datasets: [{
label: 'CPU Load',
data: [],
borderColor: 'rgb(75, 192, 192)',
fill: false
}]
}
});
const memoryChart = new Chart(document.getElementById('memoryChart').getContext('2d'), {
...chartOptions,
data: {
datasets: [{
label: 'Memory Usage',
data: [],
borderColor: 'rgb(255, 159, 64)',
fill: false
}]
}
});
const diskChart = new Chart(document.getElementById('diskChart').getContext('2d'), {
...chartOptions,
options: {
...chartOptions.options,
scales: {
...chartOptions.options.scales,
y: {
beginAtZero: true,
ticks: {
callback: function(value) {
return (value / 1024 / 1024).toFixed(2) + ' MB/s';
}
}
}
}
},
data: {
datasets: [{
label: 'Disk Read',
data: [],
borderColor: 'rgb(54, 162, 235)',
fill: false
},
{
label: 'Disk Write',
data: [],
borderColor: 'rgb(255, 99, 132)',
fill: false
}]
}
});
const gpuChart = new Chart(document.getElementById('gpuChart').getContext('2d'), {
...chartOptions,
data: {
datasets: [{
label: 'GPU Load',
data: [],
borderColor: 'rgb(153, 102, 255)',
fill: false
}]
}
});
const gpuMemoryChart = new Chart(document.getElementById('gpuMemoryChart').getContext('2d'), {
...chartOptions,
data: {
datasets: [{
label: 'GPU Memory',
data: [],
borderColor: 'rgb(255, 206, 86)',
fill: false
}]
}
});
function updateCharts(data) {
if (sidebar.classList.contains('collapsed')) return;
const now = Date.now();
const thirtySecondsAgo = now - 30000;
function updateChart(chart, value) {
chart.data.datasets[0].data.push({x: now, y: value});
chart.data.datasets[0].data = chart.data.datasets[0].data.filter(point => point.x > thirtySecondsAgo);
chart.update('none');
}
updateChart(cpuChart, data.cpu_load);
updateChart(memoryChart, data.memory_usage);
updateChart(gpuChart, data.gpu_load);
updateChart(gpuMemoryChart, data.gpu_memory);
// Update disk chart (it has two datasets)
diskChart.data.datasets[0].data.push({x: now, y: data.disk_read_rate});
diskChart.data.datasets[1].data.push({x: now, y: data.disk_write_rate});
diskChart.data.datasets[0].data = diskChart.data.datasets[0].data.filter(point => point.x > thirtySecondsAgo);
diskChart.data.datasets[1].data = diskChart.data.datasets[1].data.filter(point => point.x > thirtySecondsAgo);
diskChart.update('none');
}
// Listen for system resource updates
socket.on('system_resources', (data) => {
updateCharts(data);
});
const sidebar = document.getElementById('sidebar');
const sidebarToggle = document.getElementById('sidebar-toggle');
sidebarToggle.addEventListener('click', () => {
sidebar.classList.toggle('collapsed');
});
function checkWindowSize() {
if (window.innerWidth <= 768) {
sidebar.classList.add('collapsed');
} else {
sidebar.classList.remove('collapsed');
}
}
window.addEventListener('resize', checkWindowSize);
checkWindowSize(); // Initial check
// Add this new function to update the conversation history
function updateConversationHistory(history) {
const conversationHistoryElement = document.getElementById('conversation-history');
conversationHistoryElement.innerHTML = '';
history.forEach(item => {
const card = document.createElement('div');
card.classList.add('history-card');
const role = document.createElement('div');
role.classList.add('history-role');
role.textContent = item.role.charAt(0).toUpperCase() + item.role.slice(1);
const content = document.createElement('pre');
content.classList.add('history-content');
content.innerHTML = hljs.highlightAuto(item.content).value;
card.appendChild(role);
card.appendChild(content);
conversationHistoryElement.appendChild(card);
});
}
// Add this new socket listener
socket.on('conversation_history', (data) => {
updateConversationHistory(data.history);
});
// Add event listener for the clear history button
clearHistoryButton.addEventListener('click', () => {
if (confirm('Are you sure you want to clear the conversation history?')) {
clearConversationHistory();
}
});
</script> </script>
</body> </body>
</html> </html>

312
main.py
View File

@ -1,4 +1,4 @@
from flask import Flask, send_from_directory from flask import Flask, send_from_directory, request
from flask_socketio import SocketIO, emit from flask_socketio import SocketIO, emit
from flask_openapi3 import OpenAPI, Info from flask_openapi3 import OpenAPI, Info
from pydantic import BaseModel from pydantic import BaseModel
@ -6,15 +6,24 @@ from typing import List
from models import model_manager from models import model_manager
import structlog import structlog
import time import time
import psutil
import GPUtil
import threading
import os
from tools import DefaultToolManager
import ollama
import re
import json
from datetime import datetime
import pprint
logger = structlog.get_logger() logger = structlog.get_logger()
openapi = OpenAPI(__name__, info=Info(title="LLM Chat Server", version="1.0.0")) openapi = OpenAPI(__name__, info=Info(title="LLM Chat Server", version="1.0.0"))
app = openapi app = openapi
socketio = SocketIO(app, cors_allowed_origins="*") socketio = SocketIO(app, cors_allowed_origins="*")
tool_manager = DefaultToolManager()
@app.route('/') @app.route('/')
def index(): def index():
logger.info("Serving index.html") logger.info("Serving index.html")
@ -29,51 +38,13 @@ class ChatResponse(BaseModel):
@socketio.on('chat_request') @socketio.on('chat_request')
def handle_chat_request(data): def handle_chat_request(data):
user_input = data['message'] user_input = data['message']
logger.info("Received chat request", user_input=user_input) conversation_history = data.get('conversation_history', [])
conversation_history = [{"role": "system", "content": ANSWER_QUESTION_PROMPT}] + conversation_history
logger.info("Received chat request", user_input=user_input, conversation_history=conversation_history)
start_time = time.time() start_time = time.time()
full_context = ""
try: try:
# Step 1: Generate a plan using the initial LLM final_response = answer_question_tools(user_input, conversation_history)
emit('thinking', {'step': 'Generating plan'})
plan, plan_generation = generate_plan(user_input)
full_context += f"Plan Thinking:\n{plan_generation}"
full_context += f"Plan:\n{plan}"
emit('thought', {'content': f"Plan Thinking:\n{plan_generation}"})
emit('thought', {'content': f"Plan:\n{plan}"})
if plan[0].strip().lower() == "direct_answer":
final_response = plan[1]
thinking_time = round(time.time() - start_time, 2)
emit('chat_response', {
'response': final_response,
'thinking_time': thinking_time
})
return
# Step 2: Execute each step of the plan
step_results = []
for i, step in enumerate(plan):
emit('thinking', {'step': f'Executing step {i+1}'})
while True:
best_model, model_selection = select_best_model(step, step_results, full_context)
if best_model in model_manager.model_capabilities:
break
logger.warning(f"Selected model {best_model} is not in the list of available models. Retrying...")
emit('thought', {'content': f"Selected model for step {i+1}:\n{model_selection}"})
# summary, summary_generation = summarize_context(f"Plan: {plan}\n\nSteps: {step_results}")
# emit('thought', {'content': f"Context summary:\n{summary_generation}"})
step_result, step_execution = execute_step(step, best_model, step_results, full_context)
emit('thought', {'content': f"Step {i+1} result:\n{step_execution}"})
emit('thought', {'content': f"Result {i+1}:\n{step_result}"})
step_results.append(step_result)
full_context += f"Step {i+1} result:\n{step_execution}"
# Step 3: Generate final response
emit('thinking', {'step': 'Generating final response'})
final_response, final_generation = generate_final_response(user_input, plan, step_results)
emit('thought', {'content': f"Final response generation:\n{final_generation}"})
end_time = time.time() end_time = time.time()
thinking_time = round(end_time - start_time, 2) thinking_time = round(end_time - start_time, 2)
@ -90,147 +61,146 @@ def handle_chat_request(data):
'thinking_time': thinking_time 'thinking_time': thinking_time
}) })
PLAN_GENERATE_PROMPT = """ def answer_question_tools(user_input: str, conversation_history: List[dict], max_retries: int = 100):
You are building a "chain of thought" workflow for a series of LLMs to complete a task provided by a user. global tool_manager
Your first task is to "think" through the problem provided by the user. Probe what it would take to complete the task, see if there are hidden nuances, what constrains might be relevant, how to be efficient.
This thinking should set question the premise of the task, and sets the scene for a plan of attack to be created. # If conversation_history is empty, initialize it with the system prompt
Verbalize your thoughts out loud, allow the user to see your thought process. This thought process will also be used as context for processing the generated plan. if not conversation_history:
This thought process should mimic the process of a human, and not be a simple list of steps, but should be a narrative of thought that a human would have. conversation_history = [
Each step in the formulated plan is a step that a seperate LLM will complete. The LLM that will complete the step will be selected based on the scope of the step and the capabilities of the available models. {"role": "system", "content": ANSWER_QUESTION_PROMPT},
There are models that are good at coding and math, and there are models that are good at reasoning and planning. Some models that are generalists, multilingual, or conversational. And even some that are vision models. ]
Use this context of the possible models to shape each step such that a LLM can complete the step given the step and some context.
Steps should follow a logical "chain of thought" in order to best complete the overall task. logger.info("Starting chat", user_input=user_input, conversation_history=conversation_history)
Steps should be self contained and be designed such that the results of one step can be passed on to the next step. # Add the new user input to the conversation history
Steps should be phrased in such a way that it acts as a prompt or instruction to the LLM that will complete the step. conversation_history.append({"role": "user", "content": user_input})
Each step will return a result, and a thought process. The thought process is extremely important, it is the "chain of thought" that the LLM went through to complete the step. This thought process is critical for the next step in the plan.
Consider how results from one step can be combined with results from another step and consider how the chain of thought from one step can inform the next step when designing each step. emit('thinking', {'step': 'Starting'})
Try and minimize the number of steps required to complete the task since running a lot of steps is expensive. emit('conversation_history', {'history': conversation_history})
Your output should be your thought process, followed by a single line titled "STEPS", followed by each step to take, one step per line.
Do not add any sort of markdown formatting, code formatting, or any other formatting.
Do not add any preamble, postamble, or other text, only the thought process and the steps.
Consider the following example: for iteration in range(max_retries):
response = ollama.chat(model=PRIMARY_MODEL, messages=conversation_history, tools=tool_manager.get_tools_for_ollama_dict(), stream=False)
assistant_message = response['message']
conversation_history.append(assistant_message)
emit('conversation_history', {'history': conversation_history})
pprint.pp(assistant_message)
Prompt: Write a program to reverse a string, then output ASCII block art of that reversed string. Do this in python. if 'tool_calls' in assistant_message:
emit('thought', {'type': 'decision', 'content': "Tool Call\n\n" + assistant_message['content']})
for tool_call in assistant_message['tool_calls']:
tool_name = tool_call['function']['name']
tool_args = tool_call['function']['arguments']
emit('thought', {'type': 'tool_call', 'content': f"Tool: {tool_name}\nArguments: {tool_args}"})
tool_response = tool_manager.get_tool(tool_name).execute(tool_args)
conversation_history.append({
"role": "tool",
"content": tool_response
})
emit('conversation_history', {'history': conversation_history})
emit('thought', {'type': 'tool_result', 'content': tool_response})
So there are two parts to this task. First, we need to reverse the input string. Then we need to print the ASCII block art for each character in the reversed string. reflection_prompt = "Reflect on the tool results. If there were any errors, propose multiple alternative approaches to solve the problem. If successful, consider if the result fully answers the user's query or if additional steps are needed."
We should be able to reverse the string using either a simple loop, or a python slice. Slicing is simpler, so we should use that. conversation_history.append({
For the ASCII block art, the challenge is in creating a mapping between each character and its block art representation. There are a few ways to go about this: "role": "assistant",
- Find a library that converts text to block art "content": reflection_prompt
- Create our own mapping from characters to block art })
- Create a procedurally generated mapping from characters to block art emit('conversation_history', {'history': conversation_history})
Procedural generation could be done with an algorithm, but coming up with a good algorithm could be challenging. else:
Generating a dictionary could be a good approach, but there are 26 letters in the alphabet, and 10 digits, so we would need 36 different outputs for the block art. if "<answer>" in assistant_message['content'].lower():
We should search for a library that already does this, import it, and call it on the result of the string reversal. We would also need to tell the user to install the library. answer_content = re.search(r'<answer>(.*?)</answer>', assistant_message['content'], re.DOTALL)
if answer_content:
final_answer = answer_content.group(1).strip()
emit('thought', {'type': 'answer', 'content': final_answer})
return final_answer
else:
emit('thought', {'type': 'decision', 'content': "Think/Plan/Decision/Action\n\n" + assistant_message['content']})
reflection_prompt = "Your last response didn't provide a final answer. Please reflect on your current understanding of the problem and consider if you need to use any tools or if you can now provide a final answer. If you're ready to give a final answer, put your response in tags <answer></answer>"
conversation_history.append({"role": "assistant", "content": reflection_prompt})
emit('conversation_history', {'history': conversation_history})
We're now ready to create our plan. return f"Max iterations reached. Last response: {assistant_message['content']}"
STEPS ANSWER_QUESTION_PROMPT = f"""
1. Write a function that takes a string and reverses it. The current date is {datetime.now().strftime("%A, %B %d, %Y")}, your knowledge cutoff was December 2023.
2. Write a function that takes a string and returns the ASCII block art for each character in the string, this must be done using a library. You are Dewey, an AI assistant with access to external tools and the ability to think through complex problems. Your role is to assist users by leveraging tools when necessary, thinking deeply about problems, and providing accurate and helpful information, all with a cheerful, but witty personality. Here are the tools available to you:
3. Combine the two functions into a single program.
--- {tool_manager.get_tools_and_descriptions_for_prompt()}
Now you try. When addressing a query, follow these steps:
"""
_REMINADER_PT ="""
Each task you create should be should be self contained and be designed such that the results of one step can be passed on to the next step.
Try and minimize the number of steps required to complete the task.
Output only a numbered list of steps, each step should be a seperate line.
Do not output any preamble or other text, only the list of steps.
If you think a task can be completed by a single step, then you can output a single step.
If you can directly answer the question, you must begin your response with a single line containing the text "DIRECT_ANSWER" and then provide the answer to the question on the next line.
Here are some samples: 1. Analyze: Thoroughly analyze the query and consider multiple approaches to solving it.
Input: Write a program to reverse a string, then output the ASCII art of that reversed string. Do this in python. 2. Plan: Develop a plan of action, considering whether you need to use any tools or if you can answer directly.
Steps:
1. Define a template for a program that prints the ASCII art of the reversed string.
2. Fill in the logic to reverse the string.
3. Fill in the logic to print the ASCII art of the reversed string.
4. Output the final program.
Input: What are the oceans of the world? 3. Execute: If you need to use a tool, call it as you would a function. If not, proceed with your reasoning.
Steps:
1. Use the encyclopedia tool to get the page on the oceans of the world, parse, and output the results.
Input: What is the perfect gas law? 4. Reflect: After each step or tool use, reflect on the results:
Steps: - If successful, consider if the result fully answers the user's query or if additional steps are needed.
DIRECT_ANSWER - If there were errors or the result is unsatisfactory, don't give up! Use Tree of Thoughts reasoning:
The perfect gas law is the equation of state of a hypothetical ideal gas. The formula is $$PV = nRT$$ where P is pressure, V is volume, n is the number of moles, R is the ideal gas constant, and T is temperature. a) Generate multiple alternative approaches or modifications to your previous approach.
b) Briefly evaluate the potential of each alternative.
c) Choose the most promising alternative and execute it.
d) Repeat this process if needed, building upon your growing understanding of the problem.
e) You cannot return a final answer after an error using a tool, you must try again.
5. Iterate: Continue this process of execution and reflection, exploring different branches of thought as needed.
6. Conclude: When you believe you have a comprehensive answer to the user's query, provide your final answer.
Always explain your thought process, including your reasoning for each decision and how you arrived at your conclusions. If you're providing a final answer, put your response in tags <answer></answer>.
Remember, complex problems often require multiple steps and iterations. Don't hesitate to break down the problem, use tools multiple times, or explore different approaches to arrive at the best solution.
""" """
def generate_plan(user_input: str) -> tuple[List[str], str]: PRIMARY_MODEL = "llama3.1:8b"
logger.debug("Generating plan", prompt=user_input, system=PLAN_GENERATE_PROMPT)
response = model_manager.generate_text("qwen2.5:7b", user_input, max_length=1024, system=PLAN_GENERATE_PROMPT)
plan = response.split("STEPS")[1].strip()
response_no_steps = response.split("STEPS")[0].strip()
return [step.strip() for step in plan.split("\n") if step.strip()], response_no_steps
UPDATE_INTERVAL = 0.1 # 100ms, configurable
SELECT_BEST_MODEL_PROMPT = f""" def get_system_resources():
You are a large language model whos job it is to evaluate a step that is part of a larger plan, and determine what LLM would be best suited to complete the step based on the capabilities of the LLM. cpu_load = psutil.cpu_percent()
memory = psutil.virtual_memory()
memory_usage = memory.percent
disk_io = psutil.disk_io_counters()
disk_read = disk_io.read_bytes
disk_write = disk_io.write_bytes
gpus = GPUtil.getGPUs()
gpu_load = gpus[0].load * 100 if gpus else 0
gpu_memory = gpus[0].memoryUtil * 100 if gpus else 0
return {
'cpu_load': cpu_load,
'memory_usage': memory_usage,
'disk_read': disk_read,
'disk_write': disk_write,
'gpu_load': gpu_load,
'gpu_memory': gpu_memory
}
The LLMs and their capabilities are as follows: def send_system_resources():
{"\n".join([f"{k}: {','.join(v)}" for k,v in model_manager.model_capabilities.items()])} last_disk_read = 0
last_disk_write = 0
You will be provided with the current step of execution, the results of the previous steps in order, and the current chain of thought so far. while True:
If the chain of thought is too long, a summary of the current chain of thought will be provided. resources = get_system_resources()
Your job is to use all this information to determine which of the provided LLMs would be best suited to complete the provided step given the capabilities of the LLM.
Your response should be the full name of the LLM that should complete the step. # Calculate disk I/O rates
Reply with only one of the following values: \n{'\n'.join(list(model_manager.model_capabilities.keys()))} disk_read_rate = (resources['disk_read'] - last_disk_read) / UPDATE_INTERVAL
""" disk_write_rate = (resources['disk_write'] - last_disk_write) / UPDATE_INTERVAL
def select_best_model(step: str, results: List[str], context: str) -> tuple[str, str]: socketio.emit('system_resources', {
prompt = f"Current Step: {step}\n\nResults So Far: {results}\n\nCurrent Chain of Thought: {context}" 'cpu_load': resources['cpu_load'],
logger.debug("Selecting best model", prompt=prompt, system=SELECT_BEST_MODEL_PROMPT) 'memory_usage': resources['memory_usage'],
response = model_manager.generate_text("llama3.2:3b", prompt, max_length=50, system=SELECT_BEST_MODEL_PROMPT) 'disk_read_rate': disk_read_rate,
model_name = response.strip().lower() 'disk_write_rate': disk_write_rate,
return model_name, response 'gpu_load': resources['gpu_load'],
'gpu_memory': resources['gpu_memory']
})
def summarize_context(context: str) -> tuple[str, str]:
prompt = f"Summarize the following context: {context}" last_disk_read = resources['disk_read']
logger.debug("Summarizing context", prompt=prompt) last_disk_write = resources['disk_write']
response = model_manager.generate_text("llama3.2:3b", prompt, max_length=300) time.sleep(UPDATE_INTERVAL)
return response, response
EXECUTE_STEP_PROMPT = """
You are a large language model that has been selected to complete a step within a larger task.
You have been selected to complete this step due to your specific capabilities.
You will be provided with the job to do in this current step, the results of the previous steps in order, and the current chain of thought so far.
If the chain of thought is too long, a summary of the current chain of thought will be provided.
Your job is to use all this information to complete the step.
Your response should be in two parts. The first part should be your thought process in completing the step, how you went about solving the step, assumptions made, relation to previous steps, and challenges faced.
You must then output a line with the word "RESPONSE".
The second part should be the result of completing your step.
The second part should contain nothing except the result of completing your step.
Only complete your part of the step. Do not extrapolate beyond the bounds of the step. Do not trample on the results of previous steps. Build on the results of previous steps, and use them to inform your work.
Do not include any preamble or other text, only the result of completing your step.
Do not use any markdown formatting, code formatting, or any other formatting.
"""
def execute_step(step: str, model: str, results: List[str], context: str) -> tuple[str, str]:
prompt = f"Current Step: {step}\n\nResults So Far: {results}\n\nCurrent Chain of Thought: {context}"
logger.debug("Executing step", step=step, model=model, prompt=prompt)
response = model_manager.generate_text(model, prompt, max_length=1024, system=EXECUTE_STEP_PROMPT)
response_step = response.split("RESPONSE")[1].strip()
response_thinking = response.split("RESPONSE")[0].strip()
return response_step, response_thinking
def generate_final_response(user_input: str, plan: List[str], step_results: List[str]) -> tuple[str, str]:
prompt = f"Question: {user_input}\n\nPlan:\n"
for i, step in enumerate(plan):
prompt += f"{i+1}. {step}\n"
prompt += "\nResults:\n"
for i, result in enumerate(step_results):
prompt += f"Step {i+1} result: {result}\n"
prompt += "\nBased on the above information, provide a comprehensive answer to the original question."
logger.debug("Generating final response", prompt=prompt)
response = model_manager.generate_text("qwen2.5:7b", prompt, max_length=500)
return response, response
if __name__ == "__main__": if __name__ == "__main__":
logger.info("Starting LLM Chat Server") logger.info("Starting LLM Chat Server")
socketio.run(app, debug=True) threading.Thread(target=send_system_resources, daemon=True).start()
socketio.run(app, debug=True, host="0.0.0.0", port=5001)

View File

@ -6,9 +6,9 @@ logger = structlog.get_logger()
class ModelManager: class ModelManager:
def __init__(self): def __init__(self):
self.model_capabilities = { self.model_capabilities = {
"qwen2.5:7b": ["general_knowledge", "structured_output", "multilingual", "instruction_following", "structured_data"], "ajindal/llama3.1-storm:8b": ["general_knowledge", "reasoning", "tool_calling", "conversation", "multilingual", "instruction_following"],
"llama3.1:8b": ["general_knowledge", "reasoning", "tool_calling", "conversation", "multilingual", "instruction_following"], "llama3.1:8b": ["general_knowledge", "reasoning", "tool_calling", "conversation", "multilingual", "instruction_following"],
"qwen2.5-coder:7b": ["code_generation", "code_analysis", "instruction_following", "math_reasoning"], "qwen2.5:7b": ["general_knowledge", "reasoning", "tool_calling", "conversation", "multilingual", "instruction_following"],
"llama3.2:3b": ["summarization", "instruction_following", "tool_calling", "multilingual"], "llama3.2:3b": ["summarization", "instruction_following", "tool_calling", "multilingual"],
"llava:7b": ["visual_reasoning", "visual_conversation", "visual_tool_calling", "vision", "ocr", "multimodal"], "llava:7b": ["visual_reasoning", "visual_conversation", "visual_tool_calling", "vision", "ocr", "multimodal"],
} }
@ -25,8 +25,7 @@ class ModelManager:
logger.info("Selected best model", required_capability=required_capability, selected_model=selected_model) logger.info("Selected best model", required_capability=required_capability, selected_model=selected_model)
return selected_model return selected_model
def generate_text(self, model_name, prompt, max_length=100, system="You are a helpful assistant."): def generate_text(self, model_name, prompt, max_length=100, system="You are a helpful assistant.", tools=[]):
logger.debug("Generating text", model=model_name, prompt=prompt, max_length=max_length)
# Check if model exists # Check if model exists
try: try:
ollama.pull(model_name) ollama.pull(model_name)
@ -38,7 +37,9 @@ class ModelManager:
else: else:
logger.exception("Error pulling model", model=model_name, error=str(e)) logger.exception("Error pulling model", model=model_name, error=str(e))
raise e raise e
response = ollama.generate(model=model_name, prompt=prompt, system=system)
response = ollama.generate(model=model_name, prompt=prompt, system=system, tools=tools, max_tokens=max_length)
logger.debug("Text generated", model=model_name, response=response['response']) logger.debug("Text generated", model=model_name, response=response['response'])
return response['response'] return response['response']

116
tools.py Normal file
View File

@ -0,0 +1,116 @@
import duckduckgo_search
import requests
from readability.readability import Document
from markdownify import markdownify as md
import sys
import time
import io
import subprocess
class Tool:
def __init__(self, name: str, description: str, arguments: dict, returns: str):
self.name = name
self.description = description
self.arguments = arguments
self.returns = returns
def execute(self, arguments: dict) -> str:
pass
class ToolManager:
def __init__(self):
self.tools = []
def add_tool(self, tool: Tool):
self.tools.append(tool)
def get_tool(self, name: str) -> Tool:
for tool in self.tools:
if tool.name == name:
return tool
return None
def get_tools_and_descriptions_for_prompt(self):
return "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
def get_tools_for_ollama_dict(self):
return [{'type': 'function', 'function': {'name': tool.name, 'description': tool.description, 'parameters': tool.arguments}} for tool in self.tools]
class DefaultToolManager(ToolManager):
def __init__(self):
super().__init__()
self.add_tool(SearchTool())
self.add_tool(GetReadablePageContentsTool())
self.add_tool(CalculatorTool())
self.add_tool(PythonCodeTool())
class SearchTool(Tool):
def __init__(self):
super().__init__("search_web", "Search the internet for information", {'type': 'object', 'properties': {'query': {'type': 'string', 'description': 'The search query'}}}, "results:list[string]")
def execute(self, arg: dict) -> str:
res = duckduckgo_search.DDGS().text(arg['query'], max_results=5)
return '\n\n'.join([f"{r['title']}\n{r['body']}\n{r['href']}" for r in res])
def get_readable_page_contents(url: str) -> str:
try:
response = requests.get(url)
response.raise_for_status()
doc = Document(response.content)
content = doc.summary()
return md(content)
except Exception as e:
return f"Error fetching readable content: {str(e)}"
class GetReadablePageContentsTool(Tool):
def __init__(self):
super().__init__("get_readable_page_contents", "Get the contents of a web page in a readable format", {'type': 'object', 'properties': {'url': {'type': 'string', 'description': 'The url of the web page'}}}, "contents:string")
def execute(self, arg: dict) -> str:
return get_readable_page_contents(arg['url'])
class CalculatorTool(Tool):
def __init__(self):
super().__init__("calculator", "Perform a calculation", {'type': 'object', 'properties': {'expression': {'type': 'string', 'description': 'The mathematical expression to evaluate, should be a python mathematical expression'}}}, "result:string")
def execute(self, arg: dict) -> str:
try:
return str(exec(arg["expression"]))
except Exception as e:
return f"Error executing code: {str(e)}"
class PythonCodeTool(Tool):
def __init__(self):
super().__init__("python_code", "Execute python code", {'type': 'object', 'properties': {'code': {'type': 'string', 'description': 'The python code to execute, should be a single line of valid python'}}}, "result:string")
def execute(self, arg: dict) -> str:
try:
start_time = time.time()
process = subprocess.Popen(['python', '-c', arg['code']],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True)
stdout, stderr = process.communicate(timeout=10) # 10 second timeout
end_time = time.time()
execution_time = end_time - start_time
result = {
'stdout': stdout,
'stderr': stderr,
'return_value': process.returncode,
'execution_time': execution_time
}
except subprocess.TimeoutExpired:
process.kill()
return "Error: Code execution timed out after 10 seconds"
except Exception as e:
return f"Error executing code: {str(e)}"
return '\n'.join([f"{k}: {v}" for k, v in result.items()])