whole bucn of nonsense

This commit is contained in:
Tanishq Dubey 2024-09-29 12:18:44 -04:00
parent d050549dd8
commit 47059dabdc
4 changed files with 535 additions and 260 deletions

View File

@ -9,6 +9,8 @@
<script src="https://cdn.jsdelivr.net/npm/moment@2.29.4/moment.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script src="https://cdn.jsdelivr.net/npm/chartjs-adapter-moment@1.0.1/dist/chartjs-adapter-moment.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/highlight.min.js"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/styles/default.min.css">
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans+Mono:wght@400;700&display=swap" rel="stylesheet">
<style>
body {
@ -86,13 +88,17 @@
color: #888;
}
.thought-summary {
cursor: pointer;
color: #fff;
margin-bottom: 5px;
font-weight: bold;
display: flex;
align-items: center;
margin-bottom: 5px;
padding: 5px;
border-radius: 3px;
}
.thought-summary.plan { background-color: #2c3e50; }
.thought-summary.decision { background-color: #34495e; }
.thought-summary.tool_call { background-color: #16a085; }
.thought-summary.tool_result { background-color: #27ae60; }
.thought-summary.think_more { background-color: #2980b9; }
.thought-summary.answer { background-color: #8e44ad; }
.thought-details {
display: none;
margin-left: 20px;
@ -238,11 +244,111 @@
transform: translateX(0);
}
}
.conversation-history-container {
margin-top: 20px;
background-color: #222;
border-radius: 5px;
padding: 10px;
}
#conversation-history {
color: #fff;
font-family: 'Noto Sans Mono', monospace;
font-size: 12px;
}
.history-card {
background-color: #2c3e50;
border-radius: 5px;
padding: 10px;
margin-bottom: 10px;
}
.history-role {
font-weight: bold;
margin-bottom: 5px;
}
.history-content {
white-space: pre-wrap;
word-break: break-word;
}
.error-message {
background-color: #ff6b6b;
color: #fff;
padding: 10px;
border-radius: 5px;
margin-bottom: 10px;
}
.retrying {
background-color: #feca57;
color: #333;
}
#clear-history-button {
background-color: #e74c3c;
color: white;
border: none;
padding: 10px;
margin-bottom: 10px;
cursor: pointer;
font-family: 'Noto Sans Mono', monospace;
font-size: 14px;
border-radius: 5px;
}
#clear-history-button:hover {
background-color: #c0392b;
}
#chat-tabs {
display: flex;
background-color: #222;
padding: 10px 10px 0 10px;
}
.chat-tab {
background-color: #444;
color: #fff;
border: none;
padding: 10px 20px;
margin-right: 5px;
cursor: pointer;
border-top-left-radius: 5px;
border-top-right-radius: 5px;
}
.chat-tab.active {
background-color: #666;
}
#new-chat-button {
background-color: #27ae60;
color: #fff;
border: none;
padding: 10px 20px;
cursor: pointer;
border-top-left-radius: 5px;
border-top-right-radius: 5px;
}
.close-tab {
margin-left: 10px;
color: #ff6b6b;
cursor: pointer;
}
.thinking-section {
margin-bottom: 20px;
border-left: 2px solid #444;
padding-left: 10px;
}
</style>
</head>
<body>
<div id="main-container">
<div id="chat-area">
<div id="chat-tabs"></div>
<div id="chat-container"></div>
<div id="input-container" class="pdp-panel">
<div class="pdp-label">INPUT:</div>
@ -272,6 +378,12 @@
<div class="graph-title">GPU Memory</div>
<canvas id="gpuMemoryChart"></canvas>
</div>
<!-- Add this new section for conversation history -->
<div class="conversation-history-container">
<div class="graph-title">Conversation History</div>
<div id="conversation-history"></div>
</div>
</div>
</div>
@ -280,10 +392,80 @@
const chatContainer = document.getElementById('chat-container');
const userInput = document.getElementById('user-input');
const sendButton = document.getElementById('send-button');
const chatTabs = document.getElementById('chat-tabs');
let thinkingElement = null;
let thinkingDetails = null;
let thinkingStartTime = null;
let currentChatId = null;
let chats = {};
function createNewChat() {
const chatId = Date.now().toString();
chats[chatId] = {
messages: [],
thinkingSections: []
};
addChatTab(chatId);
switchToChat(chatId);
saveChats();
}
function addChatTab(chatId) {
const tab = document.createElement('button');
tab.classList.add('chat-tab');
tab.textContent = `Chat ${Object.keys(chats).length}`;
tab.onclick = () => switchToChat(chatId);
const closeButton = document.createElement('span');
closeButton.classList.add('close-tab');
closeButton.textContent = '×';
closeButton.onclick = (e) => {
e.stopPropagation();
closeChat(chatId);
};
tab.appendChild(closeButton);
chatTabs.insertBefore(tab, chatTabs.lastElementChild);
}
function switchToChat(chatId) {
currentChatId = chatId;
document.querySelectorAll('.chat-tab').forEach(tab => tab.classList.remove('active'));
document.querySelector(`.chat-tab:nth-child(${Object.keys(chats).indexOf(chatId) + 1})`).classList.add('active');
renderChat(chatId);
}
function closeChat(chatId) {
delete chats[chatId];
saveChats();
const tabToRemove = Array.from(chatTabs.children).find(tab => tab.textContent.includes(`Chat ${Object.keys(chats).indexOf(chatId) + 1}`));
if (tabToRemove) {
chatTabs.removeChild(tabToRemove);
}
if (currentChatId === chatId) {
const remainingChatIds = Object.keys(chats);
if (remainingChatIds.length > 0) {
switchToChat(remainingChatIds[0]);
} else {
createNewChat();
}
}
}
function renderChat(chatId) {
chatContainer.innerHTML = '';
const chat = chats[chatId];
chat.messages.forEach(message => addMessage(message.content, message.isUser));
chat.thinkingSections.forEach(section => {
const thinkingSection = createThinkingSection();
section.thoughts.forEach(thought => addThought(thought.type, thought.content, thought.details, thinkingSection));
});
}
function createThinkingSection() {
const section = document.createElement('div');
section.classList.add('thinking-section');
chatContainer.appendChild(section);
return section;
}
function addMessage(message, isUser) {
const messageElement = document.createElement('div');
@ -292,65 +474,40 @@
messageElement.innerHTML = isUser ? message : marked.parse(message);
chatContainer.appendChild(messageElement);
chatContainer.scrollTop = chatContainer.scrollHeight;
if (currentChatId) {
chats[currentChatId].messages.push({ content: message, isUser: isUser });
saveChats();
}
}
function startThinking() {
thinkingElement = document.createElement('div');
thinkingElement.classList.add('thought-summary', 'collapsible');
function addThought(type, content, details = '', thinkingSection) {
const stepElement = document.createElement('div');
stepElement.classList.add('thought-summary', 'collapsible', type);
stepElement.textContent = type.charAt(0).toUpperCase() + type.slice(1).replace('_', ' ') + ':';
stepElement.onclick = toggleStepDetails;
const stepDetails = document.createElement('div');
stepDetails.classList.add('thought-details');
const led = document.createElement('div');
led.classList.add('led', 'blinking');
const textNode = document.createTextNode('Thinking...');
thinkingElement.appendChild(led);
thinkingElement.appendChild(textNode);
thinkingElement.onclick = toggleThinkingDetails;
thinkingDetails = document.createElement('div');
thinkingDetails.classList.add('thought-details');
chatContainer.appendChild(thinkingElement);
chatContainer.appendChild(thinkingDetails);
thinkingStartTime = Date.now();
if (type === 'error') {
stepElement.classList.add('error-message');
if (content.includes('retrying')) {
stepElement.classList.add('retrying');
}
stepDetails.innerHTML = marked.parse(content + '\n\nDetails:\n```\n' + details + '\n```');
} else {
stepDetails.innerHTML = marked.parse(content);
}
thinkingSection.appendChild(stepElement);
thinkingSection.appendChild(stepDetails);
chatContainer.scrollTop = chatContainer.scrollHeight;
}
function addThought(step, content) {
if (thinkingDetails) {
const stepElement = document.createElement('div');
stepElement.classList.add('thought-summary', 'collapsible');
stepElement.textContent = step;
stepElement.onclick = toggleStepDetails;
const stepDetails = document.createElement('div');
stepDetails.classList.add('thought-details');
stepDetails.innerHTML = content;
thinkingDetails.appendChild(stepElement);
thinkingDetails.appendChild(stepDetails);
chatContainer.scrollTop = chatContainer.scrollHeight;
}
}
function endThinking(thinkingTime) {
if (thinkingElement) {
const textNode = thinkingElement.childNodes[1];
textNode.nodeValue = `Thinking... (${thinkingTime}s)`;
const led = thinkingElement.querySelector('.led');
led.classList.remove('blinking');
led.style.backgroundColor = '#0f0';
led.style.boxShadow = '0 0 10px #0f0';
thinkingStartTime = null;
}
}
function toggleThinkingDetails() {
this.classList.toggle('open');
const details = this.nextElementSibling;
if (details) {
details.style.display = details.style.display === 'none' ? 'block' : 'none';
if (currentChatId) {
const currentThinkingSection = chats[currentChatId].thinkingSections[chats[currentChatId].thinkingSections.length - 1];
currentThinkingSection.thoughts.push({ type, content, details });
saveChats();
}
}
@ -362,34 +519,71 @@
}
}
socket.on('thinking', (data) => {
if (!thinkingElement) startThinking();
addThought(data.step, 'Started');
});
function saveChats() {
localStorage.setItem('chats', JSON.stringify(chats));
}
socket.on('thought', (data) => {
addThought('Result', data.content);
});
socket.on('chat_response', (data) => {
endThinking(data.thinking_time);
addMessage(data.response, false);
});
socket.on('error', (data) => {
endThinking(data.thinking_time);
addMessage(`Error: ${data.message}`, false);
});
function loadChats() {
const storedChats = localStorage.getItem('chats');
if (storedChats) {
chats = JSON.parse(storedChats);
Object.keys(chats).forEach(chatId => addChatTab(chatId));
if (Object.keys(chats).length > 0) {
switchToChat(Object.keys(chats)[0]);
} else {
createNewChat();
}
} else {
createNewChat();
}
}
function sendMessage() {
const message = userInput.value.trim();
if (message) {
if (message && currentChatId) {
addMessage(message, true);
socket.emit('chat_request', { message: message });
chats[currentChatId].thinkingSections.push({ thoughts: [] });
socket.emit('chat_request', {
message: message,
conversation_history: chats[currentChatId].messages.filter(m => !m.isUser).map(m => ({ role: 'assistant', content: m.content }))
.concat(chats[currentChatId].messages.filter(m => m.isUser).map(m => ({ role: 'user', content: m.content })))
});
userInput.value = '';
}
}
socket.on('thinking', (data) => {
if (currentChatId) {
const newThinkingSection = createThinkingSection();
chats[currentChatId].thinkingSections.push({ thoughts: [] });
addThought(data.step, 'Started', '', newThinkingSection);
}
});
socket.on('thought', (data) => {
if (currentChatId) {
const currentThinkingSection = chatContainer.querySelector('.thinking-section:last-child');
addThought(data.type, data.content, data.details, currentThinkingSection);
}
});
socket.on('chat_response', (data) => {
if (currentChatId) {
addMessage(data.response, false);
}
});
socket.on('error', (data) => {
if (currentChatId) {
const currentThinkingSection = chatContainer.querySelector('.thinking-section:last-child');
if (data.type === 'retrying') {
addThought('error', data.content, '', currentThinkingSection);
} else {
addThought('error', data.message, '', currentThinkingSection);
}
}
});
sendButton.addEventListener('click', sendMessage);
userInput.addEventListener('keypress', function(e) {
if (e.key === 'Enter' && !e.shiftKey) {
@ -398,6 +592,16 @@
}
});
// Add new chat button
const newChatButton = document.createElement('button');
newChatButton.id = 'new-chat-button';
newChatButton.textContent = '+ New Chat';
newChatButton.onclick = createNewChat;
chatTabs.appendChild(newChatButton);
// Load chats when the page loads
loadChats();
const chartOptions = {
type: 'line',
options: {
@ -570,6 +774,41 @@
window.addEventListener('resize', checkWindowSize);
checkWindowSize(); // Initial check
// Add this new function to update the conversation history
function updateConversationHistory(history) {
const conversationHistoryElement = document.getElementById('conversation-history');
conversationHistoryElement.innerHTML = '';
history.forEach(item => {
const card = document.createElement('div');
card.classList.add('history-card');
const role = document.createElement('div');
role.classList.add('history-role');
role.textContent = item.role.charAt(0).toUpperCase() + item.role.slice(1);
const content = document.createElement('pre');
content.classList.add('history-content');
content.innerHTML = hljs.highlightAuto(item.content).value;
card.appendChild(role);
card.appendChild(content);
conversationHistoryElement.appendChild(card);
});
}
// Add this new socket listener
socket.on('conversation_history', (data) => {
updateConversationHistory(data.history);
});
// Add event listener for the clear history button
clearHistoryButton.addEventListener('click', () => {
if (confirm('Are you sure you want to clear the conversation history?')) {
clearConversationHistory();
}
});
</script>
</body>
</html>

267
main.py
View File

@ -1,4 +1,4 @@
from flask import Flask, send_from_directory
from flask import Flask, send_from_directory, request
from flask_socketio import SocketIO, emit
from flask_openapi3 import OpenAPI, Info
from pydantic import BaseModel
@ -10,15 +10,20 @@ import psutil
import GPUtil
import threading
import os
from tools import DefaultToolManager
import ollama
import re
import json
from datetime import datetime
import pprint
logger = structlog.get_logger()
openapi = OpenAPI(__name__, info=Info(title="LLM Chat Server", version="1.0.0"))
app = openapi
socketio = SocketIO(app, cors_allowed_origins="*")
tool_manager = DefaultToolManager()
@app.route('/')
def index():
logger.info("Serving index.html")
@ -33,51 +38,13 @@ class ChatResponse(BaseModel):
@socketio.on('chat_request')
def handle_chat_request(data):
user_input = data['message']
logger.info("Received chat request", user_input=user_input)
conversation_history = data.get('conversation_history', [])
conversation_history = [{"role": "system", "content": ANSWER_QUESTION_PROMPT}] + conversation_history
logger.info("Received chat request", user_input=user_input, conversation_history=conversation_history)
start_time = time.time()
full_context = ""
try:
# Step 1: Generate a plan using the initial LLM
emit('thinking', {'step': 'Generating plan'})
plan, plan_generation = generate_plan(user_input)
full_context += f"Plan Thinking:\n{plan_generation}"
full_context += f"Plan:\n{plan}"
emit('thought', {'content': f"Plan Thinking:\n{plan_generation}"})
emit('thought', {'content': f"Plan:\n{plan}"})
if plan[0].strip().lower() == "direct_answer":
final_response = plan[1]
thinking_time = round(time.time() - start_time, 2)
emit('chat_response', {
'response': final_response,
'thinking_time': thinking_time
})
return
# Step 2: Execute each step of the plan
step_results = []
for i, step in enumerate(plan):
emit('thinking', {'step': f'Executing step {i+1}'})
while True:
best_model, model_selection = select_best_model(step, step_results, full_context)
if best_model in model_manager.model_capabilities:
break
logger.warning(f"Selected model {best_model} is not in the list of available models. Retrying...")
emit('thought', {'content': f"Selected model for step {i+1}:\n{model_selection}"})
# summary, summary_generation = summarize_context(f"Plan: {plan}\n\nSteps: {step_results}")
# emit('thought', {'content': f"Context summary:\n{summary_generation}"})
step_result, step_execution = execute_step(step, best_model, step_results, full_context)
emit('thought', {'content': f"Step {i+1} result:\n{step_execution}"})
emit('thought', {'content': f"Result {i+1}:\n{step_result}"})
step_results.append(step_result)
full_context += f"Step {i+1} result:\n{step_execution}"
# Step 3: Generate final response
emit('thinking', {'step': 'Generating final response'})
final_response, final_generation = generate_final_response(user_input, plan, step_results)
emit('thought', {'content': f"Final response generation:\n{final_generation}"})
final_response = answer_question_tools(user_input, conversation_history)
end_time = time.time()
thinking_time = round(end_time - start_time, 2)
@ -94,146 +61,98 @@ def handle_chat_request(data):
'thinking_time': thinking_time
})
PLAN_GENERATE_PROMPT = """
You are building a "chain of thought" workflow for a series of LLMs to complete a task provided by a user.
Your first task is to "think" through the problem provided by the user. Probe what it would take to complete the task, see if there are hidden nuances, what constrains might be relevant, how to be efficient.
This thinking should set question the premise of the task, and sets the scene for a plan of attack to be created.
Verbalize your thoughts out loud, allow the user to see your thought process. This thought process will also be used as context for processing the generated plan.
This thought process should mimic the process of a human, and not be a simple list of steps, but should be a narrative of thought that a human would have.
Each step in the formulated plan is a step that a seperate LLM will complete. The LLM that will complete the step will be selected based on the scope of the step and the capabilities of the available models.
There are models that are good at coding and math, and there are models that are good at reasoning and planning. Some models that are generalists, multilingual, or conversational. And even some that are vision models.
Use this context of the possible models to shape each step such that a LLM can complete the step given the step and some context.
Steps should follow a logical "chain of thought" in order to best complete the overall task.
Steps should be self contained and be designed such that the results of one step can be passed on to the next step.
Steps should be phrased in such a way that it acts as a prompt or instruction to the LLM that will complete the step.
Each step will return a result, and a thought process. The thought process is extremely important, it is the "chain of thought" that the LLM went through to complete the step. This thought process is critical for the next step in the plan.
Consider how results from one step can be combined with results from another step and consider how the chain of thought from one step can inform the next step when designing each step.
Try and minimize the number of steps required to complete the task since running a lot of steps is expensive.
Your output should be your thought process, followed by a single line titled "STEPS", followed by each step to take, one step per line.
Do not add any sort of markdown formatting, code formatting, or any other formatting.
Do not add any preamble, postamble, or other text, only the thought process and the steps.
def answer_question_tools(user_input: str, conversation_history: List[dict], max_retries: int = 100):
global tool_manager
# If conversation_history is empty, initialize it with the system prompt
if not conversation_history:
conversation_history = [
{"role": "system", "content": ANSWER_QUESTION_PROMPT},
]
logger.info("Starting chat", user_input=user_input, conversation_history=conversation_history)
# Add the new user input to the conversation history
conversation_history.append({"role": "user", "content": user_input})
emit('thinking', {'step': 'Starting'})
emit('conversation_history', {'history': conversation_history})
Consider the following example:
for iteration in range(max_retries):
response = ollama.chat(model=PRIMARY_MODEL, messages=conversation_history, tools=tool_manager.get_tools_for_ollama_dict(), stream=False)
assistant_message = response['message']
conversation_history.append(assistant_message)
emit('conversation_history', {'history': conversation_history})
pprint.pp(assistant_message)
Prompt: Write a program to reverse a string, then output ASCII block art of that reversed string. Do this in python.
if 'tool_calls' in assistant_message:
emit('thought', {'type': 'decision', 'content': "Tool Call\n\n" + assistant_message['content']})
for tool_call in assistant_message['tool_calls']:
tool_name = tool_call['function']['name']
tool_args = tool_call['function']['arguments']
emit('thought', {'type': 'tool_call', 'content': f"Tool: {tool_name}\nArguments: {tool_args}"})
tool_response = tool_manager.get_tool(tool_name).execute(tool_args)
conversation_history.append({
"role": "tool",
"content": tool_response
})
emit('conversation_history', {'history': conversation_history})
emit('thought', {'type': 'tool_result', 'content': tool_response})
So there are two parts to this task. First, we need to reverse the input string. Then we need to print the ASCII block art for each character in the reversed string.
We should be able to reverse the string using either a simple loop, or a python slice. Slicing is simpler, so we should use that.
For the ASCII block art, the challenge is in creating a mapping between each character and its block art representation. There are a few ways to go about this:
- Find a library that converts text to block art
- Create our own mapping from characters to block art
- Create a procedurally generated mapping from characters to block art
Procedural generation could be done with an algorithm, but coming up with a good algorithm could be challenging.
Generating a dictionary could be a good approach, but there are 26 letters in the alphabet, and 10 digits, so we would need 36 different outputs for the block art.
We should search for a library that already does this, import it, and call it on the result of the string reversal. We would also need to tell the user to install the library.
reflection_prompt = "Reflect on the tool results. If there were any errors, propose multiple alternative approaches to solve the problem. If successful, consider if the result fully answers the user's query or if additional steps are needed."
conversation_history.append({
"role": "assistant",
"content": reflection_prompt
})
emit('conversation_history', {'history': conversation_history})
else:
if "<answer>" in assistant_message['content'].lower():
answer_content = re.search(r'<answer>(.*?)</answer>', assistant_message['content'], re.DOTALL)
if answer_content:
final_answer = answer_content.group(1).strip()
emit('thought', {'type': 'answer', 'content': final_answer})
return final_answer
else:
emit('thought', {'type': 'decision', 'content': "Think/Plan/Decision/Action\n\n" + assistant_message['content']})
reflection_prompt = "Your last response didn't provide a final answer. Please reflect on your current understanding of the problem and consider if you need to use any tools or if you can now provide a final answer. If you're ready to give a final answer, put your response in tags <answer></answer>"
conversation_history.append({"role": "assistant", "content": reflection_prompt})
emit('conversation_history', {'history': conversation_history})
We're now ready to create our plan.
return f"Max iterations reached. Last response: {assistant_message['content']}"
STEPS
1. Write a function that takes a string and reverses it.
2. Write a function that takes a string and returns the ASCII block art for each character in the string, this must be done using a library.
3. Combine the two functions into a single program.
ANSWER_QUESTION_PROMPT = f"""
The current date is {datetime.now().strftime("%A, %B %d, %Y")}, your knowledge cutoff was December 2023.
You are Dewey, an AI assistant with access to external tools and the ability to think through complex problems. Your role is to assist users by leveraging tools when necessary, thinking deeply about problems, and providing accurate and helpful information, all with a cheerful, but witty personality. Here are the tools available to you:
---
{tool_manager.get_tools_and_descriptions_for_prompt()}
Now you try.
"""
_REMINADER_PT ="""
Each task you create should be should be self contained and be designed such that the results of one step can be passed on to the next step.
Try and minimize the number of steps required to complete the task.
Output only a numbered list of steps, each step should be a seperate line.
Do not output any preamble or other text, only the list of steps.
If you think a task can be completed by a single step, then you can output a single step.
If you can directly answer the question, you must begin your response with a single line containing the text "DIRECT_ANSWER" and then provide the answer to the question on the next line.
When addressing a query, follow these steps:
Here are some samples:
1. Analyze: Thoroughly analyze the query and consider multiple approaches to solving it.
Input: Write a program to reverse a string, then output the ASCII art of that reversed string. Do this in python.
Steps:
1. Define a template for a program that prints the ASCII art of the reversed string.
2. Fill in the logic to reverse the string.
3. Fill in the logic to print the ASCII art of the reversed string.
4. Output the final program.
2. Plan: Develop a plan of action, considering whether you need to use any tools or if you can answer directly.
Input: What are the oceans of the world?
Steps:
1. Use the encyclopedia tool to get the page on the oceans of the world, parse, and output the results.
3. Execute: If you need to use a tool, call it as you would a function. If not, proceed with your reasoning.
Input: What is the perfect gas law?
Steps:
DIRECT_ANSWER
The perfect gas law is the equation of state of a hypothetical ideal gas. The formula is $$PV = nRT$$ where P is pressure, V is volume, n is the number of moles, R is the ideal gas constant, and T is temperature.
4. Reflect: After each step or tool use, reflect on the results:
- If successful, consider if the result fully answers the user's query or if additional steps are needed.
- If there were errors or the result is unsatisfactory, don't give up! Use Tree of Thoughts reasoning:
a) Generate multiple alternative approaches or modifications to your previous approach.
b) Briefly evaluate the potential of each alternative.
c) Choose the most promising alternative and execute it.
d) Repeat this process if needed, building upon your growing understanding of the problem.
e) You cannot return a final answer after an error using a tool, you must try again.
5. Iterate: Continue this process of execution and reflection, exploring different branches of thought as needed.
6. Conclude: When you believe you have a comprehensive answer to the user's query, provide your final answer.
Always explain your thought process, including your reasoning for each decision and how you arrived at your conclusions. If you're providing a final answer, put your response in tags <answer></answer>.
Remember, complex problems often require multiple steps and iterations. Don't hesitate to break down the problem, use tools multiple times, or explore different approaches to arrive at the best solution.
"""
def generate_plan(user_input: str) -> tuple[List[str], str]:
logger.debug("Generating plan", prompt=user_input, system=PLAN_GENERATE_PROMPT)
response = model_manager.generate_text("qwen2.5:7b", user_input, max_length=1024, system=PLAN_GENERATE_PROMPT)
plan = response.split("STEPS")[1].strip()
response_no_steps = response.split("STEPS")[0].strip()
return [step.strip() for step in plan.split("\n") if step.strip()], response_no_steps
SELECT_BEST_MODEL_PROMPT = f"""
You are a large language model whos job it is to evaluate a step that is part of a larger plan, and determine what LLM would be best suited to complete the step based on the capabilities of the LLM.
The LLMs and their capabilities are as follows:
{"\n".join([f"{k}: {','.join(v)}" for k,v in model_manager.model_capabilities.items()])}
You will be provided with the current step of execution, the results of the previous steps in order, and the current chain of thought so far.
If the chain of thought is too long, a summary of the current chain of thought will be provided.
Your job is to use all this information to determine which of the provided LLMs would be best suited to complete the provided step given the capabilities of the LLM.
Your response should be the full name of the LLM that should complete the step.
Reply with only one of the following values: \n{'\n'.join(list(model_manager.model_capabilities.keys()))}
"""
def select_best_model(step: str, results: List[str], context: str) -> tuple[str, str]:
prompt = f"Current Step: {step}\n\nResults So Far: {results}\n\nCurrent Chain of Thought: {context}"
logger.debug("Selecting best model", prompt=prompt, system=SELECT_BEST_MODEL_PROMPT)
response = model_manager.generate_text("llama3.2:3b", prompt, max_length=50, system=SELECT_BEST_MODEL_PROMPT)
model_name = response.strip().lower()
return model_name, response
def summarize_context(context: str) -> tuple[str, str]:
prompt = f"Summarize the following context: {context}"
logger.debug("Summarizing context", prompt=prompt)
response = model_manager.generate_text("llama3.2:3b", prompt, max_length=300)
return response, response
EXECUTE_STEP_PROMPT = """
You are a large language model that has been selected to complete a step within a larger task.
You have been selected to complete this step due to your specific capabilities.
You will be provided with the job to do in this current step, the results of the previous steps in order, and the current chain of thought so far.
If the chain of thought is too long, a summary of the current chain of thought will be provided.
Your job is to use all this information to complete the step.
Your response should be in two parts. The first part should be your thought process in completing the step, how you went about solving the step, assumptions made, relation to previous steps, and challenges faced.
You must then output a line with the word "RESPONSE".
The second part should be the result of completing your step.
The second part should contain nothing except the result of completing your step.
Only complete your part of the step. Do not extrapolate beyond the bounds of the step. Do not trample on the results of previous steps. Build on the results of previous steps, and use them to inform your work.
Do not include any preamble or other text, only the result of completing your step.
Do not use any markdown formatting, code formatting, or any other formatting.
"""
def execute_step(step: str, model: str, results: List[str], context: str) -> tuple[str, str]:
prompt = f"Current Step: {step}\n\nResults So Far: {results}\n\nCurrent Chain of Thought: {context}"
logger.debug("Executing step", step=step, model=model, prompt=prompt)
response = model_manager.generate_text(model, prompt, max_length=1024, system=EXECUTE_STEP_PROMPT)
response_step = response.split("RESPONSE")[1].strip()
response_thinking = response.split("RESPONSE")[0].strip()
return response_step, response_thinking
def generate_final_response(user_input: str, plan: List[str], step_results: List[str]) -> tuple[str, str]:
prompt = f"Question: {user_input}\n\nPlan:\n"
for i, step in enumerate(plan):
prompt += f"{i+1}. {step}\n"
prompt += "\nResults:\n"
for i, result in enumerate(step_results):
prompt += f"Step {i+1} result: {result}\n"
prompt += "\nBased on the above information, provide a comprehensive answer to the original question."
logger.debug("Generating final response", prompt=prompt)
response = model_manager.generate_text("qwen2.5:7b", prompt, max_length=500)
return response, response
PRIMARY_MODEL = "llama3.1:8b"
UPDATE_INTERVAL = 0.1 # 100ms, configurable
@ -284,4 +203,4 @@ def send_system_resources():
if __name__ == "__main__":
logger.info("Starting LLM Chat Server")
threading.Thread(target=send_system_resources, daemon=True).start()
socketio.run(app, debug=True, host="0.0.0.0", port=5000)
socketio.run(app, debug=True, host="0.0.0.0", port=5001)

View File

@ -6,9 +6,9 @@ logger = structlog.get_logger()
class ModelManager:
def __init__(self):
self.model_capabilities = {
"qwen2.5:7b": ["general_knowledge", "structured_output", "multilingual", "instruction_following", "structured_data"],
"ajindal/llama3.1-storm:8b": ["general_knowledge", "reasoning", "tool_calling", "conversation", "multilingual", "instruction_following"],
"llama3.1:8b": ["general_knowledge", "reasoning", "tool_calling", "conversation", "multilingual", "instruction_following"],
"qwen2.5-coder:7b": ["code_generation", "code_analysis", "instruction_following", "math_reasoning"],
"qwen2.5:7b": ["general_knowledge", "reasoning", "tool_calling", "conversation", "multilingual", "instruction_following"],
"llama3.2:3b": ["summarization", "instruction_following", "tool_calling", "multilingual"],
"llava:7b": ["visual_reasoning", "visual_conversation", "visual_tool_calling", "vision", "ocr", "multimodal"],
}
@ -25,8 +25,7 @@ class ModelManager:
logger.info("Selected best model", required_capability=required_capability, selected_model=selected_model)
return selected_model
def generate_text(self, model_name, prompt, max_length=100, system="You are a helpful assistant."):
logger.debug("Generating text", model=model_name, prompt=prompt, max_length=max_length)
def generate_text(self, model_name, prompt, max_length=100, system="You are a helpful assistant.", tools=[]):
# Check if model exists
try:
ollama.pull(model_name)
@ -38,7 +37,9 @@ class ModelManager:
else:
logger.exception("Error pulling model", model=model_name, error=str(e))
raise e
response = ollama.generate(model=model_name, prompt=prompt, system=system)
response = ollama.generate(model=model_name, prompt=prompt, system=system, tools=tools, max_tokens=max_length)
logger.debug("Text generated", model=model_name, response=response['response'])
return response['response']

116
tools.py Normal file
View File

@ -0,0 +1,116 @@
import duckduckgo_search
import requests
from readability.readability import Document
from markdownify import markdownify as md
import sys
import time
import io
import subprocess
class Tool:
def __init__(self, name: str, description: str, arguments: dict, returns: str):
self.name = name
self.description = description
self.arguments = arguments
self.returns = returns
def execute(self, arguments: dict) -> str:
pass
class ToolManager:
def __init__(self):
self.tools = []
def add_tool(self, tool: Tool):
self.tools.append(tool)
def get_tool(self, name: str) -> Tool:
for tool in self.tools:
if tool.name == name:
return tool
return None
def get_tools_and_descriptions_for_prompt(self):
return "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
def get_tools_for_ollama_dict(self):
return [{'type': 'function', 'function': {'name': tool.name, 'description': tool.description, 'parameters': tool.arguments}} for tool in self.tools]
class DefaultToolManager(ToolManager):
def __init__(self):
super().__init__()
self.add_tool(SearchTool())
self.add_tool(GetReadablePageContentsTool())
self.add_tool(CalculatorTool())
self.add_tool(PythonCodeTool())
class SearchTool(Tool):
def __init__(self):
super().__init__("search_web", "Search the internet for information", {'type': 'object', 'properties': {'query': {'type': 'string', 'description': 'The search query'}}}, "results:list[string]")
def execute(self, arg: dict) -> str:
res = duckduckgo_search.DDGS().text(arg['query'], max_results=5)
return '\n\n'.join([f"{r['title']}\n{r['body']}\n{r['href']}" for r in res])
def get_readable_page_contents(url: str) -> str:
try:
response = requests.get(url)
response.raise_for_status()
doc = Document(response.content)
content = doc.summary()
return md(content)
except Exception as e:
return f"Error fetching readable content: {str(e)}"
class GetReadablePageContentsTool(Tool):
def __init__(self):
super().__init__("get_readable_page_contents", "Get the contents of a web page in a readable format", {'type': 'object', 'properties': {'url': {'type': 'string', 'description': 'The url of the web page'}}}, "contents:string")
def execute(self, arg: dict) -> str:
return get_readable_page_contents(arg['url'])
class CalculatorTool(Tool):
def __init__(self):
super().__init__("calculator", "Perform a calculation", {'type': 'object', 'properties': {'expression': {'type': 'string', 'description': 'The mathematical expression to evaluate, should be a python mathematical expression'}}}, "result:string")
def execute(self, arg: dict) -> str:
try:
return str(exec(arg["expression"]))
except Exception as e:
return f"Error executing code: {str(e)}"
class PythonCodeTool(Tool):
def __init__(self):
super().__init__("python_code", "Execute python code", {'type': 'object', 'properties': {'code': {'type': 'string', 'description': 'The python code to execute, should be a single line of valid python'}}}, "result:string")
def execute(self, arg: dict) -> str:
try:
start_time = time.time()
process = subprocess.Popen(['python', '-c', arg['code']],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True)
stdout, stderr = process.communicate(timeout=10) # 10 second timeout
end_time = time.time()
execution_time = end_time - start_time
result = {
'stdout': stdout,
'stderr': stderr,
'return_value': process.returncode,
'execution_time': execution_time
}
except subprocess.TimeoutExpired:
process.kill()
return "Error: Code execution timed out after 10 seconds"
except Exception as e:
return f"Error executing code: {str(e)}"
return '\n'.join([f"{k}: {v}" for k, v in result.items()])