Checkpoint, I have tools working, but there is a better way to do this.

2024-09-28 11:12:43 -04:00
parent d050549dd8
commit 5923b8480b
3 changed files with 204 additions and 111 deletions
--- a/main.py
+++ b/main.py
@ -1,15 +1,18 @@
+import re
 from flask import Flask, send_from_directory
 from flask_socketio import SocketIO, emit
 from flask_openapi3 import OpenAPI, Info
 from pydantic import BaseModel
 from typing import List
 from models import model_manager
+from tools import DefaultToolManager
 import structlog
 import time
 import psutil
 import GPUtil
 import threading
 import os
+import datetime



@ -18,7 +21,7 @@ logger = structlog.get_logger()
 openapi = OpenAPI(__name__, info=Info(title="LLM Chat Server", version="1.0.0"))
 app = openapi
 socketio = SocketIO(app, cors_allowed_origins="*")
-
+tool_manager = DefaultToolManager()
@app.route('/')
 def index():
    logger.info("Serving index.html")
@ -36,51 +39,9 @@ def handle_chat_request(data):
    logger.info("Received chat request", user_input=user_input)
    
    start_time = time.time()
-    full_context = ""
    try:
-        # Step 1: Generate a plan using the initial LLM
-        emit('thinking', {'step': 'Generating plan'})
-        plan, plan_generation = generate_plan(user_input)
-        full_context += f"Plan Thinking:\n{plan_generation}"
-        full_context += f"Plan:\n{plan}"
-        emit('thought', {'content': f"Plan Thinking:\n{plan_generation}"})
-        emit('thought', {'content': f"Plan:\n{plan}"})
-
-        if plan[0].strip().lower() == "direct_answer":
-            final_response = plan[1]
-            thinking_time = round(time.time() - start_time, 2)
-            emit('chat_response', {
-                'response': final_response,
-                'thinking_time': thinking_time
-            })
-            return
-        
-        # Step 2: Execute each step of the plan
-        step_results = []
-        for i, step in enumerate(plan):
-            emit('thinking', {'step': f'Executing step {i+1}'})
-            while True:
-                best_model, model_selection = select_best_model(step, step_results, full_context)
-                if best_model in model_manager.model_capabilities:
-                    break
-                logger.warning(f"Selected model {best_model} is not in the list of available models. Retrying...")
-            emit('thought', {'content': f"Selected model for step {i+1}:\n{model_selection}"})
-            # summary, summary_generation = summarize_context(f"Plan: {plan}\n\nSteps: {step_results}")
-            # emit('thought', {'content': f"Context summary:\n{summary_generation}"})
-            step_result, step_execution = execute_step(step, best_model, step_results, full_context)
-            emit('thought', {'content': f"Step {i+1} result:\n{step_execution}"})
-            emit('thought', {'content': f"Result {i+1}:\n{step_result}"})
-            step_results.append(step_result)
-            full_context += f"Step {i+1} result:\n{step_execution}"
-        
-        # Step 3: Generate final response
-        emit('thinking', {'step': 'Generating final response'})
-        final_response, final_generation = generate_final_response(user_input, plan, step_results)
-        emit('thought', {'content': f"Final response generation:\n{final_generation}"})
-        
-        end_time = time.time()
-        thinking_time = round(end_time - start_time, 2)
-        
+        final_response = answer_question(user_input)
+        thinking_time = round(time.time() - start_time, 2)
        emit('chat_response', {
            'response': final_response,
            'thinking_time': thinking_time
@ -94,83 +55,130 @@ def handle_chat_request(data):
            'thinking_time': thinking_time
        })

-PLAN_GENERATE_PROMPT = """
-You are building a "chain of thought" workflow for a series of LLMs to complete a task provided by a user.
-Your first task is to "think" through the problem provided by the user. Probe what it would take to complete the task, see if there are hidden nuances, what constrains might be relevant, how to be efficient.
-This thinking should set question the premise of the task, and sets the scene for a plan of attack to be created.
-Verbalize your thoughts out loud, allow the user to see your thought process. This thought process will also be used as context for processing the generated plan.
-This thought process should mimic the process of a human, and not be a simple list of steps, but should be a narrative of thought that a human would have.
-Each step in the formulated plan is a step that a seperate LLM will complete. The LLM that will complete the step will be selected based on the scope of the step and the capabilities of the available models.
-There are models that are good at coding and math, and there are models that are good at reasoning and planning. Some models that are generalists, multilingual, or conversational. And even some that are vision models.
-Use this context of the possible models to shape each step such that a LLM can complete the step given the step and some context.
-Steps should follow a logical "chain of thought" in order to best complete the overall task.
-Steps should be self contained and be designed such that the results of one step can be passed on to the next step.
-Steps should be phrased in such a way that it acts as a prompt or instruction to the LLM that will complete the step.
-Each step will return a result, and a thought process. The thought process is extremely important, it is the "chain of thought" that the LLM went through to complete the step. This thought process is critical for the next step in the plan.
-Consider how results from one step can be combined with results from another step and consider how the chain of thought from one step can inform the next step when designing each step.
-Try and minimize the number of steps required to complete the task since running a lot of steps is expensive. 
-Your output should be your thought process, followed by a single line titled "STEPS", followed by each step to take, one step per line.
-Do not add any sort of markdown formatting, code formatting, or any other formatting.
-Do not add any preamble, postamble, or other text, only the thought process and the steps.
+ANSWER_QUESTION_PROMPT = f"""
+You are Dewy, created by DWS.
+The current date is {datetime.datetime.now().strftime("%A, %B %d, %Y")}. Dewy's knowledge base was last updated on April 2024.
+Answer questions about events prior to and after April 2024 the way a highly informed individual in April 2024 would, and let the human know this when relevant.
+You work through an iterative planning, execution, and reflection loop.
+The user will start a conversation with you. Before replying, you will have a chance to think through your response.
+Thinking is done on an internal scratchpad. You may generate context into this scratchpad to enrich your response.

-Consider the following example:
+You can generate three types of context: TOOLUSAGE, THOUGHTS, and FINAL RESPONSE.
+TOOLUSAGE is when you are using a tool.
+THOUGHTS is when you are thinking through your response.
+FINAL RESPONSE is when you have a final response to the user.
+When responding, you may only respond with one of the context types.
+Do not mix context types in your response.
+You must have at least one THOUGHTS.
+You cannot have a TOOLUSAGE at the same time as THOUGHTS or FINAL RESPONSE.

-Prompt: Write a program to reverse a string, then output ASCII block art of that reversed string. Do this in python.

-So there are two parts to this task. First, we need to reverse the input string. Then we need to print the ASCII block art for each character in the reversed string.
-We should be able to reverse the string using either a simple loop, or a python slice. Slicing is simpler, so we should use that.
-For the ASCII block art, the challenge is in creating a mapping between each character and its block art representation. There are a few ways to go about this:
- - Find a library that converts text to block art
- - Create our own mapping from characters to block art
- - Create a procedurally generated mapping from characters to block art
-Procedural generation could be done with an algorithm, but coming up with a good algorithm could be challenging.
-Generating a dictionary could be a good approach, but there are 26 letters in the alphabet, and 10 digits, so we would need 36 different outputs for the block art.
-We should search for a library that already does this, import it, and call it on the result of the string reversal. We would also need to tell the user to install the library.
+You will begin your response with either TOOLUSAGE, THOUGHTS, or FINAL RESPONSE.

-We're now ready to create our plan.
+THOUGHTS:
+Thoughts can be used to generate additional context that can be used to complete your task.

-STEPS
-1. Write a function that takes a string and reverses it.
-2. Write a function that takes a string and returns the ASCII block art for each character in the string, this must be done using a library.
-3. Combine the two functions into a single program.
+FINAL RESPONSE:
+Once you have thought through your response, used any necessary tools, and have a final response, you will output that response to the user.
+Your output should be in Markdown format.
+Do not have any preamble or other text.

---
+TOOLUSAGE:
+The following tools are available to you. You can use these tools to help you complete your task.

-Now you try.
-"""
-_REMINADER_PT ="""
-Each task you create should be should be self contained and be designed such that the results of one step can be passed on to the next step. 
-Try and minimize the number of steps required to complete the task. 
-Output only a numbered list of steps, each step should be a seperate line.
-Do not output any preamble or other text, only the list of steps.
-If you think a task can be completed by a single step, then you can output a single step. 
-If you can directly answer the question, you must begin your response with a single line containing the text "DIRECT_ANSWER" and then provide the answer to the question on the next line.
+{tool_manager.get_tools_and_descriptions_for_prompt()}
+You call a tool by placing the following text on a new line:
+<<|tool_name|arguments|>>
+The tool will execute and output the result.
+The scratchpad will be updated with the tool's output, and you will be able to continue your thought process.
+If you are using a tool, end your response with the tool call line.

-Here are some samples:

-Input: Write a program to reverse a string, then output the ASCII art of that reversed string. Do this in python.
-Steps:
-1. Define a template for a program that prints the ASCII art of the reversed string.
-2. Fill in the logic to reverse the string.
-3. Fill in the logic to print the ASCII art of the reversed string.
-4. Output the final program.
+Below are a few examples of how you can use the tools and scratchpad. Each example is separated by a <example> open and close tag. The example is a representation of the scratch pad. Each iteration on the scratch pad is delimited by a <iteration> open and close tag.
+<example>
+What is the Wikipedia article of the day?

-Input: What are the oceans of the world?
-Steps:
-1. Use the encyclopedia tool to get the page on the oceans of the world, parse, and output the results.
+<iteration>
+THOUGHTS
+The wikipedia article of the day is decided every day. Because it is dynamic, I will need to use a tool to search for the article.
+</iteration>

-Input: What is the perfect gas law?
-Steps:
-DIRECT_ANSWER
-The perfect gas law is the equation of state of a hypothetical ideal gas. The formula is $$PV = nRT$$ where P is pressure, V is volume, n is the number of moles, R is the ideal gas constant, and T is temperature.
+<iteration>
+TOOLUSAGE
+<|search_web|Wikipedia article of the day|>
+<|RESULTS|>
+Wikipedia:Today's featured article - Wikipedia -- This star symbolizes the featured content on Wikipedia. Each day, a summary (roughly 975 characters long) of one of Wikipedia's featured articles (FAs) appears at the top of the Main Page as Today's Featured Article (TFA). The Main Page is viewed about 4.7 million times daily. TFAs are scheduled by the TFA coordinators: Wehwalt, Dank and Gog ... -> https://en.wikipedia.org/wiki/Wikipedia:Today's_featured_article
+Wikipedia:Today's featured article/Most viewed - Wikipedia -- This TFA STATS page is an attempt to recognise Wikipedia's most viewed today's featured articles.Articles are listed below based on page views surpassing 100,000 hits on the day of the article's appearance on the Main Page. Although Wolfgang Amadeus Mozart was Wikipedia's first Featured Article to be featured on the Main Page, page view statistics were not tracked until December 2007. -> https://en.wikipedia.org/wiki/Wikipedia:Today's_featured_article/Most_viewed
+Wikipedia:Featured articles - Wikipedia -- There are 6,582 featured articles out of 6,886,376 articles on the English Wikipedia (about 0.1% or one out of every 1,040 articles). Articles that no longer meet the criteria can be proposed for improvement or removal at featured article review. On non-mobile versions of our website, a small bronze star icon () on the top right corner of an ... -> https://en.wikipedia.org/wiki/Wikipedia:Featured_articles
+Wikipedia -- Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation. English 6,873,000+ articles 日本語 1,427,000+ 記事 -> https://www.wikipedia.org/
+How does Wikipedia article of the day in this subreddit get selected ... -- The ones posted here as the article of the day are Wikipedia's selected article of the day, which can be seen on the English wiki's main page. More info about how they are selected on Wikipedia, including selection criteria and links to upcoming featured article candidates (and discussion about them) can be found on the featured articles about ... -> https://www.reddit.com/r/wikipedia/comments/hbuosu/how_does_wikipedia_article_of_the_day_in_this/
+</iteration>
+
+<iteration>
+THOUGHTS
+From the results, I can see that the first result provides a link to something that could be about the wikipedia article of the day. I should use a tool to get the contents of the page, and see if it answers the users question.
+</iteration>
+
+<iteration>
+TOOLUSAGE
+<|get_readable_page_contents|https://en.wikipedia.org/wiki/Wikipedia:Today's_featured_article|>
+<|RESULTS|>
+Title: Wikipedia:Today's featured article
+\nFrom Wikipedia, the free encyclopedia\n\n\n\nFeatured articles shown on the Main Page\n\n\n\nFrom today\'s featured article\n-----------------------------\n\n\n\n**[Addie Viola Smith](/wiki/Addie_Viola_Smith "Addie Viola Smith")** (1893–1975\\) was an American attorney who served as the U.S. [trade commissioner](/wiki/Trade_commissioner "Trade commissioner") to Shanghai from 1928 to 1939, the first female [Foreign Service officer](/wiki/Foreign_Service_officer "Foreign Service officer") in the [U.S. Foreign Service](/wiki/United_States_Foreign_Service "United States Foreign Service") to work under the [Commerce Department](/wiki/United_States_Department_of_Commerce "United States Department of Commerce"), and the first woman to serve as trade commissioner. A native of [Stockton, California](/wiki/Stockton,_California "Stockton, California"), Smith moved to Washington, D.C., in 1917\\. While working for the [United States Department of Labor](/wiki/United_States_Department_of_Labor "United States Department of Labor"), she attended the [Washington College of Law](/wiki/American_University_Washington_College_of_Law "American University Washington College of Law") part\\-time, earning a [Bachelor of Laws](/wiki/Bachelor_of_Laws "Bachelor of Laws") degree in 1920\\. She joined the Foreign Service in October that year. Posted to Beijing as a clerk, she was promoted to assistant trade commissioner in Shanghai in 1922, and to trade commissioner in 1928\\. She later held roles in the U.S. government, world organizations, and the [United Nations](/wiki/United_Nations "United Nations"). Smith met her life partner, [Eleanor Mary Hinder](/wiki/Eleanor_Hinder "Eleanor Hinder"), in 1926; they moved to Hinder\'s native Australia in 1957, where stone seats are dedicated to them at the [E.\xa0G. Waterhouse National Camellia Gardens](/wiki/Eben_Gowrie_Waterhouse#Camellias "Eben Gowrie Waterhouse"). (**[Full\xa0article...](/wiki/Addie_Viola_Smith "Addie Viola Smith")**)\n\n\n\n\n\n\nFrom tomorrow\'s featured article\n--------------------------------\n\n\n\n\nFrom the day after tomorrow\'s featured article\n----------------------------------------------\n\n\n\n![](https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?type=1x1)\n\n'
+</iteration>
+
+<iteration>
+THOUGHTS
+Based on the tool results, I can see that this page describers the daily featured article on Wikipedia. Todays featured article is Addie Viola Smith on the URL https://en.wikipedia.org/wiki/Addie_Viola_Smith
+The tool response also contains a short description of the article. I will use this to answer the users question.
+</iteration>
+
+<iteration>
+FINAL RESPONSE
+The Wikipedia article of the day is [Addie Viola Smith](https://en.wikipedia.org/wiki/Addie_Viola_Smith). Addie Viola Smith was an American attorney who served as the U.S. trade commissioner to Shanghai from 1928 to 1939, the first female Foreign Service officer in the U.S. Foreign Service to work under the Commerce Department, and the first woman to serve as trade commissioner.
+</iteration>
+</example>
+
+Do not reference the above examples in your response.
+
+Any response that does not conform to the above rules will be rejected.
+Your response must begin with either TOOLUSAGE, THOUGHTS, or FINAL RESPONSE.
 """

-def generate_plan(user_input: str) -> tuple[List[str], str]:
-    logger.debug("Generating plan", prompt=user_input, system=PLAN_GENERATE_PROMPT)
-    response = model_manager.generate_text("qwen2.5:7b", user_input, max_length=1024, system=PLAN_GENERATE_PROMPT)
-    plan = response.split("STEPS")[1].strip()
-    response_no_steps = response.split("STEPS")[0].strip()
-    return [step.strip() for step in plan.split("\n") if step.strip()], response_no_steps
+def answer_question(user_input: str) -> tuple[List[str], str]:
+    scratchpad = user_input
+    response = model_manager.generate_text("qwen2.5:7b", user_input, max_length=1024, system=ANSWER_QUESTION_PROMPT)
+    logger.debug("Initial response", response=response)
+    emit('thinking', {'step': 'Answering Question'})
+    emit('thought', {'content': response})
+    done = False
+
+    # Loop until the response does not start with FINAL RESPONSE
+    while not done:
+        # The first line of the response is the context type,the rest is the content
+        context_type = response.split("\n")[0].strip().lower()
+        content = "\n".join(response.split("\n")[1:])
+        emit('thought', {f'{context_type}': content})
+
+        logger.debug("Context type", context_type=context_type)
+        if context_type == "toolusage":
+            tool_name = content.split("|")[0].split("|")[0]
+            arguments = content.split("|")[1].split("|")[0]
+            emit('thinking', {'step': f'Executing tool {tool_name} with arguments {arguments}'})
+            tool_result = tool_manager.execute_tool(tool_name, arguments)
+            emit('thought', {'content': f"Tool {tool_name} result:\n{tool_result}"})
+            scratchpad += f"\n<|RESULTS|>\n{tool_result}"
+        elif context_type == "final response":
+            done = True
+            return content
+        elif context_type == "thoughts":
+            scratchpad += "\n" + content
+
+  
+        # Generate a response based on the scratchpad
+        response = model_manager.generate_text("qwen2.5:7b", scratchpad, max_length=1024, system=ANSWER_QUESTION_PROMPT)
+        logger.debug("Generated response", response=response)
+        input("Press Enter to continue...")


 SELECT_BEST_MODEL_PROMPT = f"""
--- a/models.py
+++ b/models.py
@ -25,7 +25,7 @@ class ModelManager:
        logger.info("Selected best model", required_capability=required_capability, selected_model=selected_model)
        return selected_model

-    def generate_text(self, model_name, prompt, max_length=100, system="You are a helpful assistant."):
+    def generate_text(self, model_name, prompt, max_length=100, system="You are a helpful assistant.", stream=False):
        logger.debug("Generating text", model=model_name, prompt=prompt, max_length=max_length)
        # Check if model exists
        try:
@ -38,7 +38,7 @@ class ModelManager:
            else:
                logger.exception("Error pulling model", model=model_name, error=str(e))
                raise e
-        response = ollama.generate(model=model_name, prompt=prompt, system=system)
+        response = ollama.generate(model=model_name, prompt=prompt, system=system, stream=stream)
        logger.debug("Text generated", model=model_name, response=response['response'])
        return response['response']

--- a/tools.py
+++ b/tools.py
@ -0,0 +1,85 @@
+import duckduckgo_search
+import requests
+from readability.readability import Document
+from markdownify import markdownify as md
+
+class Tool:
+    def __init__(self, name: str, description: str, arguments: str, returns: str):
+        self.name = name
+        self.description = description
+        self.arguments = arguments
+        self.returns = returns
+
+    def execute(self, arguments: str) -> str:
+        pass
+
+
+class ToolManager:
+    def __init__(self):
+        self.tools = []
+
+    def add_tool(self, tool: Tool):
+        self.tools.append(tool)
+
+    def get_tool(self, name: str) -> Tool:
+        for tool in self.tools:
+            if tool.name == name:
+                return tool
+        return None
+    
+    def get_tools_and_descriptions_for_prompt(self):
+        return "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
+    
+
+class DefaultToolManager(ToolManager):
+    def __init__(self):
+        super().__init__()
+        self.add_tool(SearchTool())
+        self.add_tool(GetReadablePageContentsTool())
+        self.add_tool(CalculatorTool())
+        self.add_tool(PythonCodeTool())
+
+
+class SearchTool(Tool):
+    def __init__(self):
+        super().__init__("search_web", "Search the internet for information -- Takes a search query as an argument", "query:string", "results:list[string]")
+
+    def execute(self, arg: str) -> str:
+        res = duckduckgo_search.DDGS().text(arg, max_results=5)
+        return [f"{r['title']}\n{r['body']}\n{r['href']}" for r in res]
+    
+
+def get_readable_page_contents(url: str) -> str:
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        doc = Document(response.content)
+        content = doc.summary()
+        return md(content)
+    except Exception as e:
+        return f"Error fetching readable content: {str(e)}"
+    
+
+
+class GetReadablePageContentsTool(Tool):
+    def __init__(self):
+        super().__init__("get_readable_page_contents", "Get the contents of a web page in a readable format -- Takes a url as an argument", "url:string", "contents:string")
+
+    def execute(self, arg: str) -> str:
+        return get_readable_page_contents(arg[0])
+
+
+class CalculatorTool(Tool):
+    def __init__(self):
+        super().__init__("calculator", "Perform a calculation -- Takes a python mathematical expression as an argument", "expression:string", "result:string")
+
+    def execute(self, arg: str) -> str:
+        return str(eval(arg[0]))
+
+
+class PythonCodeTool(Tool):
+    def __init__(self):
+        super().__init__("python_code", "Execute python code -- Takes a python code as an argument, code must be a single line of valid python", "code:string", "result:string")
+
+    def execute(self, arg: str) -> str:
+        return str(eval(arg[0]))