SIGN IN SIGN UP

A framework to enable multimodal models to operate a computer.

0 0 1 Python
2024-01-03 20:02:23 -08:00
import sys
import os
2024-01-12 07:36:13 -08:00
import time
2024-01-05 08:00:25 -08:00
import asyncio
from prompt_toolkit.shortcuts import message_dialog
from prompt_toolkit import prompt
2024-01-07 07:15:16 -08:00
from operate.exceptions import ModelNotRecognizedException
2024-01-15 11:16:38 -08:00
import platform
2024-01-13 06:41:47 -08:00
# from operate.models.prompts import USER_QUESTION, get_system_prompt
from operate.models.prompts import (
USER_QUESTION,
get_system_prompt,
)
2024-01-15 09:58:20 -08:00
from operate.config import Config
2024-01-07 07:06:52 -08:00
from operate.utils.style import (
ANSI_GREEN,
ANSI_RESET,
ANSI_YELLOW,
ANSI_RED,
ANSI_BRIGHT_MAGENTA,
2024-01-13 06:15:49 -08:00
ANSI_BLUE,
style,
)
2024-01-13 07:04:09 -08:00
from operate.utils.operating_system import OperatingSystem
from operate.models.apis import get_next_action
# Load configuration
config = Config()
2024-01-12 16:00:34 -08:00
operating_system = OperatingSystem()
2024-02-16 16:55:07 -08:00
def main(model, terminal_prompt, voice_mode=False, verbose_mode=False):
"""
Main function for the Self-Operating Computer.
Parameters:
- model: The model used for generating responses.
- terminal_prompt: A string representing the prompt provided in the terminal.
- voice_mode: A boolean indicating whether to enable voice mode.
Returns:
None
"""
mic = None
2024-01-03 20:02:23 -08:00
# Initialize `WhisperMic`, if `voice_mode` is True
config.verbose = verbose_mode
2024-01-15 09:58:20 -08:00
config.validation(model, voice_mode)
if voice_mode:
try:
from whisper_mic import WhisperMic
# Initialize WhisperMic if import is successful
mic = WhisperMic()
except ImportError:
print(
"Voice mode requires the 'whisper_mic' module. Please install it using 'pip install -r requirements-audio.txt'"
)
sys.exit(1)
# Skip message dialog if prompt was given directly
if not terminal_prompt:
2024-01-15 11:16:38 -08:00
message_dialog(
title="Self-Operating Computer",
text="An experimental framework to enable multimodal models to operate computers",
style=style,
).run()
2024-01-15 09:58:20 -08:00
else:
2024-01-19 06:32:07 -08:00
print("Running direct prompt...")
2024-01-15 10:50:28 -08:00
# # Clear the console
2024-01-15 11:16:38 -08:00
if platform.system() == "Windows":
os.system("cls")
else:
print("\033c", end="")
if terminal_prompt: # Skip objective prompt if it was given as an argument
objective = terminal_prompt
elif voice_mode:
print(
f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_RESET} Listening for your command... (speak now)"
)
try:
objective = mic.listen()
except Exception as e:
print(f"{ANSI_RED}Error in capturing voice input: {e}{ANSI_RESET}")
return # Exit if voice input fails
else:
2024-02-16 16:55:07 -08:00
print(
f"[{ANSI_GREEN}Self-Operating Computer {ANSI_RESET}|{ANSI_BRIGHT_MAGENTA} {model}{ANSI_RESET}]\n{USER_QUESTION}"
)
print(f"{ANSI_YELLOW}[User]{ANSI_RESET}")
objective = prompt(style=style)
system_prompt = get_system_prompt(model, objective)
2024-01-12 14:07:16 -08:00
system_message = {"role": "system", "content": system_prompt}
messages = [system_message]
loop_count = 0
2024-01-09 11:55:56 -08:00
session_id = None
while True:
if config.verbose:
print("[Self Operating Computer] loop_count", loop_count)
try:
2024-01-12 10:54:21 -08:00
operations, session_id = asyncio.run(
2024-01-09 11:55:56 -08:00
get_next_action(model, messages, objective, session_id)
)
2024-02-16 16:55:07 -08:00
stop = operate(operations, model)
if stop:
break
loop_count += 1
2024-01-13 07:01:16 -08:00
if loop_count > 10:
break
except ModelNotRecognizedException as e:
print(
f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_RED}[Error] -> {e} {ANSI_RESET}"
)
break
except Exception as e:
print(
f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_RED}[Error] -> {e} {ANSI_RESET}"
)
break
2024-01-11 07:48:17 -08:00
2024-02-16 16:55:07 -08:00
def operate(operations, model):
if config.verbose:
print("[Self Operating Computer][operate]")
2024-01-13 06:10:37 -08:00
for operation in operations:
if config.verbose:
print("[Self Operating Computer][operate] operation", operation)
2024-01-12 07:36:13 -08:00
# wait one second
time.sleep(1)
2024-01-13 06:10:37 -08:00
operate_type = operation.get("operation").lower()
operate_thought = operation.get("thought")
operate_detail = ""
if config.verbose:
print("[Self Operating Computer][operate] operate_type", operate_type)
if operate_type == "press" or operate_type == "hotkey":
2024-01-13 06:10:37 -08:00
keys = operation.get("keys")
operate_detail = keys
operating_system.press(keys)
elif operate_type == "write":
2024-01-13 06:10:37 -08:00
content = operation.get("content")
operate_detail = content
operating_system.write(content)
elif operate_type == "click":
2024-01-13 06:10:37 -08:00
x = operation.get("x")
y = operation.get("y")
2024-01-12 07:36:13 -08:00
click_detail = {"x": x, "y": y}
operate_detail = click_detail
operating_system.mouse(click_detail)
2024-01-13 06:15:32 -08:00
elif operate_type == "done":
summary = operation.get("summary")
2024-01-13 06:15:32 -08:00
print(
2024-02-16 16:55:07 -08:00
f"[{ANSI_GREEN}Self-Operating Computer {ANSI_RESET}|{ANSI_BRIGHT_MAGENTA} {model}{ANSI_RESET}]"
2024-01-13 06:15:32 -08:00
)
2024-02-16 16:55:07 -08:00
print(f"{ANSI_BLUE}Objective Complete: {ANSI_RESET}{summary}\n")
2024-01-13 06:15:32 -08:00
return True
else:
print(
2024-01-12 07:36:13 -08:00
f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_RED}[Error] unknown operation response :({ANSI_RESET}"
)
print(
2024-01-13 07:01:16 -08:00
f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_RED}[Error] AI response {ANSI_RESET}{operation}"
)
2024-01-11 14:37:38 -08:00
return True
print(
2024-02-16 16:55:07 -08:00
f"[{ANSI_GREEN}Self-Operating Computer {ANSI_RESET}|{ANSI_BRIGHT_MAGENTA} {model}{ANSI_RESET}]"
)
2024-02-16 16:55:07 -08:00
print(f"{operate_thought}")
print(f"{ANSI_BLUE}Action: {ANSI_RESET}{operate_type} {operate_detail}\n")
2024-01-12 07:57:29 -08:00
return False