From bfe5e915b91ed03725fe94f4999cacc751873d9c Mon Sep 17 00:00:00 2001 From: juyua9 Date: Thu, 7 May 2026 20:36:16 +0800 Subject: [PATCH] fix: make Comet tools valid JSON --- Comet Assistant/tools.json | 616 +++++++++++++++++++++++-------------- 1 file changed, 385 insertions(+), 231 deletions(-) diff --git a/Comet Assistant/tools.json b/Comet Assistant/tools.json index 5a836e00..90c4021e 100644 --- a/Comet Assistant/tools.json +++ b/Comet Assistant/tools.json @@ -1,231 +1,385 @@ - - -## Available Tools for Browser Automation and Information Retrieval - -Comet has access to the following specialized tools for completing tasks: - -### navigate - -**Purpose:** Navigate to URLs or move through browser history - -**Parameters:** -- tab_id (required): The browser tab to navigate in -- url (required): The URL to navigate to, or "back"/"forward" for history navigation - -**Usage:** -- Navigate to new page: navigate(url="https://example.com", tab_id=123) -- Go back in history: navigate(url="back", tab_id=123) -- Go forward in history: navigate(url="forward", tab_id=123) - -**Best Practices:** -- Always include the tab_id parameter -- URLs can be provided with or without protocol (defaults to https://) -- Use for loading new web pages or navigating between pages - -### computer - -**Purpose:** Interact with the browser through mouse clicks, keyboard input, scrolling, and screenshots - -**Action Types:** -- left_click: Click at specified coordinates or on element reference -- right_click: Right-click for context menus -- double_click: Double-click for selection -- triple_click: Triple-click for selecting lines/paragraphs -- type: Enter text into focused elements -- key: Press keyboard keys or combinations -- scroll: Scroll the page up/down/left/right -- screenshot: Capture current page state - -**Parameters:** -- tab_id (required): Browser tab to interact with -- action (required): Type of action to perform -- coordinate: (x, y) coordinates for mouse actions -- text: Text to type or keys to press -- scroll_parameters: Parameters for scroll actions (direction, amount) - -**Example Actions:** -- left_click: coordinates=[x, y] -- type: text="Hello World" -- key: text="ctrl+a" or text="Return" -- scroll: coordinate=[x, y], scroll_parameters={"scroll_direction": "down", "scroll_amount": 3} - -### read_page - -**Purpose:** Extract page structure and get element references (DOM accessibility tree) - -**Parameters:** -- tab_id (required): Browser tab to read -- depth (optional): How deep to traverse the tree (default: 15) -- filter (optional): "interactive" for buttons/links/inputs only, or "all" for all elements -- ref_id (optional): Focus on specific element's children - -**Returns:** -- Element references (ref_1, ref_2, etc.) for use with other tools -- Element properties, text content, and hierarchy - -**Best Practices:** -- Use when screenshot-based clicking might be imprecise -- Get element references before using form_input or computer tools -- Use smaller depth values if output is too large -- Filter for "interactive" when only interested in clickable elements - -### find - -**Purpose:** Search for elements using natural language descriptions - -**Parameters:** -- tab_id (required): Browser tab to search in -- query (required): Natural language description of what to find (e.g., "search bar", "add to cart button") - -**Returns:** -- Up to 20 matching elements with references and coordinates -- Element references can be used with other tools - -**Best Practices:** -- Use when elements aren't visible in current screenshot -- Provide specific, descriptive queries -- Use after read_page if that tool's output is incomplete -- Returns both references and coordinates for flexibility - -### form_input - -**Purpose:** Set values in form elements (text inputs, dropdowns, checkboxes) - -**Parameters:** -- tab_id (required): Browser tab containing the form -- ref (required): Element reference from read_page (e.g., "ref_1") -- value: The value to set (string for text, boolean for checkboxes) - -**Usage:** -- Set text: form_input(ref="ref_5", value="example text", tab_id=123) -- Check checkbox: form_input(ref="ref_8", value=True, tab_id=123) -- Select dropdown: form_input(ref="ref_12", value="Option Text", tab_id=123) - -**Best Practices:** -- Always get element ref from read_page first -- Use for form completion to ensure accuracy -- Can handle multiple field updates in sequence - -### get_page_text - -**Purpose:** Extract raw text content from the page - -**Parameters:** -- tab_id (required): Browser tab to extract text from - -**Returns:** -- Plain text content without HTML formatting -- Prioritizes article/main content - -**Best Practices:** -- Use for reading long articles or text-heavy pages -- Combines with other tools for comprehensive page analysis -- Good for infinite scroll pages - use with "max" scroll to load all content - -### search_web - -**Purpose:** Search the web for current and factual information - -**Parameters:** -- queries: Array of keyword-based search queries (max 3 per call) - -**Returns:** -- Search results with titles, URLs, and content snippets -- Results include ID fields for citation - -**Best Practices:** -- Use short, keyword-focused queries -- Maximum 3 queries per call for efficiency -- Break multi-entity questions into separate queries -- Do NOT use for Google.com searches - use this tool instead -- Preferred: ["inflation rate Canada"] not ["What is the inflation rate in Canada?"] - -### tabs_create - -**Purpose:** Create new browser tabs - -**Parameters:** -- url (optional): Starting URL for new tab (default: about:blank) - -**Returns:** -- New tab ID for use with other tools - -**Best Practices:** -- Use for parallel work on multiple tasks -- Can create multiple tabs in sequence -- Each tab maintains its own state -- Always check tab context after creation - -### todo_write - -**Purpose:** Create and manage task lists - -**Parameters:** -- todos: Array of todo items with: - - content: Imperative form ("Run tests", "Build project") - - status: "pending", "in_progress", or "completed" - - active_form: Present continuous form ("Running tests") - -**Best Practices:** -- Use for tracking progress on complex tasks -- Mark tasks as completed immediately when done -- Update frequently to show progress -- Helps demonstrate thoroughness - -## Tool Calling Best Practices - -### Proper Parameter Usage -- ALWAYS include tab_id when required by the tool -- Provide parameters in correct order -- Use JSON format for complex parameters -- Double-check parameter names match tool specifications - -### Efficiency Strategies -- Combine multiple actions in single computer call (click, type, key) -- Use read_page before clicking for more precise targeting -- Avoid repeated screenshots when tools provide same data -- Use find tool when elements not in latest screenshot -- Batch form inputs when completing multiple fields - -### Error Recovery -- Take screenshot after failed action -- Re-fetch element references if page changed -- Verify tab_id still exists -- Adjust coordinates if elements moved -- Use different tool approach if first attempt fails - -### Coordination Between Tools -- read_page → get element refs (ref_1, ref_2) -- computer (click with ref) → interact with element -- form_input (with ref) → set form values -- get_page_text → extract content after navigation -- navigate → load new pages before other interactions - -## Common Tool Sequences - -**Navigating and Reading:** -1. navigate to URL -2. wait for page load -3. screenshot to see current state -4. get_page_text or read_page to extract content - -**Form Completion:** -1. navigate to form page -2. read_page to get form field references -3. form_input for each field (with values) -4. find or read_page to locate submit button -5. computer left_click to submit - -**Web Search:** -1. search_web with relevant queries -2. navigate to promising results -3. get_page_text or read_page to verify information -4. Extract and synthesize findings - -**Element Clicking:** -1. screenshot to see page -2. Option A: Use coordinates from screenshot with computer left_click -3. Option B: read_page for references, then computer left_click with ref - - - +{ + "tools": [ + { + "name": "navigate", + "description": "Navigate to URLs or move through browser history.", + "parameters": { + "type": "object", + "properties": { + "tab_id": { + "type": "integer", + "description": "The browser tab to navigate in." + }, + "url": { + "type": "string", + "description": "The URL to navigate to, or \"back\"/\"forward\" for history navigation." + } + }, + "required": [ + "tab_id", + "url" + ] + }, + "usage": [ + "navigate(url=\"https://example.com\", tab_id=123)", + "navigate(url=\"back\", tab_id=123)", + "navigate(url=\"forward\", tab_id=123)" + ], + "best_practices": [ + "Always include the tab_id parameter.", + "URLs can be provided with or without protocol; default to https:// when omitted.", + "Use for loading new web pages or navigating between pages." + ] + }, + { + "name": "computer", + "description": "Interact with the browser through mouse clicks, keyboard input, scrolling, and screenshots.", + "parameters": { + "type": "object", + "properties": { + "tab_id": { + "type": "integer", + "description": "Browser tab to interact with." + }, + "action": { + "type": "string", + "description": "Action to perform.", + "enum": [ + "left_click", + "right_click", + "double_click", + "triple_click", + "type", + "key", + "scroll", + "screenshot" + ] + }, + "coordinate": { + "type": "array", + "description": "[x, y] coordinates for mouse or scroll actions.", + "items": { + "type": "number" + }, + "minItems": 2, + "maxItems": 2 + }, + "text": { + "type": "string", + "description": "Text to type or keyboard shortcut to press." + }, + "scroll_parameters": { + "type": "object", + "description": "Parameters for scroll actions, such as direction and amount." + } + }, + "required": [ + "tab_id", + "action" + ] + }, + "action_types": [ + "left_click", + "right_click", + "double_click", + "triple_click", + "type", + "key", + "scroll", + "screenshot" + ], + "examples": [ + "left_click: coordinate=[x, y]", + "type: text=\"Hello World\"", + "key: text=\"ctrl+a\" or text=\"Return\"", + "scroll: coordinate=[x, y], scroll_parameters={\"scroll_direction\": \"down\", \"scroll_amount\": 3}" + ] + }, + { + "name": "read_page", + "description": "Extract page structure and get element references from the DOM accessibility tree.", + "parameters": { + "type": "object", + "properties": { + "tab_id": { + "type": "integer", + "description": "Browser tab to read." + }, + "depth": { + "type": "integer", + "description": "How deep to traverse the tree.", + "default": 15 + }, + "filter": { + "type": "string", + "description": "Element filter mode.", + "enum": [ + "interactive", + "all" + ] + }, + "ref_id": { + "type": "string", + "description": "Focus on a specific element's children." + } + }, + "required": [ + "tab_id" + ] + }, + "returns": [ + "Element references such as ref_1 and ref_2.", + "Element properties, text content, and hierarchy." + ], + "best_practices": [ + "Use when screenshot-based clicking might be imprecise.", + "Get element references before using form_input or computer tools.", + "Use smaller depth values if output is too large.", + "Filter for interactive when only interested in clickable elements." + ] + }, + { + "name": "find", + "description": "Search for elements using natural language descriptions.", + "parameters": { + "type": "object", + "properties": { + "tab_id": { + "type": "integer", + "description": "Browser tab to search in." + }, + "query": { + "type": "string", + "description": "Natural language description of what to find, such as search bar or add to cart button." + } + }, + "required": [ + "tab_id", + "query" + ] + }, + "returns": [ + "Up to 20 matching elements with references and coordinates." + ], + "best_practices": [ + "Use when elements are not visible in the current screenshot.", + "Provide specific, descriptive queries.", + "Use after read_page if that tool's output is incomplete.", + "Use returned references or coordinates with other tools." + ] + }, + { + "name": "form_input", + "description": "Set values in form elements, including text inputs, dropdowns, and checkboxes.", + "parameters": { + "type": "object", + "properties": { + "tab_id": { + "type": "integer", + "description": "Browser tab containing the form." + }, + "ref": { + "type": "string", + "description": "Element reference from read_page, such as ref_1." + }, + "value": { + "description": "Value to set; string for text/dropdowns or boolean for checkboxes." + } + }, + "required": [ + "tab_id", + "ref", + "value" + ] + }, + "usage": [ + "form_input(ref=\"ref_5\", value=\"example text\", tab_id=123)", + "form_input(ref=\"ref_8\", value=true, tab_id=123)", + "form_input(ref=\"ref_12\", value=\"Option Text\", tab_id=123)" + ], + "best_practices": [ + "Always get element refs from read_page first.", + "Use for accurate form completion.", + "Can handle multiple field updates in sequence." + ] + }, + { + "name": "get_page_text", + "description": "Extract raw text content from the page.", + "parameters": { + "type": "object", + "properties": { + "tab_id": { + "type": "integer", + "description": "Browser tab to extract text from." + } + }, + "required": [ + "tab_id" + ] + }, + "returns": [ + "Plain text content without HTML formatting.", + "Article or main content when available." + ], + "best_practices": [ + "Use for long articles or text-heavy pages.", + "Combine with other tools for comprehensive page analysis.", + "For infinite scroll pages, scroll to load all content before extracting." + ] + }, + { + "name": "search_web", + "description": "Search the web for current and factual information.", + "parameters": { + "type": "object", + "properties": { + "queries": { + "type": "array", + "description": "Keyword-based search queries, maximum 3 per call.", + "items": { + "type": "string" + }, + "maxItems": 3 + } + }, + "required": [ + "queries" + ] + }, + "returns": [ + "Search results with titles, URLs, snippets, and citation IDs." + ], + "best_practices": [ + "Use short, keyword-focused queries.", + "Use at most 3 queries per call.", + "Break multi-entity questions into separate queries.", + "Use this instead of navigating to Google.com.", + "Prefer queries such as 'inflation rate Canada' over full questions." + ] + }, + { + "name": "tabs_create", + "description": "Create new browser tabs.", + "parameters": { + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "Starting URL for the new tab.", + "default": "about:blank" + } + } + }, + "returns": [ + "New tab ID for use with other tools." + ], + "best_practices": [ + "Use for parallel work on multiple tasks.", + "Each tab maintains its own state.", + "Check tab context after creation." + ] + }, + { + "name": "todo_write", + "description": "Create and manage task lists.", + "parameters": { + "type": "object", + "properties": { + "todos": { + "type": "array", + "description": "Todo items.", + "items": { + "type": "object", + "properties": { + "content": { + "type": "string", + "description": "Imperative form, such as Run tests." + }, + "status": { + "type": "string", + "enum": [ + "pending", + "in_progress", + "completed" + ] + }, + "active_form": { + "type": "string", + "description": "Present continuous form, such as Running tests." + } + }, + "required": [ + "content", + "status", + "active_form" + ] + } + } + }, + "required": [ + "todos" + ] + }, + "best_practices": [ + "Use for tracking progress on complex tasks.", + "Mark tasks as completed immediately when done.", + "Update frequently to show progress." + ] + } + ], + "tool_calling_best_practices": { + "proper_parameter_usage": [ + "Always include tab_id when required by the tool.", + "Provide parameters in the correct order.", + "Use JSON format for complex parameters.", + "Double-check parameter names match tool specifications." + ], + "efficiency_strategies": [ + "Combine multiple actions in a single computer call when possible.", + "Use read_page before clicking for more precise targeting.", + "Avoid repeated screenshots when another tool provides the same data.", + "Use find when elements are not in the latest screenshot.", + "Batch form inputs when completing multiple fields." + ], + "error_recovery": [ + "Take a screenshot after a failed action.", + "Re-fetch element references if the page changed.", + "Verify the tab_id still exists.", + "Adjust coordinates if elements moved.", + "Use a different tool approach if the first attempt fails." + ], + "coordination_between_tools": [ + "read_page -> get element refs.", + "computer -> interact with elements.", + "form_input -> set form values with refs.", + "get_page_text -> extract content after navigation.", + "navigate -> load new pages before other interactions." + ] + }, + "common_tool_sequences": { + "navigating_and_reading": [ + "navigate to URL", + "wait for page load", + "screenshot to see current state", + "get_page_text or read_page to extract content" + ], + "form_completion": [ + "navigate to form page", + "read_page to get form field references", + "form_input for each field", + "find or read_page to locate submit button", + "computer left_click to submit" + ], + "web_search": [ + "search_web with relevant queries", + "navigate to promising results", + "get_page_text or read_page to verify information", + "extract and synthesize findings" + ], + "element_clicking": [ + "screenshot to see page", + "use coordinates with computer left_click or read_page references with computer left_click" + ] + } +}