diff options
Diffstat (limited to 'frontend/build/web/assets/assets/scrape_synthesize_tree_structure.json')
-rw-r--r-- | frontend/build/web/assets/assets/scrape_synthesize_tree_structure.json | 375 |
1 files changed, 375 insertions, 0 deletions
diff --git a/frontend/build/web/assets/assets/scrape_synthesize_tree_structure.json b/frontend/build/web/assets/assets/scrape_synthesize_tree_structure.json new file mode 100644 index 000000000..858c55d03 --- /dev/null +++ b/frontend/build/web/assets/assets/scrape_synthesize_tree_structure.json @@ -0,0 +1,375 @@ +{ + "edges": [ + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestSearch::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestSearch::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestBasicRetrieval::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestBasicRetrieval::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSearch::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestSearch::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestTestGetInformation::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestTestGetInformation::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestBasicRetrieval::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestBasicRetrieval::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSynthesizeInfo::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestSynthesizeInfo::test_method[challenge_data0]" + } + ], + "nodes": [ + { + "color": "grey", + "data": { + "category": [ + "general", + "coding", + "scrape_synthesize", + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestWriteFile" + ], + "eval_id": "f219f3d3-a41b-45a9-a3d0-389832086ee8", + "ground": { + "answer": "The content of output.txt should be 'Hello World!'", + "eval": { + "type": "file" + }, + "files": [ + "output.txt" + ], + "should_contain": [ + "Hello World!" + ] + }, + "info": { + "description": "Tests if the agent can read a file.", + "difficulty": "interface", + "side_effects": [ + "" + ] + }, + "name": "TestReadFile", + "task": "Read the file called file_to_read.txt and write its content to a file called output.txt" + }, + "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]", + "label": "ReadFile", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "general", + "coding", + "scrape_synthesize", + "data" + ], + "cutoff": 60, + "dependencies": [], + "eval_id": "021c695a-6cc4-46c2-b93a-f3a9b0f4d123", + "ground": { + "answer": "The word 'Washington', printed to a .txt file named anything", + "eval": { + "type": "file" + }, + "files": [ + ".txt" + ], + "should_contain": [ + "Washington" + ], + "should_not_contain": [] + }, + "info": { + "description": "Tests if the agent can write a file", + "difficulty": "interface", + "side_effects": [ + "" + ] + }, + "name": "TestWriteFile", + "task": "Write the word 'Washington' to a .txt file" + }, + "id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]", + "label": "WriteFile", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "scrape_synthesize", + "general" + ], + "cutoff": 60, + "dependencies": [ + "TestSearch" + ], + "eval_id": "cd96e6b2-779d-4a4a-8367-d520023e27ae", + "ground": { + "answer": "\u00a325.89", + "eval": { + "type": "file" + }, + "files": [ + ".txt" + ], + "should_contain": [ + "25.89" + ], + "should_not_contain": [] + }, + "info": { + "description": "Tests if the agent can retrieve a specific information from a website.", + "difficulty": "basic", + "side_effects": [] + }, + "name": "TestBasicRetrieval", + "task": "Write the price of the book in this url 'http://books.toscrape.com/catalogue/meditations_33/index.html' to a .txt file." + }, + "id": "agbenchmark/generate_test.py::TestBasicRetrieval::test_method[challenge_data0]", + "label": "BasicRetrieval", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "general", + "scrape_synthesize" + ], + "cutoff": 120, + "dependencies": [ + "TestWriteFile" + ], + "eval_id": "0bb23182-b434-402b-a73e-9c226469b959", + "ground": { + "answer": "This is a Heading\nThis is a paragraph.", + "eval": { + "type": "file" + }, + "files": [ + ".txt" + ], + "should_contain": [ + "Heading", + "paragraph" + ], + "should_not_contain": [ + "The", + "the" + ] + }, + "info": { + "description": "Tests if the agent can search.", + "difficulty": "interface", + "side_effects": [ + "" + ] + }, + "name": "TestSearch", + "task": "Open 'https://silennaihin.com/random/plain.html' and paste all of the text on the page in a .txt file" + }, + "id": "agbenchmark/generate_test.py::TestSearch::test_method[challenge_data0]", + "label": "Search", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "scrape_synthesize", + "general" + ], + "cutoff": 60, + "dependencies": [ + "TestRevenueRetrieval2" + ], + "eval_id": "1758058c-f726-484f-96fa-f05e278e5ff5", + "ground": { + "answer": "The twitter handles of the two hosts of Latent Space.", + "eval": { + "type": "file" + }, + "files": [ + "output.txt" + ], + "should_contain": [ + "swyx", + "FanaHOVA" + ], + "should_not_contain": [] + }, + "info": { + "description": "Tests if the agent can retrieve twitter handles given a vague description.", + "difficulty": "intermediate", + "side_effects": [ + "" + ] + }, + "name": "TestTestGetInformation", + "task": "Write the twitter handle of the two hosts of Latent Space to a file called output.txt" + }, + "id": "agbenchmark/generate_test.py::TestTestGetInformation::test_method[challenge_data0]", + "label": "TestGetInformation", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "scrape_synthesize" + ], + "cutoff": 60, + "dependencies": [ + "TestRevenueRetrieval" + ], + "eval_id": "552bdf23-db40-4bd1-b123-4ed820886cc1", + "ground": { + "answer": "15 Millions\n112 Millions\n117 Millions\n204 Millions\n413 Millions\n2,014 Millions\n3,198 Millions\n4,046 Millions\n7,000 Millions\n11,759 Millions\n21,461 Millions\n24,578 Millions\n31,536 Millions\n53,823 Millions\n81,462 Millions", + "eval": { + "type": "file" + }, + "files": [ + ".txt" + ], + "should_contain": [ + "15", + "112", + "117", + "204", + "413", + "2,014", + "3,198", + "4,046", + "7,000", + "11,759", + "21,461", + "24,578", + "31,536", + "53,823", + "81,462" + ], + "should_not_contain": [] + }, + "info": { + "description": "Tests if the agent can retrieve all the revenues of Tesla since its creation.", + "difficulty": "intermediate", + "side_effects": [ + "tests if there is in fact an LLM attached" + ] + }, + "name": "TestRevenueRetrieval2", + "task": "Write tesla's revenue every year since its creation into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 million)." + }, + "id": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]", + "label": "RevenueRetrieval2", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "scrape_synthesize", + "general" + ], + "cutoff": 60, + "dependencies": [ + "TestBasicRetrieval" + ], + "eval_id": "dc2114d7-1597-4c9b-bed0-a97937ad977f", + "ground": { + "answer": "It was $81.462 billion in 2022. In millions the answer is 81,462.", + "eval": { + "type": "file" + }, + "files": [ + ".txt" + ], + "should_contain": [ + "81,462" + ], + "should_not_contain": [] + }, + "info": { + "description": "Tests if the agent can retrieve Tesla's revenue in 2022.", + "difficulty": "intermediate", + "side_effects": [] + }, + "name": "TestRevenueRetrieval", + "task": "Write tesla's exact revenue in 2022 into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 million)." + }, + "id": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]", + "label": "RevenueRetrieval", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "scrape_synthesize", + "general" + ], + "cutoff": 240, + "dependencies": [ + "TestReadFile" + ], + "eval_id": "895ae28a-4513-44ea-a872-0164771d1597", + "ground": { + "answer": "A report highlighting elements from the 2 files.", + "eval": { + "scoring": "binary", + "template": "question", + "type": "llm" + }, + "files": [ + "output.txt" + ], + "should_contain": [ + "Is the company mentioned in the output actively addressing or capitalizing on the challenges or trends listed?" + ], + "should_not_contain": [] + }, + "info": { + "description": "Tests if the agent can generate content based on the content of 2 files.", + "difficulty": "basic", + "side_effects": [] + }, + "name": "TestSynthesizeInfo", + "task": "Create a brief report or summary highlighting how one or more companies from companies.txt are addressing or capitalizing on challenges or trends from challenges.txt. Write a file called output.txt." + }, + "id": "agbenchmark/generate_test.py::TestSynthesizeInfo::test_method[challenge_data0]", + "label": "SynthesizeInfo", + "shape": "dot" + } + ] +} |