diff options
author | Silen Naihin <silen.naihin@gmail.com> | 2023-10-20 21:13:09 -0700 |
---|---|---|
committer | Silen Naihin <silen.naihin@gmail.com> | 2023-10-20 21:13:09 -0700 |
commit | e5e0c4bf9d1e18a23ad62aaefad61e423e463adf (patch) | |
tree | 927584a38b14ba09ccaf4f2a1c5af70909862217 /benchmark | |
parent | case sensitivity, updating challenges (diff) | |
download | Auto-GPT-e5e0c4bf9d1e18a23ad62aaefad61e423e463adf.tar.gz Auto-GPT-e5e0c4bf9d1e18a23ad62aaefad61e423e463adf.tar.bz2 Auto-GPT-e5e0c4bf9d1e18a23ad62aaefad61e423e463adf.zip |
reverting new challenges
Diffstat (limited to 'benchmark')
6 files changed, 0 insertions, 107 deletions
diff --git a/benchmark/agbenchmark/challenges/verticals/scrape/6_find_autogpt_creator/artifacts_out/output.txt b/benchmark/agbenchmark/challenges/verticals/scrape/6_find_autogpt_creator/artifacts_out/output.txt deleted file mode 100644 index c37330ccc..000000000 --- a/benchmark/agbenchmark/challenges/verticals/scrape/6_find_autogpt_creator/artifacts_out/output.txt +++ /dev/null @@ -1 +0,0 @@ -Scotland
\ No newline at end of file diff --git a/benchmark/agbenchmark/challenges/verticals/scrape/6_find_autogpt_creator/data.json b/benchmark/agbenchmark/challenges/verticals/scrape/6_find_autogpt_creator/data.json deleted file mode 100644 index 3a6962350..000000000 --- a/benchmark/agbenchmark/challenges/verticals/scrape/6_find_autogpt_creator/data.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "category": [ - "scrape_synthesize", - "general" - ], - "cutoff": 60, - "dependencies": [ - "TestRevenueRetrieval" - ], - "eval_id": "1758058c-f726-484f-96fa-f05e278e5ff5", - "ground": { - "answer": "Toran is from Scotland https://uk.linkedin.com/in/toran-richards.", - "case_sensitive": true, - "eval": { - "type": "file" - }, - "files": [ - "output.txt" - ], - "should_contain": [ - "Scotland" - ], - "should_not_contain": [] - }, - "info": { - "description": "Tests if the agent can do multi step retrieval (1. who is the CEO of AutoGPT 2. find Toran is from)", - "difficulty": "intermediate", - "side_effects": [ - "" - ] - }, - "name": "TestAutoGPTCountry", - "task": "Write the country the the inventor/creator of AutoGPT is from to a file called output.txt" -} diff --git a/benchmark/agbenchmark/challenges/verticals/scrape/7_amazon_bestseller/artifacts_out/output.txt b/benchmark/agbenchmark/challenges/verticals/scrape/7_amazon_bestseller/artifacts_out/output.txt deleted file mode 100644 index 270f18b41..000000000 --- a/benchmark/agbenchmark/challenges/verticals/scrape/7_amazon_bestseller/artifacts_out/output.txt +++ /dev/null @@ -1 +0,0 @@ -Reed
\ No newline at end of file diff --git a/benchmark/agbenchmark/challenges/verticals/scrape/7_amazon_bestseller/data.json b/benchmark/agbenchmark/challenges/verticals/scrape/7_amazon_bestseller/data.json deleted file mode 100644 index 4e1327628..000000000 --- a/benchmark/agbenchmark/challenges/verticals/scrape/7_amazon_bestseller/data.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "category": [ - "scrape_synthesize", - "general" - ], - "cutoff": 60, - "dependencies": [ - "TestAutoGPTCountry", - "TestGetInformation" - ], - "eval_id": "1758058c-f726-484f-96fa-f05e278e5ff5", - "ground": { - "answer": "https://www.amazon.com/gp/bestsellers/2021/books, second book, the answer is Reed", - "case_sensitive": true, - "eval": { - "type": "file" - }, - "files": [ - "output.txt" - ], - "should_contain": [ - "Reed" - ], - "should_not_contain": [] - }, - "info": { - "description": "Tests if the agent can follow very specific instructions for retrieval.", - "difficulty": "intermediate", - "side_effects": [ - "" - ] - }, - "name": "TestAmazonBestseller", - "task": "Write the middle name of the author of the best selling book on Amazon from 2021 that got the least amount of reviews out of the first six books on the best selling list to a file called output.txt." -} diff --git a/benchmark/agbenchmark/challenges/verticals/scrape/8_forbes_list/artifacts_out/output.txt b/benchmark/agbenchmark/challenges/verticals/scrape/8_forbes_list/artifacts_out/output.txt deleted file mode 100644 index 22696ea3b..000000000 --- a/benchmark/agbenchmark/challenges/verticals/scrape/8_forbes_list/artifacts_out/output.txt +++ /dev/null @@ -1 +0,0 @@ -Head of Ventures, M&A, and Business Development
\ No newline at end of file diff --git a/benchmark/agbenchmark/challenges/verticals/scrape/8_forbes_list/data.json b/benchmark/agbenchmark/challenges/verticals/scrape/8_forbes_list/data.json deleted file mode 100644 index a4fe85c6a..000000000 --- a/benchmark/agbenchmark/challenges/verticals/scrape/8_forbes_list/data.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "category": [ - "scrape_synthesize", - "general" - ], - "cutoff": 60, - "dependencies": [ - "TestAutoGPTCountry", - "TestGetInformation" - ], - "eval_id": "1758058c-f726-484f-96fa-f05e278e5ff5", - "ground": { - "answer": "Get to https://www.forbes.com/special-report/2012/30-under-30/30-under-30_games.html, then https://www.linkedin.com/in/brianjcho/details/experience/ is the first 27 year old, then find his longest working job on Linkedin which is Riot Games.", - "case_sensitive": true, - "eval": { - "type": "file" - }, - "files": [ - "output.txt" - ], - "should_contain": [ - "Head of Ventures, M&A, and Business Development" - ], - "should_not_contain": [] - }, - "info": { - "description": "Tests if the agent can navigate the web through trials and tribulations.", - "difficulty": "intermediate", - "side_effects": [ - "" - ] - }, - "name": "TestAmazonBestseller", - "task": "Find the Forbes 30 under 30 list for 2012. For the Games category, find the company that the first 27 year old on the list spend the longest amount of time working at according to Linkedin. What was his role at this company?" -} |