aboutsummaryrefslogtreecommitdiff
path: root/benchmark
diff options
context:
space:
mode:
authorGravatar Silen Naihin <silen.naihin@gmail.com> 2023-10-20 21:13:09 -0700
committerGravatar Silen Naihin <silen.naihin@gmail.com> 2023-10-20 21:13:09 -0700
commite5e0c4bf9d1e18a23ad62aaefad61e423e463adf (patch)
tree927584a38b14ba09ccaf4f2a1c5af70909862217 /benchmark
parentcase sensitivity, updating challenges (diff)
downloadAuto-GPT-e5e0c4bf9d1e18a23ad62aaefad61e423e463adf.tar.gz
Auto-GPT-e5e0c4bf9d1e18a23ad62aaefad61e423e463adf.tar.bz2
Auto-GPT-e5e0c4bf9d1e18a23ad62aaefad61e423e463adf.zip
reverting new challenges
Diffstat (limited to 'benchmark')
-rw-r--r--benchmark/agbenchmark/challenges/verticals/scrape/6_find_autogpt_creator/artifacts_out/output.txt1
-rw-r--r--benchmark/agbenchmark/challenges/verticals/scrape/6_find_autogpt_creator/data.json34
-rw-r--r--benchmark/agbenchmark/challenges/verticals/scrape/7_amazon_bestseller/artifacts_out/output.txt1
-rw-r--r--benchmark/agbenchmark/challenges/verticals/scrape/7_amazon_bestseller/data.json35
-rw-r--r--benchmark/agbenchmark/challenges/verticals/scrape/8_forbes_list/artifacts_out/output.txt1
-rw-r--r--benchmark/agbenchmark/challenges/verticals/scrape/8_forbes_list/data.json35
6 files changed, 0 insertions, 107 deletions
diff --git a/benchmark/agbenchmark/challenges/verticals/scrape/6_find_autogpt_creator/artifacts_out/output.txt b/benchmark/agbenchmark/challenges/verticals/scrape/6_find_autogpt_creator/artifacts_out/output.txt
deleted file mode 100644
index c37330ccc..000000000
--- a/benchmark/agbenchmark/challenges/verticals/scrape/6_find_autogpt_creator/artifacts_out/output.txt
+++ /dev/null
@@ -1 +0,0 @@
-Scotland \ No newline at end of file
diff --git a/benchmark/agbenchmark/challenges/verticals/scrape/6_find_autogpt_creator/data.json b/benchmark/agbenchmark/challenges/verticals/scrape/6_find_autogpt_creator/data.json
deleted file mode 100644
index 3a6962350..000000000
--- a/benchmark/agbenchmark/challenges/verticals/scrape/6_find_autogpt_creator/data.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
- "category": [
- "scrape_synthesize",
- "general"
- ],
- "cutoff": 60,
- "dependencies": [
- "TestRevenueRetrieval"
- ],
- "eval_id": "1758058c-f726-484f-96fa-f05e278e5ff5",
- "ground": {
- "answer": "Toran is from Scotland https://uk.linkedin.com/in/toran-richards.",
- "case_sensitive": true,
- "eval": {
- "type": "file"
- },
- "files": [
- "output.txt"
- ],
- "should_contain": [
- "Scotland"
- ],
- "should_not_contain": []
- },
- "info": {
- "description": "Tests if the agent can do multi step retrieval (1. who is the CEO of AutoGPT 2. find Toran is from)",
- "difficulty": "intermediate",
- "side_effects": [
- ""
- ]
- },
- "name": "TestAutoGPTCountry",
- "task": "Write the country the the inventor/creator of AutoGPT is from to a file called output.txt"
-}
diff --git a/benchmark/agbenchmark/challenges/verticals/scrape/7_amazon_bestseller/artifacts_out/output.txt b/benchmark/agbenchmark/challenges/verticals/scrape/7_amazon_bestseller/artifacts_out/output.txt
deleted file mode 100644
index 270f18b41..000000000
--- a/benchmark/agbenchmark/challenges/verticals/scrape/7_amazon_bestseller/artifacts_out/output.txt
+++ /dev/null
@@ -1 +0,0 @@
-Reed \ No newline at end of file
diff --git a/benchmark/agbenchmark/challenges/verticals/scrape/7_amazon_bestseller/data.json b/benchmark/agbenchmark/challenges/verticals/scrape/7_amazon_bestseller/data.json
deleted file mode 100644
index 4e1327628..000000000
--- a/benchmark/agbenchmark/challenges/verticals/scrape/7_amazon_bestseller/data.json
+++ /dev/null
@@ -1,35 +0,0 @@
-{
- "category": [
- "scrape_synthesize",
- "general"
- ],
- "cutoff": 60,
- "dependencies": [
- "TestAutoGPTCountry",
- "TestGetInformation"
- ],
- "eval_id": "1758058c-f726-484f-96fa-f05e278e5ff5",
- "ground": {
- "answer": "https://www.amazon.com/gp/bestsellers/2021/books, second book, the answer is Reed",
- "case_sensitive": true,
- "eval": {
- "type": "file"
- },
- "files": [
- "output.txt"
- ],
- "should_contain": [
- "Reed"
- ],
- "should_not_contain": []
- },
- "info": {
- "description": "Tests if the agent can follow very specific instructions for retrieval.",
- "difficulty": "intermediate",
- "side_effects": [
- ""
- ]
- },
- "name": "TestAmazonBestseller",
- "task": "Write the middle name of the author of the best selling book on Amazon from 2021 that got the least amount of reviews out of the first six books on the best selling list to a file called output.txt."
-}
diff --git a/benchmark/agbenchmark/challenges/verticals/scrape/8_forbes_list/artifacts_out/output.txt b/benchmark/agbenchmark/challenges/verticals/scrape/8_forbes_list/artifacts_out/output.txt
deleted file mode 100644
index 22696ea3b..000000000
--- a/benchmark/agbenchmark/challenges/verticals/scrape/8_forbes_list/artifacts_out/output.txt
+++ /dev/null
@@ -1 +0,0 @@
-Head of Ventures, M&A, and Business Development \ No newline at end of file
diff --git a/benchmark/agbenchmark/challenges/verticals/scrape/8_forbes_list/data.json b/benchmark/agbenchmark/challenges/verticals/scrape/8_forbes_list/data.json
deleted file mode 100644
index a4fe85c6a..000000000
--- a/benchmark/agbenchmark/challenges/verticals/scrape/8_forbes_list/data.json
+++ /dev/null
@@ -1,35 +0,0 @@
-{
- "category": [
- "scrape_synthesize",
- "general"
- ],
- "cutoff": 60,
- "dependencies": [
- "TestAutoGPTCountry",
- "TestGetInformation"
- ],
- "eval_id": "1758058c-f726-484f-96fa-f05e278e5ff5",
- "ground": {
- "answer": "Get to https://www.forbes.com/special-report/2012/30-under-30/30-under-30_games.html, then https://www.linkedin.com/in/brianjcho/details/experience/ is the first 27 year old, then find his longest working job on Linkedin which is Riot Games.",
- "case_sensitive": true,
- "eval": {
- "type": "file"
- },
- "files": [
- "output.txt"
- ],
- "should_contain": [
- "Head of Ventures, M&A, and Business Development"
- ],
- "should_not_contain": []
- },
- "info": {
- "description": "Tests if the agent can navigate the web through trials and tribulations.",
- "difficulty": "intermediate",
- "side_effects": [
- ""
- ]
- },
- "name": "TestAmazonBestseller",
- "task": "Find the Forbes 30 under 30 list for 2012. For the Games category, find the company that the first 27 year old on the list spend the longest amount of time working at according to Linkedin. What was his role at this company?"
-}