aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Reinier van der Leer <pwuts@agpt.co> 2024-01-16 15:49:46 +0100
committerGravatar Reinier van der Leer <pwuts@agpt.co> 2024-01-16 15:49:46 +0100
commit0a4185a919f3f9050237eb50258ca8cc2e455c05 (patch)
treeef099eea1cd1cf2e0d62cc24719dc126e731f0ca
parentrefactor(benchmark): Disable Helicone integrations (diff)
downloadAuto-GPT-0a4185a919f3f9050237eb50258ca8cc2e455c05.tar.gz
Auto-GPT-0a4185a919f3f9050237eb50258ca8cc2e455c05.tar.bz2
Auto-GPT-0a4185a919f3f9050237eb50258ca8cc2e455c05.zip
chore(benchmark): Upgrade OpenAI client lib from v0 to v1
-rw-r--r--benchmark/agbenchmark/utils/challenge.py8
-rw-r--r--benchmark/notebooks/LLM Score Experimentation.ipynb10
-rw-r--r--benchmark/poetry.lock36
-rw-r--r--benchmark/pyproject.toml2
4 files changed, 33 insertions, 23 deletions
diff --git a/benchmark/agbenchmark/utils/challenge.py b/benchmark/agbenchmark/utils/challenge.py
index 0cf796c87..4d48e0b8e 100644
--- a/benchmark/agbenchmark/utils/challenge.py
+++ b/benchmark/agbenchmark/utils/challenge.py
@@ -9,9 +9,9 @@ from abc import ABC
from pathlib import Path
from typing import Any, ClassVar, List
-import openai
import pytest
from colorama import Fore, Style
+from openai import OpenAI
from agbenchmark.agent_api_interface import run_api_agent
from agbenchmark.config import AgentBenchmarkConfig
@@ -198,7 +198,7 @@ class Challenge(ABC):
@classmethod
def llm_eval(cls, content: str, ground: Ground) -> float:
- openai.api_key = os.getenv("OPENAI_API_KEY")
+ openai_client = OpenAI()
if os.getenv("IS_MOCK"):
return 1.0
@@ -213,14 +213,14 @@ class Challenge(ABC):
prompt += END_PROMPT
- answer = openai.ChatCompletion.create(
+ answer = openai_client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": prompt},
],
)
- return float(answer["choices"][0]["message"]["content"]) # type: ignore
+ return float(answer.choices[0].message.content) # type: ignore
@classmethod
def get_scores(cls, workspace: Path) -> dict[str, Any]:
diff --git a/benchmark/notebooks/LLM Score Experimentation.ipynb b/benchmark/notebooks/LLM Score Experimentation.ipynb
index b14577fdb..745913971 100644
--- a/benchmark/notebooks/LLM Score Experimentation.ipynb
+++ b/benchmark/notebooks/LLM Score Experimentation.ipynb
@@ -7,23 +7,21 @@
"metadata": {},
"outputs": [],
"source": [
- "import openai\n",
"import os\n",
"from dotenv import load_dotenv\n",
+ "from openai import OpenAI\n",
"\n",
"load_dotenv()\n",
"\n",
- "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
- "\n",
"def llm_eval(evaluation: str) -> float:\n",
- " openai.api_key = OPENAI_API_KEY\n",
- " answer = openai.ChatCompletion.create(\n",
+ " openai_client = OpenAI()\n",
+ " answer = openai_client.chat.completions.create(\n",
" model=\"gpt-4\",\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": evaluation},\n",
" ],\n",
" )\n",
- " return answer[\"choices\"][0][\"message\"][\"content\"]"
+ " return answer.choices[0].message.content"
]
},
{
diff --git a/benchmark/poetry.lock b/benchmark/poetry.lock
index 290be2ba6..057b89aa4 100644
--- a/benchmark/poetry.lock
+++ b/benchmark/poetry.lock
@@ -596,6 +596,17 @@ files = [
]
[[package]]
+name = "distro"
+version = "1.9.0"
+description = "Distro - an OS platform information API"
+optional = false
+python-versions = ">=3.6"
+files = [
+ {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
+ {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
+]
+
+[[package]]
name = "exceptiongroup"
version = "1.2.0"
description = "Backport of PEP 654 (exception groups)"
@@ -1581,25 +1592,26 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
[[package]]
name = "openai"
-version = "0.27.10"
-description = "Python client library for the OpenAI API"
+version = "1.7.2"
+description = "The official Python library for the openai API"
optional = false
python-versions = ">=3.7.1"
files = [
- {file = "openai-0.27.10-py3-none-any.whl", hash = "sha256:beabd1757e3286fa166dde3b70ebb5ad8081af046876b47c14c41e203ed22a14"},
- {file = "openai-0.27.10.tar.gz", hash = "sha256:60e09edf7100080283688748c6803b7b3b52d5a55d21890f3815292a0552d83b"},
+ {file = "openai-1.7.2-py3-none-any.whl", hash = "sha256:8f41b90a762f5fd9d182b45851041386fed94c8ad240a70abefee61a68e0ef53"},
+ {file = "openai-1.7.2.tar.gz", hash = "sha256:c73c78878258b07f1b468b0602c6591f25a1478f49ecb90b9bd44b7cc80bce73"},
]
[package.dependencies]
-aiohttp = "*"
-requests = ">=2.20"
-tqdm = "*"
+anyio = ">=3.5.0,<5"
+distro = ">=1.7.0,<2"
+httpx = ">=0.23.0,<1"
+pydantic = ">=1.9.0,<3"
+sniffio = "*"
+tqdm = ">4"
+typing-extensions = ">=4.7,<5"
[package.extras]
-datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
-dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-mock"]
-embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"]
-wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"]
+datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
[[package]]
name = "outcome"
@@ -2748,4 +2760,4 @@ multidict = ">=4.0"
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
-content-hash = "5987d20b6d95ede57bcb9182836710dee96cab081f68b7a887f3585d13489adc"
+content-hash = "e0d1f991958a5d630287c7bb668e7fdc6183630e06196cf6f507a086be10baec"
diff --git a/benchmark/pyproject.toml b/benchmark/pyproject.toml
index 121235c31..6740004b4 100644
--- a/benchmark/pyproject.toml
+++ b/benchmark/pyproject.toml
@@ -11,7 +11,7 @@ packages = [{ include = "agbenchmark" }]
python = "^3.10"
pytest = "^7.3.2"
requests = "^2.31.0"
-openai = "^0.27.8"
+openai = "^1.7.2"
pydantic = "^1.10.9"
python-dotenv = "^1.0.0"
click = "^8.1.3"