chore(benchmark): Upgrade OpenAI client lib from v0 to v1

author: Reinier van der Leer <pwuts@agpt.co> 2024-01-16 15:49:46 +0100
committer: Reinier van der Leer <pwuts@agpt.co> 2024-01-16 15:49:46 +0100
commit: 0a4185a919f3f9050237eb50258ca8cc2e455c05 (patch)
tree: ef099eea1cd1cf2e0d62cc24719dc126e731f0ca /benchmark
parent: refactor(benchmark): Disable Helicone integrations (diff)
download: Auto-GPT-0a4185a919f3f9050237eb50258ca8cc2e455c05.tar.gz
Auto-GPT-0a4185a919f3f9050237eb50258ca8cc2e455c05.tar.bz2
Auto-GPT-0a4185a919f3f9050237eb50258ca8cc2e455c05.zip
4 files changed, 33 insertions, 23 deletions
diff --git a/benchmark/agbenchmark/utils/challenge.py b/benchmark/agbenchmark/utils/challenge.py
index 0cf796c87..4d48e0b8e 100644
--- a/benchmark/agbenchmark/utils/challenge.py
+++ b/benchmark/agbenchmark/utils/challenge.py
@@ -9,9 +9,9 @@ from abc import ABC
 from pathlib import Path
 from typing import Any, ClassVar, List
 
-import openai
 import pytest
 from colorama import Fore, Style
+from openai import OpenAI
 
 from agbenchmark.agent_api_interface import run_api_agent
 from agbenchmark.config import AgentBenchmarkConfig
@@ -198,7 +198,7 @@ class Challenge(ABC):
 
     @classmethod
     def llm_eval(cls, content: str, ground: Ground) -> float:
-        openai.api_key = os.getenv("OPENAI_API_KEY")
+        openai_client = OpenAI()
         if os.getenv("IS_MOCK"):
             return 1.0
 
@@ -213,14 +213,14 @@ class Challenge(ABC):
 
         prompt += END_PROMPT
 
-        answer = openai.ChatCompletion.create(
+        answer = openai_client.chat.completions.create(
             model="gpt-4",
             messages=[
                 {"role": "system", "content": prompt},
             ],
         )
 
-        return float(answer["choices"][0]["message"]["content"])  # type: ignore
+        return float(answer.choices[0].message.content)  # type: ignore
 
     @classmethod
     def get_scores(cls, workspace: Path) -> dict[str, Any]:
diff --git a/benchmark/notebooks/LLM Score Experimentation.ipynb b/benchmark/notebooks/LLM Score Experimentation.ipynb
index b14577fdb..745913971 100644
--- a/benchmark/notebooks/LLM Score Experimentation.ipynb
+++ b/benchmark/notebooks/LLM Score Experimentation.ipynb
@@ -7,23 +7,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import openai\n",
     "import os\n",
     "from dotenv import load_dotenv\n",
+    "from openai import OpenAI\n",
     "\n",
     "load_dotenv()\n",
     "\n",
-    "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
-    "\n",
     "def llm_eval(evaluation: str) -> float:\n",
-    "    openai.api_key = OPENAI_API_KEY\n",
-    "    answer = openai.ChatCompletion.create(\n",
+    "    openai_client = OpenAI()\n",
+    "    answer = openai_client.chat.completions.create(\n",
     "        model=\"gpt-4\",\n",
     "        messages=[\n",
     "            {\"role\": \"system\", \"content\": evaluation},\n",
     "        ],\n",
     "    )\n",
-    "    return answer[\"choices\"][0][\"message\"][\"content\"]"
+    "    return answer.choices[0].message.content"
    ]
   },
   {
diff --git a/benchmark/poetry.lock b/benchmark/poetry.lock
index 290be2ba6..057b89aa4 100644
--- a/benchmark/poetry.lock
+++ b/benchmark/poetry.lock
@@ -596,6 +596,17 @@ files = [
 ]
 
 [[package]]
+name = "distro"
+version = "1.9.0"
+description = "Distro - an OS platform information API"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
+    {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
+]
+
+[[package]]
 name = "exceptiongroup"
 version = "1.2.0"
 description = "Backport of PEP 654 (exception groups)"
@@ -1581,25 +1592,26 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
 
 [[package]]
 name = "openai"
-version = "0.27.10"
-description = "Python client library for the OpenAI API"
+version = "1.7.2"
+description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.7.1"
 files = [
-    {file = "openai-0.27.10-py3-none-any.whl", hash = "sha256:beabd1757e3286fa166dde3b70ebb5ad8081af046876b47c14c41e203ed22a14"},
-    {file = "openai-0.27.10.tar.gz", hash = "sha256:60e09edf7100080283688748c6803b7b3b52d5a55d21890f3815292a0552d83b"},
+    {file = "openai-1.7.2-py3-none-any.whl", hash = "sha256:8f41b90a762f5fd9d182b45851041386fed94c8ad240a70abefee61a68e0ef53"},
+    {file = "openai-1.7.2.tar.gz", hash = "sha256:c73c78878258b07f1b468b0602c6591f25a1478f49ecb90b9bd44b7cc80bce73"},
 ]
 
 [package.dependencies]
-aiohttp = "*"
-requests = ">=2.20"
-tqdm = "*"
+anyio = ">=3.5.0,<5"
+distro = ">=1.7.0,<2"
+httpx = ">=0.23.0,<1"
+pydantic = ">=1.9.0,<3"
+sniffio = "*"
+tqdm = ">4"
+typing-extensions = ">=4.7,<5"
 
 [package.extras]
-datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
-dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-mock"]
-embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"]
-wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"]
+datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
 
 [[package]]
 name = "outcome"
@@ -2748,4 +2760,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "5987d20b6d95ede57bcb9182836710dee96cab081f68b7a887f3585d13489adc"
+content-hash = "e0d1f991958a5d630287c7bb668e7fdc6183630e06196cf6f507a086be10baec"
diff --git a/benchmark/pyproject.toml b/benchmark/pyproject.toml
index 121235c31..6740004b4 100644
--- a/benchmark/pyproject.toml
+++ b/benchmark/pyproject.toml
@@ -11,7 +11,7 @@ packages = [{ include = "agbenchmark" }]
 python = "^3.10"
 pytest = "^7.3.2"
 requests = "^2.31.0"
-openai = "^0.27.8"
+openai = "^1.7.2"
 pydantic = "^1.10.9"
 python-dotenv = "^1.0.0"
 click = "^8.1.3"
author	Reinier van der Leer <pwuts@agpt.co>	2024-01-16 15:49:46 +0100
committer	Reinier van der Leer <pwuts@agpt.co>	2024-01-16 15:49:46 +0100
commit	0a4185a919f3f9050237eb50258ca8cc2e455c05 (patch)
tree	ef099eea1cd1cf2e0d62cc24719dc126e731f0ca /benchmark
parent	refactor(benchmark): Disable Helicone integrations (diff)
download	Auto-GPT-0a4185a919f3f9050237eb50258ca8cc2e455c05.tar.gz Auto-GPT-0a4185a919f3f9050237eb50258ca8cc2e455c05.tar.bz2 Auto-GPT-0a4185a919f3f9050237eb50258ca8cc2e455c05.zip