diff options
author | Reinier van der Leer <pwuts@agpt.co> | 2024-01-16 15:49:46 +0100 |
---|---|---|
committer | Reinier van der Leer <pwuts@agpt.co> | 2024-01-16 15:49:46 +0100 |
commit | 0a4185a919f3f9050237eb50258ca8cc2e455c05 (patch) | |
tree | ef099eea1cd1cf2e0d62cc24719dc126e731f0ca /benchmark | |
parent | refactor(benchmark): Disable Helicone integrations (diff) | |
download | Auto-GPT-0a4185a919f3f9050237eb50258ca8cc2e455c05.tar.gz Auto-GPT-0a4185a919f3f9050237eb50258ca8cc2e455c05.tar.bz2 Auto-GPT-0a4185a919f3f9050237eb50258ca8cc2e455c05.zip |
chore(benchmark): Upgrade OpenAI client lib from v0 to v1
Diffstat (limited to 'benchmark')
-rw-r--r-- | benchmark/agbenchmark/utils/challenge.py | 8 | ||||
-rw-r--r-- | benchmark/notebooks/LLM Score Experimentation.ipynb | 10 | ||||
-rw-r--r-- | benchmark/poetry.lock | 36 | ||||
-rw-r--r-- | benchmark/pyproject.toml | 2 |
4 files changed, 33 insertions, 23 deletions
diff --git a/benchmark/agbenchmark/utils/challenge.py b/benchmark/agbenchmark/utils/challenge.py index 0cf796c87..4d48e0b8e 100644 --- a/benchmark/agbenchmark/utils/challenge.py +++ b/benchmark/agbenchmark/utils/challenge.py @@ -9,9 +9,9 @@ from abc import ABC from pathlib import Path from typing import Any, ClassVar, List -import openai import pytest from colorama import Fore, Style +from openai import OpenAI from agbenchmark.agent_api_interface import run_api_agent from agbenchmark.config import AgentBenchmarkConfig @@ -198,7 +198,7 @@ class Challenge(ABC): @classmethod def llm_eval(cls, content: str, ground: Ground) -> float: - openai.api_key = os.getenv("OPENAI_API_KEY") + openai_client = OpenAI() if os.getenv("IS_MOCK"): return 1.0 @@ -213,14 +213,14 @@ class Challenge(ABC): prompt += END_PROMPT - answer = openai.ChatCompletion.create( + answer = openai_client.chat.completions.create( model="gpt-4", messages=[ {"role": "system", "content": prompt}, ], ) - return float(answer["choices"][0]["message"]["content"]) # type: ignore + return float(answer.choices[0].message.content) # type: ignore @classmethod def get_scores(cls, workspace: Path) -> dict[str, Any]: diff --git a/benchmark/notebooks/LLM Score Experimentation.ipynb b/benchmark/notebooks/LLM Score Experimentation.ipynb index b14577fdb..745913971 100644 --- a/benchmark/notebooks/LLM Score Experimentation.ipynb +++ b/benchmark/notebooks/LLM Score Experimentation.ipynb @@ -7,23 +7,21 @@ "metadata": {}, "outputs": [], "source": [ - "import openai\n", "import os\n", "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", "\n", "load_dotenv()\n", "\n", - "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", - "\n", "def llm_eval(evaluation: str) -> float:\n", - " openai.api_key = OPENAI_API_KEY\n", - " answer = openai.ChatCompletion.create(\n", + " openai_client = OpenAI()\n", + " answer = openai_client.chat.completions.create(\n", " model=\"gpt-4\",\n", " messages=[\n", " {\"role\": \"system\", \"content\": evaluation},\n", " ],\n", " )\n", - " return answer[\"choices\"][0][\"message\"][\"content\"]" + " return answer.choices[0].message.content" ] }, { diff --git a/benchmark/poetry.lock b/benchmark/poetry.lock index 290be2ba6..057b89aa4 100644 --- a/benchmark/poetry.lock +++ b/benchmark/poetry.lock @@ -596,6 +596,17 @@ files = [ ] [[package]] +name = "distro" +version = "1.9.0" +description = "Distro - an OS platform information API" +optional = false +python-versions = ">=3.6" +files = [ + {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, + {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, +] + +[[package]] name = "exceptiongroup" version = "1.2.0" description = "Backport of PEP 654 (exception groups)" @@ -1581,25 +1592,26 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] [[package]] name = "openai" -version = "0.27.10" -description = "Python client library for the OpenAI API" +version = "1.7.2" +description = "The official Python library for the openai API" optional = false python-versions = ">=3.7.1" files = [ - {file = "openai-0.27.10-py3-none-any.whl", hash = "sha256:beabd1757e3286fa166dde3b70ebb5ad8081af046876b47c14c41e203ed22a14"}, - {file = "openai-0.27.10.tar.gz", hash = "sha256:60e09edf7100080283688748c6803b7b3b52d5a55d21890f3815292a0552d83b"}, + {file = "openai-1.7.2-py3-none-any.whl", hash = "sha256:8f41b90a762f5fd9d182b45851041386fed94c8ad240a70abefee61a68e0ef53"}, + {file = "openai-1.7.2.tar.gz", hash = "sha256:c73c78878258b07f1b468b0602c6591f25a1478f49ecb90b9bd44b7cc80bce73"}, ] [package.dependencies] -aiohttp = "*" -requests = ">=2.20" -tqdm = "*" +anyio = ">=3.5.0,<5" +distro = ">=1.7.0,<2" +httpx = ">=0.23.0,<1" +pydantic = ">=1.9.0,<3" +sniffio = "*" +tqdm = ">4" +typing-extensions = ">=4.7,<5" [package.extras] -datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] -dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-mock"] -embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"] -wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"] +datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] [[package]] name = "outcome" @@ -2748,4 +2760,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "5987d20b6d95ede57bcb9182836710dee96cab081f68b7a887f3585d13489adc" +content-hash = "e0d1f991958a5d630287c7bb668e7fdc6183630e06196cf6f507a086be10baec" diff --git a/benchmark/pyproject.toml b/benchmark/pyproject.toml index 121235c31..6740004b4 100644 --- a/benchmark/pyproject.toml +++ b/benchmark/pyproject.toml @@ -11,7 +11,7 @@ packages = [{ include = "agbenchmark" }] python = "^3.10" pytest = "^7.3.2" requests = "^2.31.0" -openai = "^0.27.8" +openai = "^1.7.2" pydantic = "^1.10.9" python-dotenv = "^1.0.0" click = "^8.1.3" |