blob: 5607246a5a0da2f6e5f6be45b62330310c23508f (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
name: AutoGPTs Nightly Benchmark
on:
workflow_dispatch:
schedule:
- cron: '0 2 * * *'
env:
TELEMETRY_ENVIRONMENT: autogpt-benchmark-ci
TELEMETRY_OPT_IN: ${{ github.event_name == 'push' && github.ref_name == 'master' }}
jobs:
benchmark:
permissions:
contents: write
runs-on: ubuntu-latest
strategy:
matrix:
agent-name: [ autogpt ]
fail-fast: false
timeout-minutes: 120
env:
min-python-version: '3.10'
REPORTS_BRANCH: data/benchmark-reports
REPORTS_FOLDER: ${{ format('benchmark/reports/{0}', matrix.agent-name) }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: true
- name: Set up Python ${{ env.min-python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ env.min-python-version }}
- name: Install Poetry
run: curl -sSL https://install.python-poetry.org | python -
- name: Prepare reports folder
run: mkdir -p ${{ env.REPORTS_FOLDER }}
- name: Benchmark ${{ matrix.agent-name }}
run: |
./run agent start ${{ matrix.agent-name }}
cd autogpts/${{ matrix.agent-name }}
poetry run agbenchmark run -N 3 \
--test=ReadFile \
--test=BasicRetrieval --test=RevenueRetrieval2 \
--test=CombineCsv --test=LabelCsv --test=AnswerQuestionCombineCsv \
--test=UrlShortener --test=TicTacToe --test=Battleship \
--test=WebArenaTask_0 --test=WebArenaTask_21 --test=WebArenaTask_124 \
--test=WebArenaTask_134 --test=WebArenaTask_163
# Convert exit code 1 (some challenges failed) to exit code 0
if [ $? -eq 0 ] || [ $? -eq 1 ]; then
exit 0
else
exit $?
fi
env:
AGENT_NAME: ${{ matrix.agent-name }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
REPORTS_FOLDER: ${{ format('../../{0}', env.REPORTS_FOLDER) }} # account for changed workdir
- name: Push reports to data branch
run: |
git config --global user.name 'GitHub Actions'
git config --global user.email 'github-actions@agpt.co'
git fetch origin ${{ env.REPORTS_BRANCH }}:${{ env.REPORTS_BRANCH }} \
&& git checkout ${{ env.REPORTS_BRANCH }} \
|| git checkout --orphan ${{ env.REPORTS_BRANCH }}
git reset --hard
git add ${{ env.REPORTS_FOLDER }}
git commit -m "Benchmark report for ${{ matrix.agent-name }} @ $(date +'%Y-%m-%d')" \
&& git push origin ${{ env.REPORTS_BRANCH }}
|