.github/workflows/autogpts-benchmark.yml


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78

name: AutoGPTs Nightly Benchmark

on:
  workflow_dispatch:
  schedule:
    - cron: '0 2 * * *'

env:
  TELEMETRY_ENVIRONMENT: autogpt-benchmark-ci
  TELEMETRY_OPT_IN: ${{ github.event_name == 'push' && github.ref_name == 'master' }}

jobs:
  benchmark:
    permissions:
      contents: write
    runs-on: ubuntu-latest
    strategy:
      matrix:
        agent-name: [ autogpt ]
      fail-fast: false
    timeout-minutes: 120
    env:
      min-python-version: '3.10'
      REPORTS_BRANCH: data/benchmark-reports
      REPORTS_FOLDER: ${{ format('benchmark/reports/{0}', matrix.agent-name) }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          submodules: true

      - name: Set up Python ${{ env.min-python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ env.min-python-version }}

      - name: Install Poetry
        run: curl -sSL https://install.python-poetry.org | python -

      - name: Prepare reports folder
        run: mkdir -p ${{ env.REPORTS_FOLDER }}

      - name: Benchmark ${{ matrix.agent-name }}
        run: |
          ./run agent start ${{ matrix.agent-name }}
          cd autogpts/${{ matrix.agent-name }}
          poetry run agbenchmark run -N 3 \
            --test=ReadFile \
            --test=BasicRetrieval --test=RevenueRetrieval2 \
            --test=CombineCsv --test=LabelCsv --test=AnswerQuestionCombineCsv \
            --test=UrlShortener --test=TicTacToe --test=Battleship \
            --test=WebArenaTask_0 --test=WebArenaTask_21 --test=WebArenaTask_124 \
            --test=WebArenaTask_134 --test=WebArenaTask_163

            # Convert exit code 1 (some challenges failed) to exit code 0
            if [ $? -eq 0 ] || [ $? -eq 1 ]; then
              exit 0
            else
              exit $?
            fi
        env:
          AGENT_NAME: ${{ matrix.agent-name }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
          REPORTS_FOLDER: ${{ format('../../{0}', env.REPORTS_FOLDER) }}  # account for changed workdir

      - name: Push reports to data branch
        run: |
          git config --global user.name 'GitHub Actions'
          git config --global user.email 'github-actions@agpt.co'
          git fetch origin ${{ env.REPORTS_BRANCH }}:${{ env.REPORTS_BRANCH }} \
            && git checkout ${{ env.REPORTS_BRANCH }} \
            || git checkout --orphan ${{ env.REPORTS_BRANCH }}
          git reset --hard
          git add ${{ env.REPORTS_FOLDER }}
          git commit -m "Benchmark report for ${{ matrix.agent-name }} @ $(date +'%Y-%m-%d')" \
            && git push origin ${{ env.REPORTS_BRANCH }}