.github/workflows/benchmark-ci.yml


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140

name: Benchmark CI

on:
  push:
    branches: [ master, development, ci-test* ]
    paths:
      - 'benchmark/**'
      - .github/workflows/benchmark-ci.yml
      - '!benchmark/reports/**'
  pull_request:
    branches: [ master, development, release-* ]
    paths:
      - 'benchmark/**'
      - '!benchmark/reports/**'
      - .github/workflows/benchmark-ci.yml

env:
  min-python-version: '3.10'

jobs:
  lint:
    runs-on: ubuntu-latest

    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          fetch-depth: 0

      - name: Set up Python ${{ env.min-python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ env.min-python-version }}

      - id: get_date
        name: Get date
        working-directory: ./benchmark/
        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT

      - name: Install Poetry
        working-directory: ./benchmark/
        run: |
          curl -sSL https://install.python-poetry.org | python -

      - name: Install dependencies
        working-directory: ./benchmark/
        run: |
          export POETRY_VIRTUALENVS_IN_PROJECT=true
          poetry install -vvv

      - name: Lint with flake8
        working-directory: ./benchmark/
        run: poetry run flake8

      - name: Check black formatting
        working-directory: ./benchmark/
        run: poetry run  black . --exclude test.py --check
        if: success() || failure()

      - name: Check isort formatting
        working-directory: ./benchmark/
        run: poetry run  isort . --check
        if: success() || failure()

      - name: Check for unused imports and pass statements
        working-directory: ./benchmark/
        run: |
          cmd="poetry run autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring agbenchmark"
          $cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1)
        if: success() || failure()

  tests-agbenchmark:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        agent-name: [ forge ]
      fail-fast: false
    timeout-minutes: 20
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          fetch-depth: 0
          submodules: true

      - name: Set up Python ${{ env.min-python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ env.min-python-version }}

      - name: Install Poetry
        working-directory: ./autogpts/${{ matrix.agent-name }}/
        run: |
          curl -sSL https://install.python-poetry.org | python -

      - name: Run regression tests
        run: |
          ./run agent start ${{ matrix.agent-name }}
          sleep 10
          cd autogpts/${{ matrix.agent-name }}
          set +e # Ignore non-zero exit codes and continue execution
          echo "Running the following command: poetry run agbenchmark --maintain --mock"

          poetry run agbenchmark --maintain --mock
          EXIT_CODE=$?
          set -e  # Stop ignoring non-zero exit codes
          # Check if the exit code was 5, and if so, exit with 0 instead
          if [ $EXIT_CODE -eq 5 ]; then
            echo "regression_tests.json is empty."
          fi

          echo "Running the following command: poetry run agbenchmark --mock"
          poetry run agbenchmark --mock

          echo "Running the following command: poetry run agbenchmark --mock --category=data"
          poetry run agbenchmark --mock --category=data

          echo "Running the following command: poetry run agbenchmark --mock --category=coding"
          poetry run agbenchmark --mock --category=coding

          echo "Running the following command: poetry run agbenchmark --test=WriteFile"
          poetry run agbenchmark --test=WriteFile
          cd ../../benchmark
          poetry install
          echo "Adding the BUILD_SKILL_TREE environment variable. This will attempt to add new elements in the skill tree. If new elements are added, the CI fails because they should have been pushed"
          export BUILD_SKILL_TREE=true

          poetry run agbenchmark --mock
          poetry run pytest -vv -s tests
          
          CHANGED=$(git diff --name-only | grep -E '(agbenchmark/challenges)|(../frontend/assets)') || echo "No diffs"
          if [ ! -z "$CHANGED" ]; then
            echo "There are unstaged changes please run agbenchmark and commit those changes since they are needed."
            echo "$CHANGED"
            exit 1
          else
            echo "No unstaged changes."
          fi
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}