Test PR #42

Workflow file for this run

.github/workflows/new-entry.yml at 5d7d994

	name: New leaderboard entry

	on:
	issue_comment:
	types: [created]
	branches:
	- main

	jobs:
	add-new-entry:
	if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '/add-to-leaderboard') }}
	runs-on: ubuntu-latest
	steps:
	- name: Extract arguments from comment
	id: extract_args
	run: \|
	echo "Extracting arguments..."
	comment="${{ github.event.comment.body }}"
	python_version=$(echo "$comment" \| grep -oP '(?<=--python )\S+')
	appworld_version=$(echo "$comment" \| grep -oP '(?<=--appworld )\S+')
	experiment_prefix=$(echo "$comment" \| grep -oP '(?<=--experiment-prefix )\S+')
	replace_last_flag=$(echo "$comment" \| grep -q -- '--replace-last' && echo "true" \|\| echo "false")

	echo "Python version: $python_version"
	echo "Appworld version: $appworld_version"
	echo "Experiment names: ${{ env.experiment_prefix }}_test_normal and ${{ env.experiment_prefix }}_test_challenge"
	echo "Replace last flag: $replace_last_flag"

	echo "python_version=$python_version" >> $GITHUB_ENV
	echo "appworld_version=$appworld_version" >> $GITHUB_ENV
	echo "experiment_prefix=$experiment_prefix" >> $GITHUB_ENV
	echo "replace_last_flag=$replace_last_flag" >> $GITHUB_ENV

	- name: Obtain PR branch
	id: get-branch
	run: echo "branch=$(gh pr view $PR_NO --repo $REPO --json headRefName --jq '.headRefName')" >> $GITHUB_OUTPUT

	env:
	REPO: ${{ github.repository }}
	PR_NO: ${{ github.event.issue.number }}
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

	- name: Checkout PR branch
	uses: actions/checkout@v4
	with:
	ref: ${{ steps.get-branch.outputs.branch }}

	- name: Pull LFS files
	run: \|
	echo "Fetching and checking out LFS files..."
	git lfs install
	git lfs pull

	- uses: astral-sh/setup-uv@v3
	with:
	version: "0.4.4"

	- name: Set up Python
	run: uv python install ${{ env.python_version }}

	- name: Install venv
	run: uv venv

	- name: Install dependencies
	run: \|
	uv pip install appworld==${{ env.appworld_version }}
	uv run appworld install

	- name: Fetch main branch
	run: git fetch origin main

	- name: Verify PR file changes
	run: \|
	echo "Checking PR for exactly two new files..."
	experiment_prefix="${{ env.experiment_prefix }}"
	expected_files=("experiments/outputs/${experiment_prefix}_test_challenge/leaderboard.bundle" "experiments/outputs/${experiment_prefix}_test_normal/leaderboard.bundle")
	new_files=$(git diff --name-only origin/main..HEAD)

	echo "Expected files:"
	printf "%s\n" "${expected_files[@]}"
	echo "New files in the PR:"
	echo "$new_files"

	# Sort and compare file lists
	expected_sorted=$(printf "%s\n" "${expected_files[@]}")
	actual_sorted=$(echo "$new_files" \| sort)

	if [[ "$expected_sorted" != "$actual_sorted" ]]; then
	echo "Error: File list does not match the expected files."
	echo "Expected:"
	echo "$expected_sorted"
	echo "Actual:"
	echo "$actual_sorted"
	exit 1
	fi

	echo "PR file check passed. The file list matches exactly."

	- name: Unpack experiments
	run: \|
	uv run appworld unpack ${{ env.experiment_prefix }}_test_normal
	uv run appworld unpack ${{ env.experiment_prefix }}_test_challenge

	- name: Run evaluations
	run: \|
	appworld evaluate ${{ env.experiment_prefix }}_test_normal test_normal
	appworld evaluate ${{ env.experiment_prefix }}_test_challenge test_challenge

	- name: Make and add leaderboard entry
	run: appworld make ${{ env.experiment_prefix }}_test_normal ${{ env.experiment_prefix }}_test_challenge ${{env.replace_last_flag}}

	- name: Comment with leaderboard entry
	if: ${{ success() }}
	uses: actions/github-script@v6
	with:
	script: \|
	const fs = require('fs');
	const entries = JSON.parse(fs.readFileSync('leaderboard.json', 'utf8'));
	const formattedEntry = '```json\n' + JSON.stringify(entries[entries.length - 1], null, 4) + '\n```';
	const commentBody = `### Latest Leaderboard Entry\n${formattedEntry}`;
	const issue_number = context.issue.number;
	await github.rest.issues.createComment({
	...context.repo,
	issue_number: issue_number,
	body: commentBody,
	});

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Test PR #42

Workflow file

Test PR #42

Jobs

Run details

Workflow file for this run