This repository contains data and code for our paper:
conda create -n mislead python=3.10
pip install -e .
cd src/programming
python reward_api.py
bash train.sh
cd src/qa/reward
bash train_judge.sh # task-specific reward training
bash train_preference.sh # general reward training
cd ..
CUDA_VISIBLE_DEVICES=6 python reward_api.py # general reward
CUDA_VISIBLE_DEVICES=7 python judge_api.py # task-specific reward
bash train.sh