Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
mikekgfb committed Apr 10, 2024
2 parents d4c597a + ebb44e0 commit e3b4080
Show file tree
Hide file tree
Showing 9 changed files with 893 additions and 208 deletions.
13 changes: 12 additions & 1 deletion .github/workflows/compile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
run-tinystories:
strategy:
matrix:
runner: [ubuntu-latest, macos-12, macos-14]
runner: [ubuntu-latest, macos-14]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout repo
Expand Down Expand Up @@ -95,6 +95,17 @@ jobs:
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
cat ./output_aoti
echo "******************************************"
echo "******** INT4 group-wise quantized *******"
echo "******************************************"
python generate.py --quant '{"linear:int4" : {"group_size": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
python generate.py --compile --quant '{"linear:int4" : {"group_size": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
cat ./output_compiled
python export.py --quant '{"linear:int4" : {"group_size": 32}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
cat ./output_aoti
echo "tests complete"
echo "******************************************"
# echo "********* EAGER vs TORCH.COMPILE *********"
Expand Down
79 changes: 79 additions & 0 deletions .github/workflows/macos12.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
name: Compile main

on:
push:
branches:
- main
pull_request:
workflow_dispatch:

jobs:
run-tinystories:
strategy:
matrix:
runner: [macos-12]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout repo
uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.11
- name: Print machine info
run: |
uname -a
if [ $(uname -s) == Darwin ]; then
sysctl machdep.cpu.brand_string
sysctl machdep.cpu.core_count
fi
- name: Install requirements
run: |
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
pip install -r requirements.txt
- name: Download checkpoints
run: |
mkdir -p checkpoints/stories15M
pushd checkpoints/stories15M
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
popd
- name: Run inference
run: |
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
export MODEL_NAME=stories15M
export MODEL_DIR=/tmp
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
python generate.py --compile --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
cat ./output_compiled
# python export.py --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
# python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
cat ./output_aoti
echo "******************************************"
echo "******* Emb: channel-wise quantized ******"
echo "******************************************"
python generate.py --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
echo "******************************************"
echo "******** Emb: group-wise quantized *******"
echo "******************************************"
python generate.py --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
echo "******************************************"
echo "******* INT8 channel-wise quantized ******"
echo "******************************************"
python generate.py --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
echo "******************************************"
echo "******** INT8 group-wise quantized *******"
echo "******************************************"
python generate.py --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager

echo "tests complete"
echo "******************************************"
65 changes: 65 additions & 0 deletions .github/workflows/runner_et.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
name: Compile main

on:
push:
branches:
- main
pull_request:
workflow_dispatch:

jobs:
run-tinystories:
strategy:
matrix:
runner: [macos-14-xlarge]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout repo
uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.11
- name: Print machine info
run: |
uname -a
if [ $(uname -s) == Darwin ]; then
sysctl machdep.cpu.brand_string
sysctl machdep.cpu.core_count
fi
- name: Install requirements
run: |
echo "Intalling pip packages"
pip install wheel
pip install cmake
pip install ninja
pip install zstd
pip install -r requirements.txt
export LLAMA_FAST_ROOT=${PWD}
export ET_NO_PYBIND=1
./scripts/install_et.sh
cmake -S ./runner-et -B build/cmake-out -G Ninja
cmake --build ./build/cmake-out
- name: Download checkpoints
run: |
mkdir -p checkpoints/stories15M
pushd checkpoints/stories15M
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
popd
- name: Run inference
run: |
export MODEL_DIR=${PWD}/checkpoints/stories15M
export PROMPT="Once upon a time in a land far away"
python generate.py --checkpoint-path ${MODEL_DIR}/stories15M.pt --temperature 0 --prompt "${PROMPT}" > ${PWD}/output_eager
cat ${PWD}/output_eager
python export.py --checkpoint-path ${MODEL_DIR}/stories15M.pt --output-pte-path ${PWD}/stories15M.pte
./build/cmake-out/runner_et ${PWD}/stories15M.pte -z ${MODEL_DIR}/tokenizer.bin -i "${PROMPT}" > ${PWD}/output_et
cat ${PWD}/output_et
echo "Tests complete."
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,11 @@ format:

```
python utils/tokenizer.py --tokenizer-model=${MODEL_DIR}tokenizer.model
```

We will later disucss how to use this model, as described under *STANDALONE EXECUTION* in a Python-free
environment:
```
./run ${MODEL_OUT}/model.{so,pte} -z ${MODEL_OUT}/tokenizer.bin
```

Expand Down Expand Up @@ -273,7 +278,7 @@ quantization options.

*Channelwise quantization*:

The simplest way to quantize embedding tables is with int8 groupwise
The simplest way to quantize embedding tables is with int8 "channelwise"
quantization, where each value is represented by an 8 bit integer, and
a floating point scale per group.

Expand Down
Loading

0 comments on commit e3b4080

Please sign in to comment.