diff --git a/stable_diffusion/README.md b/stable_diffusion/README.md index c8b735e59..ecbbf3549 100644 --- a/stable_diffusion/README.md +++ b/stable_diffusion/README.md @@ -207,7 +207,7 @@ We achieved this subset by following these steps: 1. download coco-2014 validation dataset: `scripts/datasets/coco-2014-validation-download.sh --output-dir /datasets/coco2014` 2. create the validation subset, and resize the images to 512x512: `scripts/datasets/coco-2014-validation-split-resize.sh --input-images-path /datasets/coco2014/val2014 --input-coco-captions /datasets/coco2014/annotations/captions_val2014.json --output-images-path /datasets/coco2014/val2014_512x512_30k --output-tsv-file /datasets/coco2014/val2014_30k.tsv` -3. generate the FID statistics: `scripts/datasets/generate-fid-statistics.sh --dataset-dir /datasets/coco2014/val2014_512x512_30k --output-file /datasets/coco2014/val2014_512x512_30k_stats.npz` +3. generate the FID statistics: `scripts/datasets/generate-fid-statistics.sh --dataset-dir /datasets/coco2014/val2014_512x512_30k --output-file /datasets/coco2014/val2014_30k_stats.npz` ## The Model Stable Diffusion v2 is a latent diffusion model which combines an autoencoder with a diffusion model that is trained in the latent space of the autoencoder. During training: diff --git a/stable_diffusion/configs/train_01x08x08.yaml b/stable_diffusion/configs/train_01x08x08.yaml index a7497cb61..73db364bf 100644 --- a/stable_diffusion/configs/train_01x08x08.yaml +++ b/stable_diffusion/configs/train_01x08x08.yaml @@ -38,7 +38,7 @@ model: enabled: True inception_weights_url: https://github.com/mseitzer/pytorch-fid/releases/download/fid_weights/pt_inception-2015-12-05-6726825d.pth cache_dir: /checkpoints/inception - gt_path: /datasets/coco2014/val2014_512x512_30k_stats.npz + gt_path: /datasets/coco2014/val2014_30k_stats.npz clip: enabled: True clip_version: "ViT-H-14" diff --git a/stable_diffusion/configs/train_32x08x02.yaml b/stable_diffusion/configs/train_32x08x02.yaml index 5fd81da1a..aaa4a4681 100644 --- a/stable_diffusion/configs/train_32x08x02.yaml +++ b/stable_diffusion/configs/train_32x08x02.yaml @@ -38,7 +38,7 @@ model: enabled: True inception_weights_url: https://github.com/mseitzer/pytorch-fid/releases/download/fid_weights/pt_inception-2015-12-05-6726825d.pth cache_dir: /checkpoints/inception - gt_path: /datasets/coco2014/val2014_512x512_30k_stats.npz + gt_path: /datasets/coco2014/val2014_30k_stats.npz clip: enabled: True clip_version: "ViT-H-14" diff --git a/stable_diffusion/configs/train_32x08x02_raw_images.yaml b/stable_diffusion/configs/train_32x08x02_raw_images.yaml index c36d2ca4f..582147325 100644 --- a/stable_diffusion/configs/train_32x08x02_raw_images.yaml +++ b/stable_diffusion/configs/train_32x08x02_raw_images.yaml @@ -38,7 +38,7 @@ model: enabled: True inception_weights_url: https://github.com/mseitzer/pytorch-fid/releases/download/fid_weights/pt_inception-2015-12-05-6726825d.pth cache_dir: /checkpoints/inception - gt_path: /datasets/coco2014/val2014_512x512_30k_stats.npz + gt_path: /datasets/coco2014/val2014_30k_stats.npz clip: enabled: True clip_version: "ViT-H-14" diff --git a/stable_diffusion/configs/train_32x08x04.yaml b/stable_diffusion/configs/train_32x08x04.yaml index f275f1e20..747aa3d50 100644 --- a/stable_diffusion/configs/train_32x08x04.yaml +++ b/stable_diffusion/configs/train_32x08x04.yaml @@ -38,7 +38,7 @@ model: enabled: True inception_weights_url: https://github.com/mseitzer/pytorch-fid/releases/download/fid_weights/pt_inception-2015-12-05-6726825d.pth cache_dir: /checkpoints/inception - gt_path: /datasets/coco2014/val2014_512x512_30k_stats.npz + gt_path: /datasets/coco2014/val2014_30k_stats.npz clip: enabled: True clip_version: "ViT-H-14" diff --git a/stable_diffusion/configs/train_32x08x08.yaml b/stable_diffusion/configs/train_32x08x08.yaml index 5773fa8ef..166a5deed 100644 --- a/stable_diffusion/configs/train_32x08x08.yaml +++ b/stable_diffusion/configs/train_32x08x08.yaml @@ -38,7 +38,7 @@ model: enabled: True inception_weights_url: https://github.com/mseitzer/pytorch-fid/releases/download/fid_weights/pt_inception-2015-12-05-6726825d.pth cache_dir: /checkpoints/inception - gt_path: /datasets/coco2014/val2014_512x512_30k_stats.npz + gt_path: /datasets/coco2014/val2014_30k_stats.npz clip: enabled: True clip_version: "ViT-H-14" diff --git a/stable_diffusion/scripts/datasets/coco-2014-validation-split-resize.sh b/stable_diffusion/scripts/datasets/coco-2014-validation-split-resize.sh index 147bdcf76..91ffe6bff 100755 --- a/stable_diffusion/scripts/datasets/coco-2014-validation-split-resize.sh +++ b/stable_diffusion/scripts/datasets/coco-2014-validation-split-resize.sh @@ -2,7 +2,7 @@ : "${INPUT_IMAGES_PATH:=/datasets/coco2014/val2014}" : "${INPUT_COCO_CAPTIONS:=/datasets/coco2014/annotations/captions_val2014.json}" -: "${OUTPUT_IMAGES_PATH:=/datasets/coco2014/val2014_512x512_30k}" +: "${OUTPUT_IMAGES_PATH:=/datasets/coco2014/val2014_30k}" : "${OUTPUT_TSV_FILE:=/datasets/coco2014/val2014_30k.tsv}" : "${NUM_SAMPLES:=30000}" diff --git a/stable_diffusion/scripts/datasets/generate-fid-statistics.sh b/stable_diffusion/scripts/datasets/generate-fid-statistics.sh index ff49e3bc8..0e43b8b88 100755 --- a/stable_diffusion/scripts/datasets/generate-fid-statistics.sh +++ b/stable_diffusion/scripts/datasets/generate-fid-statistics.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash -: "${DATASET_DIR:=/datasets/coco2014/val2014_512x512_30k}" -: "${OUTPUT_FILE:=/datasets/coco2014/val2014_512x512_30k_stats.npz}" +: "${DATASET_DIR:=/datasets/coco2014/val2014_30k}" +: "${OUTPUT_FILE:=/datasets/coco2014/val2014_30k_stats.npz}" while [ "$1" != "" ]; do case $1 in