Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test #189

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open

Test #189

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FROM pytorch/pytorch:1.7.0-cuda11.0-cudnn8-devel
17 changes: 17 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"build": { "dockerfile": "Dockerfile" },
"runArgs": [
"--gpus",
"all",
"--shm-size=100g"
],

"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance"
]
}
}
}
14 changes: 14 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
data/
output_dir/
QCA_data/
__pycache__/
wandb/
ADEChallengeData2016.zip
ADEChallengeData2016/
nohup.out
*.jpg
*.jpeg
*.png
*.gif
*.zip
*.pth
Empty file added a.ipynb
Empty file.
26 changes: 20 additions & 6 deletions demo/mae_visualize.ipynb

Large diffs are not rendered by default.

366 changes: 366 additions & 0 deletions mae_visualize copy.ipynb

Large diffs are not rendered by default.

19 changes: 18 additions & 1 deletion main_finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@
from torch.utils.tensorboard import SummaryWriter

import timm
import wandb

assert timm.__version__ == "0.3.2" # version check
assert timm.__version__ == "0.4.5" # version check
from timm.models.layers import trunc_normal_
from timm.data.mixup import Mixup
from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy
Expand All @@ -38,6 +39,19 @@

from engine_finetune import train_one_epoch, evaluate

wandb.init(
# set the wandb project where this run will be logged
project="MAE_ViT_Tiny",
entity="amccbn",
group="MAE_ViT_Base",
# track hyperparameters and run metadata
config={
"architecture": "Self_Supervised_pretrained_FineTunning_ViT_Base",
"dataset": "ADE2016",
"epochs": 200,
}
)


def get_args_parser():
parser = argparse.ArgumentParser('MAE fine-tuning for image classification', add_help=False)
Expand Down Expand Up @@ -336,6 +350,9 @@ def main(args):
**{f'test_{k}': v for k, v in test_stats.items()},
'epoch': epoch,
'n_parameters': n_parameters}
wandb.log({**{f'train_{k}': v for k, v in train_stats.items()},
**{f'test_{k}': v for k, v in test_stats.items()}
})

if args.output_dir and misc.is_main_process():
if log_writer is not None:
Expand Down
40 changes: 28 additions & 12 deletions main_pretrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,17 @@
import os
import time
from pathlib import Path

import torch.nn as nn
import torch
import torch.backends.cudnn as cudnn
from torch.utils.tensorboard import SummaryWriter
import torchvision.transforms as transforms
import torchvision.datasets as datasets

from torchvision import datasets
import wandb
import argparse
import timm

assert timm.__version__ == "0.3.2" # version check
assert timm.__version__ == "0.4.5" # version check
import timm.optim.optim_factory as optim_factory

import util.misc as misc
Expand All @@ -34,7 +35,18 @@

from engine_pretrain import train_one_epoch


wandb.init(
# set the wandb project where this run will be logged
project="MAE_ViT_Tiny",
entity="amccbn",
group="MAE_ViT_Base_QCA",
# track hyperparameters and run metadata
config={
"architecture": "Self_Supervised_pretrained_ViT_Base_QCA",
"dataset": "QCA",
"epochs": 50,
}
)
def get_args_parser():
parser = argparse.ArgumentParser('MAE pre-training', add_help=False)
parser.add_argument('--batch_size', default=64, type=int,
Expand Down Expand Up @@ -72,7 +84,7 @@ def get_args_parser():
help='epochs to warmup LR')

# Dataset parameters
parser.add_argument('--data_path', default='/datasets01/imagenet_full_size/061417/', type=str,
parser.add_argument('--data_path', default='./ADEChallengeData2016/images/train', type=str,
help='dataset path')

parser.add_argument('--output_dir', default='./output_dir',
Expand All @@ -87,7 +99,7 @@ def get_args_parser():

parser.add_argument('--start_epoch', default=0, type=int, metavar='N',
help='start epoch')
parser.add_argument('--num_workers', default=10, type=int)
parser.add_argument('--num_workers', default=0, type=int)
parser.add_argument('--pin_mem', action='store_true',
help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.')
parser.add_argument('--no_pin_mem', action='store_false', dest='pin_mem')
Expand All @@ -107,6 +119,7 @@ def get_args_parser():
def main(args):
misc.init_distributed_mode(args)

print('distributed: {}'.format(args.distributed))
print('job dir: {}'.format(os.path.dirname(os.path.realpath(__file__))))
print("{}".format(args).replace(', ', ',\n'))

Expand All @@ -118,14 +131,15 @@ def main(args):
np.random.seed(seed)

cudnn.benchmark = True



# simple augmentation
transform_train = transforms.Compose([
transforms.RandomResizedCrop(args.input_size, scale=(0.2, 1.0), interpolation=3), # 3 is bicubic
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
dataset_train = datasets.ImageFolder(os.path.join(args.data_path, 'train'), transform=transform_train)
dataset_train = datasets.ImageFolder(os.path.join(args.data_path), transform=transform_train)
print(dataset_train)

if True: # args.distributed:
Expand Down Expand Up @@ -154,7 +168,7 @@ def main(args):

# define the model
model = models_mae.__dict__[args.model](norm_pix_loss=args.norm_pix_loss)

model.to(device)

model_without_ddp = model
Expand All @@ -172,7 +186,7 @@ def main(args):
print("effective batch size: %d" % eff_batch_size)

if args.distributed:
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=True)
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=False)
model_without_ddp = model.module

# following timm: set wd as 0 for bias and norm layers
Expand Down Expand Up @@ -201,7 +215,9 @@ def main(args):

log_stats = {**{f'train_{k}': v for k, v in train_stats.items()},
'epoch': epoch,}


wandb.log({**{f'train_{k}': v for k, v in train_stats.items()}})

if args.output_dir and misc.is_main_process():
if log_writer is not None:
log_writer.flush()
Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
timm==0.4.5
wandb
tensorboard
2 changes: 1 addition & 1 deletion util/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def init_distributed_mode(args):
args.distributed = True

torch.cuda.set_device(args.gpu)
args.dist_backend = 'nccl'
args.dist_backend = 'gloo'
print('| distributed init (rank {}): {}, gpu {}'.format(
args.rank, args.dist_url, args.gpu), flush=True)
torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
Expand Down