From 316c9bd7f174522dc713f95781ff476318e62062 Mon Sep 17 00:00:00 2001 From: David Neumann Date: Thu, 20 Oct 2022 17:11:20 +0200 Subject: [PATCH] Renamed the VGG-style architectures - Renamed VGG5, VGG7, and VGG9 to Conv-2, Conv-4, and Conv-6 respectively, because these are the names by which they are referred to in the original paper - Renamed the VGG17 architecture to VGG19, because this is what it is referred to in the original paper, although it only has 17 weight layers instead of 19 like the real VGG19, it still is an adapted VGG19 with the same number of convolutional layers --- .vscode/settings.json | 1 + CHANGELOG.md | 10 ++--- CITATION.cff | 2 +- README.md | 30 +++++++-------- source/lth/models/__init__.py | 8 ++-- source/lth/models/hyperparameters.py | 8 ++-- source/lth/models/vgg.py | 57 +++++++++++++++------------- 7 files changed, 59 insertions(+), 57 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 7ab6cc0..6ba586f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -28,6 +28,7 @@ "cifar", "CIFAR", "conda", + "conv", "Conv", "convolutional", "cuda", diff --git a/CHANGELOG.md b/CHANGELOG.md index 2599d02..ec01692 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,17 +2,17 @@ ## v0.1.0 -*Released on October 19, 2022* +*Released on October 20, 2022* - Initial release - Implements the original lottery ticket hypothesis algorithm using magnitude pruning - Supports the following models: - LeNet-300-100 - LeNet-5 - - VGG5 - - VGG7 - - VGG9 - - VGG17 + - Conv-2 + - Conv-4 + - Conv-6 + - VGG19 - Supports the following datasets: - MNIST - CIFAR-10 diff --git a/CITATION.cff b/CITATION.cff index 2735273..6967e26 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -24,7 +24,7 @@ keywords: - Lottery Ticket Hypothesis - Pruning version: 0.1.0 -date-released: 2022-10-19 +date-released: 2022-10-20 license: MIT repository: https://github.com/lecode-official/pytorch-lottery-ticket-hypothesis.git url: https://github.com/lecode-official/pytorch-lottery-ticket-hypothesis diff --git a/README.md b/README.md index b97d820..dabdfc0 100644 --- a/README.md +++ b/README.md @@ -69,10 +69,10 @@ Currently the following models and datasets are supported: - LeNet-300-100 [[4]](#4) (`lenet-300-100`) - LeNet-5 [[4]](#4) (`lenet-5`) -- VGG5 [[7]](#7) (`vgg5`) -- VGG7 [[7]](#7) (`vgg7`) -- VGG9 [[7]](#7) (`vgg9`) -- VGG17 [[7]](#7) (`vgg17`) +- Conv-2 [[1]](#1) (`conv-2`) +- Conv-4 [[1]](#1) (`conv-4`) +- Conv-6 [[1]](#1) (`conv-6`) +- VGG19 [[1](#1), [7](#7)] (`vgg19`) **Datasets:** @@ -151,15 +151,13 @@ If you use this software in your research, please cite it like this or use the " ## To-Do's -1. The names of the VGG networks seems to be wrong, they should be renamed -2. General clean up, so that the project can be made public -3. Intelligently retain model checkpoint files -4. Extensively log hyperparameters and training statistics -5. Add support for plotting training statistics -6. Make it possible to gracefully abort the training process -7. Add support for macOS on ARM64 -8. Implement the ResNet-18 model -9. Perform extensive experiments on all supported models and datasets and record the results in the read me -10. Make it possible to redo all of the experiments from the original paper -11. Implement the models that were used in the paper -12. Add support for different mask-0 and mask-1 actions +1. Intelligently retain model checkpoint files +2. Extensively log hyperparameters and training statistics +3. Add support for plotting training statistics +4. Make it possible to gracefully abort the training process +5. Add support for macOS on ARM64 +6. Implement the ResNet-18 model +7. Perform extensive experiments on all supported models and datasets and record the results in the read me +8. Make it possible to redo all of the experiments from the original paper +9. Add support for different mask-0 and mask-1 actions +10. Make Dropout optional diff --git a/source/lth/models/__init__.py b/source/lth/models/__init__.py index bdb4978..2bb5a85 100644 --- a/source/lth/models/__init__.py +++ b/source/lth/models/__init__.py @@ -8,8 +8,6 @@ import torch -from lth.datasets import BaseDataset - def model_id(new_id: str) -> Callable[[type], type]: """A decorator, which adds a model ID to a model class. @@ -220,7 +218,7 @@ def get_model_classes() -> list[type]: module_name = os.path.splitext(os.path.basename(module_path))[0] model_modules.append(__import__(f'lth.models.{module_name}', fromlist=[''])) - # Gets the model classes, which are all the classes in the models module and its sub-modules that inherit from BaseDataset + # Gets the model classes, which are all the classes in the models module and its sub-modules that inherit from BaseModel model_classes = [] for module in model_modules: for _, module_class in inspect.getmembers(module, inspect.isclass): @@ -246,7 +244,7 @@ def get_model_ids() -> list[str]: return model_ids -def create_model(id_of_model: str, input_size: tuple, number_of_input_channels: int, number_of_classes: int) -> BaseDataset: +def create_model(id_of_model: str, input_size: tuple, number_of_input_channels: int, number_of_classes: int) -> BaseModel: """Creates the model with the specified name. Args: @@ -260,7 +258,7 @@ def create_model(id_of_model: str, input_size: tuple, number_of_input_channels: ValueError: When the model with the specified name could not be found, an exception is raised. Returns: - BaseDataset: Returns the model with the specified name. + BaseModel: Returns the model with the specified name. """ # Finds the class for the specified model, all models in this module must have a class-level variable containing a model identifier diff --git a/source/lth/models/hyperparameters.py b/source/lth/models/hyperparameters.py index cafae23..4839314 100644 --- a/source/lth/models/hyperparameters.py +++ b/source/lth/models/hyperparameters.py @@ -26,13 +26,13 @@ def get_defaults(model_name: str, dataset_name: str, learning_rate: float, batch default_learning_rate, default_batch_size, default_number_of_epochs = 1.2e-3, 60, 50 elif model_name == 'lenet-5' and dataset_name == 'mnist': default_learning_rate, default_batch_size, default_number_of_epochs = 1.2e-3, 60, 50 - elif model_name == 'vgg5' and dataset_name == 'cifar10': + elif model_name == 'conv-2' and dataset_name == 'cifar10': default_learning_rate, default_batch_size, default_number_of_epochs = 2e-4, 60, 20 - elif model_name == 'vgg7' and dataset_name == 'cifar10': + elif model_name == 'conv-4' and dataset_name == 'cifar10': default_learning_rate, default_batch_size, default_number_of_epochs = 3e-4, 60, 25 - elif model_name == 'vgg9' and dataset_name == 'cifar10': + elif model_name == 'conv-6' and dataset_name == 'cifar10': default_learning_rate, default_batch_size, default_number_of_epochs = 3e-4, 60, 30 - elif model_name == 'vgg17' and dataset_name == 'cifar10': + elif model_name == 'vgg19' and dataset_name == 'cifar10': default_learning_rate, default_batch_size, default_number_of_epochs = 3e-4, 64, 112 learning_rate = learning_rate if learning_rate is not None else default_learning_rate diff --git a/source/lth/models/vgg.py b/source/lth/models/vgg.py index a36c622..aa5a011 100644 --- a/source/lth/models/vgg.py +++ b/source/lth/models/vgg.py @@ -1,6 +1,7 @@ """Represents a module that contains the multiple neural network models based on the VGG family of architectures first introduced by K. Simonyan and A. Zisserman in their paper "Very Deep Convolutional Networks for Large-Scale Image Recognition". VGG was named after Oxford's renowned Visual -Geometry Group (VGG). +Geometry Group (VGG). The three architectures, referred to as Conv-2, Conv-4, and Conv-6 are scaled down versions for the use with CIFAR-10 and were +introduced by Frankle et al. in their paper "The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks". """ import torch @@ -9,14 +10,15 @@ from . import BaseModel -@model_id('vgg5') -class Vgg5(BaseModel): - """Represents a very small VGG-variant with only 5 weight layers. In the original paper by Frankle et al., this is referred to as Conv-2 as it has - 2 convolutional layers. +@model_id('conv-2') +class Conv2(BaseModel): + """Represents a VGG-variant scaled down for CIFAR-10 with only 2 convolutional and 3 fully-connected layers, which was introduced by Frankle et + al. in their paper "The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks". They refer to this architecture as Conv-2, because + it has 2 convolutional layers. """ def __init__(self, input_size: tuple = (32, 32), number_of_input_channels: int = 3, number_of_classes: int = 10) -> None: - """Initializes a new Vgg2 instance. + """Initializes a new Conv2 instance. Args: input_size (tuple, optional): A tuple containing the edge lengths of the input images, which is the input size of the first convolution of @@ -31,7 +33,7 @@ def __init__(self, input_size: tuple = (32, 32), number_of_input_channels: int = super().__init__() # Exposes some information about the model architecture - self.name = 'VGG5' + self.name = 'Conv-2' self.pruning_rates = { 'convolution_1': 0.1, 'convolution_2': 0.1, @@ -98,14 +100,15 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return x -@model_id('vgg7') -class Vgg7(BaseModel): - """Represents a small VGG-variant with only 7 weight layers. In the original paper by Frankle et al., this is referred to as Conv-4, as it has 4 - convolutional layers. +@model_id('conv-4') +class Conv4(BaseModel): + """Represents a VGG-variant scaled down for CIFAR-10 with only 4 convolutional and 3 fully-connected layers, which was introduced by Frankle et + al. in their paper "The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks". They refer to this architecture as Conv-4, because + it has 4 convolutional layers. """ def __init__(self, input_size: tuple = (32, 32), number_of_input_channels: int = 3, number_of_classes: int = 10) -> None: - """Initializes a new Vgg4 instance. + """Initializes a new Conv4 instance. Args: input_size (tuple, optional): A tuple containing the edge lengths of the input images, which is the input size of the first convolution of @@ -120,7 +123,7 @@ def __init__(self, input_size: tuple = (32, 32), number_of_input_channels: int = super().__init__() # Exposes some information about the model architecture - self.name = 'VGG7' + self.name = 'Conv-4' self.pruning_rates = { 'convolution_1': 0.1, 'convolution_2': 0.1, @@ -213,14 +216,15 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return x -@model_id('vgg9') -class Vgg9(BaseModel): - """Represents a small VGG-variant with only 9 weight layers. In the original paper by Frankle et al., this is referred to as Conv-6, as it has 6 - convolutional layers. +@model_id('conv-6') +class Conv6(BaseModel): + """Represents a VGG-variant scaled down for CIFAR-10 with only 6 convolutional and 3 fully-connected layers, which was introduced by Frankle et + al. in their paper "The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks". They refer to this architecture as Conv-6, because + it has 6 convolutional layers. """ def __init__(self, input_size: tuple = (32, 32), number_of_input_channels: int = 3, number_of_classes: int = 10) -> None: - """Initializes a new Vgg6 instance. + """Initializes a new Conv6 instance. Args: input_size (tuple, optional): A tuple containing the edge lengths of the input images, which is the input size of the first convolution of @@ -235,7 +239,7 @@ def __init__(self, input_size: tuple = (32, 32), number_of_input_channels: int = super().__init__() # Exposes some information about the model architecture - self.name = 'VGG9' + self.name = 'Conv-6' self.pruning_rates = { 'convolution_1': 0.15, 'convolution_2': 0.15, @@ -354,12 +358,13 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return x -@model_id('vgg17') -class Vgg17(BaseModel): - """Represents a VGG-variant with 17 weight layers. In the original paper by Frankle et al. this is referred to as VGG19, because it is exactly as - VGG19 with the difference, that this version was adapted to CIFAR-10 and is therefore missing 2 fully-connected layers at the end, but it has 16 - convolutional layers just as VGG19. Another difference to the original VGG19 is that after the last convolutional layer, an average pooling is - performed instead of max pooling. This is the same as in the original paper by Frankle et al. +@model_id('vgg19') +class Vgg19(BaseModel): + """Represents a VGG-variant, which was introduced by Frankle et al. in their paper "The Lottery Ticket Hypothesis: Finding Sparse, Trainable + Neural Networks". They refer to this architecture as VGG19, although it is not the same VGG19 architecture first introduced by K. Simonyan and A. + Zisserman in their paper "Very Deep Convolutional Networks for Large-Scale Image Recognition". This version was adapted to CIFAR-10 and is + therefore missing 2 fully-connected layers at the end, but it has the same 16 convolutional layers just as VGG19. Another difference to the + original VGG19 is that after the last convolutional layer, average pooling is performed instead of max pooling. """ def __init__(self, input_size: tuple = (32, 32), number_of_input_channels: int = 3, number_of_classes: int = 10) -> None: @@ -378,7 +383,7 @@ def __init__(self, input_size: tuple = (32, 32), number_of_input_channels: int = super().__init__() # Exposes some information about the model architecture - self.name = 'VGG17' + self.name = 'VGG19' self.pruning_rates = { 'convolution_1': 0.2, 'convolution_2': 0.2,