diff --git a/segmentation_models/backbones/backbones_factory.py b/segmentation_models/backbones/backbones_factory.py index 7d2a3b9e..1fff5407 100644 --- a/segmentation_models/backbones/backbones_factory.py +++ b/segmentation_models/backbones/backbones_factory.py @@ -4,7 +4,8 @@ from . import inception_resnet_v2 as irv2 from . import inception_v3 as iv3 - +from . import darknet53 as dkn53 +from . import mobilenet_v3 as mbnv3 class BackbonesFactory(ModelsFactory): _default_feature_layers = { @@ -51,6 +52,9 @@ class BackbonesFactory(ModelsFactory): 'mobilenet': ('conv_pw_11_relu', 'conv_pw_5_relu', 'conv_pw_3_relu', 'conv_pw_1_relu'), 'mobilenetv2': ('block_13_expand_relu', 'block_6_expand_relu', 'block_3_expand_relu', 'block_1_expand_relu'), + 'mobilenetv3': ('Conv_1', 'activation_29', 'activation_15', 'activation_6'), + #'mobilenetv3large': ('Conv_1', 'activation_29', 'activation_15', 'activation_6'), + 'mobilenetv3small': ('activation_31', 'activation_22', 'activation_7', 'activation_3'), # EfficientNets 'efficientnetb0': ('block6a_expand_activation', 'block4a_expand_activation', @@ -70,8 +74,15 @@ class BackbonesFactory(ModelsFactory): 'efficientnetb7': ('block6a_expand_activation', 'block4a_expand_activation', 'block3a_expand_activation', 'block2a_expand_activation'), + # DarkNets + 'darknet53': ('activation_58', 'activation_37', 'activation_16', 'activation_7'), # 204 equals conv2d_58 (14, 14, 512), 131 equals conv2d_37 (28, 28, 256) + + + #'darknet53': (204, 131, 'activation_16', 'activation_7'), # 204 equals conv2d_58 (14, 14, 512), 131 equals conv2d_37 (28, 28, 256) + } + _models_update = { 'inceptionresnetv2': [irv2.InceptionResNetV2, irv2.preprocess_input], 'inceptionv3': [iv3.InceptionV3, iv3.preprocess_input], @@ -84,8 +95,12 @@ class BackbonesFactory(ModelsFactory): 'efficientnetb5': [eff.EfficientNetB5, eff.preprocess_input], 'efficientnetb6': [eff.EfficientNetB6, eff.preprocess_input], 'efficientnetb7': [eff.EfficientNetB7, eff.preprocess_input], - } + 'darknet53': [dkn53.csp_darknet53, dkn53.preprocess_input], + + 'mobilenetv3': [mbnv3.MobileNetV3Large, mbnv3.preprocess_input], + 'mobilenetv3small': [mbnv3.MobileNetV3Small, mbnv3.preprocess_input], + } # currently not supported _models_delete = ['resnet50v2', 'resnet101v2', 'resnet152v2', 'nasnetlarge', 'nasnetmobile', 'xception'] diff --git a/segmentation_models/backbones/darknet53.py b/segmentation_models/backbones/darknet53.py new file mode 100644 index 00000000..06c5400b --- /dev/null +++ b/segmentation_models/backbones/darknet53.py @@ -0,0 +1,230 @@ +import os + +from keras_applications.imagenet_utils import _obtain_input_shape +from tensorflow.python.keras import Input, Model +from tensorflow.python.keras.applications import imagenet_utils +from tensorflow.python.keras.utils.data_utils import get_file +from tensorflow.python.keras.utils.layer_utils import get_source_inputs +from functools import wraps, reduce + +import tensorflow.keras.backend as K +from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, Concatenate, MaxPooling2D, BatchNormalization, \ + Activation, UpSampling2D, ZeroPadding2D, GlobalAveragePooling2D, Reshape, Flatten, Softmax, GlobalMaxPooling2D, Add +from tensorflow.keras.layers import LeakyReLU +from tensorflow.keras.regularizers import l2 + +BASE_WEIGHT_PATH = ( + 'https://github.com/david8862/keras-YOLOv3-model-set/' + 'releases/download/v1.0.1/') + +def compose(*funcs): + """Compose arbitrarily many functions, evaluated left to right. + + Reference: https://mathieularose.com/function-composition-in-python/ + """ + # return lambda x: reduce(lambda v, f: f(v), funcs, x) + if funcs: + return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs) + else: + raise ValueError('Composition of empty sequence not supported.') + + +@wraps(Conv2D) +def DarknetConv2D(*args, **kwargs): + """Wrapper to set Darknet parameters for Convolution2D.""" + darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} + darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same' + darknet_conv_kwargs.update(kwargs) + return Conv2D(*args, **darknet_conv_kwargs) + + +@wraps(DepthwiseConv2D) +def DarknetDepthwiseConv2D(*args, **kwargs): + """Wrapper to set Darknet parameters for Convolution2D.""" + darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} + darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same' + darknet_conv_kwargs.update(kwargs) + return DepthwiseConv2D(*args, **darknet_conv_kwargs) + +def Darknet_Depthwise_Separable_Conv2D_BN_Leaky(filters, kernel_size=(3, 3), block_id_str=None, **kwargs): + """Depthwise Separable Convolution2D.""" + if not block_id_str: + block_id_str = str(K.get_uid()) + no_bias_kwargs = {'use_bias': False} + no_bias_kwargs.update(kwargs) + return compose( + DarknetDepthwiseConv2D(kernel_size, name='conv_dw_' + block_id_str, **no_bias_kwargs), + BatchNormalization(name='conv_dw_%s_bn' % block_id_str), + LeakyReLU(alpha=0.1, name='conv_dw_%s_leaky_relu' % block_id_str), + Conv2D(filters, (1,1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_%s' % block_id_str), + BatchNormalization(name='conv_pw_%s_bn' % block_id_str), + LeakyReLU(alpha=0.1, name='conv_pw_%s_leaky_relu' % block_id_str)) + + +def Depthwise_Separable_Conv2D_BN_Leaky(filters, kernel_size=(3, 3), block_id_str=None): + """Depthwise Separable Convolution2D.""" + if not block_id_str: + block_id_str = str(K.get_uid()) + return compose( + DepthwiseConv2D(kernel_size, padding='same', name='conv_dw_' + block_id_str), + BatchNormalization(name='conv_dw_%s_bn' % block_id_str), + LeakyReLU(alpha=0.1, name='conv_dw_%s_leaky_relu' % block_id_str), + Conv2D(filters, (1,1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_%s' % block_id_str), + BatchNormalization(name='conv_pw_%s_bn' % block_id_str), + LeakyReLU(alpha=0.1, name='conv_pw_%s_leaky_relu' % block_id_str)) + + +def DarknetConv2D_BN_Leaky(*args, **kwargs): + """ + Darknet Convolution2D followed by BatchNormalization and LeakyReLU. + """ + no_bias_kwargs = {'use_bias': False} + no_bias_kwargs.update(kwargs) + return compose( + DarknetConv2D(*args, **no_bias_kwargs), + BatchNormalization(), + LeakyReLU(alpha=0.1)) + + +def mish(x): + return x * K.tanh(K.softplus(x)) + +def DarknetConv2D_BN_Mish(*args, **kwargs): + """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" + no_bias_kwargs = {'use_bias': False} + no_bias_kwargs.update(kwargs) + return compose( + DarknetConv2D(*args, **no_bias_kwargs), + BatchNormalization(), + Activation(mish)) + + +def Spp_Conv2D_BN_Leaky(x, num_filters): + y1 = MaxPooling2D(pool_size=(5,5), strides=(1,1), padding='same')(x) + y2 = MaxPooling2D(pool_size=(9,9), strides=(1,1), padding='same')(x) + y3 = MaxPooling2D(pool_size=(13,13), strides=(1,1), padding='same')(x) + + y = compose( + Concatenate(), + DarknetConv2D_BN_Leaky(num_filters, (1,1)))([y1, y2, y3, x]) + return y + + + + + +def resblock_body(x, num_filters, num_blocks, all_narrow=True): + '''A series of resblocks starting with a downsampling Convolution2D''' + # Darknet uses left and top padding instead of 'same' mode + x = ZeroPadding2D(((1, 0), (1, 0)))(x) + x = DarknetConv2D_BN_Mish(num_filters, (3, 3), strides=(2, 2))(x) + + res_connection = DarknetConv2D_BN_Mish(num_filters // 2 if all_narrow else num_filters, (1, 1))(x) + x = DarknetConv2D_BN_Mish(num_filters // 2 if all_narrow else num_filters, (1, 1))(x) + + for i in range(num_blocks): + y = compose( + DarknetConv2D_BN_Mish(num_filters // 2, (1, 1)), + DarknetConv2D_BN_Mish(num_filters // 2 if all_narrow else num_filters, (3, 3)))(x) + x = Add()([x, y]) + + x = DarknetConv2D_BN_Mish(num_filters // 2 if all_narrow else num_filters, (1, 1))(x) + x = Concatenate()([x, res_connection]) + + return DarknetConv2D_BN_Mish(num_filters, (1, 1))(x) + + +def csp_darknet53_body(x): + '''CSPDarknet53 body having 52 Convolution2D layers''' + x = DarknetConv2D_BN_Mish(32, (3, 3))(x) + x = resblock_body(x, 64, 1, False) + x = resblock_body(x, 128, 2) + x = resblock_body(x, 256, 8) + x = resblock_body(x, 512, 8) + x = resblock_body(x, 1024, 4) + return x + + +def csp_darknet53(input_shape=None, + input_tensor=None, + include_top=True, + weights='imagenet', + pooling=None, + classes=1000, + **kwargs): + """Generate cspdarknet53 model for Imagenet classification.""" + + if not (weights in {'imagenet', None} or os.path.exists(weights)): + raise ValueError('The `weights` argument should be either ' + '`None` (random initialization), `imagenet` ' + '(pre-training on ImageNet), ' + 'or the path to the weights file to be loaded.') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as `"imagenet"` with `include_top`' + ' as true, `classes` should be 1000') + + # Determine proper input shape + input_shape = _obtain_input_shape(input_shape, + default_size=224, + min_size=28, + data_format=K.image_data_format(), + require_flatten=include_top, + weights=weights) + + if input_tensor is None: + img_input = Input(shape=input_shape) + else: + img_input = input_tensor + + x = csp_darknet53_body(img_input) + + if include_top: + model_name = 'cspdarknet53' + x = GlobalAveragePooling2D(name='avg_pool')(x) + x = Reshape((1, 1, 1024))(x) + x = DarknetConv2D(classes, (1, 1))(x) + x = Flatten()(x) + x = Softmax(name='Predictions/Softmax')(x) + else: + model_name = 'cspdarknet53_headless' + if pooling == 'avg': + x = GlobalAveragePooling2D(name='avg_pool')(x) + elif pooling == 'max': + x = GlobalMaxPooling2D(name='max_pool')(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = get_source_inputs(input_tensor) + else: + inputs = img_input + + # Create model. + model = Model(inputs, x, name=model_name) + + # Load weights. + if weights == 'imagenet': + if include_top: + file_name = 'cspdarknet53_weights_tf_dim_ordering_tf_kernels_224.h5' + weight_path = BASE_WEIGHT_PATH + file_name + else: + file_name = 'cspdarknet53_weights_tf_dim_ordering_tf_kernels_224_no_top.h5' + weight_path = BASE_WEIGHT_PATH + file_name + + weights_path = get_file(file_name, weight_path, cache_subdir='models') + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model + + +def preprocess_input(x, **kwargs): + """Preprocesses a numpy array encoding a batch of images. + # Arguments + x: a 4D numpy array consists of RGB values within [0, 255]. + # Returns + Preprocessed array. + """ + return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) \ No newline at end of file diff --git a/segmentation_models/backbones/mobilenet_v3.py b/segmentation_models/backbones/mobilenet_v3.py new file mode 100644 index 00000000..d898b994 --- /dev/null +++ b/segmentation_models/backbones/mobilenet_v3.py @@ -0,0 +1,634 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# A tf.keras implementation of mobilenet_v3, +# which is ported from https://github.com/keras-team/keras-applications/blob/master/keras_applications/mobilenet_v3.py +# +# Reference +# [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244?context=cs) +# + +"""MobileNet v3 models for Keras. +The following table describes the performance of MobileNets: +------------------------------------------------------------------------ +MACs stands for Multiply Adds +| Classification Checkpoint| MACs(M)| Parameters(M)| Top1 Accuracy| Pixel1 CPU(ms)| +| [mobilenet_v3_large_1.0_224] | 217 | 5.4 | 75.6 | 51.2 | +| [mobilenet_v3_large_0.75_224] | 155 | 4.0 | 73.3 | 39.8 | +| [mobilenet_v3_large_minimalistic_1.0_224] | 209 | 3.9 | 72.3 | 44.1 | +| [mobilenet_v3_small_1.0_224] | 66 | 2.9 | 68.1 | 15.8 | +| [mobilenet_v3_small_0.75_224] | 44 | 2.4 | 65.4 | 12.8 | +| [mobilenet_v3_small_minimalistic_1.0_224] | 65 | 2.0 | 61.9 | 12.2 | +The weights for all 6 models are obtained and +translated from the Tensorflow checkpoints +from TensorFlow checkpoints found [here] +(https://github.com/tensorflow/models/tree/master/research/ +slim/nets/mobilenet/README.md). +# Reference +This file contains building code for MobileNetV3, based on +[Searching for MobileNetV3] +(https://arxiv.org/pdf/1905.02244.pdf) (ICCV 2019) +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import warnings + +from keras_applications.imagenet_utils import _obtain_input_shape +from keras_applications.imagenet_utils import preprocess_input as _preprocess_input +from tensorflow.keras.utils import get_source_inputs, get_file +from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, Dense, GlobalAveragePooling2D, GlobalMaxPooling2D, Flatten, Softmax, Dropout, ZeroPadding2D +from tensorflow.keras.layers import BatchNormalization, Add, Multiply, Reshape +from tensorflow.keras.layers import Input, Activation, ReLU, Reshape, Lambda +from tensorflow.keras.models import Model +from tensorflow.keras import backend as K + +#backend = None +#layers = None +#models = None +#keras_utils = None +from tensorflow.python.keras.applications import imagenet_utils + +BASE_WEIGHT_PATH = ('https://github.com/DrSlink/mobilenet_v3_keras/' + 'releases/download/v1.0/') +WEIGHTS_HASHES = { + 'large_224_0.75_float': ( + '765b44a33ad4005b3ac83185abf1d0eb', + 'c256439950195a46c97ede7c294261c6'), + 'large_224_1.0_float': ( + '59e551e166be033d707958cf9e29a6a7', + '12c0a8442d84beebe8552addf0dcb950'), + 'large_minimalistic_224_1.0_float': ( + '675e7b876c45c57e9e63e6d90a36599c', + 'c1cddbcde6e26b60bdce8e6e2c7cae54'), + 'small_224_0.75_float': ( + 'cb65d4e5be93758266aa0a7f2c6708b7', + 'c944bb457ad52d1594392200b48b4ddb'), + 'small_224_1.0_float': ( + '8768d4c2e7dee89b9d02b2d03d65d862', + '5bec671f47565ab30e540c257bba8591'), + 'small_minimalistic_224_1.0_float': ( + '99cd97fb2fcdad2bf028eb838de69e37', + '1efbf7e822e03f250f45faa3c6bbe156'), +} + + +def correct_pad(backend, inputs, kernel_size): + """Returns a tuple for zero-padding for 2D convolution with downsampling. + # Arguments + input_size: An integer or tuple/list of 2 integers. + kernel_size: An integer or tuple/list of 2 integers. + # Returns + A tuple. + """ + img_dim = 2 if backend.image_data_format() == 'channels_first' else 1 + input_size = backend.int_shape(inputs)[img_dim:(img_dim + 2)] + + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size) + + if input_size[0] is None: + adjust = (1, 1) + else: + adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2) + + correct = (kernel_size[0] // 2, kernel_size[1] // 2) + + return ((correct[0] - adjust[0], correct[0]), + (correct[1] - adjust[1], correct[1])) + + +def preprocess_input(x): + """ + "mode" option description in preprocess_input + mode: One of "caffe", "tf" or "torch". + - caffe: will convert the images from RGB to BGR, + then will zero-center each color channel with + respect to the ImageNet dataset, + without scaling. + - tf: will scale pixels between -1 and 1, + sample-wise. + - torch: will scale pixels between 0 and 1 and then + will normalize each channel with respect to the + ImageNet dataset. + """ + x = _preprocess_input(x, mode='tf', backend=K) + #x /= 255. + #mean = [0.485, 0.456, 0.406] + #std = [0.229, 0.224, 0.225] + + #x[..., 0] -= mean[0] + #x[..., 1] -= mean[1] + #x[..., 2] -= mean[2] + #if std is not None: + #x[..., 0] /= std[0] + #x[..., 1] /= std[1] + #x[..., 2] /= std[2] + + return x + + +def relu(x): + return ReLU()(x) + + +def hard_sigmoid(x): + return ReLU(6.)(x + 3.) * (1. / 6.) + + +def hard_swish(x): + return Multiply()([Activation(hard_sigmoid)(x), x]) + + +# This function is taken from the original tf repo. +# It ensures that all layers have a channel number that is divisible by 8 +# It can be seen here: +# https://github.com/tensorflow/models/blob/master/research/ +# slim/nets/mobilenet/mobilenet.py + +def _depth(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +def _se_block(inputs, filters, se_ratio, prefix): + x = GlobalAveragePooling2D(name=prefix + 'squeeze_excite/AvgPool')(inputs) + if K.image_data_format() == 'channels_first': + x = Reshape((filters, 1, 1))(x) + else: + x = Reshape((1, 1, filters))(x) + x = Conv2D(_depth(filters * se_ratio), + kernel_size=1, + padding='same', + name=prefix + 'squeeze_excite/Conv')(x) + x = ReLU(name=prefix + 'squeeze_excite/Relu')(x) + x = Conv2D(filters, + kernel_size=1, + padding='same', + name=prefix + 'squeeze_excite/Conv_1')(x) + x = Activation(hard_sigmoid)(x) + #if K.backend() == 'theano': + ## For the Theano backend, we have to explicitly make + ## the excitation weights broadcastable. + #x = Lambda( + #lambda br: K.pattern_broadcast(br, [True, True, True, False]), + #output_shape=lambda input_shape: input_shape, + #name=prefix + 'squeeze_excite/broadcast')(x) + x = Multiply(name=prefix + 'squeeze_excite/Mul')([inputs, x]) + return x + + +def _inverted_res_block(x, expansion, filters, kernel_size, stride, + se_ratio, activation, block_id): + channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 + shortcut = x + prefix = 'expanded_conv/' + infilters = K.int_shape(x)[channel_axis] + if block_id: + # Expand + prefix = 'expanded_conv_{}/'.format(block_id) + x = Conv2D(_depth(infilters * expansion), + kernel_size=1, + padding='same', + use_bias=False, + name=prefix + 'expand')(x) + x = BatchNormalization(axis=channel_axis, + epsilon=1e-3, + momentum=0.999, + name=prefix + 'expand/BatchNorm')(x) + x = Activation(activation)(x) + + if stride == 2: + x = ZeroPadding2D(padding=correct_pad(K, x, kernel_size), + name=prefix + 'depthwise/pad')(x) + x = DepthwiseConv2D(kernel_size, + strides=stride, + padding='same' if stride == 1 else 'valid', + use_bias=False, + name=prefix + 'depthwise/Conv')(x) + x = BatchNormalization(axis=channel_axis, + epsilon=1e-3, + momentum=0.999, + name=prefix + 'depthwise/BatchNorm')(x) + x = Activation(activation)(x) + + if se_ratio: + x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix) + + x = Conv2D(filters, + kernel_size=1, + padding='same', + use_bias=False, + name=prefix + 'project')(x) + x = BatchNormalization(axis=channel_axis, + epsilon=1e-3, + momentum=0.999, + name=prefix + 'project/BatchNorm')(x) + + if stride == 1 and infilters == filters: + x = Add(name=prefix + 'Add')([shortcut, x]) + return x + + +def MobileNetV3(stack_fn, + last_point_ch, + input_shape=None, + alpha=1.0, + model_type='large', + minimalistic=False, + include_top=True, + weights='imagenet', + input_tensor=None, + classes=1000, + pooling=None, + dropout_rate=0.2, + **kwargs): + """Instantiates the MobileNetV3 architecture. + # Arguments + stack_fn: a function that returns output tensor for the + stacked residual blocks. + last_point_ch: number channels at the last layer (before top) + input_shape: optional shape tuple, to be specified if you would + like to use a model with an input img resolution that is not + (224, 224, 3). + It should have exactly 3 inputs channels (224, 224, 3). + You can also omit this option if you would like + to infer input_shape from an input_tensor. + If you choose to include both input_tensor and input_shape then + input_shape will be used if they match, if the shapes + do not match then we will throw an error. + E.g. `(160, 160, 3)` would be one valid value. + alpha: controls the width of the network. This is known as the + depth multiplier in the MobileNetV3 paper, but the name is kept for + consistency with MobileNetV1 in Keras. + - If `alpha` < 1.0, proportionally decreases the number + of filters in each layer. + - If `alpha` > 1.0, proportionally increases the number + of filters in each layer. + - If `alpha` = 1, default number of filters from the paper + are used at each layer. + model_type: MobileNetV3 is defined as two models: large and small. These + models are targeted at high and low resource use cases respectively. + minimalistic: In addition to large and small models this module also contains + so-called minimalistic models, these models have the same per-layer + dimensions characteristic as MobilenetV3 however, they don't utilize any + of the advanced blocks (squeeze-and-excite units, hard-swish, and 5x5 + convolutions). While these models are less efficient on CPU, they are + much more performant on GPU/DSP. + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor (i.e. output of + `layers.Input()`) + to use as image input for the model. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + pooling: optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + dropout_rate: fraction of the input units to drop on the last layer + # Returns + A Keras model instance. + # Raises + ValueError: in case of invalid model type, argument for `weights`, + or invalid input shape when weights='imagenet' + """ + #global backend, layers, models, keras_utils + #backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) + + if not (weights in {'imagenet', None} or os.path.exists(weights)): + raise ValueError('The `weights` argument should be either ' + '`None` (random initialization), `imagenet` ' + '(pre-training on ImageNet), ' + 'or the path to the weights file to be loaded.') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' + 'as true, `classes` should be 1000') + + # Determine proper input shape + input_shape = _obtain_input_shape(input_shape, + default_size=224, + min_size=32, + data_format=K.image_data_format(), + require_flatten=include_top, + weights=weights) + + # Determine proper input shape and default size. + # If both input_shape and input_tensor are used, they should match + #if input_shape is not None and input_tensor is not None: + #try: + #is_input_t_tensor = K.is_keras_tensor(input_tensor) + #except ValueError: + #try: + #is_input_t_tensor = K.is_keras_tensor( + #get_source_inputs(input_tensor)) + #except ValueError: + #raise ValueError('input_tensor: ', input_tensor, + #'is not type input_tensor') + #if is_input_t_tensor: + #if K.image_data_format == 'channels_first': + #if K.int_shape(input_tensor)[1] != input_shape[1]: + #raise ValueError('input_shape: ', input_shape, + #'and input_tensor: ', input_tensor, + #'do not meet the same shape requirements') + #else: + #if K.int_shape(input_tensor)[2] != input_shape[1]: + #raise ValueError('input_shape: ', input_shape, + #'and input_tensor: ', input_tensor, + #'do not meet the same shape requirements') + #else: + #raise ValueError('input_tensor specified: ', input_tensor, + #'is not a keras tensor') + + # If input_shape is None, infer shape from input_tensor + #if input_shape is None and input_tensor is not None: + + #try: + #K.is_keras_tensor(input_tensor) + #except ValueError: + #raise ValueError('input_tensor: ', input_tensor, + #'is type: ', type(input_tensor), + #'which is not a valid type') + + #if K.is_keras_tensor(input_tensor): + #if K.image_data_format() == 'channels_first': + #rows = K.int_shape(input_tensor)[2] + #cols = K.int_shape(input_tensor)[3] + #input_shape = (3, cols, rows) + #else: + #rows = K.int_shape(input_tensor)[1] + #cols = K.int_shape(input_tensor)[2] + #input_shape = (cols, rows, 3) + + # If input_shape is None and input_tensor is None using standart shape + if input_shape is None and input_tensor is None: + input_shape = (None, None, 3) + + if K.image_data_format() == 'channels_last': + row_axis, col_axis = (0, 1) + else: + row_axis, col_axis = (1, 2) + rows = input_shape[row_axis] + cols = input_shape[col_axis] + if rows and cols and (rows < 32 or cols < 32): + raise ValueError('Input size must be at least 32x32; got `input_shape=' + + str(input_shape) + '`') + if weights == 'imagenet': + if minimalistic is False and alpha not in [0.75, 1.0] \ + or minimalistic is True and alpha != 1.0: + raise ValueError('If imagenet weights are being loaded, ' + 'alpha can be one of `0.75`, `1.0` for non minimalistic' + ' or `1.0` for minimalistic only.') + + if rows != cols or rows != 224: + warnings.warn('`input_shape` is undefined or non-square, ' + 'or `rows` is not 224.' + ' Weights for input shape (224, 224) will be' + ' loaded as the default.') + + if input_tensor is None: + img_input = Input(shape=input_shape) + else: + #if not K.is_keras_tensor(input_tensor): + #img_input = Input(tensor=input_tensor, shape=input_shape) + #else: + #img_input = input_tensor + img_input = input_tensor + + channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 + + if minimalistic: + kernel = 3 + activation = relu + se_ratio = None + else: + kernel = 5 + activation = hard_swish + se_ratio = 0.25 + + x = ZeroPadding2D(padding=correct_pad(K, img_input, 3), + name='Conv_pad')(img_input) + x = Conv2D(16, + kernel_size=3, + strides=(2, 2), + padding='valid', + use_bias=False, + name='Conv')(x) + x = BatchNormalization(axis=channel_axis, + epsilon=1e-3, + momentum=0.999, + name='Conv/BatchNorm')(x) + x = Activation(activation)(x) + + x = stack_fn(x, kernel, activation, se_ratio) + + last_conv_ch = _depth(K.int_shape(x)[channel_axis] * 6) + + # if the width multiplier is greater than 1 we + # increase the number of output channels + if alpha > 1.0: + last_point_ch = _depth(last_point_ch * alpha) + + x = Conv2D(last_conv_ch, + kernel_size=1, + padding='same', + use_bias=False, + name='Conv_1')(x) + x = BatchNormalization(axis=channel_axis, + epsilon=1e-3, + momentum=0.999, + name='Conv_1/BatchNorm')(x) + x = Activation(activation)(x) + + if include_top: + x = GlobalAveragePooling2D()(x) + if channel_axis == 1: + x = Reshape((last_conv_ch, 1, 1))(x) + else: + x = Reshape((1, 1, last_conv_ch))(x) + x = Conv2D(last_point_ch, + kernel_size=1, + padding='same', + name='Conv_2')(x) + x = Activation(activation)(x) + if dropout_rate > 0: + x = Dropout(dropout_rate)(x) + x = Conv2D(classes, + kernel_size=1, + padding='same', + name='Logits')(x) + x = Flatten()(x) + x = Softmax(name='Predictions/Softmax')(x) + else: + if pooling == 'avg': + x = GlobalAveragePooling2D(name='avg_pool')(x) + elif pooling == 'max': + x = GlobalMaxPooling2D(name='max_pool')(x) + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = get_source_inputs(input_tensor) + else: + inputs = img_input + + # Create model. + model = Model(inputs, x, name='MobilenetV3' + model_type) + + # Load weights. + if weights == 'imagenet': + model_name = "{}{}_224_{}_float".format( + model_type, '_minimalistic' if minimalistic else '', str(alpha)) + if include_top: + file_name = 'weights_mobilenet_v3_' + model_name + '.h5' + file_hash = WEIGHTS_HASHES[model_name][0] + else: + file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5' + file_hash = WEIGHTS_HASHES[model_name][1] + weights_path = get_file(file_name, + BASE_WEIGHT_PATH + file_name, + cache_subdir='models', + file_hash=file_hash) + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model + + +def MobileNetV3Small(input_shape=None, + alpha=1.0, + minimalistic=False, + include_top=True, + weights='imagenet', + input_tensor=None, + classes=1000, + pooling=None, + dropout_rate=0.2, + **kwargs): + def stack_fn(x, kernel, activation, se_ratio): + def depth(d): + return _depth(d * alpha) + x = _inverted_res_block(x, 1, depth(16), 3, 2, se_ratio, relu, 0) + x = _inverted_res_block(x, 72. / 16, depth(24), 3, 2, None, relu, 1) + x = _inverted_res_block(x, 88. / 24, depth(24), 3, 1, None, relu, 2) + x = _inverted_res_block(x, 4, depth(40), kernel, 2, se_ratio, activation, 3) + x = _inverted_res_block(x, 6, depth(40), kernel, 1, se_ratio, activation, 4) + x = _inverted_res_block(x, 6, depth(40), kernel, 1, se_ratio, activation, 5) + x = _inverted_res_block(x, 3, depth(48), kernel, 1, se_ratio, activation, 6) + x = _inverted_res_block(x, 3, depth(48), kernel, 1, se_ratio, activation, 7) + x = _inverted_res_block(x, 6, depth(96), kernel, 2, se_ratio, activation, 8) + x = _inverted_res_block(x, 6, depth(96), kernel, 1, se_ratio, activation, 9) + x = _inverted_res_block(x, 6, depth(96), kernel, 1, se_ratio, activation, 10) + return x + return MobileNetV3(stack_fn, + 1024, + input_shape, + alpha, + 'small', + minimalistic, + include_top, + weights, + input_tensor, + classes, + pooling, + dropout_rate, + **kwargs) + + +def MobileNetV3Large(input_shape=None, + alpha=1.0, + minimalistic=False, + include_top=True, + weights='imagenet', + input_tensor=None, + classes=1000, + pooling=None, + dropout_rate=0.2, + **kwargs): + def stack_fn(x, kernel, activation, se_ratio): + def depth(d): + return _depth(d * alpha) + x = _inverted_res_block(x, 1, depth(16), 3, 1, None, relu, 0) + x = _inverted_res_block(x, 4, depth(24), 3, 2, None, relu, 1) + x = _inverted_res_block(x, 3, depth(24), 3, 1, None, relu, 2) + x = _inverted_res_block(x, 3, depth(40), kernel, 2, se_ratio, relu, 3) + x = _inverted_res_block(x, 3, depth(40), kernel, 1, se_ratio, relu, 4) + x = _inverted_res_block(x, 3, depth(40), kernel, 1, se_ratio, relu, 5) + x = _inverted_res_block(x, 6, depth(80), 3, 2, None, activation, 6) + x = _inverted_res_block(x, 2.5, depth(80), 3, 1, None, activation, 7) + x = _inverted_res_block(x, 2.3, depth(80), 3, 1, None, activation, 8) + x = _inverted_res_block(x, 2.3, depth(80), 3, 1, None, activation, 9) + x = _inverted_res_block(x, 6, depth(112), 3, 1, se_ratio, activation, 10) + x = _inverted_res_block(x, 6, depth(112), 3, 1, se_ratio, activation, 11) + x = _inverted_res_block(x, 6, depth(160), kernel, 2, se_ratio, + activation, 12) + x = _inverted_res_block(x, 6, depth(160), kernel, 1, se_ratio, + activation, 13) + x = _inverted_res_block(x, 6, depth(160), kernel, 1, se_ratio, + activation, 14) + return x + return MobileNetV3(stack_fn, + 1280, + input_shape, + alpha, + 'large', + minimalistic, + include_top, + weights, + input_tensor, + classes, + pooling, + dropout_rate, + **kwargs) + + + +def preprocess_input(x, **kwargs): + """Preprocesses a numpy array encoding a batch of images. + # Arguments + x: a 4D numpy array consists of RGB values within [0, 255]. + # Returns + Preprocessed array. + """ + return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) + + +setattr(MobileNetV3Small, '__doc__', MobileNetV3.__doc__) +setattr(MobileNetV3Large, '__doc__', MobileNetV3.__doc__) + +if __name__ == '__main__': + input_tensor = Input(shape=(None, None, 3), name='image_input') + model = MobileNetV3Small(include_top=False, input_shape=(416, 416, 3), weights=None, alpha=1.0) + #model = MobileNetV3Large(include_top=True, input_tensor=input_tensor, weights='imagenet', alpha=1.0) + model.summary() + + import numpy as np + from tensorflow.keras.applications.resnet50 import decode_predictions + from keras_preprocessing import image + + img = image.load_img('../../example/eagle.jpg', target_size=(224, 224)) + x = image.img_to_array(img) + x = np.expand_dims(x, axis=0) + x = preprocess_input(x) + + preds = model.predict(x) + print('Predicted:', decode_predictions(preds))