What is the best way to create a deep neural network (with 100 layers) using custom layers in tenso-CodePudding

In the following code snippet -

class ResnetIdentityBlock(tf.keras.Model):
  def __init__(self, kernel_size, filters):
    super(ResnetIdentityBlock, self).__init__(name='')
    filters1, filters2, filters3 = filters

    self.conv2a = tf.keras.layers.Conv2D(filters1, (1, 1))
    self.bn2a = tf.keras.layers.BatchNormalization()

    self.conv2b = tf.keras.layers.Conv2D(filters2, kernel_size, padding='same')
    self.bn2b = tf.keras.layers.BatchNormalization()

    self.conv2c = tf.keras.layers.Conv2D(filters3, (1, 1))
    self.bn2c = tf.keras.layers.BatchNormalization()

  def call(self, input_tensor, training=False):
    x = self.conv2a(input_tensor)
    x = self.bn2a(x, training=training)
    x = tf.nn.relu(x)

    x = self.conv2b(x)
    x = self.bn2b(x, training=training)
    x = tf.nn.relu(x)

    x = self.conv2c(x)
    x = self.bn2c(x, training=training)

    x  = input_tensor
    return tf.nn.relu(x)


block = ResnetIdentityBlock(1, [1, 2, 3])

Can anyone tell me what the syntax would look like in call() if I had say 100 layers? I mean I wont be copy-pasting the first layer a 100 times and then change a few things (like they've done in the call method). I think it'll be some sort of a for loop but I want to know exactly.

CodePudding user response：

You actually usually use resnet blocks as part of a larger model. Normally, you would create a x number of blocks like this:

import tensorflow as tf

class ResnetIdentityBlock(tf.keras.Model):
  def __init__(self, kernel_size, filters):
    super(ResnetIdentityBlock, self).__init__(name='')
    filters1, filters2, filters3 = filters

    self.conv2a = tf.keras.layers.Conv2D(filters1, (1, 1))
    self.bn2a = tf.keras.layers.BatchNormalization()

    self.conv2b = tf.keras.layers.Conv2D(filters2, kernel_size, padding='same')
    self.bn2b = tf.keras.layers.BatchNormalization()

    self.conv2c = tf.keras.layers.Conv2D(filters3, (1, 1))
    self.bn2c = tf.keras.layers.BatchNormalization()

  def call(self, input_tensor, training=False):
    x = self.conv2a(input_tensor)
    x = self.bn2a(x, training=training)
    x = tf.nn.relu(x)

    x = self.conv2b(x)
    x = self.bn2b(x, training=training)
    x = tf.nn.relu(x)

    x = self.conv2c(x)
    x = self.bn2c(x, training=training)

    x  = input_tensor
    return tf.nn.relu(x)


inputs = tf.keras.layers.Input(shape=(180, 180, 3))
x = tf.keras.layers.Conv2D(filters=64, kernel_size=9, strides=1, padding="same")(inputs)
current_model = x

for i in range(4):
  block = ResnetIdentityBlock(1, [1, 2, 64])
  block._name = 'resblock'   str(i)
  x = block(x)

x = tf.keras.layers.Conv2D(filters=64, kernel_size=3, strides=1, padding="same")(x)
x = tf.keras.layers.BatchNormalization(momentum=0.5)(x)
output = tf.keras.layers.Add()([current_model, x])

model = tf.keras.Model(inputs, output)
print(model.summary())
print(model(tf.random.normal((1, 180, 180, 3))).shape)

Model: "model_6"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
==================================================================================================
 input_14 (InputLayer)          [(None, 180, 180, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_75 (Conv2D)             (None, 180, 180, 64  15616       ['input_14[0][0]']               
                                )                                                                 
                                                                                                  
 resblock0 (ResnetIdentityBlock  (None, 180, 180, 64  529        ['conv2d_75[0][0]']              
 )                              )                                                                 
                                                                                                  
 resblock1 (ResnetIdentityBlock  (None, 180, 180, 64  529        ['resblock0[0][0]']              
 )                              )                                                                 
                                                                                                  
 resblock2 (ResnetIdentityBlock  (None, 180, 180, 64  529        ['resblock1[0][0]']              
 )                              )                                                                 
                                                                                                  
 resblock3 (ResnetIdentityBlock  (None, 180, 180, 64  529        ['resblock2[0][0]']              
 )                              )                                                                 
                                                                                                  
 conv2d_88 (Conv2D)             (None, 180, 180, 64  36928       ['resblock3[0][0]']              
                                )                                                                 
                                                                                                  
 batch_normalization_69 (BatchN  (None, 180, 180, 64  256        ['conv2d_88[0][0]']              
 ormalization)                  )                                                                 
                                                                                                  
 add_6 (Add)                    (None, 180, 180, 64  0           ['conv2d_75[0][0]',              
                                )                                 'batch_normalization_69[0][0]'] 
                                                                                                  
==================================================================================================
Total params: 54,916
Trainable params: 54,252
Non-trainable params: 664
__________________________________________________________________________________________________
None
(1, 180, 180, 64)

CodePudding user response：

First you must inherite from tf.keras.layers.Layer to define your custom layer not from tf.keras.Model.

Then you create a model which contains 100 of your residual layers using list comprehension.

Example:

Define Layer: class ResnetIdentityBlock(tf.keras.layers.Layer): def init(self, kernel_size, filters): super(ResnetIdentityBlock, self).init(name='') filters1, filters2, filters3 = filters

    self.conv2a = tf.keras.layers.Conv2D(filters1, (1, 1))
    self.bn2a = tf.keras.layers.BatchNormalization()

    self.conv2b = tf.keras.layers.Conv2D(filters2, kernel_size, padding='same')
    self.bn2b = tf.keras.layers.BatchNormalization()

    self.conv2c = tf.keras.layers.Conv2D(filters3, (1, 1))
    self.bn2c = tf.keras.layers.BatchNormalization()

  def call(self, input_tensor, training=False):
    x = self.conv2a(input_tensor)
    x = self.bn2a(x, training=training)
    x = tf.nn.relu(x)

    x = self.conv2b(x)
    x = self.bn2b(x, training=training)
    x = tf.nn.relu(x)

    x = self.conv2c(x)
    x = self.bn2c(x, training=training)

    x  = input_tensor
    return tf.nn.relu(x)

Define Model:

class ResnetCustomModel(tf.keras.Model):
  def __init__(self, num_layers, kernel_size, filters):
    super(ResnetCustomModel, self).__init__(name='')
    # list comprehension to define 100 layers
    self.res_blocks = [ResnetIdentityBlock(kernel_size = kernel_size, filters= filters) for _ in range(num_layers)]

  def call(self, input_tensor, training=False):
    x = input_tensor
    for layer in self.res_blocks:
        x = layer(x)        
    return x

Build model and view summary:

model = ResnetCustomModel(num_layers = 100, kernel_size = 3, filters= (32,32,32))
model.build((1,512,512,32))
model.summary()

To make your model independent from the input shape, you could write your own build method and define self.res_blocks depending on the input shape.

For further examples, in my Github repo DeepSaki I also implemented a residual layer and used it in a ResNet like architecture.