Dimension 2 in both shapes must be equal, but are 3 and 1-CodePudding

I am getting a value error while trying to make a GAN work on RGB photos in Tensorflow.

in the video that I'm following it works in black and white(59:50): https://www.youtube.com/watch?v=LZov6445YAY&list=WL&index=4&t=3426s&ab_channel=SundogEducationwithFrankKane

I am trying to make it work with RGB color channels instead of black and white. But i get the error above.

I have changed:

tensor = tf.io.decode_image(dataset, channels=1, dtype=tf.dtypes.float32)

to:

tensor = tf.io.decode_image(dataset, channels=3, dtype=tf.dtypes.float32)

tensor = tf.io.decode_image(img, channels=1, dtype=tf.dtypes.float32)

to:

tensor = tf.io.decode_image(img, channels=3, dtype=tf.dtypes.float32)

dataset = np.reshape(dataset, (-1, 28, 28, 1))

to:

dataset = np.reshape(dataset, (-1, 28, 28, 3))

keras.layers.InputLayer(input_shape=(28, 28, 1)),

to:

keras.layers.InputLayer(input_shape=(28, 28, 3)),

Full error:

Traceback (most recent call last):
  File "C:\Users\m8\Desktop\idek1.py", line 153, in <module>
    dLoss = trainDStep(batch)
  File "C:\Users\m8\AppData\Local\Programs\Python\Python39\lib\site-packages\tensorflow\python\util\traceback_utils.py", line 153, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "C:\Users\m8\AppData\Local\Temp\__autograph_generated_filegoihnx0n.py", line 15, in tf__trainDStep
    x = ag__.converted_call(ag__.ld(tf).concat, ([ag__.ld(data), ag__.ld(fake)],), dict(axis=0), fscope)
ValueError: in user code:

    File "C:\Users\m8\Desktop\idek1.py", line 94, in trainDStep  *
        x = tf.concat([data, fake], axis=0)

    ValueError: Dimension 2 in both shapes must be equal, but are 3 and 1. Shapes are [28,28,3] and [28,28,1]. for '{{node concat_1}} = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32](data, sequential/conv2d_transpose_2/Sigmoid, concat_1/axis)' with input shapes: [16,28,28,3], [16,28,28,1], [] and with computed input tensors: input[2] = <0>.

Full code:

import tensorflow as tf
import os
import pathlib
import numpy as np

tf.random.set_seed(1)

print(len(tf.config.list_physical_devices("GPU")))

config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)

data_dir = "C:/Users/m8/Desktop/test_fro_tensorflow/train/training/"

dataset = tf.io.read_file("C:/Users/m8/Desktop/test_fro_tensorflow/val/validation/img (1).jpg")
tensor = tf.io.decode_image(dataset, channels=3, dtype=tf.dtypes.float32)
tensor = tf.image.resize(tensor, [28, 28])
dataset = tf.expand_dims(tensor, axis=0)

for file in os.listdir(data_dir):
    f = os.path.join(data_dir, file)
    full_path = data_dir   file
    img = tf.io.read_file(full_path)
    tensor = tf.io.decode_image(img, channels=3, dtype=tf.dtypes.float32)
    tensor = tf.image.resize(tensor, [28, 28])
    new_tensor = tf.expand_dims(tensor, axis=0)
    dataset = np.concatenate([dataset, new_tensor])

print(dataset.shape)

dataset = np.expand_dims(dataset, -1).astype("float32") / 255

BATCH_SIZE = 16

dataset = np.reshape(dataset, (-1, 28, 28, 3))
dataset = tf.data.Dataset.from_tensor_slices(dataset)
dataset = dataset.shuffle(buffer_size=1024).batch(BATCH_SIZE)

from tensorflow import keras
from tensorflow.keras import layers

NOISE_DIM = 150


generator = keras.models.Sequential([
    keras.layers.InputLayer(input_shape=(NOISE_DIM,)),
    layers.Dense(7*7*256),
    layers.Reshape(target_shape=(7, 7, 256)),
    layers.Conv2DTranspose(256, 3, activation="LeakyReLU", strides=2, padding="same"),
    layers.Conv2DTranspose(128, 3, activation="LeakyReLU", strides=2, padding="same"),
    layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same"),
])

generator.summary()

discriminator = keras.models.Sequential([
    keras.layers.InputLayer(input_shape=(28, 28, 3)),
    layers.Conv2D(256, 3, activation="relu", strides=2, padding="same"),
    layers.Conv2D(128, 3, activation="relu", strides=2, padding="same"),
    layers.Dense(64, activation="relu"),
    layers.Flatten(),
    layers.Dropout(0.2),
    layers.Dense(1, activation="sigmoid")
])

discriminator.summary()

optimizerG = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.5)
optimizerD = keras.optimizers.Adam(learning_rate=0.003, beta_1=0.5)

lossFn = keras.losses.BinaryCrossentropy(from_logits=True)

gAccMetric = tf.keras.metrics.BinaryAccuracy()
dAccMetric = tf.keras.metrics.BinaryAccuracy()

@tf.function
def trainDStep(data):
    batchSize = tf.shape(data)[0]

    noise = tf.random.normal(shape=(batchSize, NOISE_DIM))
    y_true = tf.concat(
        [
            tf.ones(batchSize, 1),

            tf.zeros(batchSize, 1)
        ],
        axis=0
    )

    with tf.GradientTape() as tape:
        fake = generator(noise)
        x = tf.concat([data, fake], axis=0)
        y_pred = discriminator(x)
        discriminatorLoss = lossFn(y_true, y_pred)

    grads = tape.gradient(discriminatorLoss, discriminator.trainable_weights)
    optimizerD.apply_gradients(zip(grads, discriminator.trainable_weights))

    dAccMetric.update_state(y_true, y_pred)

    return {
        "discriminator_loss": discriminatorLoss,
        "discriminator_accuracy": dAccMetric.result()
    }

@tf.function
def trainGStep(data):
    batchSize = tf.shape(data)[0]
    noise = tf.random.normal(shape=(batchSize, NOISE_DIM))
    y_true = tf.ones(batchSize, 1)

    with tf.GradientTape() as tape:
        y_pred = discriminator(generator(noise))
        generatorLoss = lossFn(y_true, y_pred)

    grads = tape.gradient(generatorLoss, generator.trainable_weights)
    optimizerG.apply_gradients(zip(grads, generator.trainable_weights))

    gAccMetric.update_state(y_true, y_pred)

    return {
        "generator_loss": generatorLoss,
        "generator_accuracy": gAccMetric.result()
    }

from matplotlib import pyplot as plt

def plotImages(model):
    images = model(np.random.normal(size=(4, NOISE_DIM)))

    plt.figure(figsize=(9, 9))

    for i, image in enumerate(images):
        plt.subplot(2,2,i 1)
        plt.imshow(np.squeeze(image, -1), cmap="Greys_r")
        plt.axis("off")

    plt.show();

for epoch in range(50):

    dLossSum = 0
    gLossSum = 0
    dAccSum = 0
    gAccSum = 0
    cnt = 0


    for batch in dataset:

        dLoss = trainDStep(batch)
        dLossSum  = dLoss["discriminator_loss"]
        dAccSum  = dLoss["discriminator_accuracy"]

        gLoss = trainGStep(batch)
        gLossSum  = dLoss["discriminator_loss"]
        gAccSum  = dLoss["discriminator_accuracy"]


        cnt  = 1


    print("E:{}, Loss G:{:0.4f}, Loss D:{:0.4f}, Acc G:%{:0.2f}, Acc D:%{:0.2f}".format(
        epoch,
        gLossSum/cnt,
        dLossSum/cnt,
        100 * gAccSum/cnt,
        100 * dAccSum/cnt
    ))

    if epoch % 2 == 0:
        plotImages(generator)

CodePudding user response：

Let us start by inspecting your error alongside the code you have provided.

        x = tf.concat([data, fake], axis=0)

    ValueError: Dimension 2 in both shapes must be equal, but are 3 and 1. Shapes are [28,28,3] and [28,28,1]. for '{{node concat_1}} = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32](data, sequential/conv2d_transpose_2/Sigmoid, concat_1/axis)' with input shapes: [16,28,28,3], [16,28,28,1], [] and with computed input tensors: input[2] = <0>.

The main takeaway from this error is that data and fake cannot be concatenated - the reason being that they don't match size-wise. As the error states,

data has the shape [28,28,3] (which is expected as you have made the changes to have RGB inputs
fake has the shape [28,28,1] which is not the same as the shape of data

Our solution is to somehow fix the fake variable's shape to match that of data.

We see that fake is created in the code in the line

fake = generator(noise)

And generator is defined as

generator = keras.models.Sequential([
    keras.layers.InputLayer(input_shape=(NOISE_DIM,)),
    ...
    ...
    layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same"),
])

The last layer of the generator seems to be a Conv2DTranspose but it is using just 1 output channel (the first argument). Here is our error!

To fix it, it would just require the change of making it output 3 channel rather than 1 as

    layers.Conv2DTranspose(3, 3, activation="sigmoid", padding="same"),