ValueError: `logits` and `labels` must have the same shape
Question:
I’m trying to use Imagenet V2 with transfer-learning for multiclass classification (6 classes), but getting the following error. Can anyone please help?
ValueError: `logits` and `labels` must have the same shape, received ((None, 6) vs (None, 1)).
I borrowed this code from Andrew Ng’s CNN course I took a while back but the original code was for binary classification. I tried to modify it for multiclass classification but got this error. Here’s my code:
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
import tensorflow.keras.layers as tfl
import datetime
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.layers.experimental.preprocessing import RandomFlip, RandomRotation
BATCH_SIZE = 16
IMG_SIZE = (160, 160)
training_directory = "/content/drive/MyDrive/Microscopy Data/04112028_multiclass_maiden/Training/Actin"
validation_directory = "/content/drive/MyDrive/Microscopy Data/04112028_multiclass_maiden/Validation/Actin"
train_dataset = image_dataset_from_directory(training_directory,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE,
seed=42)
validation_dataset = image_dataset_from_directory(validation_directory,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE,
seed=42)
Output:
Found 600 files belonging to 6 classes.
Found 600 files belonging to 6 classes.
Code Continued…
class_names = train_dataset.class_names
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input
IMG_SHAPE = IMG_SIZE + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=True,
weights='imagenet')
def huvec_model (image_shape=IMG_SIZE, data_augmentation=data_augmenter()):
''' Define a tf.keras model for binary classification out of the MobileNetV2 model
Arguments:
image_shape -- Image width and height
data_augmentation -- data augmentation function
Returns:
Returns:
tf.keras.model
'''
input_shape = image_shape + (3,)
# Freeze the base model by making it non trainable
# base_model.trainable = None
# create the input layer (Same as the imageNetv2 input size)
# inputs = tf.keras.Input(shape=None)
# apply data augmentation to the inputs
# x = None
# data preprocessing using the same weights the model was trained on
# x = preprocess_input(None)
# set training to False to avoid keeping track of statistics in the batch norm layer
# x = base_model(None, training=None)
# Add the new Binary classification layers
# use global avg pooling to summarize the info in each channel
# x = None()(x)
#include dropout with probability of 0.2 to avoid overfitting
# x = None(None)(x)
# create a prediction layer with one neuron (as a classifier only needs one)
# prediction_layer = None
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
base_model.trainable = False
inputs = tf.keras.Input(shape=input_shape)
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tfl.Dropout(.2)(x)
prediction_layer = tf.keras.layers.Dense(units = len(class_names), activation='softmax')
# YOUR CODE ENDS HERE
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs)
return model
model2 = huvec_model(IMG_SIZE)
base_model.trainable = True
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))
# Fine-tune from this layer onwards
fine_tune_at = 120
# Freeze all the layers before the `fine_tune_at` layer
# for layer in base_model.layers[:fine_tune_at]:
# layer.trainable = None
# Define a BinaryCrossentropy loss function. Use from_logits=True
# loss_function=None
# Define an Adam optimizer with a learning rate of 0.1 * base_learning_rate
# optimizer = None
# Use accuracy as evaluation metric
# metrics=None
base_learning_rate = 0.01
# YOUR CODE STARTS HERE
for layer in base_model.layers[:fine_tune_at]:
layer.trainable = False
loss_function=tf.keras.losses.BinaryCrossentropy(from_logits=True)
optimizer= tf.keras.optimizers.Adam(learning_rate=0.1*base_learning_rate)
metrics=['accuracy']
# YOUR CODE ENDS HERE
model2.compile(loss=loss_function,
optimizer = optimizer,
metrics=metrics)
initial_epochs = 5
history = model2.fit(train_dataset, validation_data=validation_dataset, epochs=initial_epochs)
Answers:
Looks like you yet have to one-hot-encode your labels, i.e. instead of having number i
(between 0 and 5, inclusive) for a label of an image that belongs to the i
-th class, which is of shape (None, 1)
, provide an array of all 0’s except a 1 at index i, which is of shape (None, 6)
. Then labels
has the same shape as logits
.
It is easy you need to match the logits output or you need to remove softmax or distribution at the end of the model.
Almost correct, I change a bit on un-defined data_augmentation that is working.
It will have the output but the calculation is based on output expectation try to use meanquears you see errors or use class entropy that will provide different behavior.
Somebody told it boosted up accuracy output as they are using Binary cross entropy but not this way, it will highly boosted up when using with sequences of binary see the example of ALE games ( Street Fighters )
[ Sample ]:
import os
from os.path import exists
import tensorflow as tf
import matplotlib.pyplot as plt
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
BATCH_SIZE = 16
IMG_SIZE = (160, 160)
PATH = 'F:\datasets\downloads\sample\cats_dogs\training'
training_directory = os.path.join(PATH, 'train')
validation_directory = os.path.join(PATH, 'validation')
train_dataset = tf.keras.utils.image_dataset_from_directory(training_directory,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE,
seed=42)
validation_dataset = tf.keras.utils.image_dataset_from_directory(validation_directory,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE,
seed=42)
class_names = train_dataset.class_names
print( "class_names: " + str( class_names ) )
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Functions
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def huvec_model (image_shape=IMG_SIZE, data_augmentation = tf.keras.Sequential([ tf.keras.layers.RandomFlip('horizontal'), tf.keras.layers.RandomRotation(0.2), ])):
# def huvec_model (image_shape=IMG_SIZE, data_augmentation=data_augmenter()):
''' Define a tf.keras model for binary classification out of the MobileNetV2 model
Arguments:
image_shape -- Image width and height
data_augmentation -- data augmentation function
Returns:
Returns:
tf.keras.model
'''
input_shape = image_shape + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
base_model.trainable = False
inputs = tf.keras.Input(shape=input_shape)
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(.2)(x)
prediction_layer = tf.keras.layers.Dense(units = len(class_names), activation='softmax')
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs)
return model
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
DataSet
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
IMG_SHAPE = IMG_SIZE + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=True,
weights='imagenet')
base_model.summary()
model2 = huvec_model(IMG_SIZE)
base_model.trainable = True
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))
# Fine-tune from this layer onwards
fine_tune_at = 120
base_learning_rate = 0.01
for layer in base_model.layers[:fine_tune_at]:
layer.trainable = False
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Adam(learning_rate=0.1*base_learning_rate)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
lossfn = tf.keras.losses.BinaryCrossentropy(from_logits=False)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model2.compile(optimizer=optimizer, loss=lossfn, metrics=[ 'accuracy' ])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model2.fit(train_dataset, validation_data=validation_dataset, epochs=5)
input('...')
[ Output ]
Found the bug:
I had to redefine loss in the model compiler as loss='sparse_categorical_crossentropy'
which was initially defined as loss=tf.keras.losses.BinaryCrossentropy(from_logits=True)
.
Refer to the SO thread Changing Keras Model from Binary Classification to Multi-classification for more details.
I’m trying to use Imagenet V2 with transfer-learning for multiclass classification (6 classes), but getting the following error. Can anyone please help?
ValueError: `logits` and `labels` must have the same shape, received ((None, 6) vs (None, 1)).
I borrowed this code from Andrew Ng’s CNN course I took a while back but the original code was for binary classification. I tried to modify it for multiclass classification but got this error. Here’s my code:
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
import tensorflow.keras.layers as tfl
import datetime
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.layers.experimental.preprocessing import RandomFlip, RandomRotation
BATCH_SIZE = 16
IMG_SIZE = (160, 160)
training_directory = "/content/drive/MyDrive/Microscopy Data/04112028_multiclass_maiden/Training/Actin"
validation_directory = "/content/drive/MyDrive/Microscopy Data/04112028_multiclass_maiden/Validation/Actin"
train_dataset = image_dataset_from_directory(training_directory,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE,
seed=42)
validation_dataset = image_dataset_from_directory(validation_directory,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE,
seed=42)
Output:
Found 600 files belonging to 6 classes.
Found 600 files belonging to 6 classes.
Code Continued…
class_names = train_dataset.class_names
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input
IMG_SHAPE = IMG_SIZE + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=True,
weights='imagenet')
def huvec_model (image_shape=IMG_SIZE, data_augmentation=data_augmenter()):
''' Define a tf.keras model for binary classification out of the MobileNetV2 model
Arguments:
image_shape -- Image width and height
data_augmentation -- data augmentation function
Returns:
Returns:
tf.keras.model
'''
input_shape = image_shape + (3,)
# Freeze the base model by making it non trainable
# base_model.trainable = None
# create the input layer (Same as the imageNetv2 input size)
# inputs = tf.keras.Input(shape=None)
# apply data augmentation to the inputs
# x = None
# data preprocessing using the same weights the model was trained on
# x = preprocess_input(None)
# set training to False to avoid keeping track of statistics in the batch norm layer
# x = base_model(None, training=None)
# Add the new Binary classification layers
# use global avg pooling to summarize the info in each channel
# x = None()(x)
#include dropout with probability of 0.2 to avoid overfitting
# x = None(None)(x)
# create a prediction layer with one neuron (as a classifier only needs one)
# prediction_layer = None
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
base_model.trainable = False
inputs = tf.keras.Input(shape=input_shape)
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tfl.Dropout(.2)(x)
prediction_layer = tf.keras.layers.Dense(units = len(class_names), activation='softmax')
# YOUR CODE ENDS HERE
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs)
return model
model2 = huvec_model(IMG_SIZE)
base_model.trainable = True
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))
# Fine-tune from this layer onwards
fine_tune_at = 120
# Freeze all the layers before the `fine_tune_at` layer
# for layer in base_model.layers[:fine_tune_at]:
# layer.trainable = None
# Define a BinaryCrossentropy loss function. Use from_logits=True
# loss_function=None
# Define an Adam optimizer with a learning rate of 0.1 * base_learning_rate
# optimizer = None
# Use accuracy as evaluation metric
# metrics=None
base_learning_rate = 0.01
# YOUR CODE STARTS HERE
for layer in base_model.layers[:fine_tune_at]:
layer.trainable = False
loss_function=tf.keras.losses.BinaryCrossentropy(from_logits=True)
optimizer= tf.keras.optimizers.Adam(learning_rate=0.1*base_learning_rate)
metrics=['accuracy']
# YOUR CODE ENDS HERE
model2.compile(loss=loss_function,
optimizer = optimizer,
metrics=metrics)
initial_epochs = 5
history = model2.fit(train_dataset, validation_data=validation_dataset, epochs=initial_epochs)
Looks like you yet have to one-hot-encode your labels, i.e. instead of having number i
(between 0 and 5, inclusive) for a label of an image that belongs to the i
-th class, which is of shape (None, 1)
, provide an array of all 0’s except a 1 at index i, which is of shape (None, 6)
. Then labels
has the same shape as logits
.
It is easy you need to match the logits output or you need to remove softmax or distribution at the end of the model.
Almost correct, I change a bit on un-defined data_augmentation that is working.
It will have the output but the calculation is based on output expectation try to use meanquears you see errors or use class entropy that will provide different behavior.
Somebody told it boosted up accuracy output as they are using Binary cross entropy but not this way, it will highly boosted up when using with sequences of binary see the example of ALE games ( Street Fighters )
[ Sample ]:
import os
from os.path import exists
import tensorflow as tf
import matplotlib.pyplot as plt
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
BATCH_SIZE = 16
IMG_SIZE = (160, 160)
PATH = 'F:\datasets\downloads\sample\cats_dogs\training'
training_directory = os.path.join(PATH, 'train')
validation_directory = os.path.join(PATH, 'validation')
train_dataset = tf.keras.utils.image_dataset_from_directory(training_directory,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE,
seed=42)
validation_dataset = tf.keras.utils.image_dataset_from_directory(validation_directory,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE,
seed=42)
class_names = train_dataset.class_names
print( "class_names: " + str( class_names ) )
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Functions
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def huvec_model (image_shape=IMG_SIZE, data_augmentation = tf.keras.Sequential([ tf.keras.layers.RandomFlip('horizontal'), tf.keras.layers.RandomRotation(0.2), ])):
# def huvec_model (image_shape=IMG_SIZE, data_augmentation=data_augmenter()):
''' Define a tf.keras model for binary classification out of the MobileNetV2 model
Arguments:
image_shape -- Image width and height
data_augmentation -- data augmentation function
Returns:
Returns:
tf.keras.model
'''
input_shape = image_shape + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
base_model.trainable = False
inputs = tf.keras.Input(shape=input_shape)
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(.2)(x)
prediction_layer = tf.keras.layers.Dense(units = len(class_names), activation='softmax')
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs)
return model
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
DataSet
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
IMG_SHAPE = IMG_SIZE + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=True,
weights='imagenet')
base_model.summary()
model2 = huvec_model(IMG_SIZE)
base_model.trainable = True
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))
# Fine-tune from this layer onwards
fine_tune_at = 120
base_learning_rate = 0.01
for layer in base_model.layers[:fine_tune_at]:
layer.trainable = False
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Adam(learning_rate=0.1*base_learning_rate)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
lossfn = tf.keras.losses.BinaryCrossentropy(from_logits=False)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model2.compile(optimizer=optimizer, loss=lossfn, metrics=[ 'accuracy' ])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model2.fit(train_dataset, validation_data=validation_dataset, epochs=5)
input('...')
[ Output ]
Found the bug:
I had to redefine loss in the model compiler as loss='sparse_categorical_crossentropy'
which was initially defined as loss=tf.keras.losses.BinaryCrossentropy(from_logits=True)
.
Refer to the SO thread Changing Keras Model from Binary Classification to Multi-classification for more details.