TensorFlow Error: ValueError("Shapes %s and %s are incompatible" % (self, other))
Question:
I’m trying to classify images of PCBs into two categories (defected
and undefected
) using categorical cross-entropy
as the loss function. The code for the same is as below:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.applications.resnet50 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
def create_compiled_model():
model = Sequential()
model.add(ResNet50(include_top=False, weights=RESNET50_WEIGHTS, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), pooling=RESNET50_POOLING_AVERAGE))
model.add(Dense(NUM_CLASSES, activation=DENSE_LAYER_ACTIVATION))
model.layers[0].trainable = False
sgd = SGD(lr = 0.01, decay = 1e-6, momentum = 0.9, nesterov = True)
model.compile(optimizer = sgd, loss = OBJECTIVE_FUNCTION, metrics = LOSS_METRICS)
return model
def data_splitor():
x = np.load("/content/data/xtrain.npy")
y = np.load("/content/data/ytrain.npy")
# Getting the Test and Train splits
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size= TRAIN_TEST_SPLIT, shuffle= True)
# Getting the Train and Validation splits
x__train, x__valid, y__train, y__valid = train_test_split(x_train, y_train, test_size= TRAIN_TEST_SPLIT, shuffle= True)
return x__train, x__valid, x_test, y__train, y__valid, y_test
def data_generator(x, y, batch_size, seed=None, shuffle=True):
data_generator = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=180, brightness_range=[0.3, 1.0], preprocessing_function=preprocess_input)
generator = data_generator.flow(x_train, y_train, batch_size= batch_size, seed= seed, shuffle=shuffle)
return generator
def run_program():
x_train, x_valid, x_test, y_train, y_valid, y_test = data_splitor()
train_generator = data_generator(x_train, y_train, BATCH_SIZE_TRAINING)
validation_generator = data_generator(x_valid, y_valid, BATCH_SIZE_VALIDATION)
cb_early_stopper = EarlyStopping(monitor = 'val_loss', patience = EARLY_STOP_PATIENCE)
cb_checkpointer = ModelCheckpoint(filepath = '/content/model/best.hdf5', monitor = 'val_loss', save_best_only = True, mode = 'auto')
model = create_compiled_model()
fit_history = model.fit_generator(
train_generator,
steps_per_epoch=STEPS_PER_EPOCH_TRAINING,
epochs = NUM_EPOCHS,
validation_data=validation_generator,
validation_steps=STEPS_PER_EPOCH_VALIDATION,
callbacks=[cb_checkpointer, cb_early_stopper]
)
plt.figure(1, figsize = (15,8))
plt.subplot(221)
plt.plot(fit_history.history['acc'])
plt.plot(fit_history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'valid'])
plt.subplot(222)
plt.plot(fit_history.history['loss'])
plt.plot(fit_history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'valid'])
plt.show()
# Testing
test_generator = data_generator(x_test, y_test, BATCH_SIZE_TESTING, 123, False)
test_generator.reset()
model.load_weights("/content/model/best.hdf5")
pred = model.predict_generator(test_generator, steps = len(test_generator), verbose = 1)
predicted_class_indices = np.argmax(pred, axis = 1)
# Running the program
try:
with tensorflow.device('/device:GPU:0'):
run_program()
except RuntimeError as e:
print(e)
And upon executing this, I get the ValueError seen below:
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:571 train_function *
outputs = self.distribute_strategy.run(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:951 run **
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:533 train_step **
y, y_pred, sample_weight, regularization_losses=self.losses)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/compile_utils.py:204 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:143 __call__
losses = self.call(y_true, y_pred)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:246 call
return self.fn(y_true, y_pred, **self._fn_kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:1527 categorical_crossentropy
return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py:4561 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_shape.py:1117 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 1) and (None, 2) are incompatible
I have already looked at this, this and this, but could not resolve the error.
I really appreciate the help in fixing this.
Thanks Praveen
Here is the complete traceback… link
Answers:
Your traceback link isn’t working.
However, try replacing categorical cross-entropy with binary cross entropy since you have only two classes.
Seems your y_train data have shape (None,1) while your network is expecting (None,2). There are two options to solve this:
1) Change your model output to 1 unit and change loss to binary crossentropy
or
2) Change your y_train data to categorical. See this
If you can post here your model.summary() and your dataset shapes it will help us to help you.
I had the same issue, but instead, i was using labels decoded to int64 format from TFRecord files, changing my loss function from ‘CategoricalCrossentropy’ to ‘SparseCategoricalCrossentropy’ resolved the issue.
I encountered a similar issue and these above-mentioned solutions did not work. The main reason why we get this error is when we fail to establish the 1 to 1 mapping of data between the X_train and Y_train. This means the shape of Y_train should be in shape like (No_of_Sequnces, no_of_classes).
Example –
Let’s say my dataset has 2000 rows and 5 features. where 1 sequence = 100 rows of data.
So before reshaping x_train will look like this
X_train.shape = (2000,5)
before feeding into LSTM we should reshape it to 3D (usually), hence
X_train.reshape(20,100,5)
On the other hand, our Y_Train will be initially. (if it is in 2D, change it to 1D by flattening)
Y_train.shape = (2000, )
So, before feeding into LSTM we should change the Y_train shape like
Y_train.shape =(20, 5)
the 20 will make the 1:1 mapping with the train set, while the 5 will make the mapping with the final dense layer of the classification model, where we are supposed to use categorical-cross entropy.
Also please note that the Y_train should be in 2D shape. So how do we re-shape it like that?.
-
Check how the _train data are
-
If in string use one-hot representation
-
If integers for each class, convert to categorical (refer)
-
After changing to categorical refer the Y_train again.
-
If the class number and number of column is equal, use the following code to reduce the rows to 20 (lik of X_train)
for eachRowTemp in range(df_Y_Labels.__len__()):
if(eachRowTemp%20 == 1):
Y_Label_Array.append(df_Y_Labels.loc[eachRowTemp])
Y_Label = np.asarray(Y_Label_Array)
This should work. also you should change the Y_test in similar way.
Thanks @Augusto maillo the link was useful to fix error.
Mulitclass classification
For multi class classification the labels have to be converted to a matrix use tensorflow.keras.utils.to_categorical
method for converting labels to a matrix.
Syntax:
tf.keras.utils.to_categorical(
y, num_classes=None, dtype='float32'
)
Demo:
y = [0, 1, 2, 0, 2, 2, 1, 0, 1, 1, 0, 0, 1, 0, 2, 2, 0] # we have 3 classes 0, 1 & 2
y = tf.keras.utils.to_categorical(y, num_classes=3, dtype='int')
y
array([[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
[1, 0, 0],
[0, 0, 1],
[0, 0, 1],
[0, 1, 0],
[1, 0, 0],
[0, 1, 0],
[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 1, 0],
[1, 0, 0],
[0, 0, 1],
[0, 0, 1],
[1, 0, 0]])
I’m trying to classify images of PCBs into two categories (defected
and undefected
) using categorical cross-entropy
as the loss function. The code for the same is as below:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.applications.resnet50 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
def create_compiled_model():
model = Sequential()
model.add(ResNet50(include_top=False, weights=RESNET50_WEIGHTS, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), pooling=RESNET50_POOLING_AVERAGE))
model.add(Dense(NUM_CLASSES, activation=DENSE_LAYER_ACTIVATION))
model.layers[0].trainable = False
sgd = SGD(lr = 0.01, decay = 1e-6, momentum = 0.9, nesterov = True)
model.compile(optimizer = sgd, loss = OBJECTIVE_FUNCTION, metrics = LOSS_METRICS)
return model
def data_splitor():
x = np.load("/content/data/xtrain.npy")
y = np.load("/content/data/ytrain.npy")
# Getting the Test and Train splits
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size= TRAIN_TEST_SPLIT, shuffle= True)
# Getting the Train and Validation splits
x__train, x__valid, y__train, y__valid = train_test_split(x_train, y_train, test_size= TRAIN_TEST_SPLIT, shuffle= True)
return x__train, x__valid, x_test, y__train, y__valid, y_test
def data_generator(x, y, batch_size, seed=None, shuffle=True):
data_generator = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=180, brightness_range=[0.3, 1.0], preprocessing_function=preprocess_input)
generator = data_generator.flow(x_train, y_train, batch_size= batch_size, seed= seed, shuffle=shuffle)
return generator
def run_program():
x_train, x_valid, x_test, y_train, y_valid, y_test = data_splitor()
train_generator = data_generator(x_train, y_train, BATCH_SIZE_TRAINING)
validation_generator = data_generator(x_valid, y_valid, BATCH_SIZE_VALIDATION)
cb_early_stopper = EarlyStopping(monitor = 'val_loss', patience = EARLY_STOP_PATIENCE)
cb_checkpointer = ModelCheckpoint(filepath = '/content/model/best.hdf5', monitor = 'val_loss', save_best_only = True, mode = 'auto')
model = create_compiled_model()
fit_history = model.fit_generator(
train_generator,
steps_per_epoch=STEPS_PER_EPOCH_TRAINING,
epochs = NUM_EPOCHS,
validation_data=validation_generator,
validation_steps=STEPS_PER_EPOCH_VALIDATION,
callbacks=[cb_checkpointer, cb_early_stopper]
)
plt.figure(1, figsize = (15,8))
plt.subplot(221)
plt.plot(fit_history.history['acc'])
plt.plot(fit_history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'valid'])
plt.subplot(222)
plt.plot(fit_history.history['loss'])
plt.plot(fit_history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'valid'])
plt.show()
# Testing
test_generator = data_generator(x_test, y_test, BATCH_SIZE_TESTING, 123, False)
test_generator.reset()
model.load_weights("/content/model/best.hdf5")
pred = model.predict_generator(test_generator, steps = len(test_generator), verbose = 1)
predicted_class_indices = np.argmax(pred, axis = 1)
# Running the program
try:
with tensorflow.device('/device:GPU:0'):
run_program()
except RuntimeError as e:
print(e)
And upon executing this, I get the ValueError seen below:
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:571 train_function *
outputs = self.distribute_strategy.run(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:951 run **
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:533 train_step **
y, y_pred, sample_weight, regularization_losses=self.losses)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/compile_utils.py:204 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:143 __call__
losses = self.call(y_true, y_pred)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:246 call
return self.fn(y_true, y_pred, **self._fn_kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:1527 categorical_crossentropy
return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py:4561 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_shape.py:1117 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 1) and (None, 2) are incompatible
I have already looked at this, this and this, but could not resolve the error.
I really appreciate the help in fixing this.
Thanks Praveen
Here is the complete traceback… link
Your traceback link isn’t working.
However, try replacing categorical cross-entropy with binary cross entropy since you have only two classes.
Seems your y_train data have shape (None,1) while your network is expecting (None,2). There are two options to solve this:
1) Change your model output to 1 unit and change loss to binary crossentropy
or
2) Change your y_train data to categorical. See this
If you can post here your model.summary() and your dataset shapes it will help us to help you.
I had the same issue, but instead, i was using labels decoded to int64 format from TFRecord files, changing my loss function from ‘CategoricalCrossentropy’ to ‘SparseCategoricalCrossentropy’ resolved the issue.
I encountered a similar issue and these above-mentioned solutions did not work. The main reason why we get this error is when we fail to establish the 1 to 1 mapping of data between the X_train and Y_train. This means the shape of Y_train should be in shape like (No_of_Sequnces, no_of_classes).
Example –
Let’s say my dataset has 2000 rows and 5 features. where 1 sequence = 100 rows of data.
So before reshaping x_train will look like this
X_train.shape = (2000,5)
before feeding into LSTM we should reshape it to 3D (usually), hence
X_train.reshape(20,100,5)
On the other hand, our Y_Train will be initially. (if it is in 2D, change it to 1D by flattening)
Y_train.shape = (2000, )
So, before feeding into LSTM we should change the Y_train shape like
Y_train.shape =(20, 5)
the 20 will make the 1:1 mapping with the train set, while the 5 will make the mapping with the final dense layer of the classification model, where we are supposed to use categorical-cross entropy.
Also please note that the Y_train should be in 2D shape. So how do we re-shape it like that?.
-
Check how the _train data are
-
If in string use one-hot representation
-
If integers for each class, convert to categorical (refer)
-
After changing to categorical refer the Y_train again.
-
If the class number and number of column is equal, use the following code to reduce the rows to 20 (lik of X_train)
for eachRowTemp in range(df_Y_Labels.__len__()): if(eachRowTemp%20 == 1): Y_Label_Array.append(df_Y_Labels.loc[eachRowTemp]) Y_Label = np.asarray(Y_Label_Array)
This should work. also you should change the Y_test in similar way.
Thanks @Augusto maillo the link was useful to fix error.
Mulitclass classification
For multi class classification the labels have to be converted to a matrix use tensorflow.keras.utils.to_categorical
method for converting labels to a matrix.
Syntax:
tf.keras.utils.to_categorical(
y, num_classes=None, dtype='float32'
)
Demo:
y = [0, 1, 2, 0, 2, 2, 1, 0, 1, 1, 0, 0, 1, 0, 2, 2, 0] # we have 3 classes 0, 1 & 2
y = tf.keras.utils.to_categorical(y, num_classes=3, dtype='int')
y
array([[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
[1, 0, 0],
[0, 0, 1],
[0, 0, 1],
[0, 1, 0],
[1, 0, 0],
[0, 1, 0],
[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 1, 0],
[1, 0, 0],
[0, 0, 1],
[0, 0, 1],
[1, 0, 0]])