y should be a 1d array, got an array of shape {} instead.".format(shape)
Question:
I am currently working on a project to detect and recognise handwritten signatures. I am able to detect the signatures and store it in a "images" folder and I am storing the labels in a "labels.txt" file.
Now, I am trying to build a model to recognise the handwriting and give me the prediction. However, I am encountering the following error
"y should be a 1d array, got an array of shape {}
instead.".format(shape)"
I referred this question but am not sure as they were using train_test_split module.
Here is my code:
Checking the labels and images:
import os
# Get the list of images
images = os.listdir("images/")
# Remove any non-image files from the list
images = [img for img in images if img.endswith(".jpg") or img.endswith(".jpeg") or img.endswith(".png")]
# Get the list of labels from the label text file
with open("labels.txt", "r") as f:
labels = f.readlines()
# Strip the newline characters from each label
labels = [label.strip() for label in labels]
# Print the number of images and labels
print("Number of images:", len(images))
print("Number of labels:", len(labels))
Loading the dataset into "images" and "labels" array
import os
import numpy as np
import cv2
def load_images_labels(images_folder, labels_file):
images = []
labels = []
# Read images from the folder
for filename in os.listdir(images_folder):
if filename.endswith('.jpeg') or filename.endswith('.jpg'):
image = cv2.imread(os.path.join(images_folder, filename))
images.append(image)
# Read labels from the file
with open(labels_file) as f:
lines = f.readlines()
for line in lines:
label = line.strip()
labels.append(label)
# Convert the lists to numpy arrays
images = np.array(images)
labels = np.array(labels)
return images, labels
# Load the images and labels from the folder and file
images, labels = load_images_labels('./images', './labels.txt')
images = images[:2]
labels = labels[:5]
print(images)
print(labels)
Building, Compiling and Training the model
import tensorflow.keras as keras
import pandas as pd
# image size should be 28*28 so reshape using .reshape(-1.28,28,1)
# y, the list of names
#array[:75]
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
Dense,
Conv2D,
MaxPool2D,
Flatten,
Dropout,
BatchNormalization,
)
model = Sequential()
model.add(Conv2D(75, (3, 3), strides=1, padding="same", activation="relu",
input_shape=(28, 28, 1)))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding="same"))
model.add(Conv2D(50, (3, 3), strides=1, padding="same", activation="relu"))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding="same"))
model.add(Conv2D(25, (3, 3), strides=1, padding="same", activation="relu"))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding="same"))
model.add(Flatten())
model.add(Dense(units=512, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(units=num_classes, activation="softmax"))
# Preprocessing the images to be ready for the model
images = images.astype("float32") / 255.0
images = np.array([image.reshape(-1, 28, 28, 1) for image in images])
# One hot encode the labels
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)
labels = labels.reshape(-1, 1)
onehot_encoder = OneHotEncoder()
labels = onehot_encoder.fit_transform(labels).toarray()
num_classes = labels.shape[1]
# Compile the model
model.compile(
optimizer=keras.optimizers.Adam(lr=0.001),
loss="categorical_crossentropy",
metrics=["accuracy"],
)
# Train the model
history = model.fit(x=images, y=labels, batch_size=32, epochs=10, validation_split=0.01)
# Evaluate the model
test_loss, test_accuracy = model.evaluate(images, labels, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f}")
Error Message:
--------- ValueError Traceback (most recent call
> last) Cell In[10], line 9
> 6 from sklearn.preprocessing import LabelEncoder, OneHotEncoder
> 8 label_encoder = LabelEncoder()
> ----> 9 labels = label_encoder.fit_transform(labels)
> 10 labels = labels.reshape(-1, 1)
> 12 onehot_encoder = OneHotEncoder()
>
> File
> ~/miniconda3/envs/tensorflow/lib/python3.10/site-packages/sklearn/utils/_set_output.py:142,
> in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
> 140 @wraps(f)
> 141 def wrapped(self, X, *args, **kwargs):
> --> 142 data_to_wrap = f(self, X, *args, **kwargs)
> 143 if isinstance(data_to_wrap, tuple):
> 144 # only wrap the first output for cross decomposition
> 145 return (
> 146 _wrap_data_with_container(method, data_to_wrap[0], X, self),
> 147 *data_to_wrap[1:],
> 148 )
>
> File
> ~/miniconda3/envs/tensorflow/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:116,
> in LabelEncoder.fit_transform(self, y)
> 103 def fit_transform(self, y):
> 104 """Fit label encoder and return encoded labels.
> 105
> 106 Parameters (...)
> 114 Encoded labels.
> 115 """
> --> 116 y = column_or_1d(y, warn=True)
> 117 self.classes_, y = _unique(y, return_inverse=True)
> 118 return y
>
> File
> ~/miniconda3/envs/tensorflow/lib/python3.10/site-packages/sklearn/utils/validation.py:1202,
> in column_or_1d(y, dtype, warn) 1193 warnings.warn( 1194
> "A column-vector y was passed when a 1d array was" 1195
> " expected. Please change the shape of y to " (...) 1198
> stacklevel=2, 1199 ) 1200 return
> _asarray_with_order(xp.reshape(y, -1), order="C", xp=xp)
> -> 1202 raise ValueError( 1203 "y should be a 1d array, got an array of shape {} instead.".format(shape) 1204 )
>
> ValueError: y should be a 1d array, got an array of shape (5, 5)
> instead.
I would really be grateful for any help that I can receive.
Answers:
So, the issue was fairly simple and it was completely my fault. I was executing this code on the Colab platform. And I forgot to execute the previous block of cell, before running "Building, Compiling and Training the model" – this cell.
I am currently working on a project to detect and recognise handwritten signatures. I am able to detect the signatures and store it in a "images" folder and I am storing the labels in a "labels.txt" file.
Now, I am trying to build a model to recognise the handwriting and give me the prediction. However, I am encountering the following error
"y should be a 1d array, got an array of shape {}
instead.".format(shape)"
I referred this question but am not sure as they were using train_test_split module.
Here is my code:
Checking the labels and images:
import os
# Get the list of images
images = os.listdir("images/")
# Remove any non-image files from the list
images = [img for img in images if img.endswith(".jpg") or img.endswith(".jpeg") or img.endswith(".png")]
# Get the list of labels from the label text file
with open("labels.txt", "r") as f:
labels = f.readlines()
# Strip the newline characters from each label
labels = [label.strip() for label in labels]
# Print the number of images and labels
print("Number of images:", len(images))
print("Number of labels:", len(labels))
Loading the dataset into "images" and "labels" array
import os
import numpy as np
import cv2
def load_images_labels(images_folder, labels_file):
images = []
labels = []
# Read images from the folder
for filename in os.listdir(images_folder):
if filename.endswith('.jpeg') or filename.endswith('.jpg'):
image = cv2.imread(os.path.join(images_folder, filename))
images.append(image)
# Read labels from the file
with open(labels_file) as f:
lines = f.readlines()
for line in lines:
label = line.strip()
labels.append(label)
# Convert the lists to numpy arrays
images = np.array(images)
labels = np.array(labels)
return images, labels
# Load the images and labels from the folder and file
images, labels = load_images_labels('./images', './labels.txt')
images = images[:2]
labels = labels[:5]
print(images)
print(labels)
Building, Compiling and Training the model
import tensorflow.keras as keras
import pandas as pd
# image size should be 28*28 so reshape using .reshape(-1.28,28,1)
# y, the list of names
#array[:75]
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
Dense,
Conv2D,
MaxPool2D,
Flatten,
Dropout,
BatchNormalization,
)
model = Sequential()
model.add(Conv2D(75, (3, 3), strides=1, padding="same", activation="relu",
input_shape=(28, 28, 1)))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding="same"))
model.add(Conv2D(50, (3, 3), strides=1, padding="same", activation="relu"))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding="same"))
model.add(Conv2D(25, (3, 3), strides=1, padding="same", activation="relu"))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding="same"))
model.add(Flatten())
model.add(Dense(units=512, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(units=num_classes, activation="softmax"))
# Preprocessing the images to be ready for the model
images = images.astype("float32") / 255.0
images = np.array([image.reshape(-1, 28, 28, 1) for image in images])
# One hot encode the labels
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)
labels = labels.reshape(-1, 1)
onehot_encoder = OneHotEncoder()
labels = onehot_encoder.fit_transform(labels).toarray()
num_classes = labels.shape[1]
# Compile the model
model.compile(
optimizer=keras.optimizers.Adam(lr=0.001),
loss="categorical_crossentropy",
metrics=["accuracy"],
)
# Train the model
history = model.fit(x=images, y=labels, batch_size=32, epochs=10, validation_split=0.01)
# Evaluate the model
test_loss, test_accuracy = model.evaluate(images, labels, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f}")
Error Message:
--------- ValueError Traceback (most recent call
> last) Cell In[10], line 9
> 6 from sklearn.preprocessing import LabelEncoder, OneHotEncoder
> 8 label_encoder = LabelEncoder()
> ----> 9 labels = label_encoder.fit_transform(labels)
> 10 labels = labels.reshape(-1, 1)
> 12 onehot_encoder = OneHotEncoder()
>
> File
> ~/miniconda3/envs/tensorflow/lib/python3.10/site-packages/sklearn/utils/_set_output.py:142,
> in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
> 140 @wraps(f)
> 141 def wrapped(self, X, *args, **kwargs):
> --> 142 data_to_wrap = f(self, X, *args, **kwargs)
> 143 if isinstance(data_to_wrap, tuple):
> 144 # only wrap the first output for cross decomposition
> 145 return (
> 146 _wrap_data_with_container(method, data_to_wrap[0], X, self),
> 147 *data_to_wrap[1:],
> 148 )
>
> File
> ~/miniconda3/envs/tensorflow/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:116,
> in LabelEncoder.fit_transform(self, y)
> 103 def fit_transform(self, y):
> 104 """Fit label encoder and return encoded labels.
> 105
> 106 Parameters (...)
> 114 Encoded labels.
> 115 """
> --> 116 y = column_or_1d(y, warn=True)
> 117 self.classes_, y = _unique(y, return_inverse=True)
> 118 return y
>
> File
> ~/miniconda3/envs/tensorflow/lib/python3.10/site-packages/sklearn/utils/validation.py:1202,
> in column_or_1d(y, dtype, warn) 1193 warnings.warn( 1194
> "A column-vector y was passed when a 1d array was" 1195
> " expected. Please change the shape of y to " (...) 1198
> stacklevel=2, 1199 ) 1200 return
> _asarray_with_order(xp.reshape(y, -1), order="C", xp=xp)
> -> 1202 raise ValueError( 1203 "y should be a 1d array, got an array of shape {} instead.".format(shape) 1204 )
>
> ValueError: y should be a 1d array, got an array of shape (5, 5)
> instead.
I would really be grateful for any help that I can receive.
So, the issue was fairly simple and it was completely my fault. I was executing this code on the Colab platform. And I forgot to execute the previous block of cell, before running "Building, Compiling and Training the model" – this cell.