y should be a 1d array, got an array of shape {} instead.".format(shape)

Question:

I am currently working on a project to detect and recognise handwritten signatures. I am able to detect the signatures and store it in a "images" folder and I am storing the labels in a "labels.txt" file.

Now, I am trying to build a model to recognise the handwriting and give me the prediction. However, I am encountering the following error

"y should be a 1d array, got an array of shape {}
instead.".format(shape)"

I referred this question but am not sure as they were using train_test_split module.

Here is my code:

Checking the labels and images:

import os

# Get the list of images
images = os.listdir("images/")

# Remove any non-image files from the list
images = [img for img in images if img.endswith(".jpg") or img.endswith(".jpeg") or img.endswith(".png")]

# Get the list of labels from the label text file
with open("labels.txt", "r") as f:
    labels = f.readlines()

# Strip the newline characters from each label
labels = [label.strip() for label in labels]

# Print the number of images and labels
print("Number of images:", len(images))
print("Number of labels:", len(labels))

Loading the dataset into "images" and "labels" array

import os
import numpy as np
import cv2

def load_images_labels(images_folder, labels_file):
    images = []
    labels = []
    
    # Read images from the folder
    for filename in os.listdir(images_folder):
        if filename.endswith('.jpeg') or filename.endswith('.jpg'):
            image = cv2.imread(os.path.join(images_folder, filename))
            images.append(image)
    
    # Read labels from the file
    with open(labels_file) as f:
        lines = f.readlines()
        for line in lines:
            label = line.strip()
            labels.append(label)
    
    # Convert the lists to numpy arrays
    images = np.array(images)
    labels = np.array(labels)
    
    return images, labels

# Load the images and labels from the folder and file
images, labels = load_images_labels('./images', './labels.txt')

images = images[:2]
labels = labels[:5]

print(images)
print(labels)

Building, Compiling and Training the model

import tensorflow.keras as keras
import pandas as pd

# image size should be 28*28 so reshape using .reshape(-1.28,28,1)
# y, the list of names

#array[:75]


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Dense,
    Conv2D,
    MaxPool2D,
    Flatten,
    Dropout,
    BatchNormalization,
)

model = Sequential()
model.add(Conv2D(75, (3, 3), strides=1, padding="same", activation="relu", 
                 input_shape=(28, 28, 1)))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding="same"))
model.add(Conv2D(50, (3, 3), strides=1, padding="same", activation="relu"))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding="same"))
model.add(Conv2D(25, (3, 3), strides=1, padding="same", activation="relu"))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding="same"))
model.add(Flatten())
model.add(Dense(units=512, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(units=num_classes, activation="softmax"))
# Preprocessing the images to be ready for the model
images = images.astype("float32") / 255.0
images = np.array([image.reshape(-1, 28, 28, 1) for image in images])

# One hot encode the labels
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)
labels = labels.reshape(-1, 1)

onehot_encoder = OneHotEncoder()
labels = onehot_encoder.fit_transform(labels).toarray()

num_classes = labels.shape[1]

# Compile the model
model.compile(
    optimizer=keras.optimizers.Adam(lr=0.001),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)

# Train the model
history = model.fit(x=images, y=labels, batch_size=32, epochs=10, validation_split=0.01)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(images, labels, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f}")

Error Message:


--------- ValueError                                Traceback (most recent call
> last) Cell In[10], line 9
>       6 from sklearn.preprocessing import LabelEncoder, OneHotEncoder
>       8 label_encoder = LabelEncoder()
> ----> 9 labels = label_encoder.fit_transform(labels)
>      10 labels = labels.reshape(-1, 1)
>      12 onehot_encoder = OneHotEncoder()
> 
> File
> ~/miniconda3/envs/tensorflow/lib/python3.10/site-packages/sklearn/utils/_set_output.py:142,
> in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
>     140 @wraps(f)
>     141 def wrapped(self, X, *args, **kwargs):
> --> 142     data_to_wrap = f(self, X, *args, **kwargs)
>     143     if isinstance(data_to_wrap, tuple):
>     144         # only wrap the first output for cross decomposition
>     145         return (
>     146             _wrap_data_with_container(method, data_to_wrap[0], X, self),
>     147             *data_to_wrap[1:],
>     148         )
> 
> File
> ~/miniconda3/envs/tensorflow/lib/python3.10/site-packages/sklearn/preprocessing/_label.py:116,
> in LabelEncoder.fit_transform(self, y)
>     103 def fit_transform(self, y):
>     104     """Fit label encoder and return encoded labels.
>     105 
>     106     Parameters    (...)
>     114         Encoded labels.
>     115     """
> --> 116     y = column_or_1d(y, warn=True)
>     117     self.classes_, y = _unique(y, return_inverse=True)
>     118     return y
> 
> File
> ~/miniconda3/envs/tensorflow/lib/python3.10/site-packages/sklearn/utils/validation.py:1202,
> in column_or_1d(y, dtype, warn)    1193         warnings.warn(    1194
> "A column-vector y was passed when a 1d array was"    1195            
> " expected. Please change the shape of y to "    (...)    1198        
> stacklevel=2,    1199         )    1200     return
> _asarray_with_order(xp.reshape(y, -1), order="C", xp=xp)
> -> 1202 raise ValueError(    1203     "y should be a 1d array, got an array of shape {} instead.".format(shape)    1204 )
> 
> ValueError: y should be a 1d array, got an array of shape (5, 5)
> instead.

I would really be grateful for any help that I can receive.

Asked By: sadLife101

||

Answers:

So, the issue was fairly simple and it was completely my fault. I was executing this code on the Colab platform. And I forgot to execute the previous block of cell, before running "Building, Compiling and Training the model" – this cell.

Answered By: sadLife101