Convert folder of images with labels in CSV file into a tensorflow Dataset

Question:

This clothing dataset (from Kaggle) when downloaded looks something like the below:

  • Labels inside a .csv file
  • Images in a subdirectory
+-dataset/
  |
  +-images.csv
  |
  +-images/
    |
    +-d7ed1d64-2c65-427f-9ae4-eb4aaa3e2389.jpg
    |
    +-5c1b7a77-1fa3-4af8-9722-cd38e45d89da.jpg
    |
    +-... <additional files>

I would like to load this into a tensorflow Dataset (version: tensorflow~=2.4).

Is there some way I can convert this directory of images with labels in a separate .csv into a tf.Dataset?

Tensorflow load image dataset with image labels suggests ImageDataGenerator.flow_from_dataframe, but this is now deprecated :/

Answers:

It is possible but making it correct the first time will be managed it easy when application process or adjusting to the current process is because correcting logic later is sometimes a massive task.

Adjust the label and target directory variable fields targeted to your observation.

[ Sample ]:

import tensorflow as tf
import tensorflow_io as tfio

import pandas as pd

import matplotlib.pyplot as plt

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
# list_label_actual = [ 'Pikaploy', 'Pikaploy', 'Pikaploy', 'Pikaploy', 'Pikaploy', 'Candidt Kibt', 'Candidt Kibt', 'Candidt Kibt', 'Candidt Kibt', 'Candidt Kibt' ]
list_label_actual = [ 'Candidt Kibt', 'Pikaploy' ]

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Dataset
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
variables = pd.read_excel('F:\temp\Python\excel\Book 7.xlsx', index_col=None, header=[0])

list_label = [ ]
list_Image = [ ]
list_file_actual = [ ]

for Index, Image, Label in variables.values:
    print( Label )
    list_label.append( Label )
    
    image = tf.io.read_file( Image )
    image = tfio.experimental.image.decode_tiff(image, index=0)
    list_file_actual.append(image)
    image = tf.image.resize(image, [32,32], method='nearest')
    list_Image.append(image)


list_label = tf.cast( list_label, dtype=tf.int32 )
list_label = tf.constant( list_label, shape=( 33, 1, 1 ) )
list_Image = tf.cast( list_Image, dtype=tf.int32 )
list_Image = tf.constant( list_Image, shape=( 33, 1, 32, 32, 4 ) )

dataset = tf.data.Dataset.from_tensor_slices(( list_Image, list_label ))
list_Image = tf.constant( list_Image, shape=( 33, 32, 32, 4) ).numpy()

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
    tf.keras.layers.InputLayer(input_shape=( 32, 32, 4 )),
    tf.keras.layers.Normalization(mean=3., variance=2.),
    tf.keras.layers.Normalization(mean=4., variance=6.),
    # tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    # tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dense(256, activation='relu'),
    # tf.keras.layers.Reshape((256, 225)),
    tf.keras.layers.Reshape((256, 32 * 32)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(196, return_sequences=True, return_state=False)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(196)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(192, activation='relu'),
    tf.keras.layers.Dense(2),
])

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Callback
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class custom_callback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if( logs['accuracy'] >= 0.97 ):
            self.model.stop_training = True
    
custom_callback = custom_callback()

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Nadam(
    learning_rate=0.000001, beta_1=0.9, beta_2=0.999, epsilon=1e-07,
    name='Nadam'
)

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""                               
lossfn = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=False,
    reduction=tf.keras.losses.Reduction.AUTO,
    name='sparse_categorical_crossentropy'
)

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model.compile(optimizer=optimizer, loss=lossfn, metrics=['accuracy'] )

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit( dataset, batch_size=100, epochs=50, callbacks=[custom_callback] )

plt.figure(figsize=(6,6))
plt.title("Actors recognitions")
for i in range(len(list_Image)):
    img = tf.keras.preprocessing.image.array_to_img(
        list_Image[i],
        data_format=None,
        scale=True
    )
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = tf.expand_dims(img_array, 0)
    predictions = model.predict(img_array)
    score = tf.nn.softmax(predictions[0])
    plt.subplot(6, 6, i + 1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(list_file_actual[i])
    plt.xlabel(str(round(score[tf.math.argmax(score).numpy()].numpy(), 2)) + ":" +  str(list_label_actual[tf.math.argmax(score)]))
    
plt.show()

input('...')

[ Output ]:
Sample

Answered By: Jirayu Kaewprateep

Based on the answers:

I have DIY created the following. I am sure there is a simpler way, but this at least is something functional. I was hoping for more built-in support though:

import os.path
from typing import Dict, Tuple

import pandas as pd
import tensorflow as tf


def get_full_dataset(
    batch_size: int = 32, image_size: Tuple[int, int] = (256, 256)
) -> tf.data.Dataset:
    data = pd.read_csv(os.path.join(DATA_ABS_PATH, "images.csv"))
    images_path = os.path.join(DATA_ABS_PATH, "images")
    data["image"] = data["image"].map(lambda x: os.path.join(images_path, f"{x}.jpg"))
    filenames: tf.Tensor = tf.constant(data["image"], dtype=tf.string)
    data["label"] = data["label"].str.lower()
    class_name_to_label: Dict[str, int] = {
        label: i for i, label in enumerate(set(data["label"]))
    }
    labels: tf.Tensor = tf.constant(
        data["label"].map(class_name_to_label.__getitem__), dtype=tf.uint8
    )
    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))

    def _parse_function(filename, label):
        jpg_image: tf.Tensor = tf.io.decode_jpeg(tf.io.read_file(filename))
        return tf.image.resize(jpg_image, size=image_size), label

    dataset = dataset.map(_parse_function)
    return dataset.batch(batch_size)