AttributeError: 'tuple' object has no attribute 'rank' when calling fit on a Keras model with custom generator

Question:

I want to build a Neural Network with two inputs: for image data and for numeric data. So I wrote custom data generator for that. The train and validation dataframes contain 11 columns:

  1. image_name — path to the image;
  2. 9 numeric features;
  3. target — class for the item (last column).

The code for custom generator (based on this answer):

target_size = (224, 224)
batch_size = 1

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    train,
    x_col='image_name',
    y_col=train.columns[1:],
    target_size=target_size,
    batch_size=batch_size,
    shuffle=True,
    class_mode='raw')

validation_generator = val_datagen.flow_from_dataframe(
    validation,
    x_col='image_name',
    y_col=validation.columns[1:],
    target_size=target_size,
    shuffle=False,
    batch_size=batch_size,
    class_mode='raw')

def train_generator_func():
    count = 0
    while True:
        if count == len(train.index):
            train_generator.reset()
            break
        count += 1
        data = train_generator.next()
        
        imgs = []
        cols = []
        targets = []
        
        for k in range(batch_size):
            imgs.append(data[0][k])
            cols.append(data[1][k][:-1])
            targets.append(data[1][k][-1])
            
        yield [imgs, cols], targets
        
def validation_generator_func():
    count = 0
    while True:
        if count == len(validation.index):
            validation_generator.reset()
            break
        count += 1
        data = validation_generator.next()
                
        imgs = []
        cols = []
        targets = []
        
        for k in range(batch_size):
            imgs.append(data[0][k])
            cols.append(data[1][k][:-1])
            targets.append(data[1][k][-1])
            
        yield [imgs, cols], targets

Model building:

def mlp_model(dim):
    model = Sequential()
    model.add(Dense(8, input_dim=dim, activation="relu"))
    model.add(Dense(4, activation="relu"))
    return model


def vgg16_model():
    model = VGG16(weights='imagenet', include_top=False, input_shape=target_size+(3,))
    x=Flatten()(model.output)
    output=Dense(1,activation='sigmoid')(x) # because we have to predict the AUC
    model=Model(model.input,output)
    return model


def concatenated_model(cnn, mlp):
    combinedInput = concatenate([cnn.output, mlp.output])
    x = Dense(4, activation="relu")(combinedInput)
    x = Dense(1, activation="sigmoid")(x)    
    model = Model(inputs=[cnn.input, mlp.input], outputs=x)
    return model


def focal_loss(alpha=0.25,gamma=2.0):
    def focal_crossentropy(y_true, y_pred):
        bce = K.binary_crossentropy(y_true, y_pred)
        
        y_pred = K.clip(y_pred, K.epsilon(), 1.- K.epsilon())
        p_t = (y_true*y_pred) + ((1-y_true)*(1-y_pred))
        
        alpha_factor = 1
        modulating_factor = 1

        alpha_factor = y_true*alpha + ((1-alpha)*(1-y_true))
        modulating_factor = K.pow((1-p_t), gamma)

        # compute the final loss and return
        return K.mean(alpha_factor*modulating_factor*bce, axis=-1)
    return focal_crossentropy

cnn = vgg16_model()
mlp = mlp_model(9)

model = concatenated_model(cnn, mlp)

opt = Adam(lr=1e-5)
model.compile(loss=focal_loss(), metrics=[tf.keras.metrics.AUC()],optimizer=opt)

nb_epochs = 2
nb_train_steps = train.shape[0]//batch_size
nb_val_steps = validation.shape[0]//batch_size

model.fit(
    train_generator_func(),
    steps_per_epoch=nb_train_steps,
    epochs=nb_epochs,
    validation_data=validation_generator_func(),
    validation_steps=nb_val_steps)

And fitting doesn’t work with error message:

AttributeError                            Traceback (most recent call last)
<ipython-input-53-253849fd34d6> in <module>
      9     epochs=nb_epochs,
     10     validation_data=validation_generator_func(),
---> 11     validation_steps=nb_val_steps)

d:pyenvkeras-gpulibsite-packagestensorflowpythonkerasenginetraining.py in _method_wrapper(self, *args, **kwargs)
    106   def _method_wrapper(self, *args, **kwargs):
    107     if not self._in_multi_worker_mode():  # pylint: disable=protected-access
--> 108       return method(self, *args, **kwargs)
    109 
    110     # Running inside `run_distribute_coordinator` already.

d:pyenvkeras-gpulibsite-packagestensorflowpythonkerasenginetraining.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
   1061           use_multiprocessing=use_multiprocessing,
   1062           model=self,
-> 1063           steps_per_execution=self._steps_per_execution)
   1064 
   1065       # Container that configures and calls `tf.keras.Callback`s.

d:pyenvkeras-gpulibsite-packagestensorflowpythonkerasenginedata_adapter.py in __init__(self, x, y, sample_weight, batch_size, steps_per_epoch, initial_epoch, epochs, shuffle, class_weight, max_queue_size, workers, use_multiprocessing, model, steps_per_execution)
   1108         use_multiprocessing=use_multiprocessing,
   1109         distribution_strategy=ds_context.get_strategy(),
-> 1110         model=model)
   1111 
   1112     strategy = ds_context.get_strategy()

d:pyenvkeras-gpulibsite-packagestensorflowpythonkerasenginedata_adapter.py in __init__(self, x, y, sample_weights, workers, use_multiprocessing, max_queue_size, model, **kwargs)
    796       return tensor_shape.TensorShape([None for _ in shape.as_list()])
    797 
--> 798     output_shapes = nest.map_structure(_get_dynamic_shape, peek)
    799     output_types = nest.map_structure(lambda t: t.dtype, peek)
    800 

d:pyenvkeras-gpulibsite-packagestensorflowpythonutilnest.py in map_structure(func, *structure, **kwargs)
    633 
    634   return pack_sequence_as(
--> 635       structure[0], [func(*x) for x in entries],
    636       expand_composites=expand_composites)
    637 

d:pyenvkeras-gpulibsite-packagestensorflowpythonutilnest.py in <listcomp>(.0)
    633 
    634   return pack_sequence_as(
--> 635       structure[0], [func(*x) for x in entries],
    636       expand_composites=expand_composites)
    637 

d:pyenvkeras-gpulibsite-packagestensorflowpythonkerasenginedata_adapter.py in _get_dynamic_shape(t)
    792       shape = t.shape
    793       # Unknown number of dimensions, `as_list` cannot be called.
--> 794       if shape.rank is None:
    795         return shape
    796       return tensor_shape.TensorShape([None for _ in shape.as_list()])

AttributeError: 'tuple' object has no attribute 'rank'

So I tried to look at Keras sources but without any success.

If I use modified train_generator and validation_generator (y_col='target' instead of y_col=train.columns[1:]) everything works fine.

Asked By: feeeper

||

Answers:

You need to convert all the individual objects returned by both the training and validation generators to Numpy arrays:

    yield [np.array(imgs), np.array(cols)], np.array(targets)

Alternatively, a simpler and much more efficient solution is to not iterate over the data batch at all; instead, we can take advantage of the fact that these objects are already Numpy arrays when returned by ImageDataGenerator, so we can write:

    imgs = data[0]
    cols = data[1][:,:-1]
    targets = data[1][:,-1:]
    yield [imgs, cols], targets
Answered By: today

A different solution worked for me, just posting it here.
I ran into the problem working with two very similar dataframes in one notebook, where for one of them the error occurred.
I noticed the dtypes were slightly different int64 vs Int64, where the target column coded as Int64 gave the error.

For me the following worked:

dataframe[target_col] = dataframe[target_col].astype(int)
Answered By: Datamedster