Tensorflow 2.0 ValueError while Loading weights from .h5 file

Question:

I have a VAE architecture script as follows:

import numpy as np

import tensorflow as tf

from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, Conv2DTranspose, Lambda, Reshape, Layer
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K

INPUT_DIM = (64,64,3)

CONV_FILTERS = [32,64,64, 128]
CONV_KERNEL_SIZES = [4,4,4,4]
CONV_STRIDES = [2,2,2,2]
CONV_ACTIVATIONS = ['relu','relu','relu','relu']

DENSE_SIZE = 1024

CONV_T_FILTERS = [64,64,32,3]
CONV_T_KERNEL_SIZES = [5,5,6,6]
CONV_T_STRIDES = [2,2,2,2]
CONV_T_ACTIVATIONS = ['relu','relu','relu','sigmoid']

Z_DIM = 32

BATCH_SIZE = 100
LEARNING_RATE = 0.0001
KL_TOLERANCE = 0.5




class Sampling(Layer):
    def call(self, inputs):
        mu, log_var = inputs
        epsilon = K.random_normal(shape=K.shape(mu), mean=0., stddev=1.)
        return mu + K.exp(log_var / 2) * epsilon


class VAEModel(Model):


    def __init__(self, encoder, decoder, r_loss_factor, **kwargs):
        super(VAEModel, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.r_loss_factor = r_loss_factor

    def train_step(self, data):
        if isinstance(data, tuple):
            data = data[0]
        def compute_kernel(x, y):
            x_size = tf.shape(x)[0]
            y_size = tf.shape(y)[0]
            dim = tf.shape(x)[1]
            tiled_x = tf.tile(tf.reshape(x, tf.stack([x_size, 1, dim])), tf.stack([1, y_size, 1]))
            tiled_y = tf.tile(tf.reshape(y, tf.stack([1, y_size, dim])), tf.stack([x_size, 1, 1]))
            return tf.exp(-tf.reduce_mean(tf.square(tiled_x - tiled_y), axis=2) / tf.cast(dim, tf.float32))

        def compute_mmd(x, y):
            x_kernel = compute_kernel(x, x)
            y_kernel = compute_kernel(y, y)
            xy_kernel = compute_kernel(x, y)
            return tf.reduce_mean(x_kernel) + tf.reduce_mean(y_kernel) - 2 * tf.reduce_mean(xy_kernel)

        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.square(data - reconstruction), axis = [1,2,3]
            )
            reconstruction_loss *= self.r_loss_factor
            kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
            kl_loss = tf.reduce_sum(kl_loss, axis = 1)
            kl_loss *= -0.5

            true_samples = tf.random.normal(tf.stack([BATCH_SIZE, Z_DIM]))
            loss_mmd = compute_mmd(true_samples, z)
            

            total_loss = reconstruction_loss + loss_mmd
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        return {
            "loss": total_loss,
            "reconstruction_loss": reconstruction_loss,
            "kl_loss": kl_loss,
            "mmd_loss": loss_mmd
        }
    
    def call(self,inputs):
        latent = self.encoder(inputs)
        return self.decoder(latent)



class VAE():
    def __init__(self):
        self.models = self._build()
        self.full_model = self.models[0]
        self.encoder = self.models[1]
        self.decoder = self.models[2]

        self.input_dim = INPUT_DIM
        self.z_dim = Z_DIM
        self.learning_rate = LEARNING_RATE
        self.kl_tolerance = KL_TOLERANCE

    def _build(self):
        vae_x = Input(shape=INPUT_DIM, name='observation_input')
        vae_c1 = Conv2D(filters = CONV_FILTERS[0], kernel_size = CONV_KERNEL_SIZES[0], strides = CONV_STRIDES[0], activation=CONV_ACTIVATIONS[0], name='conv_layer_1')(vae_x)
        vae_c2 = Conv2D(filters = CONV_FILTERS[1], kernel_size = CONV_KERNEL_SIZES[1], strides = CONV_STRIDES[1], activation=CONV_ACTIVATIONS[0], name='conv_layer_2')(vae_c1)
        vae_c3= Conv2D(filters = CONV_FILTERS[2], kernel_size = CONV_KERNEL_SIZES[2], strides = CONV_STRIDES[2], activation=CONV_ACTIVATIONS[0], name='conv_layer_3')(vae_c2)
        vae_c4= Conv2D(filters = CONV_FILTERS[3], kernel_size = CONV_KERNEL_SIZES[3], strides = CONV_STRIDES[3], activation=CONV_ACTIVATIONS[0], name='conv_layer_4')(vae_c3)

        vae_z_in = Flatten()(vae_c4)

        vae_z_mean = Dense(Z_DIM, name='mu')(vae_z_in)
        vae_z_log_var = Dense(Z_DIM, name='log_var')(vae_z_in)

        vae_z = Sampling(name='z')([vae_z_mean, vae_z_log_var])
        

        #### DECODER: 
        vae_z_input = Input(shape=(Z_DIM,), name='z_input')

        vae_dense = Dense(1024, name='dense_layer')(vae_z_input)
        vae_unflatten = Reshape((1,1,DENSE_SIZE), name='unflatten')(vae_dense)
        vae_d1 = Conv2DTranspose(filters = CONV_T_FILTERS[0], kernel_size = CONV_T_KERNEL_SIZES[0] , strides = CONV_T_STRIDES[0], activation=CONV_T_ACTIVATIONS[0], name='deconv_layer_1')(vae_unflatten)
        vae_d2 = Conv2DTranspose(filters = CONV_T_FILTERS[1], kernel_size = CONV_T_KERNEL_SIZES[1] , strides = CONV_T_STRIDES[1], activation=CONV_T_ACTIVATIONS[1], name='deconv_layer_2')(vae_d1)
        vae_d3 = Conv2DTranspose(filters = CONV_T_FILTERS[2], kernel_size = CONV_T_KERNEL_SIZES[2] , strides = CONV_T_STRIDES[2], activation=CONV_T_ACTIVATIONS[2], name='deconv_layer_3')(vae_d2)
        vae_d4 = Conv2DTranspose(filters = CONV_T_FILTERS[3], kernel_size = CONV_T_KERNEL_SIZES[3] , strides = CONV_T_STRIDES[3], activation=CONV_T_ACTIVATIONS[3], name='deconv_layer_4')(vae_d3)
        

        #### MODELS

    
        vae_encoder = Model(vae_x, [vae_z_mean, vae_z_log_var, vae_z], name = 'encoder')
        vae_decoder = Model(vae_z_input, vae_d4, name = 'decoder')

        vae_full = VAEModel(vae_encoder, vae_decoder, 10000)

        opti = Adam(lr=LEARNING_RATE)
        vae_full.compile(optimizer=opti)
        
        return (vae_full,vae_encoder, vae_decoder)

    def set_weights(self, filepath):
        self.full_model.load_weights(filepath)

    def train(self, data):

        self.full_model.fit(data, data,
                shuffle=True,
                epochs=1,
                batch_size=BATCH_SIZE)
        
    def save_weights(self, filepath):
        self.full_model.save_weights(filepath)

Problem:

vae = VAE()
vae.set_weights(filepath)

throws:

File
"/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py",
line 2200, in load_weights
    'Unable to load weights saved in HDF5 format into a subclassed ' ValueError: Unable to load weights saved in HDF5 format into a
subclassed Model which has not created its variables yet. Call the
Model first, then load the weights.

I am not sure what this means since I am not that proficient in OOP. The surprising bit is that the above code was working until it stopped working. The model is training from scratch and it saves the weights in filepath. But when I am loading the same weights now it is throwing the above error!

Asked By: Nirmal Roy

||

Answers:

What version of TF are you running? For a while the default saving format was hdf5, but this format cannot support subclassed models as easily, so you get this error. It may be solvable by first training it on a single batch and then loading the weights (to determine how the parts are connected, which is not saved in hdf5).

In the future I would recommend making sure that all saves are done with the TF file format though, it will save you from extra work.

Answered By: alwaysmvp45

If you set model.built = True prior to loading the model weights it works.

Answered By: Aaron

As alwaysmvp45 pointed out "hdf5 does not store how the layers are connected". To make these layers be connected, another way is that you call the model to predict a zeros array with input shape ((1,w,h,c)) before loading weights:

model(np.zeros((1,w,h,c)))
Answered By: Hoa Nguyen

i was getting same same error while loading weights via

model.load_weights("Detection_model.h5")

ValueError: Unable to load weights saved in HDF5 format into a subclassed Model which has not created its variables yet. Call the Model first, then load the weights.

solved it by building model before loading weights

model.build(input_shape = <INPUT_SHAPE>)
model.load_weights("Detection_model.h5")

ps, tensorflow Version: 2.5.0

Answered By: Shumaila Ahmed

Not sure if this has changed in more recent versions (I’m on 2.4). but I had to go this route:

# Do all the build and training 
# ...
# Save the weights
model.save('path/to/location.h5')

# delete any reference to the model
del model

# Now do the load for testing
from tensorflow import keras
model = keras.models.load_model('path/to/location.h5')

If I tried the other suggestions, I got warnings about the layers not being present and I had to build the same model that I did the training on. No big deal, stick it in in a function somewhere, but this works better for me.

Answered By: Nigel Johnson

Before loading weights with .h5 files, you should building model first:

model.build(input_shape=())
model.load_weights(r'your_h5_files.h5')

input_shape should be a 4-dims tuples, if your input is 3-dims, just take the first dim with None like:model.build(input_shape=(None, 224, 224, 3))

Answered By: UltraMar1ne
Categories: questions Tags: , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.