How to join an encoder and a decoder

Question:

I have built the following encoder-decoder architecture, and the encoder and decoder both work fine separately:

from tensorflow.keras.layers import LSTM, Input, Reshape, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K

WORD_TO_INDEX = {"foo": 0, "bar": 1}

MAX_QUERY_WORD_COUNT = 10
QUERY_ENCODING_SIZE = 15

# ENCODER
query_encoder_input = Input(shape=(None, len(WORD_TO_INDEX)), name="query_encoder_input")
query_encoder_output = LSTM(QUERY_ENCODING_SIZE, name="query_encoder_lstm")(query_encoder_input)
query_encoder = Model(inputs=query_encoder_input, outputs=query_encoder_output)

# DECODER
query_decoder_input = Input(shape=(QUERY_ENCODING_SIZE,), name="query_decoder_input")
query_decoder_reshape = Reshape((1, QUERY_ENCODING_SIZE), name="query_decoder_reshape")(query_decoder_input)
query_decoder_lstm = LSTM(QUERY_ENCODING_SIZE, name="query_decoder_lstm", return_sequences=True, return_state=True)
recurrent_input, state_h, state_c = query_decoder_lstm(query_decoder_reshape)
states = [state_h, state_c]
query_decoder_outputs = []
for _ in range(MAX_QUERY_WORD_COUNT):
    recurrent_input, state_h, state_c = query_decoder_lstm(recurrent_input, initial_state=states)
    query_decoder_outputs.append(recurrent_input)
    states = [state_h, state_c]
query_decoder_output = Lambda(lambda x: K.concatenate(x, axis=1), name="query_decoder_concat")(query_decoder_outputs)
query_decoder = Model(inputs=query_decoder_input, outputs=query_decoder_output)

But when I try to join them together to create an autoencoder, I get an odd error and I don’t know why.

# AUTOENCODER
# apply the reshape layer to the output of the encoder
query_autoencoder_output = query_decoder.layers[1](query_encoder_output)
# rebuild the autoencoder by applying each layer of the decoder to the output of the encoder
for decoder_layer in query_decoder.layers[2:]:
    # this fails and I don't know why
    query_autoencoder_output = decoder_layer(query_autoencoder_output)
# the code never gets here
query_autoencoder = Model(inputs=query_encoder_input, outputs=query_autoencoder_output)

This throws the error:

ValueError: Shape must be rank 3 but is rank 2 for ‘{{node
query_decoder_concat/concat_1}} = ConcatV2[N=3, T=DT_FLOAT,
Tidx=DT_INT32](query_decoder_lstm/PartitionedCall_11:1,
query_decoder_lstm/PartitionedCall_11:2,
query_decoder_lstm/PartitionedCall_11:3,
query_decoder_concat/concat_1/axis)’ with input shapes: [?,1,15],
[?,15], [?,15], [].

This is the template I used for my decoder. (See the "What if I don’t want to use teacher forcing for training?" section.)

I relied on these StackOverflow questions (especially the last one) to figure out how to combine the models together.

What does this error mean and how can I fix it?

Asked By: Pro Q

||

Answers:

You can treat a model as a layer * essentially *. With an autoencoder, it’ll be as straightforward as something like this:

autoencoder = Sequential([encoder, decoder])
Answered By: M Z

If you want some extra flexibility you can subclass tf.keras.Model:

class AutoEncoder(tf.keras.Model):
    def __init__(self, encoder, decoder):
        super(AutoEncoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def call(self, inputs, training=None, **kwargs):
        x = self.encoder(inputs)
        x = self.decoder(x)
        return x

ae = AutoEncoder(encoder, decoder)

ae.fit(...

Full reproducible example:

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from tensorflow import keras
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)
import numpy as np

(xtrain, ytrain), (xtest, ytest) = keras.datasets.cifar10.load_data()

train_ix = np.where(ytrain.ravel() == 1)
test_ix = np.where(ytest.ravel() == 1)

cars_train = xtrain[train_ix]
cars_test = xtest[test_ix]

cars = np.vstack([cars_train, cars_test]).astype(np.float32)/255

X = tf.data.Dataset.from_tensor_slices(cars).batch(8)


class Encoder(keras.Model):
    def __init__(self):
        super(Encoder, self).__init__()
        self.flat = keras.layers.Flatten(input_shape=(32, 32, 3))
        self.dense1 = keras.layers.Dense(128)
        self.dense2 = keras.layers.Dense(32)

    def call(self, inputs, training=None, **kwargs):
        x = self.flat(inputs)
        x = keras.activations.selu(self.dense1(x))
        x = keras.activations.selu(self.dense2(x))
        return x


class Decoder(keras.Model):
    def __init__(self):
        super(Decoder, self).__init__()
        self.dense1 = keras.layers.Dense(128, input_shape=[32])
        self.dense2 = keras.layers.Dense(32 * 32 * 3)
        self.reshape = keras.layers.Reshape([32, 32, 3])

    def call(self, inputs, training=None, **kwargs):
        x = keras.activations.selu(self.dense1(inputs))
        x = keras.activations.sigmoid(self.dense2(x))
        x = self.reshape(x)
        return x


class AutoEncoder(keras.Model):
    def __init__(self, encoder, decoder):
        super(AutoEncoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def call(self, inputs, training=None, **kwargs):
        x = self.encoder(inputs)
        x = self.decoder(x)
        return x


ae = AutoEncoder(Encoder(), Decoder())

loss_object = keras.losses.BinaryCrossentropy()

reconstruction_loss = keras.metrics.Mean(name='reconstruction_loss')

optimizer = keras.optimizers.Adam()


@tf.function
def reconstruct(inputs):
    with tf.GradientTape() as tape:
        out = ae(inputs)
        loss = loss_object(inputs, out)

    gradients = tape.gradient(loss, ae.trainable_variables)
    optimizer.apply_gradients(zip(gradients, ae.trainable_variables))

    reconstruction_loss(loss)


if __name__ == '__main__':
    template = 'Epoch {:2} Reconstruction Loss {:.4f}'
    for epoch in range(50):
        reconstruction_loss.reset_states()
        for input_batches in X:
            reconstruct(input_batches)
        print(template.format(epoch + 1, reconstruction_loss.result()))

Output:

Epoch 35 Reconstruction Loss 0.5794
Epoch 36 Reconstruction Loss 0.5793
Epoch 37 Reconstruction Loss 0.5792
Epoch 38 Reconstruction Loss 0.5791
Epoch 39 Reconstruction Loss 0.5790
Epoch 40 Reconstruction Loss 0.5789
Answered By: Nicolas Gervais

Based off of M Z’s answer, but without using Sequential, you can do this:

query_autoencoder = Model(inputs=query_encoder_input, outputs=query_decoder(query_encoder_output))
query_autoencoder.summary()

The summary also breaks down into more layers than M Z’s answer did.

Answered By: Pro Q

For anyone using Javascript and Tensorflow JS, here is an example of how to make deeper networks.

I thought I would include this since most Javascript examples of autoencoders show only one encoding and decoding layer.

function buildModel (layers)
{
    const model = tf.sequential();
    layers.forEach(layer =>
    {
        model.add(layer);
    });
    return model;
}

Then you can make a deeper network with more encoding and decoding layers with:

//               [ encoder ]   [ decoder ]
// Model layers: 20 -> 10 -> 2 -> 10 -> 20

// Encoding Layers
const encoder = [
    tf.layers.dense({ inputShape: [20], units: 10, activation: 'relu' }),
    tf.layers.dense({ units: 2, activation: 'relu' })
];

// Decoding Layers
const decoder = [
    tf.layers.dense({ units: 10, activation: 'relu' }),
    tf.layers.dense({ units: 20, activation: 'relu' })
];

// Build model
const model = buildModel([...encoder, ...decoder]);

// ... Then compile and Train ...

After training, predictions can be made with:

const predictionModel = buildModel([...encoder]);
predictionModel.predict(data);
Answered By: Broper