TensorFlow: How to use Adam optimizer properly

Question:

Somebody have already asked a similar question, but the solution, which is given there, does not work for me.

I am trying to use Adam optimizer in TensorFlow. Here is a part of my code about it:

adamOptimizer = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9,
           beta2=0.999, epsilon=1e-08, use_locking=False, name='Adam')

print('Optimizer was created!')

# Create a variable to track the global step.
global_step = tf.Variable(0, name='global_step', trainable=False)

# Initialize variables
vars_to_init = ae.get_variables_to_init(n)
vars_to_init.append(global_step)
vars_to_init.append

sess.run(tf.variables_initializer(vars_to_init))

# Create an optimizer
train_op = adamOptimizer.minimize(loss, global_step=global_step)

The following error is raised after train_op is used for the first time:

FailedPreconditionError (see above for traceback): Attempting to use uninitialized value pretrain_1/beta2_power
[[Node: pretrain_1/beta2_power/read = IdentityT=DT_FLOAT, _class=["loc:@autoencoder_variables/weights1"], _device="/job:localhost/replica:0/task:0/cpu:0"]]

If I try to add a line

vars_to_init.append(beta2_power)

I am getting the following error:

NameError: global name ‘beta2_power’ is not defined

If I follow an advice for the similar question and replace sess.run(tf.variables_initializer(vars_to_init)) by sess.run(tf.initialize_all_variables()), I am getting the following error after running this line:

FailedPreconditionError: Attempting to use uninitialized value autoencoder_variables/biases1
[[Node: autoencoder_variables/biases1/read = IdentityT=DT_FLOAT, _class=["loc:@autoencoder_variables/biases1"], _device="/job:localhost/replica:0/task:0/cpu:0"]]

I didn’t have any problems when I was using Gradient Descent optimizer…

What am I doing wrong? What is the proper way to use this optimizer?

More details about the class to clarify autoencoder_variables:

class AutoEncoder(object):

  _weights_str = "weights{0}"
  _biases_str = "biases{0}"

def __init__(self, shape, sess):

  self.__shape = shape
  self.__num_hidden_layers = len(self.__shape) - 2

  self.__variables = {}
  self.__sess = sess

  self._setup_variables()

@property
def shape(self):
  return self.__shape

@property
def num_hidden_layers(self):
  return self.__num_hidden_layers

@property
def session(self):
  return self.__sess

def __getitem__(self, item):

  return self.__variables[item]

def __setitem__(self, key, value):

  self.__variables[key] = value

def _setup_variables(self):
  with tf.name_scope("autoencoder_variables"):
    for i in xrange(self.__num_hidden_layers + 1):
      # Train weights
      name_w = self._weights_str.format(i + 1)
      w_shape = (self.__shape[i], self.__shape[i + 1])
      a = tf.mul(4.0, tf.sqrt(6.0 / (w_shape[0] + w_shape[1])))
      w_init = tf.random_uniform(w_shape, -1 * a, a)
      self[name_w] = tf.Variable(w_init,
                                 name=name_w,
                                 trainable=True)
      # Train biases
      name_b = self._biases_str.format(i + 1)
      b_shape = (self.__shape[i + 1],)
      b_init = tf.zeros(b_shape)
      self[name_b] = tf.Variable(b_init, trainable=True, name=name_b)

      if i <= self.__num_hidden_layers:

        # Hidden layer fixed weights (after pretraining before fine tuning)
        self[name_w + "_fixed"] = tf.Variable(tf.identity(self[name_w]),
                                              name=name_w + "_fixed",
                                              trainable=False)

        # Hidden layer fixed biases
        self[name_b + "_fixed"] = tf.Variable(tf.identity(self[name_b]),
                                              name=name_b + "_fixed",
                                              trainable=False)

        # Pretraining output training biases
        name_b_out = self._biases_str.format(i + 1) + "_out"
        b_shape = (self.__shape[i],)
        b_init = tf.zeros(b_shape)
        self[name_b_out] = tf.Variable(b_init,
                                       trainable=True,
                                       name=name_b_out)

def _w(self, n, suffix=""):
  return self[self._weights_str.format(n) + suffix]

def _b(self, n, suffix=""):
  return self[self._biases_str.format(n) + suffix]

def get_variables_to_init(self, n):
  assert n > 0
  assert n <= self.__num_hidden_layers + 1

  vars_to_init = [self._w(n), self._b(n)]

  if n <= self.__num_hidden_layers:
    vars_to_init.append(self._b(n, "_out"))

  if 1 < n <= self.__num_hidden_layers+1:
    # Fixed matrices for learning of deeper layers
    vars_to_init.append(self._w(n - 1, "_fixed"))
    vars_to_init.append(self._b(n - 1, "_fixed"))

  return vars_to_init
Asked By: Taras Kucherenko

||

Answers:

The problem was that I was using one variable’s values in order to initialize other variable (it raised an error of using uninitialized variables during initialization).

Instead of using another variable during initialization,

self[name_b + "_fixed"] = tf.Variable(tf.identity(self[name_b]),
                                            name=name_b + "_fixed",
                                            trainable=False)

I initialize it randomly:

self[name_b + "_fixed"] = tf.Variable(init_b,
                                            name=name_b + "_fixed",
                                            trainable=False)

And assigned it to another variable once it was trained:

 ae[name_w + "_fixed"] = tf.identity(ae[name_w])
Answered By: Taras Kucherenko