示例#1
0
def decoder_train_save_restore_test():
    # BeamSearchDecoder
    vocab_size = 6
    SOS_token = 0
    EOS_token = 5

    # x_data = np.array([[SOS_token, 3, 1, 4, 3, 2],[SOS_token, 3, 4, 2, 3, 1],[SOS_token, 1, 3, 2, 2, 1]], dtype=np.int32)
    # y_data = np.array([[3, 1, 4, 3, 2,EOS_token],[3, 4, 2, 3, 1,EOS_token],[1, 3, 2, 2, 1,EOS_token]],dtype=np.int32)
    # print("data shape: ", x_data.shape)

    index_to_char = {
        SOS_token: '<S>',
        1: 'h',
        2: 'e',
        3: 'l',
        4: 'o',
        EOS_token: '<E>'
    }
    x_data = np.array([[SOS_token, 1, 2, 3, 3, 4]], dtype=np.int32)
    y_data = np.array([[1, 2, 3, 3, 4, EOS_token]], dtype=np.int32)

    output_dim = vocab_size
    batch_size = len(x_data)
    hidden_dim = 7

    seq_length = x_data.shape[1]
    embedding_dim = 8

    embedding = tf.keras.layers.Embedding(vocab_size,
                                          embedding_dim,
                                          trainable=True)
    ##### embedding.weights, embedding.trainable_variables, embedding.trainable_weights --> 모두 같은 결과

    target = tf.convert_to_tensor(y_data)

    # Decoder
    method = 1
    if method == 1:
        # single layer RNN
        decoder_cell = tf.keras.layers.LSTMCell(hidden_dim)
        # decoder init state:

        #init_state = [tf.zeros((batch_size,hidden_dim)), tf.ones((batch_size,hidden_dim))]   # (h,c)
        init_state = decoder_cell.get_initial_state(inputs=None,
                                                    batch_size=batch_size,
                                                    dtype=tf.float32)

    else:
        # multi layer RNN
        decoder_cell = tf.keras.layers.StackedRNNCells([
            tf.keras.layers.LSTMCell(hidden_dim),
            tf.keras.layers.LSTMCell(2 * hidden_dim)
        ])
        init_state = decoder_cell.get_initial_state(inputs=tf.zeros_like(
            x_data, dtype=tf.float32))  # inputs의 batch_size만 참조하기 때문에

    projection_layer = tf.keras.layers.Dense(output_dim)

    # train용 Sampler로 TrainingSampler 또는 ScheduledEmbeddingTrainingSampler 선택.
    sampler = tfa.seq2seq.sampler.TrainingSampler(
    )  # alias ---> sampler = tfa.seq2seq.TrainingSampler()
    #sampler = tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler(sampling_probability=0.2)

    decoder = tfa.seq2seq.BasicDecoder(decoder_cell,
                                       sampler,
                                       output_layer=projection_layer)

    optimizer = tf.keras.optimizers.Adam(lr=0.01)

    inputs = tf.keras.Input(shape=(seq_length))

    embedded = embedding(inputs)
    embedded = tf.reshape(embedded, [batch_size, seq_length, embedding_dim])

    if isinstance(sampler,
                  tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler):
        outputs, last_state, last_sequence_lengths = decoder(
            embedded,
            initial_state=init_state,
            sequence_length=[seq_length] * batch_size,
            training=True,
            embedding=embedding.weights)
    else:
        outputs, last_state, last_sequence_lengths = decoder(
            embedded,
            initial_state=init_state,
            sequence_length=[seq_length] * batch_size,
            training=True)

    outputs, last_state, last_sequence_lengths = decoder(
        embedded,
        initial_state=init_state,
        sequence_length=[seq_length] * batch_size,
        training=True)

    model = tf.keras.Model(
        inputs, [outputs, last_state, last_sequence_lengths]
    )  # model.layers ---> [<tensorflow.python.keras.engine.input_layer.InputLayer object at 0x0000000014BC3A20>, <tensorflow.python.keras.layers.embeddings.Embedding object at 0x000000000464AB00>, <tensorflow.python.keras.engine.base_layer.TensorFlowOpLayer object at 0x0000000014C31F98>, <tensorflow_addons.seq2seq.basic_decoder.BasicDecoder object at 0x0000000014BC37F0>]
    print(model.summary())

    train_mode = False

    if train_mode:

        for step in range(500):
            with tf.GradientTape() as tape:

                outputs, last_state, last_sequence_lengths = model(x_data)

                weights = tf.ones(shape=[batch_size, seq_length])
                loss = tfa.seq2seq.sequence_loss(outputs.rnn_output, target,
                                                 weights)

            trainable_variables = embedding.trainable_variables + decoder.trainable_variables  # 매번 update되어야 한다.
            grads = tape.gradient(loss, trainable_variables)
            optimizer.apply_gradients(zip(grads, trainable_variables))

            if step % 10 == 0:
                print(step, loss.numpy())

        model.save_weights(
            './saved_model/model_ckpt')  # tf.saved_model.save로 하면 안돈다.

    else:
        model = model.load_weights('./saved_model/model_ckpt')

        sample_batch_size = 5

        decoder_type = 1
        if decoder_type == 1:
            # GreedyEmbeddingSampler or SampleEmbeddingSampler()

            # sampler 선택 가능.
            sampler = tfa.seq2seq.GreedyEmbeddingSampler(
            )  # alias ---> sampler = tfa.seq2seq.sampler.GreedyEmbeddingSampler
            #sampler = tfa.seq2seq.GreedyEmbeddingSampler(embedding_fn=lambda ids: tf.nn.embedding_lookup(embedding.weights, ids)) # embedding_fn을 넘겨줄 수도 ㅣ있다.
            #sampler = tfa.seq2seq.SampleEmbeddingSampler()

            decoder = tfa.seq2seq.BasicDecoder(decoder_cell,
                                               sampler,
                                               output_layer=projection_layer,
                                               maximum_iterations=seq_length)
            if method == 1:
                # single layer
                init_state = decoder_cell.get_initial_state(
                    inputs=None,
                    batch_size=sample_batch_size,
                    dtype=tf.float32)
            else:
                # multi layer
                init_state = decoder_cell.get_initial_state(inputs=tf.zeros(
                    [sample_batch_size, hidden_dim], dtype=tf.float32))

        else:
            # Beam Search
            beam_width = 2
            decoder = tfa.seq2seq.BeamSearchDecoder(
                decoder_cell,
                beam_width,
                output_layer=projection_layer,
                maximum_iterations=seq_length)

            # 2가지 방법은 같은 결과를 준다.
            if method == 1:
                #init_state = decoder_cell.get_initial_state(inputs=None, batch_size=sample_batch_size*beam_width, dtype=tf.float32)
                init_state = tfa.seq2seq.tile_batch(
                    decoder_cell.get_initial_state(
                        inputs=None,
                        batch_size=sample_batch_size,
                        dtype=tf.float32),
                    multiplier=beam_width)
            else:
                #init_state = decoder_cell.get_initial_state(inputs=tf.zeros([sample_batch_size*beam_width,hidden_dim],dtype=tf.float32))
                init_state = tfa.seq2seq.tile_batch(
                    decoder_cell.get_initial_state(inputs=tf.zeros(
                        [sample_batch_size, hidden_dim], dtype=tf.float32)),
                    multiplier=beam_width)

        outputs, last_state, last_sequence_lengths = decoder(
            embedding.weights,
            initial_state=init_state,
            start_tokens=tf.tile([SOS_token], [sample_batch_size]),
            end_token=EOS_token,
            training=False)

        if decoder_type == 1:
            result = tf.argmax(outputs.rnn_output, axis=-1).numpy()

            print(result)
            for i in range(sample_batch_size):
                print(''.join(index_to_char[a] for a in result[i]
                              if a != EOS_token))

        else:
            result = outputs.predicted_ids.numpy()
            print(result.shape)
            for i in range(sample_batch_size):
                print(i, )
                for j in range(beam_width):
                    print(''.join(index_to_char[a] for a in result[i, :, j]
                                  if a != EOS_token))
示例#2
0
    def train(self,
              epochs,
              augmentation=True,
              plot_progress=False,
              plot_interval=50,
              save_backups=True,
              warm_up=False):
        assert self.original != [], 'Training dataset was not loaded, use load_training_dataset() first'

        for e in range(epochs):
            for i in range(self.iters_per_epoch):

                x, y = self.generate_input(augmentation=augmentation)

                x = x * 2 - 1
                y = y * 2 - 1

                if warm_up: y = x

                if i % plot_interval == 0 and plot_progress:
                    plt.close()
                    fig, ax = plt.subplots(1,
                                           3,
                                           sharex=True,
                                           figsize=(16.5, 16.5))
                    if self.mode == 'RGB':
                        ax[0].imshow((x[0] + 1) / 2)
                        ax[0].set_title('Original')
                        ax[1].imshow((self.G(x)[0] + 1) / 2)
                        ax[1].set_title('Starless')
                        ax[2].imshow((y[0] + 1) / 2)
                        ax[2].set_title('Target')
                    else:
                        ax[0].imshow((x[0, :, :, 0] + 1) / 2,
                                     cmap='gray',
                                     vmin=0,
                                     vmax=1)
                        ax[0].set_title('Original')
                        ax[1].imshow((self.G(x)[0, :, :, 0] + 1) / 2,
                                     cmap='gray',
                                     vmin=0,
                                     vmax=1)
                        ax[1].set_title('Starless')
                        ax[2].imshow((y[0, :, :, 0] + 1) / 2,
                                     cmap='gray',
                                     vmin=0,
                                     vmax=1)
                        ax[2].set_title('Target')

                    display.clear_output(wait=True)
                    display.display(plt.gcf())

                if i > 0:
                    print("\rEpoch: %d. Iteration %d / %d Loss %f    " %
                          (e, i, self.iters_per_epoch,
                           self.history['total'][-1]),
                          end='')
                else:
                    print("\rEpoch: %d. Iteration %d / %d " %
                          (e, i, self.iters_per_epoch),
                          end='')

                with tf.GradientTape() as gen_tape, tf.GradientTape(
                ) as dis_tape:
                    gen_output = self.G(x)

                    p1_real, p2_real, p3_real, p4_real, p5_real, p6_real, p7_real, p8_real, predict_real = self.D(
                        y)
                    p1_fake, p2_fake, p3_fake, p4_fake, p5_fake, p6_fake, p7_fake, p8_fake, predict_fake = self.D(
                        gen_output)

                    d = {}

                    dis_loss = tf.reduce_mean(
                        -(tf.math.log(predict_real + 1E-8) +
                          tf.math.log(1 - predict_fake + 1E-8)))
                    d['dis_loss'] = dis_loss

                    gen_loss_GAN = tf.reduce_mean(-tf.math.log(predict_fake +
                                                               1E-8))
                    d['gen_loss_GAN'] = gen_loss_GAN

                    gen_p1 = tf.reduce_mean(tf.abs(p1_fake - p1_real))
                    d['gen_p1'] = gen_p1

                    gen_p2 = tf.reduce_mean(tf.abs(p2_fake - p2_real))
                    d['gen_p2'] = gen_p2

                    gen_p3 = tf.reduce_mean(tf.abs(p3_fake - p3_real))
                    d['gen_p3'] = gen_p3

                    gen_p4 = tf.reduce_mean(tf.abs(p4_fake - p4_real))
                    d['gen_p4'] = gen_p4

                    gen_p5 = tf.reduce_mean(tf.abs(p5_fake - p5_real))
                    d['gen_p5'] = gen_p5

                    gen_p6 = tf.reduce_mean(tf.abs(p6_fake - p6_real))
                    d['gen_p6'] = gen_p6

                    gen_p7 = tf.reduce_mean(tf.abs(p7_fake - p7_real))
                    d['gen_p7'] = gen_p7

                    gen_p8 = tf.reduce_mean(tf.abs(p8_fake - p8_real))
                    d['gen_p8'] = gen_p8

                    gen_L1 = tf.reduce_mean(tf.abs(y - gen_output))
                    d['gen_L1'] = gen_L1 * 100

                    gen_loss = gen_loss_GAN * 0.1 + gen_p1 * 0.1 + gen_p2 * 10 + gen_p3 * 10 + gen_p4 * 10 + gen_p5 * 10 + gen_p6 * 10 + gen_p7 * 10 + gen_p8 * 10 + gen_L1 * 100
                    d['total'] = gen_loss

                    for k in d:
                        if k in self.history.keys():
                            self.history[k].append(d[k] * (1 - self._ema) +
                                                   self.history[k][-1] *
                                                   self._ema)
                        else:
                            self.history[k] = [d[k]]

                    gen_grads = gen_tape.gradient(gen_loss,
                                                  self.G.trainable_variables)
                    self.gen_optimizer.apply_gradients(
                        zip(gen_grads, self.G.trainable_variables))

                    dis_grads = dis_tape.gradient(dis_loss,
                                                  self.D.trainable_variables)
                    self.dis_optimizer.apply_gradients(
                        zip(dis_grads, self.D.trainable_variables))

            if save_backups:
                if e % 2 == 0:
                    self.G.save_weights("./starnet_backup_G_even.h5")
                    self.D.save_weights("./starnet_backup_D_even.h5")
                else:
                    self.G.save_weights("./starnet_backup_G_odd.h5")
                    self.D.save_weights("./starnet_backup_D_odd.h5")

            if plot_progress: plt.close()
def run_example(filter_type, loss, out_dir, batch_size, hetero_q, hetero_r,
                learned_process, image_size, use_gpu, debug):
    """
    Exemplary code to set up and train a differentiable filter for the
    simulated disc tracking task described in the paper "How to train your
    Differentiable FIlter"

    Parameters
    ----------
    filter_type : str
        Defines which filtering algorithm is used. Can be ekf, ukf, mcukf or pf
    loss : str
        Which loss to use for training the filter. This can be "nll" for the
        negative log likelihood, "mse" for the mean squared error or "mixed"
        for a combination of both
    out_dir : str
        Path to the directory where results and data should be written to.
    batch_size : int
        Batch size for training and testing.
    hetero_q : bool
        If true, heteroscedastic process noise is learned, else constant.
    hetero_r : bool
        If true, heteroscedastic observation noise is learned, else constant.
    learned_process : bool
        If true, a neural network is used as process model in the filter, else
        an analytical process model is used.
    image_size : int
        Width and height of the image observations
    use_gpu : bool
        If true, the training and testing is run on GPU (if one is available)
    debug : bool
        Turns on additional debug output and prints.

    Returns
    -------
    None.

    """
    if use_gpu:
        # limit tensorflows gpuy memory consumption
        gpus = tf.config.list_physical_devices('GPU')
        if gpus:
            try:
                for gpu in gpus:
                    tf.config.experimental.set_memory_growth(gpu, True)
            except RuntimeError as e:
                # Memory growth must be set before GPUs have been initialized
                print(e)
    else:
        # Hide GPU from visible devices to run on cpu
        tf.config.set_visible_devices([], 'GPU')

    # prepare the output directories
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    train_dir = os.path.join(out_dir + '/train')
    data_dir = os.path.join(out_dir + '/data')
    if not os.path.exists(train_dir):
        os.makedirs(train_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    # create a small dataset (if it doesn't already exist)
    name = 'example'
    if not os.path.exists(os.path.join(data_dir, 'info_' + name + '.txt')):
        c = DiscTrackingData(name, data_dir, image_size, 1000, 30, 1000,
                             rescale=1, debug=debug)
        c.create_dataset(15, 0, 0, 3.0)
    else:
        print('data already exists')

    # create a tensorflow model that combines a differentiable filter with a
    # problem context
    model = FilterApplication(filter_type, loss, batch_size, hetero_q,
                              hetero_r, learned_process, image_size,
                              debug=debug)

    # Load training and test datasets
    # we use sequence length 10 for training and validation and sequence
    # length 30 for testing
    train_files, val_files, test_files = load_data(data_dir, name)
    train_set = tf.data.TFRecordDataset(train_files)
    train_set = model.preprocess(data_dir, name, train_set, 'train', 10)
    train_set = train_set.shuffle(500)
    train_set = train_set.batch(batch_size, drop_remainder=True)

    val_set = tf.data.TFRecordDataset(val_files)
    val_set = model.preprocess(data_dir, name, val_set, 'val', 10)
    val_set = val_set.batch(batch_size, drop_remainder=True)

    test_set = tf.data.TFRecordDataset(test_files)
    test_set = model.preprocess(data_dir, name, test_set, 'test', 30)
    test_set = test_set.batch(batch_size, drop_remainder=True)

    # prepare the training
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
    epochs = 3
    step = 0

    # prepare a summary writer for logging information that can be viewed
    # with tensorboard
    train_summary_writer = tf.summary.create_file_writer(train_dir + '/' +
                                                         str(time.time()))
    tf.summary.experimental.set_step(step)

    # unfortunately, we cannot use keras model.fit here, since keras currently
    # does not support loss functions that receive multiple output tensors
    # (like mean and covariance of the filter's belief for computing the nll
    # loss) We thus write a custom training loop
    print("\n Start training with sequence length 10")
    for epoch in range(epochs):
        print("\nStart of epoch %d \n" % (epoch))
        print("Validating ...")
        evaluate(model, val_set, batch_size)

        for (x_batch_train, y_batch_train) in train_set:
            start = time.time()

            with tf.GradientTape() as tape:
                # sample a random disturbance of the initial state from the
                # initial covariance
                n_val = np.random.normal(loc=np.zeros((model.dim_x)),
                                         scale=model.initial_covariance,
                                         size=(batch_size, model.dim_x))
                x_batch_train = (*x_batch_train, n_val)
                # Run the forward pass of the model
                with train_summary_writer.as_default():
                    out = model(x_batch_train, training=False)

                # Compute the loss value for this minibatch.
                loss_value, metrics, metric_names = \
                    model.context.get_loss(y_batch_train, out)

                # log summaries of the metrics every 50 steps
                with train_summary_writer.as_default():
                    with tf.summary.record_if(step%50==0):
                        for i, name in enumerate(metric_names):
                            tf.summary.scalar('metrics/' + name,
                                              tf.reduce_mean(metrics[i]))

            # Use the gradient tape to automatically retrieve the
            # gradients of the trainable variables with respect to the loss.
            grads = tape.gradient(loss_value, model.trainable_weights)

            # Run one step of gradient descent by updating
            # the value of the variables to minimize the loss.
            optimizer.apply_gradients(zip(grads, model.trainable_weights))
            end = time.time()

            # Log every 50 batches.
            if step % 50 == 0:
                print("Training loss at step %d: %.4f (took %.3f seconds) " %
                      (step, float(loss_value), float(end-start)))
            step += 1
            tf.summary.experimental.set_step(step)

    # test the trained model on the held out data
    print("\n Testing with sequence length 30")
    evaluate(model, test_set, batch_size)
示例#4
0
import tensorflow as tf

# 创建4个张量
a = tf.constant(1.)
b = tf.constant(2.)
c = tf.constant(3.)
w = tf.constant(4.)

with tf.GradientTape() as tape:  # 构建梯度环境
    tape.watch([w])  # 将w加入梯度跟踪列表
    # 构建计算过程
    y = a * w**2 + b * w + c
# 求导
[dy_dw] = tape.gradient(y, [w])
print(dy_dw)
示例#5
0
    def train_epoch(self, train, learning_rate, args):
        self.optimizer.learning_rate = learning_rate

        skipped = 0
        num_gradients = 0
        while not train.epoch_finished():
            batch = train.next_batch(args.batch_size)
            sentences = batch["sentences"]

            layers = []
            for sentence in sentences:
                layers.append(get_layers(sentence))

            for accuracy in self.accuracies.values():
                accuracy.reset_states()

            with tf.GradientTape() as tape:
                encoded_tokens = self._encoder(
                    token_ids=batch["batch_factors"][Sentence.TOKENS],
                    token_charseqs=batch["charseqs"],
                    token_charseq_ids=batch["charseq_ids"],
                    token_values=batch["batch_factors"][Sentence.TOKEN_VALUES],
                    token_additionals=[batch[x] for x in ["bert", "fasttext", "word2vec"] if x in batch],
                    training=True)

                states = []
                for i, sentence in enumerate(sentences):
                    states.append(self.State(self, sentence.empty_copy(), encoded_tokens[i][:sentence.n_tokens()], True))

                loss = 0
                for iteration in range(args.decoder_iterations):
                    # 1) New nodes
                    ori_nodes = [s.n_nodes for s in states]
                    sum_nodes = sum(ori_nodes)
                    nodes = tf.concat([s.nodes for s in states], axis=0)

                    target_ops = np.zeros([sum_nodes], np.int32)
                    target_node_values = np.zeros([sum_nodes, len(train.node_properties)], np.int32)
                    target_node_values_mask = np.zeros([sum_nodes, 1], np.int32)
#                     target_edge_values = np.zeros([sum_nodes, len(train.edge_properties)], np.int32)
#                     target_edge_values_mask = np.zeros([sum_nodes, 1], np.int32)

                    start = 0
                    for i in range(len(sentences)):
                        if iteration < len(layers[i]):
                            state, layer, sentence = states[i], layers[i][iteration], sentences[i]
                            for n, target, edge in layer:
                                n = state.node_mapping[n]
                                target_ops[start + n] = NODE_PARENT if sentence.factors[Sentence.EDGE_PARENTS][edge] == target else NODE_CHILD
                                target_node_values[start + n] = sentence.factors[Sentence.NODE_VALUES][target - sentence.n_tokens()]
                                target_node_values_mask[start + n] = 1
#                                 target_edge_values[start + n] = sentence.factors[Sentence.EDGE_VALUES][edge]
#                                 target_edge_values_mask[start + n] = 1 if n >= state.sentence.n_tokens() else 0
                                state.add_node(target_node_values[start + n], n, target)
                        start += states[i].n_nodes_cached
                    self.State.recompute_nodes(nodes, states, iteration)

                    predictions = self.layers.decoder_node_operation[iteration](nodes)
                    loss += self.loss_sce(target_ops, predictions)
                    self.accuracies["node/ops"](target_ops, predictions)
                    for i, prop in enumerate(train.node_properties):
                        predictions = self.layers.decoder_node_values[iteration][i](nodes)
                        loss += self.loss_sce(target_node_values[:, i], predictions, target_node_values_mask)
                        self.accuracies["node/" + prop](target_node_values[:, i], predictions, target_node_values_mask)
#                     for i, prop in enumerate(train.edge_properties):
#                         predictions = self.layers.decoder_edge_values[iteration][i](nodes)
#                         loss += self.loss_sce(target_edge_values[:, i], predictions, target_edge_values_mask)
#                         self.accuracies["edge/" + prop](target_edge_values[:, i], predictions, target_edge_values_mask)

                    # 2) Edges
                    sum_nodes = sum(s.n_nodes for s in states)
                    nodes = tf.concat([s.nodes for s in states], axis=0)
                    target_indices_a, target_indices_b = [], []
                    target_a_parent, target_a_child = [], []
                    target_deprel_parents, target_deprel_children = [], []
                    target_deprel_values = []

                    start = 0
                    for i in range(len(sentences)):
                        state, sentence = states[i], sentences[i]
                        offset = len(target_indices_a)
                        for j in range(ori_nodes[i], state.n_nodes):
                            target_indices_a.append(start + np.repeat(np.int32(j), state.n_nodes))
                            target_indices_b.append(start + np.arange(0, state.n_nodes, dtype=np.int32))
                            target_a_parent.append(np.zeros(state.n_nodes, np.float32))
                            target_a_child.append(np.zeros(state.n_nodes, np.float32))
                        for n_ori, n in state.node_mapping.items():
                            if n >= ori_nodes[i]:
                                for e in sentence.parents[n_ori]:
                                    if sentence.factors[Sentence.EDGE_VALUES][e][0] == Mapping.ROOT: continue
                                    p = sentence.factors[sentence.EDGE_PARENTS][e]
                                    if p in state.node_mapping:
                                        p = state.node_mapping[p]
                                        if not args.no_anchors or not sentence.factors[Sentence.EDGE_VALUES][e][0] == Mapping.ANCHOR:
                                            target_a_child[offset + n - ori_nodes[i]][p] = 1
                                        target_deprel_parents.append(start + p)
                                        target_deprel_children.append(start + n)
                                        target_deprel_values.append(sentence.factors[Sentence.EDGE_VALUES][e])
                                        state.add_edge(p, n, target_deprel_values[-1])
                                for e in sentence.children[n_ori]:
                                    if sentence.factors[Sentence.EDGE_VALUES][e][0] == Mapping.ROOT: continue
                                    c = sentence.factors[sentence.EDGE_CHILDREN][e]
                                    if c in state.node_mapping:
                                        c = state.node_mapping[c]
                                        if not args.no_anchors or not sentence.factors[Sentence.EDGE_VALUES][e][0] == Mapping.ANCHOR:
                                            target_a_parent[offset + n-ori_nodes[i]][c] = 1
                                        target_deprel_parents.append(start + n)
                                        target_deprel_children.append(start + c)
                                        target_deprel_values.append(sentence.factors[Sentence.EDGE_VALUES][e])
                                        state.add_edge(n, c, target_deprel_values[-1])
                        start += state.n_nodes

                    if not target_indices_a or not target_deprel_parents:
                        continue
                    target_indices_a = np.concatenate(target_indices_a, axis=0)
                    target_indices_b = np.concatenate(target_indices_b, axis=0)
                    target_a_parent = np.concatenate(target_a_parent, axis=0)
                    target_a_child = np.concatenate(target_a_child, axis=0)
                    target_deprel_parents = np.array(target_deprel_parents, np.int32)
                    target_deprel_children = np.array(target_deprel_children, np.int32)
                    target_deprel_values = np.array(target_deprel_values, np.int32)

                    # 2.1) Compute arcs
                    edge_parents = self.layers.decoder_edge_parents[iteration](nodes)
                    edge_children = self.layers.decoder_edge_children[iteration](nodes)

                    a_parent = tf.nn.tanh(self.layers.sum([tf.gather(edge_parents, target_indices_a),
                                                           tf.gather(edge_children, target_indices_b)]))
                    if args.highway:
                        a_parent += self.layers.decoder_edge_highway[iteration](a_parent)
                    a_parent = self.layers.decoder_edge_arc[iteration](a_parent)
                    loss += self.loss_sce(target_a_parent, a_parent)
                    self.accuracies["edge/arc"](target_a_parent, a_parent)

                    a_child = tf.nn.tanh(self.layers.sum([tf.gather(edge_parents, target_indices_b),
                                                          tf.gather(edge_children, target_indices_a)]))
                    if args.highway:
                        a_child += self.layers.decoder_edge_highway[iteration](a_child)
                    a_child = self.layers.decoder_edge_arc[iteration](a_child)
                    loss += self.loss_sce(target_a_child, a_child)
                    self.accuracies["edge/arc"](target_a_child, a_child)

                    # 2.2) Compute deprels
                    deprel_parents = self.layers.decoder_deprel_parents[iteration](nodes)
                    deprel_children = self.layers.decoder_deprel_children[iteration](nodes)
                    deprel_weights = tf.nn.tanh(self.layers.sum(
                        [tf.gather(deprel_parents, target_deprel_parents),
                         tf.gather(deprel_children, target_deprel_children)]))
                    if args.highway:
                        deprel_weights += self.layers.decoder_deprel_highway[iteration](deprel_weights)
                    for i, prop in enumerate(train.edge_properties):
                        predictions = self.layers.decoder_deprel_values[iteration][i](deprel_weights)
                        loss += self.loss_sce(target_deprel_values[:, i], predictions)
                        self.accuracies["edge/" + prop](target_deprel_values[:, i], predictions)

                    self.State.recompute_edges(ori_nodes, states, iteration)

                # Tops
                sum_nodes = sum(s.n_nodes for s in states)
                nodes = tf.concat([s.nodes for s in states], axis=0)
                target_tops = np.zeros([sum_nodes], np.int32)
                start = 0
                for i, sentence in enumerate(sentences):
                    for e in sentence.children[0]:
                        if sentence.factors[Sentence.EDGE_VALUES][e][0] == Mapping.ROOT:
                            c = sentence.factors[Sentence.EDGE_CHILDREN][e]
                            if c in states[i].node_mapping:
                                target_tops[start + states[i].node_mapping[c]] = 1
                    start += states[i].n_nodes
                predictions = self.layers.decoder_tops(nodes)
                loss += self.loss_sce(target_tops, predictions)
                self.accuracies["edge/tops"](target_tops, predictions)

            tg = tape.gradient(loss, self.layers.trainable_variables)
            tg_none = [variable.name for g, variable in zip(tg, self.layers.trainable_variables) if g is None]
            if tg_none:
                print("Skipping a batch with None gradient for variables {}".format(tg_none), file=sys.stderr, flush=True)
                continue

            if num_gradients == 0:
                gradients = [g.numpy() if not isinstance(g, tf.IndexedSlices) else [(g.values.numpy(), g.indices.numpy())] for g in tg]
            else:
                for g, ng in zip(gradients, tg):
                    if isinstance(g, list):
                        g.append((ng.values.numpy(), ng.indices.numpy()))
                    else:
                        g += ng.numpy()
            num_gradients += 1
            if num_gradients == args.batch_aggregation or len(train._permutation) == 0:
                gradients = [tf.IndexedSlices(*map(np.concatenate, zip(*g))) if isinstance(g, list) else g for g in gradients]
                self.optimizer.apply_gradients(zip(gradients, self.layers.trainable_variables))
                num_gradients = 0
                if int(self.optimizer.iterations) % 100 == 0:
                    tf.summary.experimental.set_step(self._summary_step())
                    with self.writer.as_default():
                        for name, accuracy in self.accuracies.items():
                            tf.summary.scalar("train/" + name, accuracy.result())

        tf.summary.experimental.set_step(self._summary_step())
        with self.writer.as_default():
            tf.summary.scalar("train/skipped", skipped)
def train_one_step(train_batch_i, bvae_model, genmo_optimizer,
                   infnet_optimizer, prior_optimizer, theta_optimizer,
                   encoder_grad_variable, encoder_grad_sq_variable,
                   grad_variable_dict, grad_sq_variable_dict):
    """Train Discrete VAE for 1 step."""
    metrics = {}
    input_batch = process_batch_input(train_batch_i)

    if FLAGS.grad_type == 'relax':
        with tf.GradientTape(persistent=True) as theta_tape:
            (genmo_grads, prior_grads, infnet_grads,
             genmo_loss) = estimate_gradients(input_batch, bvae_model,
                                              FLAGS.grad_type)

            # Update generative model
            genmo_vars = bvae_model.decoder_vars
            genmo_optimizer.apply_gradients(list(zip(genmo_grads, genmo_vars)))

            prior_vars = bvae_model.prior_vars
            prior_optimizer.apply_gradients(list(zip(prior_grads, prior_vars)))

            infnet_vars = bvae_model.encoder_vars
            infnet_optimizer.apply_gradients(
                list(zip(infnet_grads, infnet_vars)))

            infnet_grads_sq = [tf.square(grad_i) for grad_i in infnet_grads]
            theta_vars = []
            if bvae_model.control_nn:
                theta_vars.extend(bvae_model.control_nn.trainable_variables)
            if FLAGS.temperature is None:
                theta_vars.append(bvae_model.log_temperature_variable)
            if FLAGS.scaling_factor is None:
                theta_vars.append(bvae_model.scaling_variable)
            theta_grads = theta_tape.gradient(infnet_grads_sq, theta_vars)
            theta_optimizer.apply_gradients(zip(theta_grads, theta_vars))
        del theta_tape

        metrics['learning_signal'] = bvae_model.mean_learning_signal

    else:
        (genmo_grads, prior_grads, infnet_grads,
         genmo_loss) = estimate_gradients(input_batch, bvae_model,
                                          FLAGS.grad_type)

        genmo_vars = bvae_model.decoder_vars
        genmo_optimizer.apply_gradients(list(zip(genmo_grads, genmo_vars)))

        prior_vars = bvae_model.prior_vars
        prior_optimizer.apply_gradients(list(zip(prior_grads, prior_vars)))

        infnet_vars = bvae_model.encoder_vars
        infnet_optimizer.apply_gradients(list(zip(infnet_grads, infnet_vars)))

    batch_size_sq = tf.cast(FLAGS.batch_size * FLAGS.batch_size, tf.float32)
    encoder_grad_var = bvae_model.compute_grad_variance(
        encoder_grad_variable, encoder_grad_sq_variable,
        infnet_grads) / batch_size_sq

    if grad_variable_dict is not None:
        variance_dict = dict()
        for k in grad_variable_dict.keys():
            encoder_grads = estimate_gradients(input_batch,
                                               bvae_model,
                                               gradient_type=k)[2]
            variance_dict['var/' + k] = bvae_model.compute_grad_variance(
                grad_variable_dict[k], grad_sq_variable_dict[k],
                encoder_grads) / batch_size_sq
    else:
        variance_dict = None

    return (encoder_grad_var, variance_dict, genmo_loss, metrics)
示例#7
0
def fit_regression(network,
                   hidden_bayes=False,
                   same_noise=False,
                   max_std=0.5,
                   data="ian",
                   save=False):

    # load data
    if data not in ALLOWED_DATA_CONFIGS:
        raise AssertionError(
            f"'data' has to be in {ALLOWED_DATA_CONFIGS} but was set to {data}."
        )
    elif data == TOY_DATA:
        data = np.load("data/train_data_regression.npz")
        x_train = data["x_train"]
        y_train = data["y_train"]
        x_lim, y_lim = 4.5, 70.0
        reg = 10.0  # regularization parameter lambda
    elif data == IAN_DATA:
        data = np.load("data/train_data_ian_regression.npz", allow_pickle=True)
        x_train = data["x_train"]
        y_train = data["y_train"]
        x_lim, y_lim = 12.0, 8.0
        reg = 30  # regularization parameter lambda
    elif data == SAMPLE_DATA:
        n_samples = 20
        toy_regression = ToyRegressionData()
        x_train, y_train = toy_regression.gen_data(n_samples)
        x_lim, y_lim = 4.5, 70.0
        reg = 10.0  # regularization parameter lambda

    # choose network
    if network not in ALLOWED_NETWORK_CONFIGS:
        raise AssertionError(
            f"'network' has to be in {ALLOWED_NETWORK_CONFIGS} but was set to {network}."
        )
    elif network == MNF:
        model = BNN_MNF(hidden_bayes=hidden_bayes, max_std=max_std)
        bayes = True
    elif network == BAYES_BY_BACKPROP:
        model = BNN_BBB(hidden_bayes=hidden_bayes, max_std=max_std)
        bayes = True
    elif network == DENSE:
        model = MLP()
        bayes = False

    epochs = 500
    learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(1e-2,
                                                                     epochs,
                                                                     1e-6,
                                                                     power=0.5)
    opt = tf.keras.optimizers.Adam(learning_rate=learning_rate_fn)

    # initialize
    _, _ = loss_fn(y_train, x_train, model, bayes, reg, same_noise)

    train_losses = []
    kl_losses = []
    for i in range(epochs):
        with tf.GradientTape() as tape:
            tape.watch(model.trainable_variables)
            loss, kl_loss = loss_fn(y_train, x_train, model, bayes, reg,
                                    same_noise)
        gradients = tape.gradient(loss, model.trainable_variables)
        opt.apply_gradients(zip(gradients, model.trainable_variables))

        if same_noise:
            model.reset_noise()  # sample new epsilons

        train_losses.append(loss)
        kl_losses.append(kl_loss)

        if i % int(10) == 0:
            print(f"Epoch: {i}, MSE: {loss}, KL-loss: {kl_loss}")

    plt.plot(range(epochs), train_losses)
    plt.plot(range(epochs), kl_losses)
    plt.legend(["Train loss", "KL loss"])

    n_test = 500
    x_test = np.linspace(-x_lim, x_lim, n_test).reshape(n_test,
                                                        1).astype('float32')

    if bayes:
        y_preds = []
        for _ in range(20):
            y_pred = model(x_test)
            y_preds.append(y_pred)
        plt.figure(figsize=(10, 4))
        y_preds = np.array(y_preds).reshape(20, n_test)
        y_preds_mean = np.mean(y_preds, axis=0)
        y_preds_std = np.std(y_preds, axis=0)

        plt.scatter(x_train, y_train, c="orangered")
        color_pred = (0.0, 101.0 / 255.0, 189.0 / 255.0)
        plt.plot(x_test, y_preds_mean, color=color_pred)
        plt.fill_between(x_test.reshape(n_test, ),
                         y_preds_mean - y_preds_std,
                         y_preds_mean + y_preds_std,
                         alpha=0.25,
                         color=color_pred)
        plt.fill_between(x_test.reshape(n_test, ),
                         y_preds_mean - 2.0 * y_preds_std,
                         y_preds_mean + 2.0 * y_preds_std,
                         alpha=0.35,
                         color=color_pred)

        plt.xlim(-x_lim, x_lim)
        plt.ylim(-y_lim, y_lim)
        plt.legend(["Mean function", "Observations"])

    else:
        plt.figure(figsize=(10, 4))
        y_pred = model(x_test)
        plt.scatter(x_train, y_train, c="orangered")
        color_pred = (0.0, 101.0 / 255.0, 189.0 / 255.0)
        plt.plot(x_test, y_pred, color=color_pred)
        plt.xlim(-x_lim, x_lim)
        plt.ylim(-y_lim, y_lim)
        plt.legend(["Mean function", "Observations"])

    plt.tight_layout()
    if save:
        plt.savefig(f"plots/{network}.pdf")
    else:
        plt.show()
示例#8
0
def train_step(graph, t_nodes, t_edges):
    with tf.GradientTape() as tape:
        out_gs = model(graph)
        loss = tf.reduce_mean(loss_function(out_gs, t_nodes, t_edges))
    grads = tape.gradient(loss, model.trainable_variables)
    return loss, grads, out_gs
示例#9
0
@tf.function(autograph=False)
def get_hessian1():
    var = var1
    # var = var[indices]
    # var = [v.value() for v in var]
    with tf.GradientTape(persistent=True, watch_accessed_variables=False) as tape:
        tape.watch(var)
        preds = model(var)
        grads = tape.gradient(preds, var)
        grads = grads[indices]
    hessians = tape.jacobian(grads, var, experimental_use_pfor=True)
    hessians = hessians[:, indices]
    return grads, hessians


with tf.GradientTape(watch_accessed_variables=False) as tape:
    y = var1.sparse_read(5) * 5.
grad = tape.gradient(y, var1.sparse_read(5))
print(grad)

class MyVar(tf.Variable):
    pass
vars2 = [MyVar(val, dtype=tf.float64, validate_shape=False)
         for val in np.linspace(0, 10, nparams)]


def assign2(values, variables):
    for i, var in enumerate(variables):
        var.assign(values[i], use_locking=False, read_value=False)

示例#10
0
def train_step(inputs):
    with tf.GradientTape() as gen_tape, tf.GradientTape() as dis_tape:
        outputs = model(inputs)
        generation_A = outputs[0]
        generation_B = outputs[1]
        cycle_A = outputs[2]
        cycle_B = outputs[3]
        identity_A = outputs[4]
        identity_B = outputs[5]
        discrimination_A_real = outputs[6]
        discrimination_A_fake = outputs[7]
        discrimination_B_real = outputs[8]
        discrimination_B_fake = outputs[9]
        discrimination_A_dot_real = outputs[10]
        discrimination_A_dot_fake = outputs[11]
        discrimination_B_dot_real = outputs[12]
        discrimination_B_dot_fake = outputs[13]

        # Cycle loss.
        cycle_loss = l1_loss(inputs[0], cycle_A) + l1_loss(inputs[1], cycle_B)

        # Identity loss.
        identity_loss = l1_loss(inputs[0], identity_A) + l1_loss(
            inputs[1], identity_B)

        # Generator loss.
        generator_loss_A2B = l2_loss(tf.ones_like(discrimination_B_fake),
                                     discrimination_B_fake)
        generator_loss_B2A = l2_loss(tf.ones_like(discrimination_A_fake),
                                     discrimination_A_fake)

        two_step_generator_loss_A = l2_loss(
            tf.ones_like(discrimination_A_dot_fake), discrimination_A_dot_fake)
        two_step_generator_loss_B = l2_loss(
            tf.ones_like(discrimination_B_dot_fake), discrimination_B_dot_fake)

        generator_loss = generator_loss_A2B + generator_loss_B2A + two_step_generator_loss_A + \
                         two_step_generator_loss_B + hp.lambda_cycle * cycle_loss + hp.lambda_identity * identity_loss

        discriminator_loss_A_real = l2_loss(
            tf.ones_like(discrimination_A_real), discrimination_A_real)
        discriminator_loss_A_fake = l2_loss(
            tf.zeros_like(discrimination_A_fake), discrimination_A_fake)
        discriminator_loss_A = (discriminator_loss_A_real +
                                discriminator_loss_A_fake) / 2

        discriminator_loss_B_real = l2_loss(
            tf.ones_like(discrimination_B_real), discrimination_B_real)
        discriminator_loss_B_fake = l2_loss(
            tf.zeros_like(discrimination_B_fake), discrimination_B_fake)
        discriminator_loss_B = (discriminator_loss_B_real +
                                discriminator_loss_B_fake) / 2

        discriminator_loss_A_dot_real = l2_loss(
            tf.ones_like(discrimination_A_dot_real), discrimination_A_dot_real)
        discriminator_loss_A_dot_fake = l2_loss(
            tf.zeros_like(discrimination_A_dot_fake),
            discrimination_A_dot_fake)
        discriminator_loss_A_dot = (discriminator_loss_A_dot_real +
                                    discriminator_loss_A_dot_fake) / 2

        discriminator_loss_B_dot_real = l2_loss(
            tf.ones_like(discrimination_B_dot_real), discrimination_B_dot_real)
        discriminator_loss_B_dot_fake = l2_loss(
            tf.zeros_like(discrimination_B_dot_fake),
            discrimination_B_dot_fake)
        discriminator_loss_B_dot = (discriminator_loss_B_dot_real +
                                    discriminator_loss_B_dot_fake) / 2

        discriminator_loss = discriminator_loss_A + discriminator_loss_B + discriminator_loss_A_dot + \
                             discriminator_loss_B_dot

    generator_vars = model.generatorA2B.trainable_variables + model.generatorB2A.trainable_variables
    discriminator_vars = model.discriminator_A.trainable_variables + model.discriminator_B.trainable_variables + \
                         model.discriminator_A_dot.trainable_variables + model.discriminator_B_dot.trainable_variables

    grad_gen = gen_tape.gradient(generator_loss, sources=generator_vars)
    grad_dis = dis_tape.gradient(discriminator_loss,
                                 sources=discriminator_vars)
    generator_optimizer.apply_gradients(zip(grad_gen, generator_vars))
    discriminator_optimizer.apply_gradients(zip(grad_dis, discriminator_vars))

    gen_loss(generator_loss)
    disc_loss(discriminator_loss)
def main():

    # [b, 32, 32, 3] => [b, 1, 1, 512]
    conv_net = Sequential(conv_layers)  #第一部分,卷积层

    fc_net = Sequential([
        layers.Dense(256, activation=tf.nn.relu),    #第二部分,全连接层
        layers.Dense(128, activation=tf.nn.relu),
        layers.Dense(100, activation=None),
    ])

    conv_net.build(input_shape=[None, 32, 32, 3])
    fc_net.build(input_shape=[None, 512])    #第二部分的输入是第一部分的输出
    optimizer = optimizers.Adam(lr=1e-4)

    # [1, 2] + [3, 4] => [1, 2, 3, 4]
    variables = conv_net.trainable_variables + fc_net.trainable_variables  #需要求梯度的参数

    for epoch in range(50):

        for step, (x,y) in enumerate(train_db):

            with tf.GradientTape() as tape:
                # [b, 32, 32, 3] => [b, 1, 1, 512]
                out = conv_net(x)
                # flatten, => [b, 512]
                out = tf.reshape(out, [-1, 512])
                # [b, 512] => [b, 100]
                logits = fc_net(out)
                # [b] => [b, 100]
                y_onehot = tf.one_hot(y, depth=100)
                # compute loss
                loss = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
                loss = tf.reduce_mean(loss)

            grads = tape.gradient(loss, variables)
            optimizer.apply_gradients(zip(grads, variables))

            if step %100 == 0:
                print(epoch, step, 'loss:', float(loss))



        total_num = 0
        total_correct = 0
        for x,y in test_db:

            out = conv_net(x)
            out = tf.reshape(out, [-1, 512])
            logits = fc_net(out)
            prob = tf.nn.softmax(logits, axis=1)
            pred = tf.argmax(prob, axis=1)
            pred = tf.cast(pred, dtype=tf.int32)

            correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
            correct = tf.reduce_sum(correct)

            total_num += x.shape[0]
            total_correct += int(correct)

        acc = total_correct / total_num
        print(epoch, 'acc:', acc)
示例#12
0
def main(_argv):
    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size, training=True)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes)
    train_dataset = train_dataset.shuffle(buffer_size=1024)  # TODO: not 1024
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 80)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 80)))

    if FLAGS.transfer != 'none':
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.mode == 'frozen':
            # freeze everything
            freeze_all(model)
        else:
            # reset top layers
            if FLAGS.tiny:  # get initial weights
                init_model = YoloV3Tiny(FLAGS.size, training=True)
            else:
                init_model = YoloV3(FLAGS.size, training=True)

            if FLAGS.transfer == 'darknet':
                for l in model.layers:
                    if l.name != 'yolo_darknet' and l.name.startswith('yolo_'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)
            elif FLAGS.transfer == 'no_output':
                for l in model.layers:
                    if l.name.startswith('yolo_output'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [YoloLoss(anchors[mask]) for mask in anchor_masks]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
示例#13
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    else:
        train_dataset = dataset.load_fake_dataset()
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    else:
        val_dataset = dataset.load_fake_dataset()
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)

    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask],
                 classes=FLAGS.num_classes,
                 ignore_thresh=FLAGS.ignore_threshold) for mask in anchor_masks
    ]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=FLAGS.patience, verbose=1),
            ModelCheckpoint(os.path.join(FLAGS.output_path,
                                         'yolov3_train_best.tf'),
                            monitor='val_loss',
                            save_best_only=True,
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
示例#14
0
def InferenceSampler_test():
    vocab_size = 6
    SOS_token = 0
    EOS_token = 5

    # x_data = np.array([[SOS_token, 3, 1, 4, 3, 2],[SOS_token, 3, 4, 2, 3, 1],[SOS_token, 1, 3, 2, 2, 1]], dtype=np.int32)
    # y_data = np.array([[3, 1, 4, 3, 2,EOS_token],[3, 4, 2, 3, 1,EOS_token],[1, 3, 2, 2, 1,EOS_token]],dtype=np.int32)
    # print("data shape: ", x_data.shape)

    index_to_char = {
        SOS_token: '<S>',
        1: 'h',
        2: 'e',
        3: 'l',
        4: 'o',
        EOS_token: '<E>'
    }
    x_data = np.array([[SOS_token, 1, 2, 3, 3, 4]], dtype=np.int32)
    y_data = np.array([[1, 2, 3, 3, 4, EOS_token]], dtype=np.int32)

    output_dim = vocab_size
    batch_size = len(x_data)
    hidden_dim = 7

    seq_length = x_data.shape[1]
    embedding_dim = 8

    init = np.arange(vocab_size * embedding_dim).reshape(vocab_size, -1)

    embedding = tf.keras.layers.Embedding(
        vocab_size,
        embedding_dim,
        embeddings_initializer=Constant(init),
        trainable=True)
    ##### embedding.weights, embedding.trainable_variables, embedding.trainable_weights --> 모두 같은 결과

    target = tf.convert_to_tensor(y_data)

    # Decoder

    # single layer RNN
    decoder_cell = tf.keras.layers.LSTMCell(hidden_dim)
    # decoder init state:

    #init_state = [tf.zeros((batch_size,hidden_dim)), tf.ones((batch_size,hidden_dim))]   # (h,c)
    init_state = decoder_cell.get_initial_state(inputs=None,
                                                batch_size=batch_size,
                                                dtype=tf.float32)

    projection_layer = tf.keras.layers.Dense(output_dim)

    sampler = tfa.seq2seq.sampler.TrainingSampler(
    )  # alias ---> sampler = tfa.seq2seq.TrainingSampler()

    decoder = tfa.seq2seq.BasicDecoder(decoder_cell,
                                       sampler,
                                       output_layer=projection_layer)

    optimizer = tf.keras.optimizers.Adam(lr=0.01)

    for step in range(500):
        with tf.GradientTape() as tape:
            inputs = embedding(x_data)
            if isinstance(
                    sampler,
                    tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler):
                outputs, last_state, last_sequence_lengths = decoder(
                    inputs,
                    initial_state=init_state,
                    sequence_length=[seq_length] * batch_size,
                    training=True,
                    embedding=embedding.weights)
            else:
                outputs, last_state, last_sequence_lengths = decoder(
                    inputs,
                    initial_state=init_state,
                    sequence_length=[seq_length] * batch_size,
                    training=True)

            logits = outputs.rnn_output

            weights = tf.ones(shape=[batch_size, seq_length])
            loss = tfa.seq2seq.sequence_loss(logits, target, weights)

        trainable_variables = embedding.trainable_variables + decoder.trainable_variables  # 매번 update되어야 한다.
        grads = tape.gradient(loss, trainable_variables)
        optimizer.apply_gradients(zip(grads, trainable_variables))

        if step % 10 == 0:
            print(step, loss.numpy())

    sample_batch_size = 5

    # InferenceSampler를 사용해 보자.
    # GreedyEmbedding Sampler를 구현했다.
    sampler = tfa.seq2seq.InferenceSampler(
        sample_fn=lambda outputs: tf.argmax(
            outputs, axis=-1, output_type=tf.int32),
        sample_shape=[],
        sample_dtype=tf.int32,
        end_fn=lambda sample_ids: tf.equal(sample_ids, EOS_token),
        next_inputs_fn=lambda ids: tf.nn.embedding_lookup(
            embedding.weights, ids))

    decoder = tfa.seq2seq.BasicDecoder(decoder_cell,
                                       sampler,
                                       output_layer=projection_layer,
                                       maximum_iterations=seq_length)

    init_state = decoder_cell.get_initial_state(inputs=None,
                                                batch_size=sample_batch_size,
                                                dtype=tf.float32)

    start_inputs = tf.nn.embedding_lookup(
        embedding.weights,
        tf.tile([SOS_token], [sample_batch_size]))  # embedding된 것을 넘겨주어야 한다.
    outputs, last_state, last_sequence_lengths = decoder(
        start_inputs, initial_state=init_state, training=False)

    result = tf.argmax(outputs.rnn_output, axis=-1).numpy()

    print(result)
    for i in range(sample_batch_size):
        print(''.join(index_to_char[a] for a in result[i] if a != EOS_token))
示例#15
0
from mpl_toolkits.mplot3d import Axes3D


def getZ(x,y):
    return (2*x**2+3*y+3)+(y**2-9*x+6) 

xrng=np.arange(-3,3,.1)
yrng=np.arange(-4,4,.1)

# 求曲面的最小值对应的坐标
x=tf.Variable(tf.random.truncated_normal([1]))
y=tf.Variable(tf.random.truncated_normal([1]))
rate=0.1
epoches=1000
for epoch in range(epoches):
    with tf.GradientTape(persistent=True) as tape:
        loss=getZ(x,y)
    grads=tape.gradient(loss,[x,y])
    print('epoch={0},x={1},y={2},loss={3}'.format(epoch,x.numpy(),y.numpy(),loss.numpy()))
    x.assign_sub(rate*grads[0])
    y.assign_sub(rate*grads[1])



# 设置交叉点
X,Y=np.meshgrid(xrng,yrng)
Z=getZ(X,Y)

# print(X.shape,Y.shape,Z.shape)
# print(X,Y,Z)
# auto gradient
import tensorflow as tf

# create 4 tensor
a = tf.constant(1.)
b = tf.constant(2.)
c = tf.constant(3.)
w = tf.constant(4.)

with tf.GradientTape() as tape:  # create gradient environment
    tape.watch([w])  # add w to gradient trace list
    # compute process
    y = a * w**2 + b * w + c

# calculate derivative
[dy_dw] = tape.gradient(y, [w])
print(dy_dw)  # print derivative
示例#17
0
def train_txt_gen_rnn(train_dat, valid_dat, vocab, embed_dim, units,
                      batch_size, seq_len, learn_rt, n_epochs,
                      early_stop_epochs, save_loc):
    """train rnn to predict next words in the sequence"""
    start_tm = time.time()
    # Model Specification
    rnn = rnn_spec(dict_len=len(vocab), embed_dim=embed_dim, num_units=units)
    optimizer = tf.train.AdamOptimizer(learning_rate=learn_rt)
    rnn.build(tf.TensorShape([batch_size, seq_len]))
    valid_x, valid_y = next(iter(valid_dat))
    # Early Stopping Placeholders
    best_val_loss = 999999
    epoch_ph = []
    epoch_tm_ph = [start_tm]
    trn_loss_ph = []
    val_loss_ph = []
    break_ph = []
    # Iterative Training
    for epoch in range(n_epochs):
        # Train
        for (batch, (inp, target)) in enumerate(train_dat):
            with tf.GradientTape() as tape:
                train_predictions = rnn(inp)
                train_loss = loss_function(target, train_predictions)
            grads = tape.gradient(train_loss, rnn.variables)
            optimizer.apply_gradients(zip(grads, rnn.variables))
        # Validation
        for (batch, (inp, target)) in enumerate(valid_dat):
            with tf.GradientTape() as tape:
                valid_predictions = rnn(valid_x)
                valid_loss = loss_function(valid_y, valid_predictions)
        # Record Epoch Results
        epoch_ph.append(epoch + 1)
        trn_loss_ph.append(train_loss)
        val_loss_ph.append(valid_loss)
        epoch_sec_elapsed = str(
            int((np.float64(time.time()) - np.float64(epoch_tm_ph[-1]))))
        pr_str1 = str('Ep. {} Loss: Train {:.4f} Val {:.4f}'.format(
            epoch + 1, train_loss, valid_loss))
        print(pr_str1 + '  ' + epoch_sec_elapsed + ' sec.')
        epoch_tm_ph.append(time.time())
        # Early Stopping
        best_val_loss = min(val_loss_ph)
        if (valid_loss > best_val_loss):
            break_ph.append(1)
        else:
            break_ph = []
            # Model Saving
            checkpoint_prefix = os.path.join(save_loc, "ckpt")
            checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=rnn)
            checkpoint.save(file_prefix=checkpoint_prefix)
        if sum(break_ph) >= early_stop_epochs:
            print("Stopping after " + str(int(epoch + 1)) + " epochs.")
            print("Validation cross entropy hasn't improved in " +
                  str(int(early_stop_epochs)) + " rounds.")
            break
    # Output Training Progress
    output_df = pd.DataFrame({
        'Epoch': epoch_ph,
        'Train Loss': trn_loss_ph,
        'Validation Loss': val_loss_ph
    })
    end_tm = time.time()
    sec_elapsed = (np.float64(end_tm) - np.float64(start_tm))
    print('Execution Time: ' + seconds_to_time(sec_elapsed))
    return output_df
示例#18
0
文件: md.py 项目: ChayaSt/gimlet
    def minimize(
            self,
            method='adam',
            coordinates=None,
            max_iter=10000,
            **kwargs):
        """ Minimize the energy.
        """

        max_iter = tf.constant(max_iter, dtype=tf.int64)

        if type(coordinates) == type(None):
            coordinates = self.coordinates

        if method == 'adam':
            # put coordinates into a variable
            coordinates = tf.Variable(coordinates)

            # keep a history
            recent_ten = tf.zeros((10, ), dtype=tf.float32)

            # get the Adam optimizer
            optimizer = tf.keras.optimizers.Adam(1000)

            # init
            iter_idx = tf.constant(0, dtype=tf.int64)

            while tf.less(iter_idx, max_iter):
                with tf.GradientTape() as tape:
                    energy = self.energy(coordinates)

                print(energy)

                recent_ten = tf.concat(
                    [
                        recent_ten[1:],
                        tf.expand_dims(energy, 0)
                    ],
                    axis=0)

                grad = tape.gradient(energy, coordinates)

                grad = tf.where(
                    tf.math.is_nan(grad),
                    tf.zeros_like(grad),
                    grad)

                optimizer.apply_gradients(zip([grad], [coordinates]))


                if tf.logical_and(
                    tf.greater(iter_idx, 100),
                    tf.less(
                        tf.math.reduce_std(recent_ten),
                        1e-3)):
                    break

                iter_idx += 1

            gin.i_o.to_sdf.write_sdf(
                [[
                    self.atoms,
                    self.adjacency_map,
                    tf.constant(10, dtype=tf.float32) * (
                        coordinates - tf.reduce_mean(coordinates, 0))
                ]],
                'caffeine_out.sdf')
示例#19
0
文件: main.py 项目: fomorians/vae
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--job-dir', required=True)
    parser.add_argument('--seed', default=67, type=int)
    args = parser.parse_args()
    print('args:', args)

    # create a job directory if it doesn't already exist
    if not os.path.exists(args.job_dir):
        os.makedirs(args.job_dir)

    # enable eager execution
    tf.enable_eager_execution()

    # set random seeds for consistent execution
    random.seed(args.seed)
    np.random.seed(args.seed)
    tf.set_random_seed(args.seed)

    # define hyperparameters
    params = Params()
    print('params:', params)

    # load MNIST dataset
    ((images_train, labels_train),
     (images_test, labels_test)) = tf.keras.datasets.mnist.load_data()

    # prepare the images by casting and rescaling
    images_train = prep_images(images_train)
    images_test = prep_images(images_test)

    # compute statistics from the training set
    images_loc = images_train.mean()
    images_scale = images_train.std()

    # define datasets for sampling batches
    dataset_train = get_dataset((images_train, labels_train),
                                batch_size=params.batch_size,
                                shuffle=True)
    dataset_test = get_dataset((images_test, labels_test),
                               batch_size=params.batch_size)

    # model / optimization
    global_step = tf.train.get_or_create_global_step()
    optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate)
    model = Model(inputs_loc=images_loc,
                  inputs_scale=images_scale,
                  inputs_shape=[28, 28, 1])
    latent_prior = tfp.distributions.MultivariateNormalDiag(
        loc=tf.zeros(shape=[2], dtype=tf.float32),
        scale_identity_multiplier=1.0)

    # checkpoints
    checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                     model=model,
                                     global_step=global_step)
    checkpoint_path = tf.train.latest_checkpoint(args.job_dir)
    if checkpoint_path is not None:
        checkpoint.restore(checkpoint_path).assert_consumed()

    # summaries
    summary_writer = tf.contrib.summary.create_file_writer(args.job_dir,
                                                           max_queue=1,
                                                           flush_millis=1000)
    summary_writer.set_as_default()

    with trange(params.epochs) as pbar:
        for epoch in pbar:
            loss_train = tfe.metrics.Mean(name='loss/train')
            for images, labels in dataset_train:
                with tf.GradientTape() as tape:
                    outputs_dist, z_dist, z = model(images,
                                                    labels,
                                                    training=True)
                    loss = losses.variational(outputs_dist, z_dist, images,
                                              latent_prior)
                    loss_train(loss)

                grads = tape.gradient(loss, model.trainable_variables)
                grads_and_vars = zip(grads, model.trainable_variables)
                optimizer.apply_gradients(grads_and_vars,
                                          global_step=global_step)

            with tf.contrib.summary.always_record_summaries():
                loss_train.result()

                tf.contrib.summary.scalar(name='grad_norm',
                                          tensor=tf.global_norm(grads))

                tf.contrib.summary.image(name='image/train',
                                         tensor=images,
                                         max_images=1,
                                         step=global_step)
                tf.contrib.summary.image(name='outputs/train',
                                         tensor=outputs_dist.mean(),
                                         max_images=1,
                                         step=global_step)

            loss_test = tfe.metrics.Mean(name='loss/eval')
            for images, labels in dataset_test:
                outputs_dist, z_dist, z = model(images, labels)
                loss = losses.variational(outputs_dist, z_dist, images,
                                          latent_prior)
                loss_test(loss)

            with tf.contrib.summary.always_record_summaries():
                loss_test.result()

                tf.contrib.summary.image(name='image/eval',
                                         tensor=images,
                                         max_images=1,
                                         step=global_step)
                tf.contrib.summary.image(name='outputs/eval',
                                         tensor=outputs_dist.mean(),
                                         max_images=1,
                                         step=global_step)

            pbar.set_description('loss (train): {}, loss (eval): {}'.format(
                loss_train.result().numpy(),
                loss_test.result().numpy()))

            checkpoint_prefix = os.path.join(args.job_dir, 'ckpt')
            checkpoint.save(checkpoint_prefix)
示例#20
0
import tensorflow as tf

tf.enable_eager_execution()

x = tf.ones((2, 2))

with tf.GradientTape() as t:
    t.watch(x)
    y = tf.reduce_sum(x)
    z = tf.multiply(y, y)

dz_dx = t.gradient(z, x)
for i in [0, 1]:
    for j in [0, 1]:
        assert dz_dx[i][j].numpy() == 8.0

x = tf.ones((2, 2))

with tf.GradientTape() as t:
    t.watch(x)
    y = tf.reduce_sum(x)
    z = tf.multiply(y, y)
dz_dy = t.gradient(z, y)
assert dz_dy.numpy() == 8.0

x = tf.constant(3.0)
with tf.GradientTape(persistent=True) as t:
    t.watch(x)
    y = x * x
    z = y * y
def grad(model, images, labels):
    with tf.GradientTape() as tape:#loss값을 gradienttape에 기록
        loss = loss_fn(model, images, labels)
    return tape.gradient(loss, model.variables)#테이프를 거꾸로 감으면서 계산
示例#22
0
def grad(x, y):
    with tf.GradientTape() as t:
        t.watch(x)
        out = f(x, y)
    return t.gradient(out, x)
lr = 0.005
epochs = 500

alpha = 0.03


# 神经网络构建
w1 = tf.Variable(tf.random.truncated_normal([2, 8], mean=0, stddev=0.1), dtype=tf.float32)
b1 = tf.Variable(tf.constant(0.01, shape=[8]))

w2 = tf.Variable(tf.random.truncated_normal([8, 1], mean=0, stddev=0.1), dtype=tf.float32)
b2 = tf.Variable(tf.constant([0.01]), dtype=tf.float32)

for epoch in range(0, epochs):
    for idx, (batch_x, batch_y) in enumerate(train_db):
        with tf.GradientTape() as tape:
            hidden = tf.matmul(batch_x, w1) + b1
            hidden = tf.nn.relu(hidden)
            y = tf.matmul(hidden, w2) + b2

            # wu
            # loss_mse = tf.reduce_mean(tf.square(batch_y - y))

            # 使用l2正则
            loss_mse = tf.reduce_mean(tf.square(batch_y - y))
            # loss_l2 = tf.nn.l2_loss(w1) + tf.nn.l2_loss(w2) 或
            loss_l2 = tf.reduce_sum([tf.nn.l2_loss(w1) + tf.nn.l2_loss(w2)])
            # 若alpha = 1, 无法正确画图
            loss = loss_mse + alpha * loss_l2

        grad = tape.gradient(loss, [w1, b1, w2, b2])
示例#24
0
def compute_apply_gradients(model, x, optimizer):
    with tf.GradientTape() as tape:
        loss = compute_loss(model, x)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
示例#25
0
def grad(model, inputs, targets):
    with tf.GradientTape() as tape:
        loss_value = loss(model, inputs, targets)
    return loss_value, tape.gradient(loss_value, model.trainable_variables)
示例#26
0
    def _train_step(self, images, kp2d, kp3d, has3d, theta):
        tf.keras.backend.set_learning_phase(1)
        batch_size = images.shape[0]

        with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
            generator_outputs = self.generator(images, training=True)
            # only use last computed theta (from iterative feedback loop)
            _, kp2d_pred, kp3d_pred, pose_pred, shape_pred, _ = generator_outputs[
                -1]

            vis = tf.expand_dims(kp2d[:, :, 2], -1)
            kp2d_loss = v1_loss.absolute_difference(kp2d[:, :, :2],
                                                    kp2d_pred,
                                                    weights=vis)
            kp2d_loss = kp2d_loss * self.config.GENERATOR_2D_LOSS_WEIGHT

            if self.config.USE_3D:
                has3d = tf.expand_dims(has3d, -1)

                kp3d_real = batch_align_by_pelvis(kp3d)
                kp3d_pred = batch_align_by_pelvis(
                    kp3d_pred[:, :self.config.NUM_KP3D, :])

                kp3d_real = tf.reshape(kp3d_real, [batch_size, -1])
                kp3d_pred = tf.reshape(kp3d_pred, [batch_size, -1])

                kp3d_loss = v1_loss.mean_squared_error(
                    kp3d_real, kp3d_pred, weights=has3d) * 0.5
                kp3d_loss = kp3d_loss * self.config.GENERATOR_3D_LOSS_WEIGHT
                """Calculating pose and shape loss basically makes no sense 
                    due to missing paired 3d and mosh ground truth data.
                    The original implementation has paired data for Human 3.6 M dataset
                    which was not published due to licence conflict.
                    Nevertheless with SMPLify paired data can be generated 
                    (see http://smplify.is.tue.mpg.de/ for more information)
                """
                pose_pred = tf.reshape(pose_pred, [batch_size, -1])
                shape_pred = tf.reshape(shape_pred, [batch_size, -1])
                pose_shape_pred = tf.concat([pose_pred, shape_pred], 1)

                # fake ground truth
                has_smpl = tf.zeros(batch_size,
                                    tf.float32)  # do not include loss
                has_smpl = tf.expand_dims(has_smpl, -1)
                pose_shape_real = tf.zeros(pose_shape_pred.shape)

                ps_loss = v1_loss.mean_squared_error(
                    pose_shape_real, pose_shape_pred, weights=has_smpl) * 0.5
                ps_loss = ps_loss * self.config.GENERATOR_3D_LOSS_WEIGHT

            # use all poses and shapes from iterative feedback loop
            fake_disc_input = self.accumulate_fake_disc_input(
                generator_outputs)
            fake_disc_output = self.discriminator(fake_disc_input,
                                                  training=True)

            real_disc_input = self.accumulate_real_disc_input(theta)
            real_disc_output = self.discriminator(real_disc_input,
                                                  training=True)

            gen_disc_loss = tf.reduce_mean(
                tf.reduce_sum((fake_disc_output - 1)**2, axis=1))
            gen_disc_loss = gen_disc_loss * self.config.DISCRIMINATOR_LOSS_WEIGHT

            generator_loss = tf.reduce_sum([kp2d_loss, gen_disc_loss])
            if self.config.USE_3D:
                generator_loss = tf.reduce_sum(
                    [generator_loss, kp3d_loss, ps_loss])

            disc_real_loss = tf.reduce_mean(
                tf.reduce_sum((real_disc_output - 1)**2, axis=1))
            disc_fake_loss = tf.reduce_mean(
                tf.reduce_sum(fake_disc_output**2, axis=1))

            discriminator_loss = tf.reduce_sum(
                [disc_real_loss, disc_fake_loss])
            discriminator_loss = discriminator_loss * self.config.DISCRIMINATOR_LOSS_WEIGHT

        generator_grads = gen_tape.gradient(generator_loss,
                                            self.generator.trainable_variables)
        discriminator_grads = disc_tape.gradient(
            discriminator_loss, self.discriminator.trainable_variables)
        self.generator_opt.apply_gradients(
            zip(generator_grads, self.generator.trainable_variables))
        self.discriminator_opt.apply_gradients(
            zip(discriminator_grads, self.discriminator.trainable_variables))

        self.generator_loss_log.update_state(generator_loss)
        self.kp2d_loss_log.update_state(kp2d_loss)
        self.gen_disc_loss_log.update_state(gen_disc_loss)

        if self.config.USE_3D:
            self.kp3d_loss_log.update_state(kp3d_loss)
            self.pose_shape_loss_log.update_state(ps_loss)

        self.discriminator_loss_log.update_state(discriminator_loss)
        self.disc_real_loss_log.update_state(disc_real_loss)
        self.disc_fake_loss_log.update_state(disc_fake_loss)
示例#27
0
import tensorflow as tf

w = tf.Variable(tf.constant(5, dtype=tf.float32))

epoch = 40
LR_BASE = 0.2  # 最初学习率
LR_DECAY = 0.99  # 学习率衰减率
LR_STEP = 1  # 喂入多少轮BATCH_SIZE后,更新一次学习率

for epoch in range(
        epoch
):  # for epoch 定义顶层循环,表示对数据集循环epoch次,此例数据集数据仅有1个w,初始化时候constant赋值为5,循环100次迭代。
    lr = LR_BASE * LR_DECAY**(epoch / LR_STEP)
    with tf.GradientTape() as tape:  # with结构到grads框起了梯度的计算过程。
        loss = tf.square(w + 1)
    grads = tape.gradient(loss, w)  # .gradient函数告知谁对谁求导

    w.assign_sub(
        lr * grads)  # .assign_sub 对变量做自减 即:w -= lr*grads 即 w = w - lr*grads
    print("After %s epoch,w is %f,loss is %f,lr is %f" %
          (epoch, w.numpy(), loss, lr))
示例#28
0
def train(model, opt, original, target):
    with tf.GradientTape() as tape:
        gradients = tape.gradient(loss(model, original, target),
                                  model.trainable_variables)
        gradient_variables = zip(gradients, model.trainable_variables)
        opt.apply_gradients(gradient_variables)
def processData(device_index, start_samples, samples, federated,
                full_data_size, number_of_batches, parameter_server,
                sample_distribution):
    pause(5)  # PS server (if any) starts first
    checkpointpath1 = 'results/model{}.h5'.format(device_index)
    outfile = 'results/dump_train_variables{}.npz'.format(device_index)
    outfile_models = 'results/dump_train_model{}.npy'.format(device_index)
    global_model = 'results/model_global.npy'
    global_epoch = 'results/epoch_global.npy'

    # np.random.seed(1)
    # tf.random.set_seed(1)  # common initialization

    learning_rate = args.mu
    learning_rate_local = learning_rate

    B = np.ones((devices, devices)) - tf.one_hot(np.arange(devices), devices)
    Probabilities = B[device_index, :] / (devices - 1)
    training_signal = False

    # check for backup variables on start
    if os.path.isfile(checkpointpath1):
        train_start = False

        # backup the model and the model target
        model = models.load_model(checkpointpath1)
        data_history = []
        label_history = []
        local_model_parameters = np.load(outfile_models, allow_pickle=True)
        model.set_weights(local_model_parameters.tolist())

        dump_vars = np.load(outfile, allow_pickle=True)
        frame_count = dump_vars['frame_count']
        epoch_loss_history = dump_vars['epoch_loss_history'].tolist()
        running_loss = np.mean(epoch_loss_history[-5:])
        epoch_count = dump_vars['epoch_count']
    else:
        train_start = True
        model = create_q_model()
        data_history = []
        label_history = []
        frame_count = 0
        # Experience replay buffers
        epoch_loss_history = []
        epoch_count = 0
        running_loss = math.inf

    if parameter_server:
        epoch_global = 0

    training_end = False

    a = model.get_weights()
    # set an arbitrary optimizer, here Adam is used
    optimizer = keras.optimizers.Adam(learning_rate=args.mu, clipnorm=1.0)
    # create a data object (here radar data)
    #start = time.time()
    if args.noniid_assignment == 1:
        data_handle = RadarData_tasks(filepath, device_index, start_samples,
                                      samples, full_data_size)
    else:
        data_handle = RadarData(filepath, device_index, start_samples, samples,
                                full_data_size, args.random_data_distribution)
    #end = time.time()
    #time_count = (end - start)
    #print(time_count)
    # create a consensus object
    cfa_consensus = CFA_process(devices, device_index, args.N)

    while True:  # Run until solved
        # collect 1 batch
        frame_count += 1
        obs, labels = data_handle.getTrainingData(batch_size)
        data_batch = preprocess_observation(obs, batch_size)

        # Save data and labels in the current learning session
        data_history.append(data_batch)
        label_history.append(labels)

        if frame_count % number_of_batches == 0:
            if not parameter_server:
                epoch_count += 1
            # check scheduling for federated
            if federated:
                if epoch_count == 1 or scheduling_tx[device_index,
                                                     epoch_count] == 1:
                    training_signal = False
                else:
                    # stop all computing, just save the previous model
                    training_signal = True
                    model_weights = np.asarray(model.get_weights())
                    model.save(checkpointpath1,
                               include_optimizer=True,
                               save_format='h5')
                    np.savez(outfile,
                             frame_count=frame_count,
                             epoch_loss_history=epoch_loss_history,
                             training_end=training_end,
                             epoch_count=epoch_count,
                             loss=running_loss)
                    np.save(outfile_models, model_weights)
            # check scheduling for parameter server
            if parameter_server:
                while not os.path.isfile(global_epoch):
                    # implementing consensus
                    print("waiting")
                    pause(1)
                try:
                    epoch_global = np.load(global_epoch, allow_pickle=True)
                except:
                    pause(5)
                    print("retrying opening global epoch counter")
                    try:
                        epoch_global = np.load(global_epoch, allow_pickle=True)
                    except:
                        print("failed reading global epoch")

                if epoch_global == 0:
                    training_signal = False

                elif scheduling_tx[device_index, epoch_global] == 1:
                    if epoch_global > epoch_count:
                        epoch_count = epoch_global
                        training_signal = False
                    else:
                        training_signal = True
                else:
                    # stop all computing, just save the previous model
                    training_signal = True

                # always refresh the local model using the PS one
                stop_aggregation = False
                while not os.path.isfile(global_model):
                    # implementing consensus
                    print("waiting")
                    pause(1)
                try:
                    model_global = np.load(global_model, allow_pickle=True)
                except:
                    pause(5)
                    print("retrying opening global model")
                    try:
                        model_global = np.load(global_model, allow_pickle=True)
                    except:
                        print("halting aggregation")
                        stop_aggregation = True

                if not stop_aggregation:
                    model.set_weights(model_global.tolist())

                if training_signal:
                    model_weights = np.asarray(model.get_weights())
                    model.save(checkpointpath1,
                               include_optimizer=True,
                               save_format='h5')
                    np.savez(outfile,
                             frame_count=frame_count,
                             epoch_loss_history=epoch_loss_history,
                             training_end=training_end,
                             epoch_count=epoch_count,
                             loss=running_loss)
                    np.save(outfile_models, model_weights)
            # check schedulting for parameter server

        # Local learning update every "number of batches" batches
        time_count = 0
        if frame_count % number_of_batches == 0 and not training_signal:
            # run local batches
            for i in range(number_of_batches):
                start = time.time()
                data_sample = np.array(data_history[i])
                label_sample = np.array(label_history[i])

                # Create a mask to calculate loss
                masks = tf.one_hot(label_sample, n_outputs)

                with tf.GradientTape() as tape:
                    # Train the model on data samples
                    classes = model(data_sample, training=False)
                    # Apply the masks
                    # for k in range(batch_size):
                    #     class_v[k] = tf.argmax(classes[k])
                    # class_v = tf.reduce_sum(tf.multiply(classes, masks), axis=1)
                    # Take best action

                    # Calculate loss
                    loss = loss_function(masks, classes)

                # Backpropagation
                grads = tape.gradient(loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))
                end = time.time()
                time_count = time_count + (end - start) / number_of_batches
            if not parameter_server and not federated:
                print('Average batch training time {:.2f}'.format(time_count))
            del data_history
            del label_history
            data_history = []
            label_history = []

            model_weights = np.asarray(model.get_weights())
            model.save(checkpointpath1,
                       include_optimizer=True,
                       save_format='h5')
            np.savez(outfile,
                     frame_count=frame_count,
                     epoch_loss_history=epoch_loss_history,
                     training_end=training_end,
                     epoch_count=epoch_count,
                     loss=running_loss)
            np.save(outfile_models, model_weights)

            #  Consensus round
            # update local model

            cfa_consensus.update_local_model(model_weights)
            # neighbor = cfa_consensus.get_connectivity(device_index, args.N, devices) # fixed neighbor
            np.random.seed(1)
            tf.random.set_seed(1)  # common initialization
            if not train_start:
                if federated and not training_signal:
                    eps_c = 1 / (args.N + 1)
                    # apply consensus for model parameter
                    # neighbor = np.random.choice(np.arange(devices), args.N, p=Probabilities, replace=False) # choose neighbor
                    neighbor = np.random.choice(
                        indexes_tx[:, epoch_count - 1], args.N,
                        replace=False)  # choose neighbor
                    while neighbor == device_index:
                        neighbor = np.random.choice(
                            indexes_tx[:, epoch_count - 1],
                            args.N,
                            replace=False)  # choose neighbor
                    print(
                        "Consensus from neighbor {} for device {}, local loss {:.2f}"
                        .format(neighbor, device_index, loss.numpy()))

                    model.set_weights(
                        cfa_consensus.federated_weights_computing(
                            neighbor, args.N, epoch_count, eps_c, max_lag))
                    if cfa_consensus.getTrainingStatusFromNeightbor():
                        # a neighbor completed the training, with loss < target, transfer learning is thus applied (the device will copy and reuse the same model)
                        training_signal = True  # stop local learning, just do validation
            else:
                print("Consensus warm up")
                train_start = False

            # check if parameter server is enabled
            # stop_aggregation = False

            # if parameter_server:
            #     # pause(refresh_server)
            #     while not os.path.isfile(global_model):
            #         # implementing consensus
            #         print("waiting")
            #         pause(1)
            #     try:
            #         model_global = np.load(global_model, allow_pickle=True)
            #     except:
            #         pause(5)
            #         print("retrying opening global model")
            #         try:
            #             model_global = np.load(global_model, allow_pickle=True)
            #         except:
            #             print("halting aggregation")
            #             stop_aggregation = True
            #
            #     if not stop_aggregation:
            #         # print("updating from global model inside the parmeter server")
            #         for k in range(cfa_consensus.layers):
            #             # model_weights[k] = model_weights[k]+ 0.5*(model_global[k]-model_weights[k])
            #             model_weights[k] = model_global[k]
            #         model.set_weights(model_weights.tolist())
            #
            #     while not os.path.isfile(global_epoch):
            #         # implementing consensus
            #         print("waiting")
            #         pause(1)
            #     try:
            #         epoch_global = np.load(global_epoch, allow_pickle=True)
            #     except:
            #         pause(5)
            #         print("retrying opening global epoch counter")
            #         try:
            #             epoch_global = np.load(global_epoch, allow_pickle=True)
            #         except:
            #             print("halting aggregation")

            del model_weights

        #start = time.time()
        # validation tool for device 'device_index'
        if epoch_count > validation_start and frame_count % number_of_batches == 0:
            avg_cost = 0.
            for i in range(number_of_batches_for_validation):
                obs_valid, labels_valid = data_handle.getTestData(
                    batch_size, i)
                # obs_valid, labels_valid = data_handle.getRandomTestData(batch_size)
                data_valid = preprocess_observation(np.squeeze(obs_valid),
                                                    batch_size)
                data_sample = np.array(data_valid)
                label_sample = np.array(labels_valid)
                # Create a mask to calculate loss
                masks = tf.one_hot(label_sample, n_outputs)
                classes = model(data_sample, training=False)
                # Apply the masks
                # class_v = tf.reduce_sum(tf.multiply(classes, masks), axis=1)
                # class_v = np.zeros(batch_size, dtype=int)
                # for k in range(batch_size):
                #     class_v[k] = tf.argmax(classes[k]).numpy()
                # Calculate loss
                # loss = loss_function(label_sample, classes)
                loss = loss_function(masks, classes).numpy()
                avg_cost += loss / number_of_batches_for_validation  # Training loss
            epoch_loss_history.append(avg_cost)
            print("Device {} epoch count {}, validation loss {:.2f}".format(
                device_index, epoch_count, avg_cost))
            # mean loss for last 5 epochs
            running_loss = np.mean(epoch_loss_history[-1:])
        #end = time.time()
        #time_count = (end - start)
        #print(time_count)

        if running_loss < target_loss:  # Condition to consider the task solved
            print(
                "Solved for device {} at epoch {} with average loss {:.2f} !".
                format(device_index, epoch_count, running_loss))
            training_end = True
            model_weights = np.asarray(model.get_weights())
            model.save(checkpointpath1,
                       include_optimizer=True,
                       save_format='h5')
            # model_target.save(checkpointpath2, include_optimizer=True, save_format='h5')
            np.savez(outfile,
                     frame_count=frame_count,
                     epoch_loss_history=epoch_loss_history,
                     training_end=training_end,
                     epoch_count=epoch_count,
                     loss=running_loss)
            np.save(outfile_models, model_weights)

            if federated:
                dict_1 = {
                    "epoch_loss_history": epoch_loss_history,
                    "federated": federated,
                    "parameter_server": parameter_server,
                    "devices": devices,
                    "neighbors": args.N,
                    "active_devices": args.Ka_consensus,
                    "batches": number_of_batches,
                    "batch_size": batch_size,
                    "samples": samples,
                    "noniid": args.noniid_assignment,
                    "data_distribution": args.random_data_distribution
                }
            elif parameter_server:
                dict_1 = {
                    "epoch_loss_history": epoch_loss_history,
                    "federated": federated,
                    "parameter_server": parameter_server,
                    "devices": devices,
                    "active_devices": active_devices_per_round,
                    "batches": number_of_batches,
                    "batch_size": batch_size,
                    "samples": samples,
                    "noniid": args.noniid_assignment,
                    "data_distribution": args.random_data_distribution
                }
            else:
                dict_1 = {
                    "epoch_loss_history": epoch_loss_history,
                    "federated": federated,
                    "parameter_server": parameter_server,
                    "devices": devices,
                    "batches": number_of_batches,
                    "batch_size": batch_size,
                    "samples": samples,
                    "noniid": args.noniid_assignment,
                    "data_distribution": args.random_data_distribution
                }

            if federated:
                sio.savemat(
                    "results/matlab/CFA_device_{}_samples_{}_devices_{}_active_{}_neighbors_{}_batches_{}_size{}_noniid{}_run{}_distribution{}.mat"
                    .format(device_index, samples, devices, args.Ka_consensus,
                            args.N, number_of_batches, batch_size,
                            args.noniid_assignment, args.run,
                            args.random_data_distribution), dict_1)
                sio.savemat(
                    "CFA_device_{}_samples_{}_devices_{}_neighbors_{}_batches_{}_size{}.mat"
                    .format(device_index, samples, devices, args.N,
                            number_of_batches, batch_size), dict_1)
            elif parameter_server:
                sio.savemat(
                    "results/matlab/FA_device_{}_samples_{}_devices_{}_active_{}_batches_{}_size{}_noniid{}_run{}_distribution{}.mat"
                    .format(device_index, samples, devices,
                            active_devices_per_round, number_of_batches,
                            batch_size, args.noniid_assignment, args.run,
                            args.random_data_distribution), dict_1)
                sio.savemat(
                    "FA_device_{}_samples_{}_devices_{}_active_{}_batches_{}_size{}.mat"
                    .format(device_index, samples, devices,
                            active_devices_per_round, number_of_batches,
                            batch_size), dict_1)
            else:  # CL
                sio.savemat(
                    "results/matlab/CL_samples_{}_devices_{}_batches_{}_size{}_noniid{}_run{}_distribution{}.mat"
                    .format(samples, devices, number_of_batches, batch_size,
                            args.noniid_assignment, args.run,
                            args.random_data_distribution), dict_1)
            break

        if epoch_count > max_epochs:  # stop simulation
            print("Unsolved for device {} at epoch {}!".format(
                device_index, epoch_count))
            training_end = True
            model_weights = np.asarray(model.get_weights())
            model.save(checkpointpath1,
                       include_optimizer=True,
                       save_format='h5')
            # model_target.save(checkpointpath2, include_optimizer=True, save_format='h5')
            np.savez(outfile,
                     frame_count=frame_count,
                     epoch_loss_history=epoch_loss_history,
                     training_end=training_end,
                     epoch_count=epoch_count,
                     loss=running_loss)
            np.save(outfile_models, model_weights)

            if federated:
                dict_1 = {
                    "epoch_loss_history": epoch_loss_history,
                    "federated": federated,
                    "parameter_server": parameter_server,
                    "devices": devices,
                    "neighbors": args.N,
                    "active_devices": args.Ka_consensus,
                    "batches": number_of_batches,
                    "batch_size": batch_size,
                    "samples": samples,
                    "noniid": args.noniid_assignment,
                    "data_distribution": args.random_data_distribution
                }
            elif parameter_server:
                dict_1 = {
                    "epoch_loss_history": epoch_loss_history,
                    "federated": federated,
                    "parameter_server": parameter_server,
                    "devices": devices,
                    "active_devices": active_devices_per_round,
                    "batches": number_of_batches,
                    "batch_size": batch_size,
                    "samples": samples,
                    "noniid": args.noniid_assignment,
                    "data_distribution": args.random_data_distribution
                }
            else:
                dict_1 = {
                    "epoch_loss_history": epoch_loss_history,
                    "federated": federated,
                    "parameter_server": parameter_server,
                    "devices": devices,
                    "batches": number_of_batches,
                    "batch_size": batch_size,
                    "samples": samples,
                    "noniid": args.noniid_assignment,
                    "data_distribution": args.random_data_distribution
                }

            if federated:
                sio.savemat(
                    "results/matlab/CFA_device_{}_samples_{}_devices_{}_active_{}_neighbors_{}_batches_{}_size{}_noniid{}_run{}_distribution{}.mat"
                    .format(device_index, samples, devices, args.Ka_consensus,
                            args.N, number_of_batches, batch_size,
                            args.noniid_assignment, args.run,
                            args.random_data_distribution), dict_1)
                sio.savemat(
                    "CFA_device_{}_samples_{}_devices_{}_neighbors_{}_batches_{}_size{}.mat"
                    .format(device_index, samples, devices, args.N,
                            number_of_batches, batch_size), dict_1)
            elif parameter_server:
                sio.savemat(
                    "results/matlab/FA_device_{}_samples_{}_devices_{}_active_{}_batches_{}_size{}_noniid{}_run{}_distribution{}.mat"
                    .format(device_index, samples, devices,
                            active_devices_per_round, number_of_batches,
                            batch_size, args.noniid_assignment, args.run,
                            args.random_data_distribution), dict_1)
                sio.savemat(
                    "FA_device_{}_samples_{}_devices_{}_active_{}_batches_{}_size{}.mat"
                    .format(device_index, samples, devices,
                            active_devices_per_round, number_of_batches,
                            batch_size), dict_1)
            else:  # CL
                sio.savemat(
                    "results/matlab/CL_samples_{}_devices_{}_batches_{}_size{}_noniid{}_run{}_distribution{}.mat"
                    .format(samples, devices, number_of_batches, batch_size,
                            args.noniid_assignment, args.run,
                            args.random_data_distribution), dict_1)
            break
    def train_step(self, input_seq, target_seq):
        """
        Defines a backward pass through the network
        :param input_seq:
        :param target_seq:
        :return:
        """

        # initialize loss
        loss = 0
        time_steps = target_seq.shape[1]

        # initialize encoder hidden state
        enc_hidden = self.encoder.encoderA.initialize_hidden_state(self.encoder.encoderA.batch_size)

        with tf.GradientTape() as tape:
            # pass through encoder
            enc_output, enc_hidden = self.encoder(input_seq, enc_hidden, True)

            # input the hidden state
            dec_hidden = enc_hidden
            dec_input = tf.zeros(target_seq[:, 0].shape)

            # start teacher forcing the network
            for t in range(time_steps):
                # pass dec_input and target sequence to decoder
                prediction, dec_hidden, _ = self.decoder(dec_input, dec_hidden, enc_output, True)

                # calculate the loss for every time step
                losses = tf.keras.losses.MSE(target_seq[:, t], prediction)
                loss += tf.reduce_mean(losses)

                # purge the tensors from memory
                del dec_input, prediction

                # set the next target value as input to decoder
                dec_input = target_seq[:, t]

        # calculate average batch loss
        batch_loss = (loss / time_steps)

        # get trainable variables
        variables = self.encoder.trainable_variables

        # get the gradients
        gradients = tape.gradient(loss, variables)

        # purge tape from memory
        del tape

        # apply gradients to variables
        self.optimizer.apply_gradients(zip(gradients, variables))

        loss_dict = {
            'TEDM':
                {
                    'Reconstruction Loss': batch_loss
                }
        }

        return loss_dict