Exemplo n.º 1
0
 def __init__(self, env):
     self.env = env
     self.num_obs = env.observation_space.shape[0]
     self.num_actions = env.action_space.n
     self.network = PGN(self.num_obs, self.num_actions)
     self.gamma = 0.99
     self.lr = 1e-3
     self.train_episodes = 4
     self.optimizer = tf.keras.optimizers.Adam(lr=self.lr)
     self.print_every = 10
Exemplo n.º 2
0
def main(_):
    pp.pprint(flags.FLAGS.__flags)

    if not os.path.exists(FLAGS.checkpoint_dir):
        os.makedirs(FLAGS.checkpoint_dir)
    if not os.path.exists(FLAGS.sample_dir):
        os.makedirs(FLAGS.sample_dir)

    random.seed(31241)
    np.random.seed(41982)
    tf.set_random_seed(1327634)

    color = True # Must change this and the dataset Flags to the correct path to use color
    if FLAGS.is_debug:
       reader = Bouncing_Balls_Data_Reader(FLAGS.dataset, FLAGS.batch_size, color=color, train_size=160*5, validation_size=8*5, test_size=8*5, num_partitions=5)
    else:
       reader = Bouncing_Balls_Data_Reader(FLAGS.dataset, FLAGS.batch_size, color=color)

    data_fn = lambda epoch, batch_index: reader.read_data(batch_index, reader.TRAIN)
    frame_shape = reader.read_data(0, reader.TRAIN).shape[2:]
    print("Frame shape: ", frame_shape)
    num_batches = reader.num_batches(reader.TRAIN)
    print("Num batches: %d" % num_batches)
    input_sequence_range = range(5, 16)
    print("Input sequence range min: %d, max: %d" % (min(input_sequence_range), max(input_sequence_range)))

    save_sample_fn = utils.gen_save_sample_fn(FLAGS.sample_dir, image_prefix="train")

    with tf.Session() as sess:
        pgn  = PGN(sess, FLAGS.dataset_name, FLAGS.epoch, num_batches, FLAGS.batch_size, input_sequence_range,
                 data_fn, frame_shape=frame_shape, save_sample_fn=save_sample_fn, checkpoint_dir=FLAGS.checkpoint_dir,
                 lambda_adv_loss= FLAGS.lambda_adv_loss)

        if FLAGS.is_train:
            pgn.train()
        else:
            print("Loading from: %s" %(FLAGS.checkpoint_dir,))
            if pgn.load(FLAGS.checkpoint_dir) :
               print(" [*] Successfully loaded")
            else:
               print(" [!] Load failed")

        if FLAGS.is_test:
           result = test.test(pgn, reader)
           result_str = pp.pformat(result)
           fid = open(os.path.join(FLAGS.sample_dir, 'test_out.txt'), mode='w')
           fid.write(unicode(result_str))
           fid.close()

        if FLAGS.is_visualize:
           for i in range(3):
               vid_seq = reader.read_data(i, data_set_type=reader.TEST, batch_size=1)[:, 0, :, :, :]
               utils.make_prediction_gif(pgn, os.path.join(FLAGS.sample_dir, 'vis_%d.gif' % i), video_sequence=vid_seq)
           utils.plot_convergence(pgn.get_MSE_history(), "MSE Convergence",
                        path=os.path.join(FLAGS.sample_dir, "vis_MSE_convergence.png"))
def train(params):
    assert params["mode"].lower() == "train", "change training mode to 'train'"

    tf.compat.v1.logging.info("Building the model ...")
    model = PGN(params)

    tf.compat.v1.logging.info("Creating the batcher ...")
    b = batcher(params["data_dir"], params["vocab_path"], params)

    tf.compat.v1.logging.info("Creating the checkpoint manager")
    logdir = "{}/logdir".format(params["model_dir"])
    checkpoint_dir = "{}/checkpoint".format(params["model_dir"])
    ckpt = tf.train.Checkpoint(step=tf.Variable(0), PGN=model)
    ckpt_manager = tf.train.CheckpointManager(ckpt,
                                              checkpoint_dir,
                                              max_to_keep=11)

    ckpt.restore(ckpt_manager.latest_checkpoint)
    if ckpt_manager.latest_checkpoint:
        print("Restored from {}".format(ckpt_manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")

    tf.compat.v1.logging.info("Starting the training ...")
    train_model(model, b, params, ckpt, ckpt_manager)
def test(params):
    assert params["mode"].lower() in [
        "test", "eval"
    ], "change training mode to 'test' or 'eval'"
    print(params["beam_size"], params["batch_size"])
    assert params["beam_size"] == params[
        "batch_size"], "Beam size must be equal to batch_size, change the params"

    print("Creating the vocab ...")
    vocab = Vocab(params["vocab_path"], params["vocab_size"])

    embeddings_matrix = get_embedding(params["vocab_size"],
                                      params["embed_size"], vocab,
                                      params['vector_path'])

    tf.compat.v1.logging.info("Building the model ...")
    model = PGN(params, embeddings_matrix)

    print("Creating the batcher ...")
    b = batcher(params["data_dir"], vocab, params)

    print("Creating the checkpoint manager")
    checkpoint_dir = "{}".format(params["checkpoint_dir"])
    ckpt = tf.train.Checkpoint(step=tf.Variable(0), PGN=model)
    ckpt_manager = tf.train.CheckpointManager(ckpt,
                                              checkpoint_dir,
                                              max_to_keep=11)

    path = params["model_path"] if params[
        "model_path"] else ckpt_manager.latest_checkpoint
    ckpt.restore(path)
    print("Model restored")

    for batch in b:
        yield beam_decode(model, batch, vocab, params)
def train(params):
    assert params["mode"].lower() == "train", "change training mode to 'train'"

    print("Creating the vocab from :", params["vocab_path"])
    vocab = Vocab(params["vocab_path"], params["vocab_size"])

    print("Creating the embedding_matrix from:", params["vector_path"])
    embeddings_matrix = get_embedding(params["vocab_size"],
                                      params["embed_size"], vocab,
                                      params['vector_path'])

    tf.compat.v1.logging.info("Building the model ...")
    model = PGN(params, embeddings_matrix)

    print("Creating the batcher ...")
    b = batcher(params["data_dir"], vocab, params)

    print("Creating the checkpoint manager")
    checkpoint_dir = "{}".format(params["checkpoint_dir"])
    ckpt = tf.train.Checkpoint(step=tf.Variable(0), PGN=model)
    ckpt_manager = tf.train.CheckpointManager(ckpt,
                                              checkpoint_dir,
                                              max_to_keep=11)

    ckpt.restore(ckpt_manager.latest_checkpoint)
    if ckpt_manager.latest_checkpoint:
        print("Restored from {}".format(ckpt_manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")
    tf.compat.v1.logging.info("Starting the training ...")
    train_model(model, b, params, ckpt, ckpt_manager, "output.txt")
Exemplo n.º 6
0
    def __init__(self):
        self.DEVICE = config.DEVICE

        dataset = PairDataset(config.data_path,
                              max_src_len=config.max_src_len,
                              max_tgt_len=config.max_tgt_len,
                              truncate_src=config.truncate_src,
                              truncate_tgt=config.truncate_tgt)

        self.vocab = dataset.build_vocab(embed_file=config.embed_file)

        self.model = PGN(self.vocab)
        self.stop_word = list(
            set([
                self.vocab[x.strip()] for x in open(
                    config.stop_word_file, encoding='utf-8').readlines()
            ]))
        self.model.load_model()
        self.model.to(self.DEVICE)
    def __init__(self):
        self.DEVICE = config.device

        #         self.dataset = SamplesDataset(config.train_data_path)

        #         self.vocab = self.dataset.vocab
        self.vocab = None
        if (os.path.exists(config.vocab)):
            with open(config.vocab, 'rb') as f:
                self.vocab = pickle.load(f)

        self.dataset = SamplesDataset(config.train_data_path, vocab=self.vocab)
        self.vocab = self.dataset.vocab

        self.model = PGN(self.vocab)
        self.stop_word = list(
            set([
                self.vocab[x.strip()]
                for x in open(config.stop_word_file).readlines()
            ]))
        self.model.load_model()
        self.model.to(self.DEVICE)
Exemplo n.º 8
0
def main(args):
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)
    train_set = TrainImageFolder(args.train_dir)
    data_loader = torch.utils.data.DataLoader(train_set,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=args.num_workers)
    model = nn.DataParallel(PGN()).cuda()
    criterion = nn.CrossEntropyLoss(reduce=False).cuda()
    params = list(model.parameters())

    total_step = len(data_loader)
    for epoch in range(134, args.num_epochs):
        lr_ = lr_poly(args.learning_rate, epoch * total_step,
                      args.num_epochs * total_step, 0.9)
        optimizer = torch.optim.SGD(params,
                                    lr=lr_,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        for i, (images, parse) in enumerate(data_loader):
            images = images.cuda()
            parse = parse.long().cuda()
            parsing_out1, parsing_out2, edge_out1_final, edge_out_res5, edge_out_res4, edge_out_res3, edge_out2_final = model(
                images)
            #parsing_out1=model(images)
            loss = criterion(parsing_out1, parse).mean()
            model.zero_grad()

            loss.backward()
            optimizer.step()

            # Print log info
            if i % args.log_step == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(
                    epoch, args.num_epochs, i, total_step, loss.item()))
            if (i + 1) % args.save_step == 0:
                torch.save(
                    model.state_dict(),
                    os.path.join(args.model_path,
                                 'model-{}-{}.ckpt'.format(epoch + 1, i + 1)))
Exemplo n.º 9
0
def predict():
    model = nn.DataParallel(PGN()).cuda()
    #model.load_state_dict(torch.load('models/model-134-2539.ckpt'))
    data_dir = 'LIP/testing_images'
    dirs = os.listdir(data_dir)
    for file in dirs:
        image = Image.open(data_dir + '/' + file).convert('RGB')
        a, b = image.size[0], image.size[1]
        image = torch.Tensor(
            np.array(image).astype(np.float32).transpose(
                (2, 0, 1))).unsqueeze(0).cuda()
        #pre_image=Image.fromarray(model(image).cpu().detach().numpy()[0])
        c = Image.fromarray(
            np.argmax(model(image).cpu().detach().numpy()[0],
                      axis=0).astype(np.uint8))
        c = c.resize((a, b), Image.NEAREST)
        #print(np.array(c))

        # save_image=pre_image.resize((a,b),Image.NEAREST)
        c.save('LIP/test_save/' + file[:-4] + '.png',
               quality=95,
               subsampling=0)
Exemplo n.º 10
0
def train(dataset, val_dataset, v, start_epoch=0):
    """Train the model, evaluate it and store it.

    Args:
        dataset (dataset.PairDataset): The training dataset.
        val_dataset (dataset.PairDataset): The evaluation dataset.
        v (vocab.Vocab): The vocabulary built from the training dataset.
        start_epoch (int, optional): The starting epoch number. Defaults to 0.
    """

    DEVICE = torch.device("cuda" if config.is_cuda else "cpu")

    model = PGN(v)
    model.load_model()
    model.to(DEVICE)
    if config.fine_tune:
        # In fine-tuning mode, we fix the weights of all parameters except attention.wc.
        print('Fine-tuning mode.')
        for name, params in model.named_parameters():
            if name != 'attention.wc.weight':
                params.requires_grad = False
    # forward
    print("loading data")
    train_data = SampleDataset(dataset.pairs, v)
    val_data = SampleDataset(val_dataset.pairs, v)

    print("initializing optimizer")

    # Define the optimizer.
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    train_dataloader = DataLoader(dataset=train_data,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  collate_fn=collate_fn)

    val_losses = np.inf
    if (os.path.exists(config.losses_path)):
        with open(config.losses_path, 'rb') as f:
            val_losses = pickle.load(f)


#     torch.cuda.empty_cache()
# SummaryWriter: Log writer used for TensorboardX visualization.
    writer = SummaryWriter(config.log_path)
    # tqdm: A tool for drawing progress bars during training.
    # scheduled_sampler : A tool for choosing teacher_forcing or not
    num_epochs = len(range(start_epoch, config.epochs))
    scheduled_sampler = ScheduledSampler(num_epochs)
    if config.scheduled_sampling:
        print('scheduled_sampling mode.')
    #  teacher_forcing = True

    with tqdm(total=config.epochs) as epoch_progress:
        for epoch in range(start_epoch, config.epochs):
            print(config_info(config))
            batch_losses = []  # Get loss of each batch.
            num_batches = len(train_dataloader)
            # set a teacher_forcing signal
            if config.scheduled_sampling:
                teacher_forcing = scheduled_sampler.teacher_forcing(
                    epoch - start_epoch)
            else:
                teacher_forcing = True
            print('teacher_forcing = {}'.format(teacher_forcing))
            with tqdm(total=num_batches) as batch_progress:
                for batch, data in enumerate(tqdm(train_dataloader)):
                    x, y, x_len, y_len, oov, len_oovs = data
                    assert not np.any(np.isnan(x.numpy()))
                    if config.is_cuda:  # Training with GPUs.
                        x = x.to(DEVICE)
                        y = y.to(DEVICE)
                        x_len = x_len.to(DEVICE)
                        len_oovs = len_oovs.to(DEVICE)

                    model.train()  # Sets the module in training mode.
                    optimizer.zero_grad()  # Clear gradients.
                    # Calculate loss.  Call model forward propagation
                    loss = model(x,
                                 x_len,
                                 y,
                                 len_oovs,
                                 batch=batch,
                                 num_batches=num_batches,
                                 teacher_forcing=teacher_forcing)
                    batch_losses.append(loss.item())
                    loss.backward()  # Backpropagation.

                    # Do gradient clipping to prevent gradient explosion.
                    clip_grad_norm_(model.encoder.parameters(),
                                    config.max_grad_norm)
                    clip_grad_norm_(model.decoder.parameters(),
                                    config.max_grad_norm)
                    clip_grad_norm_(model.attention.parameters(),
                                    config.max_grad_norm)
                    optimizer.step()  # Update weights.

                    # Output and record epoch loss every 100 batches.
                    if (batch % 32) == 0:
                        batch_progress.set_description(f'Epoch {epoch}')
                        batch_progress.set_postfix(Batch=batch,
                                                   Loss=loss.item())
                        batch_progress.update()
                        # Write loss for tensorboard.
                        writer.add_scalar(f'Average loss for epoch {epoch}',
                                          np.mean(batch_losses),
                                          global_step=batch)
            # Calculate average loss over all batches in an epoch.
            epoch_loss = np.mean(batch_losses)

            epoch_progress.set_description(f'Epoch {epoch}')
            epoch_progress.set_postfix(Loss=epoch_loss)
            epoch_progress.update()

            avg_val_loss = evaluate(model, val_data, epoch)

            print('training loss:{}'.format(epoch_loss),
                  'validation loss:{}'.format(avg_val_loss))

            # Update minimum evaluating loss.
            if (avg_val_loss < val_losses):
                torch.save(model.encoder, config.encoder_save_name)
                torch.save(model.decoder, config.decoder_save_name)
                torch.save(model.attention, config.attention_save_name)
                torch.save(model.reduce_state, config.reduce_state_save_name)
                val_losses = avg_val_loss
            with open(config.losses_path, 'wb') as f:
                pickle.dump(val_losses, f)

    writer.close()
Exemplo n.º 11
0
class Predict():
    @timer(module='initalize predicter')
    def __init__(self):
        self.DEVICE = config.DEVICE

        dataset = PairDataset(config.data_path,
                              max_src_len=config.max_src_len,
                              max_tgt_len=config.max_tgt_len,
                              truncate_src=config.truncate_src,
                              truncate_tgt=config.truncate_tgt)

        self.vocab = dataset.build_vocab(embed_file=config.embed_file)

        self.model = PGN(self.vocab)
        self.stop_word = list(
            set([
                self.vocab[x.strip()] for x in open(
                    config.stop_word_file, encoding='utf-8').readlines()
            ]))
        self.model.load_model()
        self.model.to(self.DEVICE)

    def greedy_search(self, x, max_sum_len, len_oovs, x_padding_masks):
        """Function which returns a summary by always picking
           the highest probability option conditioned on the previous word.

        Args:
            x (Tensor): Input sequence as the source.
            max_sum_len (int): The maximum length a summary can have.
            len_oovs (Tensor): Numbers of out-of-vocabulary tokens.
            x_padding_masks (Tensor):
                The padding masks for the input sequences
                with shape (batch_size, seq_len).

        Returns:
            summary (list): The token list of the result summary.
        """

        # Get encoder output and states.Call encoder forward propagation
        ###########################################
        #          TODO: module 4 task 2          #
        ###########################################
        # use decoder to generate vocab distribution for the next token
        encoder_output, encoder_states = self.model.encoder(
            replace_oovs(x, self.vocab), self.model.decoder.embedding)

        # Initialize decoder's hidden states with encoder's hidden states.
        decoder_states = self.model.reduce_state(encoder_states)

        # Initialize decoder's input at time step 0 with the SOS token.
        x_t = torch.ones(1) * self.vocab.SOS
        x_t = x_t.to(self.DEVICE, dtype=torch.int64)
        summary = [self.vocab.SOS]
        coverage_vector = torch.zeros((1, x.shape[1])).to(self.DEVICE)
        # Generate hypothesis with maximum decode step.
        while int(x_t.item()) != (self.vocab.EOS) \
                and len(summary) < max_sum_len:

            context_vector, attention_weights, coverage_vector = \
                self.model.attention(decoder_states,
                                     encoder_output,
                                     x_padding_masks,
                                     coverage_vector)
            p_vocab, decoder_states, p_gen = \
                self.model.decoder(x_t.unsqueeze(1),
                                   decoder_states,
                                   context_vector)
            final_dist = self.model.get_final_distribution(
                x, p_gen, p_vocab, attention_weights, torch.max(len_oovs))
            # Get next token with maximum probability.
            x_t = torch.argmax(final_dist, dim=1).to(self.DEVICE)
            decoder_word_idx = x_t.item()
            summary.append(decoder_word_idx)
            x_t = replace_oovs(x_t, self.vocab)

        return summary


#     @timer('best k')

    def best_k(self, beam, k, encoder_output, x_padding_masks, x, len_oovs):
        """Get best k tokens to extend the current sequence at the current time step.

        Args:
            beam (untils.Beam): The candidate beam to be extended.
            k (int): Beam size.
            encoder_output (Tensor): The lstm output from the encoder.
            x_padding_masks (Tensor):
                The padding masks for the input sequences.
            x (Tensor): Source token ids.
            len_oovs (Tensor): Number of oov tokens in a batch.

        Returns:
            best_k (list(Beam)): The list of best k candidates.

        """
        # use decoder to generate vocab distribution for the next token
        x_t = torch.tensor(beam.tokens[-1]).reshape(1, 1)
        x_t = x_t.to(self.DEVICE)

        # Get context vector from attention network.
        context_vector, attention_weights, coverage_vector = \
            self.model.attention(beam.decoder_states,
                                 encoder_output,
                                 x_padding_masks,
                                 beam.coverage_vector)

        # Replace the indexes of OOV words with the index of OOV token
        # to prevent index-out-of-bound error in the decoder.

        p_vocab, decoder_states, p_gen = \
            self.model.decoder(replace_oovs(x_t, self.vocab),
                               beam.decoder_states,
                               context_vector)

        final_dist = self.model.get_final_distribution(x, p_gen, p_vocab,
                                                       attention_weights,
                                                       torch.max(len_oovs))
        # Calculate log probabilities.
        log_probs = torch.log(final_dist.squeeze())
        # Filter forbidden tokens.
        if len(beam.tokens) == 1:
            forbidden_ids = [
                self.vocab[u"台独"], self.vocab[u"吸毒"], self.vocab[u"黄赌毒"]
            ]
            log_probs[forbidden_ids] = -float('inf')
        # EOS token penalty. Follow the definition in
        # https://opennmt.net/OpenNMT/translation/beam_search/.
        log_probs[self.vocab.EOS] *= \
            config.gamma * x.size()[1] / len(beam.tokens)

        log_probs[self.vocab.UNK] = -float('inf')
        # Get top k tokens and the corresponding logprob.
        topk_probs, topk_idx = torch.topk(log_probs, k)

        # Extend the current hypo with top k tokens, resulting k new hypos.
        best_k = [
            beam.extend(x, log_probs[x], decoder_states, coverage_vector)
            for x in topk_idx.tolist()
        ]

        return best_k

    def beam_search(self, x, max_sum_len, beam_width, len_oovs,
                    x_padding_masks):
        """Using beam search to generate summary.

        Args:
            x (Tensor): Input sequence as the source.
            max_sum_len (int): The maximum length a summary can have.
            beam_width (int): Beam size.
            max_oovs (int): Number of out-of-vocabulary tokens.
            x_padding_masks (Tensor):
                The padding masks for the input sequences.

        Returns:
            result (list(Beam)): The list of best k candidates.
        """
        # run body_sequence input through encoder. Call encoder forward propagation
        ###########################################
        #          TODO: module 4 task 2          #
        ###########################################
        encoder_output, encoder_states = self.model.encoder(
            replace_oovs(x, self.vocab), self.model.decoder.embedding)
        coverage_vector = torch.zeros((1, x.shape[1])).to(self.DEVICE)
        # initialize decoder states with encoder forward states
        decoder_states = self.model.reduce_state(encoder_states)

        # initialize the hypothesis with a class Beam instance.

        init_beam = Beam([self.vocab.SOS], [0], decoder_states,
                         coverage_vector)

        # get the beam size and create a list for stroing current candidates
        # and a list for completed hypothesis
        k = beam_width
        curr, completed = [init_beam], []

        # use beam search for max_sum_len (maximum length) steps
        for _ in range(max_sum_len):
            # get k best hypothesis when adding a new token

            topk = []
            for beam in curr:
                # When an EOS token is generated, add the hypo to the completed
                # list and decrease beam size.
                if beam.tokens[-1] == self.vocab.EOS:
                    completed.append(beam)
                    k -= 1
                    continue
                for can in self.best_k(beam, k,
                                       encoder_output, x_padding_masks, x,
                                       torch.max(len_oovs)):
                    # Using topk as a heap to keep track of top k candidates.
                    # Using the sequence scores of the hypos to campare
                    # and object ids to break ties.
                    add2heap(topk, (can.seq_score(), id(can), can), k)

            curr = [items[2] for items in topk]
            # stop when there are enough completed hypothesis
            if len(completed) == beam_width:
                break
        # When there are not engouh completed hypotheses,
        # take whatever when have in current best k as the final candidates.
        completed += curr
        # sort the hypothesis by normalized probability and choose the best one
        result = sorted(completed, key=lambda x: x.seq_score(),
                        reverse=True)[0].tokens
        return result

    @timer(module='doing prediction')
    def predict(self, text, tokenize=True, beam_search=True):
        """Generate summary.

        Args:
            text (str or list): Source.
            tokenize (bool, optional):
                Whether to do tokenize or not. Defaults to True.
            beam_search (bool, optional):
                Whether to use beam search or not.
                Defaults to True (means using greedy search).

        Returns:
            str: The final summary.
        """
        if isinstance(text, str) and tokenize:
            text = list(jieba.cut(text))
        x, oov = source2ids(text, self.vocab)
        x = torch.tensor(x).to(self.DEVICE)
        len_oovs = torch.tensor([len(oov)]).to(self.DEVICE)
        x_padding_masks = torch.ne(x, 0).byte().float()
        if beam_search:
            summary = self.beam_search(x.unsqueeze(0),
                                       max_sum_len=config.max_dec_steps,
                                       beam_width=config.beam_size,
                                       len_oovs=len_oovs,
                                       x_padding_masks=x_padding_masks)
        else:
            summary = self.greedy_search(x.unsqueeze(0),
                                         max_sum_len=config.max_dec_steps,
                                         len_oovs=len_oovs,
                                         x_padding_masks=x_padding_masks)
        summary = outputids2words(summary, oov, self.vocab)
        return summary.replace('<SOS>', '').replace('<EOS>', '').strip()
def train(dataset, val_dataset, v, start_epoch=0):
    """Train the model, evaluate it and store it.

    Args:
        dataset (dataset.PairDataset): The training dataset.
        val_dataset (dataset.PairDataset): The evaluation dataset.
        v (vocab.Vocab): The vocabulary built from the training dataset.
        start_epoch (int, optional): The starting epoch number. Defaults to 0.
    """
    torch.autograd.set_detect_anomaly(True)
    DEVICE = torch.device("cuda" if config.is_cuda else "cpu")

    model = PGN(v)
    model.load_model()
    model.to(DEVICE)
    if config.fine_tune:
        # In fine-tuning mode, we fix the weights of all parameters except attention.wc.
        logging.info('Fine-tuning mode.')
        for name, params in model.named_parameters():
            if name != 'attention.wc.weight':
                params.requires_grad = False
    # forward
    logging.info("loading data")
    train_data = dataset
    val_data = val_dataset

    logging.info("initializing optimizer")

    # Define the optimizer.
    #     optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    optimizer = optim.Adagrad(
        model.parameters(),
        lr=config.learning_rate,
        initial_accumulator_value=config.initial_accumulator_value)
    scheduler = StepLR(optimizer, step_size=10, gamma=0.2)  # 学习率调整
    train_dataloader = DataLoader(dataset=train_data,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  collate_fn=collate_fn)

    val_loss = np.inf
    if (os.path.exists(config.losses_path)):
        with open(config.losses_path, 'r') as f:
            val_loss = float(f.readlines()[-1].split("=")[-1])
            logging.info("the last best val loss is: " + str(val_loss))


#     torch.cuda.empty_cache()
# SummaryWriter: Log writer used for TensorboardX visualization.
    writer = SummaryWriter(config.log_path)
    # tqdm: A tool for drawing progress bars during training.
    early_stopping_count = 0

    logging.info("start training model {}, ".format(config.model_name) + \
        "epoch : {}, ".format(config.epochs) +
        "batch_size : {}, ".format(config.batch_size) +
        "num batches: {}, ".format(len(train_dataloader)))

    for epoch in range(start_epoch, config.epochs):
        batch_losses = []  # Get loss of each batch.
        num_batches = len(train_dataloader)
        #             with tqdm(total=num_batches//100) as batch_progress:
        for batch, data in enumerate(train_dataloader):
            x, y, x_len, y_len, oov, len_oovs, img_vec = data
            assert not np.any(np.isnan(x.numpy()))
            if config.is_cuda:  # Training with GPUs.
                x = x.to(DEVICE)
                y = y.to(DEVICE)
                x_len = x_len.to(DEVICE)
                len_oovs = len_oovs.to(DEVICE)
                img_vec = img_vec.to(DEVICE)
            if batch == 0:
                logging.info("x: %s, shape: %s" % (x, x.shape))
                logging.info("y: %s, shape: %s" % (y, y.shape))
                logging.info("oov: %s" % oov)
                logging.info("img_vec: %s, shape: %s" %
                             (img_vec, img_vec.shape))

            model.train()  # Sets the module in training mode.
            optimizer.zero_grad()  # Clear gradients.

            loss = model(x,
                         y,
                         len_oovs,
                         img_vec,
                         batch=batch,
                         num_batches=num_batches)
            batch_losses.append(loss.item())
            loss.backward()  # Backpropagation.

            # Do gradient clipping to prevent gradient explosion.
            clip_grad_norm_(model.encoder.parameters(), config.max_grad_norm)
            clip_grad_norm_(model.decoder.parameters(), config.max_grad_norm)
            clip_grad_norm_(model.attention.parameters(), config.max_grad_norm)
            clip_grad_norm_(model.reduce_state.parameters(),
                            config.max_grad_norm)
            optimizer.step()  # Update weights.
            #             scheduler.step()

            #                     # Output and record epoch loss every 100 batches.
            if (batch % 100) == 0:
                #                         batch_progress.set_description(f'Epoch {epoch}')
                #                         batch_progress.set_postfix(Batch=batch,
                #                                                    Loss=loss.item())
                #                         batch_progress.update()
                #                         # Write loss for tensorboard.
                writer.add_scalar(f'Average_loss_for_epoch_{epoch}',
                                  np.mean(batch_losses),
                                  global_step=batch)
                logging.info('epoch: {}, batch:{}, training loss:{}'.format(
                    epoch, batch, np.mean(batch_losses)))

        # Calculate average loss over all batches in an epoch.
        epoch_loss = np.mean(batch_losses)

        #             epoch_progress.set_description(f'Epoch {epoch}')
        #             epoch_progress.set_postfix(Loss=epoch_loss)
        #             epoch_progress.update()

        avg_val_loss = evaluate(model, val_data, epoch)

        logging.info('epoch: {} '.format(epoch) +
                     'training loss:{} '.format(epoch_loss) +
                     'validation loss:{} '.format(avg_val_loss))

        # Update minimum evaluating loss.
        if not os.path.exists(os.path.dirname(config.encoder_save_name)):
            os.mkdir(os.path.dirname(config.encoder_save_name))
        if (avg_val_loss < val_loss):
            logging.info("saving model to ../saved_model/ %s" %
                         config.model_name)
            torch.save(model.encoder, config.encoder_save_name)
            torch.save(model.decoder, config.decoder_save_name)
            torch.save(model.attention, config.attention_save_name)
            torch.save(model.reduce_state, config.reduce_state_save_name)
            val_loss = avg_val_loss
            with open(config.losses_path, 'a') as f:
                f.write(f"best val loss={val_loss}\n")
        else:
            early_stopping_count += 1
        if early_stopping_count >= config.patience:
            logging.info(
                f'Validation loss did not decrease for {config.patience} epochs, stop training.'
            )
            break

    writer.close()
Exemplo n.º 13
0
        (val_extended_input_tokens, val_extended_gt_tokens, val_loss_mask,
         val_index)).batch(int(global_batch_size))
    val_dist_dataset = train_strategy.experimental_distribute_dataset(
        val_tf_dataset)

max_oovs_in_text = max(0,
                       np.max(extended_input_tokens) - vocab.size() + 1,
                       np.max(val_extended_input_tokens) - vocab.size() + 1)
print('Max oovs in text :', max_oovs_in_text)

#################################################################################################
# Создаем модель и слои ошибок, определяем функцию для распределенного обучения
#################################################################################################

with train_strategy.scope():
    model = PGN(vocab=vocab, max_oovs_in_text=max_oovs_in_text)
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001,
                                         beta_1=0.9,
                                         beta_2=0.98,
                                         epsilon=1e-9)
    ce_loss = CELoss(alpha=1.)


# def train_step(inputs):
def pretrain_step(extended_input_tokens, extended_gt_tokens, loss_mask, idx):
    model.switch_decoding_mode('cross_entropy')

    with tf.GradientTape() as tape:
        gt_probs, greedy_seqs, coverage_losses = model(extended_input_tokens,
                                                       extended_gt_tokens,
                                                       training=True)
Exemplo n.º 14
0
with tf.device('CPU'):
    val_tf_dataset = tf.data.Dataset.from_tensor_slices(
        (val_extended_input_tokens, val_extended_gt_tokens, val_loss_mask,
         val_tensor_oovs, val_index)).batch(int(global_batch_size))
    val_dist_dataset = train_strategy.experimental_distribute_dataset(
        val_tf_dataset)

max_oovs_in_text = max(0, np.max(val_extended_input_tokens) - vocab.size() + 1)
print('Max oovs in text :', max_oovs_in_text)

#################################################################################################
# Создаем модель, определяем функцию для распределенной генерации резюме
#################################################################################################

with train_strategy.scope():
    model = PGN(vocab=vocab, max_oovs_in_text=max_oovs_in_text)
    model.load_weights(load_model_path)


def eval_step(extended_input_tokens, extended_gt_tokens, loss_mask, oovs, idx):
    model.switch_decoding_mode('evaluate')
    _, _, greedy_seqs, _, _ = model(extended_input_tokens,
                                    extended_gt_tokens,
                                    training=False)
    return greedy_seqs


@tf.function
def distributed_step(dist_inputs):
    greedy_seqs = train_strategy.run(eval_step, args=(dist_inputs))
    return greedy_seqs
Exemplo n.º 15
0
class Reinforce(object):
    def __init__(self, env):
        self.env = env
        self.num_obs = env.observation_space.shape[0]
        self.num_actions = env.action_space.n
        self.network = PGN(self.num_obs, self.num_actions)
        self.gamma = 0.99
        self.lr = 1e-3
        self.train_episodes = 4
        self.optimizer = tf.keras.optimizers.Adam(lr=self.lr)
        self.print_every = 10

    def discounted_rewards(self, rewards):
        discounted_rewards = []
        sum_reward = 0
        for reward in reversed(rewards):
            sum_reward = reward + self.gamma * sum_reward
            discounted_rewards.append(sum_reward)
        return discounted_rewards.reverse()

    def loss_fn(self, prob, action, reward):
        dist = tfp.distributions.Categorical(probs=prob, dtype=tf.float32)
        log_prob = dist.log_prob(action)
        return -log_prob * reward  # -Q(s, a) * logPI(a/s)

    def update(self, states, actions, rewards):
        # rewards = self.discounted_rewards(rewards)

        sum_reward = 0
        discounted_rewards = []
        for reward in reversed(rewards):
            sum_reward = reward + self.gamma * sum_reward
            discounted_rewards.append(sum_reward)
        discounted_rewards.reverse()

        for state, action, reward in zip(states, actions, rewards):
            with tf.GradientTape() as tape:
                prob = self.network([state], training=True)
                loss = self.loss_fn(prob, action, reward)
                grads = tape.gradient(loss, self.network.trainable_variables)
                self.optimizer.apply_gradients(
                    zip(grads, self.network.trainable_variables))

    def train(self, max_episodes, max_steps):
        scores = []
        for ep in range(max_episodes):
            saved_log_probs = []
            states = []
            actions = []
            rewards = []
            state = self.env.reset()
            for t in range(max_steps):
                action = self.network.take_action(state)
                # print(action)
                # saved_log_probs.append(log_prob)
                next_state, reward, done, _ = self.env.step(action)
                states.append(state)
                actions.append(action)
                rewards.append(reward)
                state = next_state
                if done:
                    break
            scores.append(sum(rewards))
            self.update(states, actions, rewards)

            # policy_loss = []

            if ep % self.print_every == 0:
                print("Episode {}\tAverage Score: {:.2f}".format(
                    ep, np.mean(scores)))
            if np.mean(scores) >= 195.0:
                print(
                    "Environment solved in {} episodes!\tAverage Score: {:.2f}"
                    .format(ep, np.mean(scores)))
                break
        return scores

    def play(self, episodes=100, steps=200):
        for ep in range(episodes):
            self.env.render()
            state = self.env.reset()
            for t in range(steps):
                action = self.network.take_action(state)
                next_state, reward, done, _ = self.env.step(action)
                state = next_state
                if done: break