Пример #1
0
def minibatches(data, batch_size):
    print '************************',data[0]
    x = np.array([d[0] for d in data])
    y = np.array([d[2] for d in data])
    one_hot = np.zeros((y.size, 3))
    one_hot[np.arange(y.size), y] = 1
    return get_minibatches([x, one_hot], batch_size)
Пример #2
0
    def run_epoch(self, sess, config, dataset, train_writer, merged):
        prog = Progbar(target=1 + len(dataset.train_inputs[0]) / config.batch_size)
        for i, (train_x, train_y) in enumerate(get_minibatches([dataset.train_inputs, dataset.train_targets],
                                                               config.batch_size, is_multi_feature_input=True)):

            summary, loss = self.train_on_batch(sess, train_x, train_y, merged)
            prog.update(i + 1, [("train loss", loss)])
            # train_writer.add_summary(summary, global_step=i)
        return summary, loss  # Last batch
Пример #3
0
def minibatches(data, batch_size):
    '''

    :param data: [([n_feature长的特征], [0 1,1表示可以采取的操作], 真实的操作)..]
    :param batch_size: ..
    :return: one-hot编码真实操作作为label
    '''
    x = np.array([d[0] for d in data])
    y = np.array([d[2] for d in data])
    one_hot = np.zeros((y.size, len(data[0][1])))
    one_hot[np.arange(y.size), y] = 1
    return get_minibatches([x, one_hot], batch_size)
Пример #4
0
                              weight_decay=l2)
    RL_optimizer = optim.Adam(RL_model.parameters(),
                              lr=args.lr_RL,
                              weight_decay=l2)
    sentence_reward_noisy = [0 for i in range(args.batchsize)]
    noisy_sentences_vec = Variable(torch.FloatTensor(1, dim).fill_(0))
    for e in range(args.epochRL):
        print("training epoch ", e)

        # random.shuffle(train_data)
        # batchcnt = (len(train_data) - 1) // args.batchsize + 1
        # for b in range(batchcnt):
        # 	# start = time.time()
        # 	datas = train_data[b * args.batchsize: (b + 1) * args.batchsize]

        mini_batches = get_minibatches(dev_datasets, args.batchsize)
        batchcnt = len(
            dev_datasets[0]) // args.batchsize  # len(list(mini_batches))
        for b, data in enumerate(mini_batches):
            if b >= batchcnt:
                break
            sentences, pos_lambda, tags, sentences_words, relation_tags, relation_names = data
            input_tensor, input_length = padding_sequence(
                sentences, pad_token=args.embedding_size)
            pos_tensor, input_length = padding_sequence(pos_lambda,
                                                        pad_token=0)
            target_tensor, target_length = padding_sequence(
                tags, pad_token=args.entity_tag_size)
            relation_target_tensor = padding_sequence_recurr(relation_tags)
            if torch.cuda.is_available():
                input_tensor = Variable(
Пример #5
0
def train(datasets, mode):  # optimizer, criterion, args,
    # JointModel.train()
    if args.use_RL:
        mini_batches = get_bags(datasets, relations, args.batchsize)
        noisy_sentences_vec = Variable(
            torch.FloatTensor(1, args.hidden_dim).fill_(0))
        noisy_vec_mean = torch.mean(noisy_sentences_vec, 0, True)
    else:
        mini_batches = get_minibatches(datasets, args.batchsize)
    batchcnt = len(datasets[0]) // args.batchsize  # len(list(mini_batches))
    logger.info("********************%s data*********************" % mode)
    logger.info("number of batches: %s" % batchcnt)
    NER_correct, NER_total = 0., 0.
    RE_correct, RE_total = 0., 0.
    if mode != 'train':
        # NER_target_all, NER_output_all = None, None
        # RE_target_all, RE_output_all = None, None
        NER_target_all2, NER_output_all2 = [], []
        RE_target_all2, RE_output_all2 = [], []
        NER_output_logits, RE_output_logits = [], []

    for b, data in enumerate(mini_batches):
        if b >= batchcnt:
            break
        sentences, pos_lambda, tags, sentences_words, relation_tags, relation_names = data
        input_tensor, input_length = padding_sequence(sentences, pad_token=0)
        pos_tensor, input_length = padding_sequence(pos_lambda, pad_token=0)
        target_tensor, target_length = padding_sequence(
            tags, pad_token=args.entity_tag_size)  # entity tags
        relation_target_tensor = relation_tags  # padding_sequence_recurr(relation_tags)  		# relation tag
        if torch.cuda.is_available():
            input_tensor = Variable(
                torch.cuda.LongTensor(input_tensor, device=device)).cuda()
            target_tensor = Variable(
                torch.cuda.LongTensor(target_tensor, device=device)).cuda()
            if args.encoder_model == "BiLSTM":
                mask = torch.cuda.ByteTensor(
                    (1 - (target_tensor == args.entity_tag_size))).to(device)
            else:
                mask = torch.cuda.ByteTensor(
                    (1 - (input_tensor == 0))).to(device)
            pos_tensor = Variable(
                torch.cuda.FloatTensor(pos_tensor, device=device)).cuda()
            relation_target_tensor = Variable(
                torch.cuda.LongTensor(relation_target_tensor,
                                      device=device)).cuda()
        else:
            input_tensor = Variable(
                torch.LongTensor(input_tensor, device=device))
            target_tensor = Variable(
                torch.LongTensor(target_tensor, device=device))
            if args.encoder_model == "BiLSTM":
                mask = torch.ByteTensor(
                    (1 - (target_tensor == args.entity_tag_size))).to(device)
            else:
                mask = torch.ByteTensor((1 - (input_tensor == 0))).to(device)
            pos_tensor = Variable(torch.Tensor(pos_tensor, device=device))
            relation_target_tensor = Variable(
                torch.LongTensor(relation_target_tensor, device=device))

        if mode == 'train':
            optimizer.zero_grad()
            NER_active_logits, NER_active_labels, RE_output_tag, NER_output_tag, NER_output, BERT_pooled_output = JointModel(
                input_tensor, pos_tensor, target_tensor, args.batchsize,
                mask)  # , input_length, target_length
            if args.use_RL:
                mask_entity = [
                    list(map(lambda x: 1 if x in [1, 2, 4, 5] else 0, i))
                    for i in target_tensor
                ]
                if torch.cuda.is_available():
                    mask_entity = torch.cuda.ByteTensor(mask_entity).to(device)
                else:
                    mask_entity = torch.ByteTensor(mask_entity).to(device)
                NER_embedding = None
                for i in range(len(mask_entity)):
                    NER_embedding = torch.mean(NER_output[i][mask_entity[i]], 0).view(1, -1) if NER_embedding is None \
                     else torch.cat((NER_embedding, torch.mean(NER_output[i][mask_entity[i]], 0).view(1, -1)), 0)

                RE_rewards, loss_RL, noisy_sentences_vec, noisy_vec_mean = RL_model(
                    BERT_pooled_output, NER_embedding, JointModel.noysy_model,
                    RE_output_tag, relation_target_tensor, noisy_sentences_vec,
                    noisy_vec_mean)

            if not args.use_RL:
                loss_entity = criterion(NER_active_logits, NER_active_labels)
                loss_RE = criterion(RE_output_tag, relation_target_tensor)
                loss = loss_entity + loss_RE
                if args.merge_loss:
                    loss.backward()
                else:
                    loss_entity.backward(
                        retain_graph=True)  # retain_graph=True
                    loss_RE.backward(retain_graph=True)
            if args.use_RL:
                loss = loss_RL
                loss_RL.backward()
            '''
			use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
			if use_teacher_forcing:
				# Teacher forcing: Feed the target as the next input
				for di in range(target_length):
					decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
					loss += criterion(decoder_output, target_tensor[di])
					decoder_input = target_tensor[di]  # Teacher forcing
			else:
				# Without teacher forcing: use its own predictions as the next input
				for di in range(target_length):
					decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
					topv, topi = decoder_output.topk(1)
					decoder_input = topi.squeeze().detach()  # detach from history as input
	
					loss += criterion(decoder_output, target_tensor[di])
					if decoder_input.item() == EOS_token:
						break
			'''

            optimizer.step()
        else:
            NER_active_logits, NER_active_labels, RE_output_tag, NER_output_tag, _, _ = JointModel(
                input_tensor, pos_tensor, target_tensor, args.batchsize, mask,
                True)  # , input_length, target_length
        NER_correct += (torch.argmax(NER_active_logits,
                                     -1) == NER_active_labels).sum().item()
        NER_total += len(NER_active_logits)
        # temp = 0.
        # for i in range(len(relation_target_tensor[0])):
        # 	target = torch.transpose(relation_target_tensor, 0, 1)[i]
        # 	temp += (torch.argmax(RE_output_tag, -1) == target).sum().item()
        RE_correct += (torch.argmax(
            RE_output_tag, -1) == relation_target_tensor).sum().item()
        RE_total += len(RE_output_tag)
        if mode != 'train':
            NER_target_all2.append(target_tensor.cpu().tolist(
            ))  # target_tensor, NER_active_labels .numpy()
            NER_output_all2.append(
                torch.argmax(
                    NER_output_tag,
                    -1).cpu().tolist())  # NER_output_tag, NER_active_logits
            NER_output_logits.append(NER_output_tag.detach().cpu().tolist())
            RE_output_all2.append(
                torch.argmax(RE_output_tag, -1).cpu().tolist())
            RE_target_all2.append(
                relation_target_tensor.detach().cpu().tolist())
            RE_output_logits.append(RE_output_tag.cpu().tolist())
            if b % args.print_batch == 0:
                logger.info(
                    'seq-seq model: (%d %.2f%%), NER acc: %.4f, RE acc: %.4f' %
                    (b, float(b) / batchcnt * 100, NER_correct / NER_total,
                     RE_correct / RE_total))
            '''if not args.do_train:
				if NER_target_all is None:
					NER_target_all = NER_active_labels.to('cpu')
					NER_output_all = NER_active_logits.to('cpu')
				else:
					NER_target_all = torch.cat((NER_target_all.to('cpu'), NER_active_labels.to('cpu')), dim=0)
					NER_output_all = torch.cat((NER_output_all.to('cpu'), NER_active_logits.to('cpu')), dim=0)
				if RE_target_all is None:
					RE_target_all = relation_target_tensor.to('cpu')
					RE_output_all = RE_output_tag.to('cpu')
				else:
					RE_target_all = torch.cat((RE_target_all.to('cpu'), relation_target_tensor.to('cpu')), dim=0)
					RE_output_all = torch.cat((RE_output_all.to('cpu'), RE_output_tag.to('cpu')), dim=0)'''
        if mode == 'train':
            out_losses.append(loss.item())
            if b % args.print_batch == 0:
                logger.info(
                    'seq-seq model: (%d %.2f%%), loss_NER: %.4f, loss_RE: %.4f, NER acc: %.4f, RE acc: %.4f'
                    % (b, float(b) / batchcnt * 100, loss_entity.item(),
                       loss_RE.item(), NER_correct / NER_total,
                       RE_correct / RE_total))

    if mode != 'train':
        cal_F_score(RE_output_all2, RE_target_all2, NER_target_all2,
                    NER_output_all2, args.batchsize)
        if args.do_train:
            if mode == 'test' or (mode == 'dev' and e == args.epochRL - 1):
                with open(
                        args.output_dir + 'predict_%s_epoch_%s.json' %
                    (mode, e), "a+") as fw:
                    json.dump(
                        {
                            "RE_predict": RE_output_all2,
                            "RE_actual": RE_target_all2,
                            "RE_output_logits": RE_output_logits,
                            "NER_predict": NER_output_all2,
                            "NER_actual": NER_target_all2,
                            "NER_output_logits": NER_output_logits
                        }, fw)
        else:
            with open(args.output_dir + 'predict_%s.json' % mode, "a+") as fw:
                json.dump(
                    {
                        "RE_predict": RE_output_all2,
                        "RE_actual": RE_target_all2,
                        "RE_output_logits": RE_output_logits,
                        "NER_predict": NER_output_all2,
                        "NER_actual": NER_target_all2,
                        "NER_output_logits": NER_output_logits
                    }, fw)
            # np.save('pred_res/RE_predict', RE_output_all2)  # RE_output_all.to('cpu').detach().numpy()
            # np.save('pred_res/RE_actual', RE_target_all2)
            # np.save('pred_res/NER_predict', NER_output_all2)
            # np.save('pred_res/NER_actual', NER_target_all2)
            '''NER_pred_res = metrics.classification_report(NER_target_all2, NER_output_all2)
			logger.info('NER Prediction results: \n{}'.format(NER_pred_res))
			RE_pred_res = metrics.classification_report(RE_target_all2, RE_output_all2)
			logger.info('RE Prediction results: \n{}'.format(RE_pred_res))'''
    else:
        np.save(args.output_dir + "loss_train", out_losses)
Пример #6
0
def train(save_dir='saved_weights',
          parser_name='parser',
          num_epochs=5,
          max_iters=-1,
          print_every_iters=10):
    """
    Trains the model.

    parser_name is the string prefix used for the filename where the parser is
    saved after every epoch
    """

    # load dataset
    load_existing_dump = False
    print('Loading dataset for training')
    dataset = load_datasets(load_existing_dump)
    # HINT: Look in the ModelConfig class for the model's hyperparameters
    config = dataset.model_config

    print('Loading embeddings')
    word_embeddings, pos_embeddings, dep_embeddings = load_embeddings(config)
    # TODO: For Optional Task, add Twitter and Wikipedia embeddings (do this last)

    if False:
        # Switch to True if you want to print examples of feature types
        print('words: ', len(dataset.word2idx))
        print('examples: ', [(k, v)
                             for i, (k,
                                     v) in enumerate(dataset.word2idx.items())
                             if i < 30])
        print('\n')
        print('POS-tags: ', len(dataset.pos2idx))
        print(dataset.pos2idx)
        print('\n')
        print('dependencies: ', len(dataset.dep2idx))
        print(dataset.dep2idx)
        print('\n')
        print("some hyperparameters")
        print(vars(config))

    # load parser object (used for Task 2)
    parser = ParserModel(config, word_embeddings, pos_embeddings, dep_embeddings)

    # Uncomment the following parser for Task 3
    # parser = AnotherParserModel(config, word_embeddings, pos_embeddings, dep_embeddings)

    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    parser.to(device)

    # set save_dir for model
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # create object for loss function
    loss_fn = F.cross_entropy

    # create object for an optimizer that updated the weights of our parser
    # model.  Be sure to set the learning rate based on the parameters!
    optimizer = optim.Adam(parser.parameters(), lr=config.lr)

    for epoch in range(1, num_epochs + 1):

        ###### Training #####

        # load training set in minibatches
        for i, (train_x, train_y) in enumerate(get_minibatches([dataset.train_inputs,
                                                                dataset.train_targets], \
                                                               config.batch_size,
                                                               is_multi_feature_input=True)):

            word_inputs_batch, pos_inputs_batch, dep_inputs_batch = train_x

            # Convert the numpy data to pytorch's tensor represetation.  They're
            # numpy objects initially.  NOTE: In general, when using Pytorch,
            # you want to send them to the device that will do the computation
            # (either a GPU or CPU).  You do this by saying "obj.to(device)"
            # where we've already created the device for you (see above where we
            # did this for the parser).  This ensures your data is running on
            # the processor you expect it to!
            word_inputs_batch = torch.from_numpy(np.array(word_inputs_batch)).to(device)
            pos_inputs_batch = torch.from_numpy(np.array(pos_inputs_batch)).to(device)
            dep_inputs_batch = torch.from_numpy(np.array(dep_inputs_batch)).to(device)

            # Convert the labels from 1-hot vectors to a list of which index was
            # 1, which is what Pytorch expects.  HINT: look for the "argmax"
            # function in numpy.
            labels = np.argmax(train_y, axis=1)

            # Convert the label to pytorch's tensor
            labels = torch.from_numpy(labels).to(device)

            # This is just a quick hack so you can cut training short to see how
            # things are working.  In the final model, make sure to use all the data!
            if max_iters >= 0 and i > max_iters:
                break

            # Some debugging information for you
            if i == 0 and epoch == 1:
                print("size of word inputs: ", word_inputs_batch.size())
                print("size of pos inputs: ", pos_inputs_batch.size())
                print("size of dep inputs: ", dep_inputs_batch.size())
                print("size of labels: ", labels.size())

            #
            #### Backprop & Update weights ####
            #

            # Before the backward pass, use the optimizer object to zero all of
            # the gradients for the variables
            optimizer.zero_grad()

            # For the current batch of inputs, run a full forward pass through the
            # data and get the outputs for each item's prediction.
            # These are the raw outputs, which represent the activations for
            # prediction over valid transitions.
            outputs = parser.forward(word_inputs_batch, pos_inputs_batch, dep_inputs_batch)

            # Compute the loss for the outputs with the labels.  Note that for
            # your particular loss (cross-entropy) it will compute the softmax
            # for you, so you can safely pass in the raw activations.
            loss = loss_fn(outputs, labels)

            # Backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()

            # Perform 1 update using the optimizer
            optimizer.step()

            # Every 10 batches, print out some reporting so we can see convergence
            if i % print_every_iters == 0:
                print ('Epoch: %d [%d], loss: %1.3f, acc: %1.3f' \
                       % (epoch, i, loss.item(),
                          int((outputs.argmax(1)==labels).sum())/len(labels)))

        print("End of epoch")

        # save model
        save_file = os.path.join(save_dir, '%s-epoch-%d.mdl' % (parser_name,
                                                                epoch))
        print('Saving current state of model to %s' % save_file)
        torch.save(parser, save_file)

        ###### Validation #####
        print('Evaluating on valudation data after epoch %d' % epoch)

        # Once we're in test/validation time, we need to indicate that we are in
        # "evaluation" mode.  This will turn off things like Dropout so that
        # we're not randomly zero-ing out weights when it might hurt performance
        parser.eval()

        # Compute the current model's UAS score on the validation (development)
        # dataset.  Note that we can use this held-out data to tune the
        # hyper-parameters of the model but we should never look at the test
        # data until we want to report the very final result.
        compute_dependencies(parser, device, dataset.valid_data, dataset)
        valid_UAS = get_UAS(dataset.valid_data)
        print("- validation UAS: {:.2f}".format(valid_UAS * 100.0))

        # Once we're done with test/validation, we need to indicate that we are back in
        # "train" mode.  This will turn back on things like Dropout
        parser.train()

    return parser
Пример #7
0
	encoder = torch.load(args.modelPath+"model_encoder_epoch24.pkl", map_location=device)
	decoder = torch.load(args.modelPath+"model_decoder_epoch24.pkl", map_location=device)

	if torch.cuda.is_available():
		encoder = encoder.cuda()
		decoder = decoder.cuda()
	encoder.eval()
	decoder.eval()

	# encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate, weight_decay=l2)  # SGD
	# decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate, weight_decay=l2)
	# RE_optimizer = optim.Adam(RE_model.parameters(), lr=learning_rate, weight_decay=l2)

	# ********************Train data*********************
	if args.test:
		mini_batches = get_minibatches(train_datasets, args.batchsize)
		batchcnt = len(dev_datasets[0]) // args.batchsize  # len(list(mini_batches))
		for b, data in enumerate(mini_batches):
			if b >= batchcnt:
				break
			sentences, tags = data
			input_tensor, input_length = padding_sequence(sentences, pad_token=args.embedding_size)
			target_tensor, target_length = padding_sequence(tags, pad_token=args.entity_tag_size)
			if torch.cuda.is_available():
				input_tensor = Variable(torch.cuda.LongTensor(input_tensor, device=device)).cuda()
				target_tensor = Variable(torch.cuda.LongTensor(target_tensor, device=device)).cuda()
			else:
				input_tensor = Variable(torch.LongTensor(input_tensor, device=device))
				target_tensor = Variable(torch.LongTensor(target_tensor, device=device))

			RE_output = eval_model(encoder, decoder, input_tensor, args.batchsize)
Пример #8
0
def minibatches(data, batch_size):
    x = np.array([d[0] for d in data])
    y = np.array([d[2] for d in data])
    one_hot = np.zeros((y.size, 3))
    one_hot[np.arange(y.size), y] = 1
    return get_minibatches([x, one_hot], batch_size)
Пример #9
0
def minibatches(data, batch_size, n_classes):
    x = np.array([d[0] for d in data])
    y = np.array([d[2] for d in data])
    one_hot = np.zeros((y.size, n_classes))
    one_hot[np.arange(y.size), y] = 1
    return get_minibatches([x, one_hot], batch_size)
Пример #10
0
def train(save_dir='saved_weights',
          parser_name='parser',
          num_epochs=5,
          max_iters=-1,
          print_every_iters=10):
    """
    Trains the model.

    parser_name is the string prefix used for the filename where the parser is
    saved after every epoch
    """

    # load dataset
    load_existing_dump = False
    print('Loading dataset for training')
    dataset = load_datasets(load_existing_dump)
    config = dataset.model_config

    print('Loading embeddings')
    word_embeddings, pos_embeddings, dep_embeddings = load_embeddings(config)

    if False:
        # Switch to True if you want to print examples of feature types
        print('words: ', len(dataset.word2idx))
        print('examples: ',
              [(k, v)
               for i, (k, v) in enumerate(dataset.word2idx.items()) if i < 30])
        print('\n')
        print('POS-tags: ', len(dataset.pos2idx))
        print(dataset.pos2idx)
        print('\n')
        print('dependencies: ', len(dataset.dep2idx))
        print(dataset.dep2idx)
        print('\n')
        print("some hyperparameters")
        print(vars(config))

    # load parser object
    parser = ParserModel(config, word_embeddings, pos_embeddings,
                         dep_embeddings)
    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    parser.to(device)

    # set save_dir for model
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # create object for loss function
    loss_fn = nn.CrossEntropyLoss()

    # create object for an optimizer that updated the weights of parser model.
    optimizer = torch.optim.SGD(parser.parameters(), lr=config.lr)

    loss_list = []
    acc_list = []
    uas_list = []
    for epoch in range(1, num_epochs + 1):

        ###### Training #####

        # load training set in minibatches
        for i, (train_x, train_y) in enumerate(get_minibatches([dataset.train_inputs,
                                                                dataset.train_targets], \
                                                               config.batch_size,
                                                               is_multi_feature_input=True)):

            word_inputs_batch, pos_inputs_batch, dep_inputs_batch = train_x

            # Convert the numpy data to pytorch's tensor represetation.  They're
            # numpy objects initially.
            word_inputs_batch = torch.tensor(word_inputs_batch).to(device)
            pos_inputs_batch = torch.tensor(pos_inputs_batch).to(device)
            dep_inputs_batch = torch.tensor(dep_inputs_batch).to(device)

            # Convert the labels from 1-hot vectors to a list of which index was
            # 1, which is what Pytorch expects.
            labels = np.argmax(train_y, axis=1)

            # Convert the label to pytorch's tensor
            labels = torch.tensor(labels)

            if max_iters >= 0 and i > max_iters:
                break
            if i == 0 and epoch == 1:
                print("size of word inputs: ", word_inputs_batch.size())
                print("size of pos inputs: ", pos_inputs_batch.size())
                print("size of dep inputs: ", dep_inputs_batch.size())
                print("size of labels: ", labels.size())

            #### Backprop & Update weights ####

            # Before the backward pass, use the optimizer object to zero all of
            # the gradients for the variables

            optimizer.zero_grad()

            # For the current batch of inputs, run a full forward pass through the
            # data and get the outputs for each item's prediction.
            # These are the raw outputs, which represent the activations for
            # prediction over valid transitions.

            outputs = parser(word_inputs_batch, pos_inputs_batch,
                             dep_inputs_batch)  # TODO

            # Compute the loss for the outputs with the labels.

            loss = None
            loss = loss_fn(outputs, labels)

            # Backward pass: compute gradient of the loss with respect to model parameters

            loss.backward()
            # Perform 1 update using the optimizer

            optimizer.step()
            # Every 10 batches, print out some reporting so I can see convergence
            if i % print_every_iters == 0:
                print ('Epoch: %d [%d], loss: %1.3f, acc: %1.3f' \
                       % (epoch, i, loss.item(),
                          int((outputs.argmax(1)==labels).sum())/len(labels)))

        print("End of epoch")

        # save model
        save_file = os.path.join(save_dir,
                                 '%s-epoch-%d.mdl' % (parser_name, epoch))
        print('Saving current state of model to %s' % save_file)
        torch.save(parser, save_file)

        ###### Validation #####
        print('Evaluating on valudation data after epoch %d' % epoch)

        # Once we're in test/validation time, we need to indicate that we are in
        # "evaluation" mode.  This will turn off things like Dropout so that
        # we're not randomly zero-ing out weights when it might hurt performance
        parser.eval()

        # Compute the current model's UAS score on the validation (development)
        # dataset.
        compute_dependencies(parser, device, dataset.valid_data, dataset)
        valid_UAS = get_UAS(dataset.valid_data)
        print("- validation UAS: {:.2f}".format(valid_UAS * 100.0))
        loss_list.append(loss.item())
        acc_list.append(int((outputs.argmax(1) == labels).sum()) / len(labels))
        uas_list.append(valid_UAS * 100.0)

        # Once we're done with test/validation, we need to indicate that we are back in
        # "train" mode.  This will turn back on things like Dropout
        parser.train()

    score = pd.DataFrame({'loss': loss_list, 'acc': acc_list, 'uas': uas_list})
    score.to_csv(r"score.csv", index=True, header=True)

    return parser
Пример #11
0
def minibatches(dataX, dataY, sentLen, mask, batch_size):
    # x = np.array([d[0] for d in data])
    # y = np.array([d[2] for d in data])
    # one_hot = np.zeros((y.size, 3))
    # one_hot[np.arange(y.size), y] = 1
    return get_minibatches(dataX, dataY, sentLen, mask, batch_size)
Пример #12
0
def trainEpoches(encoder,
                 decoder,
                 criterion,
                 print_every=10,
                 learning_rate=0.001,
                 l2=0.0001):
    start = time.time()
    out_losses = []
    print_loss_total = 0  # Reset every print_every
    # plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.Adam(encoder.parameters(),
                                   lr=learning_rate,
                                   weight_decay=l2)  # SGD
    decoder_optimizer = optim.Adam(decoder.parameters(),
                                   lr=learning_rate,
                                   weight_decay=l2)
    # training_pairs = [tensorsFromPair(random.choice(pairs))
    # 				  for i in range(n_iters)]

    # for iter in range(1, n_iters + 1):
    # training_pair = training_pairs[iter - 1]
    # for epoch in range(epoches):
    # i = 0
    mini_batches = get_minibatches(train_datasets, BATCH)
    batches_size = len(train_datasets[0]) // BATCH  # len(list(mini_batches))
    for i, data in enumerate(mini_batches):
        if i == batches_size:
            break
        # for i, data in enumerate(train_dataloader, 1):
        sentences, tags = data
        input_tensor, input_length = padding_sequence(sentences,
                                                      pad_token=EMBEDDING_SIZE)
        target_tensor, target_length = padding_sequence(tags,
                                                        pad_token=TAG_SIZE)
        if torch.cuda.is_available():
            input_tensor = Variable(
                torch.cuda.LongTensor(input_tensor, device=device)).cuda()
            target_tensor = Variable(
                torch.cuda.LongTensor(target_tensor, device=device)).cuda()
        else:
            input_tensor = Variable(
                torch.LongTensor(input_tensor, device=device))
            target_tensor = Variable(
                torch.LongTensor(target_tensor, device=device))

        loss = train(input_tensor, target_tensor, encoder, decoder,
                     encoder_optimizer, decoder_optimizer,
                     criterion)  # , input_length, target_length
        out_losses.append(loss)
        print_loss_total += loss
        # plot_loss_total += loss

        if i % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print(' (%d %d%%) %.4f' %
                  (i, float(i) / batches_size * 100, print_loss_avg))
            # print('%s (%d %d%%) %.4f' % (timeSince(start, float(i) / batches_size),
            # i, float(i) / batches_size * 100, print_loss_avg))

        # plot_loss_avg = plot_loss_total / plot_every
        # plot_losses.append(plot_loss_avg)
        # plot_loss_total = 0
        # i += 1
    np.save("loss", out_losses)
    if epoch % 10 == 0:
        model_name = "./model/model_encoder_epoch" + str(epoch) + ".pkl"
        torch.save(encoder, model_name)
        model_name = "./model/model_decoder_epoch" + str(epoch) + ".pkl"
        torch.save(decoder, model_name)
        print("Model has been saved")
Пример #13
0
def train(save_dir='saved_weights',
          parser_name='parser',
          num_epochs=5,
          max_iters=-1,
          print_every_iters=10,
          layer_num=1):
    """
    Trains the model.

    parser_name is the string prefix used for the filename where the parser is
    saved after every epoch
    """

    # load dataset
    load_existing_dump = False
    print('Loading dataset for training')
    dataset = load_datasets(load_existing_dump)
    # HINT: Look in the ModelConfig class for the model's hyperparameters
    config = dataset.model_config

    print('Loading embeddings')
    word_embeddings, pos_embeddings, dep_embeddings = load_embeddings(config)
    # TODO: For Task 3, add Twitter and Wikipedia embeddings (do this last)

    if False:
        # Switch to True if you want to print examples of feature types
        print('words: ', len(dataset.word2idx))
        print('examples: ', [(k, v) for i, (k, v) in enumerate(dataset.word2idx.items()) if i < 30])
        print('\n')
        print('POS-tags: ', len(dataset.pos2idx))
        print(dataset.pos2idx)
        print('\n')
        print('dependencies: ', len(dataset.dep2idx))
        print(dataset.dep2idx)
        print('\n')
        print("some hyperparameters")
        print(vars(config))

    # load parser object
    if layer_num <= 1:
        parser = ParserModel(config, word_embeddings, pos_embeddings, dep_embeddings)
    else:
        parser = MultiLayer_ParserModel(config, word_embeddings, pos_embeddings, dep_embeddings, layer_num)
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    parser.to(device)

    # set save_dir for model
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # create object for loss function
    loss_fn = nn.CrossEntropyLoss()

    # create object for an optimizer that updated the weights of our parser model
    optimizer = torch.optim.Adam(parser.parameters(), lr=config.lr)

    # initialize lists to plot data
    loss_list, acc_list, uas_list = [], [], []

    for epoch in range(1, num_epochs + 1):

        ###### Training #####

        # load training set in minibatches
        for i, (train_x, train_y) in enumerate(get_minibatches([dataset.train_inputs, dataset.train_targets], config.batch_size,
                                                               is_multi_feature_input=True)):

            word_inputs_batch, pos_inputs_batch, dep_inputs_batch = train_x

            # Convert the numpy data to pytorch's tensor represetation.
            word_inputs_batch = torch.tensor(word_inputs_batch).to(device)
            pos_inputs_batch = torch.tensor(pos_inputs_batch).to(device)
            dep_inputs_batch = torch.tensor(dep_inputs_batch).to(device)

            # Convert the labels from 1-hot vectors to a list of which index was 1, then to pytorch tensor
            labels = torch.tensor(np.argmax(train_y, axis=1)).to(device)

            # This is just a quick hack so you can cut training short to see how things are working
            if max_iters >= 0 and i > max_iters:
                break

            # Some debugging information for you
            if i == 0 and epoch == 1:
                print("size of word inputs: ", word_inputs_batch.size())
                print("size of pos inputs: ", pos_inputs_batch.size())
                print("size of dep inputs: ", dep_inputs_batch.size())
                print("size of labels: ", labels.size())

            #### Backprop & Update weights ####

            # Before the backward pass, use the optimizer object to zero all of the gradients for the variables
            optimizer.zero_grad()

            # For the current batch of inputs, run a full forward pass through the data and get the outputs for each item's prediction
            outputs = parser(word_inputs_batch, pos_inputs_batch, dep_inputs_batch)

            # Compute the loss for the outputs with the labels
            loss = loss_fn(outputs, labels)

            # Backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()

            # Perform 1 update using the optimizer
            optimizer.step()

            # Every 10 batches, print out some reporting so we can see convergence
            if i % print_every_iters == 0:
                print ('Epoch: %d [%d], loss: %1.3f, acc: %1.3f' \
                       % (epoch, i, loss.item(),
                          int((outputs.argmax(1)==labels).sum())/len(labels)))

        print("End of epoch")

        # save model
        save_file = os.path.join(save_dir, '%s-epoch-%d.mdl' % (parser_name,
                                                                epoch))
        print('Saving current state of model to %s' % save_file)
        torch.save(parser, save_file)

        ###### Validation #####
        print('Evaluating on valudation data after epoch %d' % epoch)

        # Once we're in test/validation time, we need to indicate that we are in "evaluation" mode
        parser.eval()

        # Compute the current model's UAS score on the validation (development) dataset
        compute_dependencies(parser, device, dataset.valid_data, dataset)
        valid_UAS = get_UAS(dataset.valid_data)
        print("- validation UAS: {:.2f}".format(valid_UAS * 100.0))

        # Append the computed values to plotting lists
        loss_list.append(loss.item())
        acc_list.append(int((outputs.argmax(1)==labels).sum())/len(labels))
        uas_list.append(valid_UAS*100.0)

        # Once we're done with test/validation, we need to indicate that we are back in "train" mode
        parser.train()

    # Plot the data!
    epoch_size = np.arange(1, num_epochs + 1)

    loss_plot = {"Epoch":epoch_size, "Loss":np.array(loss_list)}
    seaborn.lineplot(x="Epoch", y="Loss", data=loss_plot)
    plot.xlabel("Epoch")
    plot.ylabel("Loss")
    plot.title("Training Loss vs Time")
    plot.show()

    acc_plot = {"Epoch":epoch_size, "Accuracy":np.array(acc_list)}
    seaborn.lineplot(x="Epoch", y="Accuracy", data=acc_plot)
    plot.xlabel("Epoch")
    plot.ylabel("Accuracy")
    plot.title("Training Accuracy vs Time")
    plot.show()

    uas_plot = {"Epoch":epoch_size, "UAS":np.array(uas_list)}
    seaborn.lineplot(x="Epoch", y="UAS", data=uas_plot)
    plot.xlabel("Epoch")
    plot.ylabel("UAS")
    plot.title("Unlabeled Attachment Score vs Time")
    plot.show()

    return parser