Пример #1
0
def train(save_dir='saved_weights',
          parser_name='parser',
          num_epochs=5,
          max_iters=-1,
          print_every_iters=10):
    """
    Trains the model.

    parser_name is the string prefix used for the filename where the parser is
    saved after every epoch
    """

    # load dataset
    load_existing_dump = False
    print('Loading dataset for training')
    dataset = load_datasets(load_existing_dump)
    # HINT: Look in the ModelConfig class for the model's hyperparameters
    config = dataset.model_config

    print('Loading embeddings')
    word_embeddings, pos_embeddings, dep_embeddings = load_embeddings(config)
    # TODO: For Optional Task, add Twitter and Wikipedia embeddings (do this last)

    if False:
        # Switch to True if you want to print examples of feature types
        print('words: ', len(dataset.word2idx))
        print('examples: ', [(k, v)
                             for i, (k,
                                     v) in enumerate(dataset.word2idx.items())
                             if i < 30])
        print('\n')
        print('POS-tags: ', len(dataset.pos2idx))
        print(dataset.pos2idx)
        print('\n')
        print('dependencies: ', len(dataset.dep2idx))
        print(dataset.dep2idx)
        print('\n')
        print("some hyperparameters")
        print(vars(config))

    # load parser object (used for Task 2)
    parser = ParserModel(config, word_embeddings, pos_embeddings, dep_embeddings)

    # Uncomment the following parser for Task 3
    # parser = AnotherParserModel(config, word_embeddings, pos_embeddings, dep_embeddings)

    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    parser.to(device)

    # set save_dir for model
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # create object for loss function
    loss_fn = F.cross_entropy

    # create object for an optimizer that updated the weights of our parser
    # model.  Be sure to set the learning rate based on the parameters!
    optimizer = optim.Adam(parser.parameters(), lr=config.lr)

    for epoch in range(1, num_epochs + 1):

        ###### Training #####

        # load training set in minibatches
        for i, (train_x, train_y) in enumerate(get_minibatches([dataset.train_inputs,
                                                                dataset.train_targets], \
                                                               config.batch_size,
                                                               is_multi_feature_input=True)):

            word_inputs_batch, pos_inputs_batch, dep_inputs_batch = train_x

            # Convert the numpy data to pytorch's tensor represetation.  They're
            # numpy objects initially.  NOTE: In general, when using Pytorch,
            # you want to send them to the device that will do the computation
            # (either a GPU or CPU).  You do this by saying "obj.to(device)"
            # where we've already created the device for you (see above where we
            # did this for the parser).  This ensures your data is running on
            # the processor you expect it to!
            word_inputs_batch = torch.from_numpy(np.array(word_inputs_batch)).to(device)
            pos_inputs_batch = torch.from_numpy(np.array(pos_inputs_batch)).to(device)
            dep_inputs_batch = torch.from_numpy(np.array(dep_inputs_batch)).to(device)

            # Convert the labels from 1-hot vectors to a list of which index was
            # 1, which is what Pytorch expects.  HINT: look for the "argmax"
            # function in numpy.
            labels = np.argmax(train_y, axis=1)

            # Convert the label to pytorch's tensor
            labels = torch.from_numpy(labels).to(device)

            # This is just a quick hack so you can cut training short to see how
            # things are working.  In the final model, make sure to use all the data!
            if max_iters >= 0 and i > max_iters:
                break

            # Some debugging information for you
            if i == 0 and epoch == 1:
                print("size of word inputs: ", word_inputs_batch.size())
                print("size of pos inputs: ", pos_inputs_batch.size())
                print("size of dep inputs: ", dep_inputs_batch.size())
                print("size of labels: ", labels.size())

            #
            #### Backprop & Update weights ####
            #

            # Before the backward pass, use the optimizer object to zero all of
            # the gradients for the variables
            optimizer.zero_grad()

            # For the current batch of inputs, run a full forward pass through the
            # data and get the outputs for each item's prediction.
            # These are the raw outputs, which represent the activations for
            # prediction over valid transitions.
            outputs = parser.forward(word_inputs_batch, pos_inputs_batch, dep_inputs_batch)

            # Compute the loss for the outputs with the labels.  Note that for
            # your particular loss (cross-entropy) it will compute the softmax
            # for you, so you can safely pass in the raw activations.
            loss = loss_fn(outputs, labels)

            # Backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()

            # Perform 1 update using the optimizer
            optimizer.step()

            # Every 10 batches, print out some reporting so we can see convergence
            if i % print_every_iters == 0:
                print ('Epoch: %d [%d], loss: %1.3f, acc: %1.3f' \
                       % (epoch, i, loss.item(),
                          int((outputs.argmax(1)==labels).sum())/len(labels)))

        print("End of epoch")

        # save model
        save_file = os.path.join(save_dir, '%s-epoch-%d.mdl' % (parser_name,
                                                                epoch))
        print('Saving current state of model to %s' % save_file)
        torch.save(parser, save_file)

        ###### Validation #####
        print('Evaluating on valudation data after epoch %d' % epoch)

        # Once we're in test/validation time, we need to indicate that we are in
        # "evaluation" mode.  This will turn off things like Dropout so that
        # we're not randomly zero-ing out weights when it might hurt performance
        parser.eval()

        # Compute the current model's UAS score on the validation (development)
        # dataset.  Note that we can use this held-out data to tune the
        # hyper-parameters of the model but we should never look at the test
        # data until we want to report the very final result.
        compute_dependencies(parser, device, dataset.valid_data, dataset)
        valid_UAS = get_UAS(dataset.valid_data)
        print("- validation UAS: {:.2f}".format(valid_UAS * 100.0))

        # Once we're done with test/validation, we need to indicate that we are back in
        # "train" mode.  This will turn back on things like Dropout
        parser.train()

    return parser
Пример #2
0
def train(save_dir='saved_weights',
          parser_name='parser',
          num_epochs=5,
          max_iters=-1,
          print_every_iters=10):
    """
    Trains the model.

    parser_name is the string prefix used for the filename where the parser is
    saved after every epoch
    """

    # load dataset
    load_existing_dump = False
    print('Loading dataset for training')
    dataset = load_datasets(load_existing_dump)
    config = dataset.model_config

    print('Loading embeddings')
    word_embeddings, pos_embeddings, dep_embeddings = load_embeddings(config)

    if False:
        # Switch to True if you want to print examples of feature types
        print('words: ', len(dataset.word2idx))
        print('examples: ',
              [(k, v)
               for i, (k, v) in enumerate(dataset.word2idx.items()) if i < 30])
        print('\n')
        print('POS-tags: ', len(dataset.pos2idx))
        print(dataset.pos2idx)
        print('\n')
        print('dependencies: ', len(dataset.dep2idx))
        print(dataset.dep2idx)
        print('\n')
        print("some hyperparameters")
        print(vars(config))

    # load parser object
    parser = ParserModel(config, word_embeddings, pos_embeddings,
                         dep_embeddings)
    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    parser.to(device)

    # set save_dir for model
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # create object for loss function
    loss_fn = nn.CrossEntropyLoss()

    # create object for an optimizer that updated the weights of parser model.
    optimizer = torch.optim.SGD(parser.parameters(), lr=config.lr)

    loss_list = []
    acc_list = []
    uas_list = []
    for epoch in range(1, num_epochs + 1):

        ###### Training #####

        # load training set in minibatches
        for i, (train_x, train_y) in enumerate(get_minibatches([dataset.train_inputs,
                                                                dataset.train_targets], \
                                                               config.batch_size,
                                                               is_multi_feature_input=True)):

            word_inputs_batch, pos_inputs_batch, dep_inputs_batch = train_x

            # Convert the numpy data to pytorch's tensor represetation.  They're
            # numpy objects initially.
            word_inputs_batch = torch.tensor(word_inputs_batch).to(device)
            pos_inputs_batch = torch.tensor(pos_inputs_batch).to(device)
            dep_inputs_batch = torch.tensor(dep_inputs_batch).to(device)

            # Convert the labels from 1-hot vectors to a list of which index was
            # 1, which is what Pytorch expects.
            labels = np.argmax(train_y, axis=1)

            # Convert the label to pytorch's tensor
            labels = torch.tensor(labels)

            if max_iters >= 0 and i > max_iters:
                break
            if i == 0 and epoch == 1:
                print("size of word inputs: ", word_inputs_batch.size())
                print("size of pos inputs: ", pos_inputs_batch.size())
                print("size of dep inputs: ", dep_inputs_batch.size())
                print("size of labels: ", labels.size())

            #### Backprop & Update weights ####

            # Before the backward pass, use the optimizer object to zero all of
            # the gradients for the variables

            optimizer.zero_grad()

            # For the current batch of inputs, run a full forward pass through the
            # data and get the outputs for each item's prediction.
            # These are the raw outputs, which represent the activations for
            # prediction over valid transitions.

            outputs = parser(word_inputs_batch, pos_inputs_batch,
                             dep_inputs_batch)  # TODO

            # Compute the loss for the outputs with the labels.

            loss = None
            loss = loss_fn(outputs, labels)

            # Backward pass: compute gradient of the loss with respect to model parameters

            loss.backward()
            # Perform 1 update using the optimizer

            optimizer.step()
            # Every 10 batches, print out some reporting so I can see convergence
            if i % print_every_iters == 0:
                print ('Epoch: %d [%d], loss: %1.3f, acc: %1.3f' \
                       % (epoch, i, loss.item(),
                          int((outputs.argmax(1)==labels).sum())/len(labels)))

        print("End of epoch")

        # save model
        save_file = os.path.join(save_dir,
                                 '%s-epoch-%d.mdl' % (parser_name, epoch))
        print('Saving current state of model to %s' % save_file)
        torch.save(parser, save_file)

        ###### Validation #####
        print('Evaluating on valudation data after epoch %d' % epoch)

        # Once we're in test/validation time, we need to indicate that we are in
        # "evaluation" mode.  This will turn off things like Dropout so that
        # we're not randomly zero-ing out weights when it might hurt performance
        parser.eval()

        # Compute the current model's UAS score on the validation (development)
        # dataset.
        compute_dependencies(parser, device, dataset.valid_data, dataset)
        valid_UAS = get_UAS(dataset.valid_data)
        print("- validation UAS: {:.2f}".format(valid_UAS * 100.0))
        loss_list.append(loss.item())
        acc_list.append(int((outputs.argmax(1) == labels).sum()) / len(labels))
        uas_list.append(valid_UAS * 100.0)

        # Once we're done with test/validation, we need to indicate that we are back in
        # "train" mode.  This will turn back on things like Dropout
        parser.train()

    score = pd.DataFrame({'loss': loss_list, 'acc': acc_list, 'uas': uas_list})
    score.to_csv(r"score.csv", index=True, header=True)

    return parser
Пример #3
0
    if opt.pretrained:
        state_dict = torch.load(opt.mainpath + '/output/' +
                                str(opt.modelpath) + "model.weights" +
                                "pretrained")
        model.load_state_dict(state_dict['model'])
        del state_dict

    if opt.multigpu:
        print('multi')
        print(torch.cuda.device_count())
        if torch.cuda.device_count() > 1:
            print("Let's use", torch.cuda.device_count(), "GPUs!")
            # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
            model = nn.DataParallel(model)

    model = model.to(device)

    print("took {:.2f} seconds\n".format(time.time() - start))

    print(80 * "=")
    print("TRAINING")
    print(80 * "=")
    output_path = opt.mainpath + "/output/" + str(
        opt.outputname) + "model.weights"

    parser.embedding_shape = embeddings.shape[0]

    embeddings_shape = embeddings.shape[1]

    del embeddings
Пример #4
0
def train(save_dir='saved_weights',
          parser_name='parser',
          num_epochs=5,
          max_iters=-1,
          print_every_iters=10,
          layer_num=1):
    """
    Trains the model.

    parser_name is the string prefix used for the filename where the parser is
    saved after every epoch
    """

    # load dataset
    load_existing_dump = False
    print('Loading dataset for training')
    dataset = load_datasets(load_existing_dump)
    # HINT: Look in the ModelConfig class for the model's hyperparameters
    config = dataset.model_config

    print('Loading embeddings')
    word_embeddings, pos_embeddings, dep_embeddings = load_embeddings(config)
    # TODO: For Task 3, add Twitter and Wikipedia embeddings (do this last)

    if False:
        # Switch to True if you want to print examples of feature types
        print('words: ', len(dataset.word2idx))
        print('examples: ', [(k, v) for i, (k, v) in enumerate(dataset.word2idx.items()) if i < 30])
        print('\n')
        print('POS-tags: ', len(dataset.pos2idx))
        print(dataset.pos2idx)
        print('\n')
        print('dependencies: ', len(dataset.dep2idx))
        print(dataset.dep2idx)
        print('\n')
        print("some hyperparameters")
        print(vars(config))

    # load parser object
    if layer_num <= 1:
        parser = ParserModel(config, word_embeddings, pos_embeddings, dep_embeddings)
    else:
        parser = MultiLayer_ParserModel(config, word_embeddings, pos_embeddings, dep_embeddings, layer_num)
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    parser.to(device)

    # set save_dir for model
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # create object for loss function
    loss_fn = nn.CrossEntropyLoss()

    # create object for an optimizer that updated the weights of our parser model
    optimizer = torch.optim.Adam(parser.parameters(), lr=config.lr)

    # initialize lists to plot data
    loss_list, acc_list, uas_list = [], [], []

    for epoch in range(1, num_epochs + 1):

        ###### Training #####

        # load training set in minibatches
        for i, (train_x, train_y) in enumerate(get_minibatches([dataset.train_inputs, dataset.train_targets], config.batch_size,
                                                               is_multi_feature_input=True)):

            word_inputs_batch, pos_inputs_batch, dep_inputs_batch = train_x

            # Convert the numpy data to pytorch's tensor represetation.
            word_inputs_batch = torch.tensor(word_inputs_batch).to(device)
            pos_inputs_batch = torch.tensor(pos_inputs_batch).to(device)
            dep_inputs_batch = torch.tensor(dep_inputs_batch).to(device)

            # Convert the labels from 1-hot vectors to a list of which index was 1, then to pytorch tensor
            labels = torch.tensor(np.argmax(train_y, axis=1)).to(device)

            # This is just a quick hack so you can cut training short to see how things are working
            if max_iters >= 0 and i > max_iters:
                break

            # Some debugging information for you
            if i == 0 and epoch == 1:
                print("size of word inputs: ", word_inputs_batch.size())
                print("size of pos inputs: ", pos_inputs_batch.size())
                print("size of dep inputs: ", dep_inputs_batch.size())
                print("size of labels: ", labels.size())

            #### Backprop & Update weights ####

            # Before the backward pass, use the optimizer object to zero all of the gradients for the variables
            optimizer.zero_grad()

            # For the current batch of inputs, run a full forward pass through the data and get the outputs for each item's prediction
            outputs = parser(word_inputs_batch, pos_inputs_batch, dep_inputs_batch)

            # Compute the loss for the outputs with the labels
            loss = loss_fn(outputs, labels)

            # Backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()

            # Perform 1 update using the optimizer
            optimizer.step()

            # Every 10 batches, print out some reporting so we can see convergence
            if i % print_every_iters == 0:
                print ('Epoch: %d [%d], loss: %1.3f, acc: %1.3f' \
                       % (epoch, i, loss.item(),
                          int((outputs.argmax(1)==labels).sum())/len(labels)))

        print("End of epoch")

        # save model
        save_file = os.path.join(save_dir, '%s-epoch-%d.mdl' % (parser_name,
                                                                epoch))
        print('Saving current state of model to %s' % save_file)
        torch.save(parser, save_file)

        ###### Validation #####
        print('Evaluating on valudation data after epoch %d' % epoch)

        # Once we're in test/validation time, we need to indicate that we are in "evaluation" mode
        parser.eval()

        # Compute the current model's UAS score on the validation (development) dataset
        compute_dependencies(parser, device, dataset.valid_data, dataset)
        valid_UAS = get_UAS(dataset.valid_data)
        print("- validation UAS: {:.2f}".format(valid_UAS * 100.0))

        # Append the computed values to plotting lists
        loss_list.append(loss.item())
        acc_list.append(int((outputs.argmax(1)==labels).sum())/len(labels))
        uas_list.append(valid_UAS*100.0)

        # Once we're done with test/validation, we need to indicate that we are back in "train" mode
        parser.train()

    # Plot the data!
    epoch_size = np.arange(1, num_epochs + 1)

    loss_plot = {"Epoch":epoch_size, "Loss":np.array(loss_list)}
    seaborn.lineplot(x="Epoch", y="Loss", data=loss_plot)
    plot.xlabel("Epoch")
    plot.ylabel("Loss")
    plot.title("Training Loss vs Time")
    plot.show()

    acc_plot = {"Epoch":epoch_size, "Accuracy":np.array(acc_list)}
    seaborn.lineplot(x="Epoch", y="Accuracy", data=acc_plot)
    plot.xlabel("Epoch")
    plot.ylabel("Accuracy")
    plot.title("Training Accuracy vs Time")
    plot.show()

    uas_plot = {"Epoch":epoch_size, "UAS":np.array(uas_list)}
    seaborn.lineplot(x="Epoch", y="UAS", data=uas_plot)
    plot.xlabel("Epoch")
    plot.ylabel("UAS")
    plot.title("Unlabeled Attachment Score vs Time")
    plot.show()

    return parser