def getLSTMModel(item_train_dl, item_valid_dl, item_vocab, user_train_dl,
                 user_valid_dl, user_vocab, embedding_dim, hidden_dim):
    print('Finding the best LSTM Models....')
    lstm_item_model = LSTM(item_vocab, embedding_dim, hidden_dim)
    lstm_user_model = LSTM(user_vocab, embedding_dim, hidden_dim)

    lstm_item_output = train_LSTMmodel(lstm_item_model,
                                       item_train_dl,
                                       item_valid_dl,
                                       1,
                                       epochs=30,
                                       lr=0.01)
    lstm_user_output = train_LSTMmodel(lstm_user_model,
                                       user_train_dl,
                                       user_valid_dl,
                                       2,
                                       epochs=30,
                                       lr=0.01)

    item_model_PATH = 'model/model1.pt'
    user_model_PATH = 'model/model2.pt'

    item_LSTMmodel = LSTM(item_vocab, embedding_dim, hidden_dim)
    user_LSTMmodel = LSTM(user_vocab, embedding_dim, hidden_dim)

    item_LSTMmodel.load_state_dict(torch.load(item_model_PATH))
    user_LSTMmodel.load_state_dict(torch.load(user_model_PATH))

    return item_LSTMmodel, user_LSTMmodel
示例#2
0
def Train(train_loader, val_loader, weight_pos):
    print("Start Training!")
    if sys.argv[1] == "LSTM":
        model = LSTM(NUM_TASKS, BATCH_SIZE, DIM_EMB).cuda()
    elif sys.argv[1] == "CNN":
        model = CNN(NUM_TASKS, BATCH_SIZE, DIM_EMB).cuda()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    loss_criterion = nn.BCEWithLogitsLoss(pos_weight=weight_pos.cuda())
    last_val_score = 0.0
    for epoch in range(N_EPOCH):
        print("epoch " + str(epoch) + ": ")
        total_loss = 0.0
        #i = 0
        for x, y in train_loader:
            x = x.cuda()
            y = y.cuda()
            model.zero_grad()
            probs = model.forward(x).cuda()
            #print(i)
            #print(probs)
            loss = loss_criterion(probs, y)
            total_loss += loss
            loss.backward()  #retain_graph=True)
            optimizer.step()
            #i += 1
        print(f"loss on epoch {epoch} = {total_loss}")
        val_score = Val(val_loader, model)
        print(f"val_score on epoch {epoch} = {val_score}")
        if val_score <= last_val_score: break
        last_val_score = val_score
    return model
示例#3
0
def main(hparams):
    datamodule = DrivingDataMadule('v0.1', 1000, 60, 1000)

    # model = LSTM.load_from_checkpoint(
    #     "/home/sepehr/PycharmProjects/Neuropad/DAD/model/lightning_logs/version_31/checkpoints/checkpoint.ckpt"
    # )
    model = LSTM()
    trainer = pl.Trainer(gpus=-1,
                         max_epochs=100,
                         accelerator='ddp',
                         callbacks=[LSTMCallback()],
                         precision=16,
                         num_nodes=1)
    trainer.fit(model=model, datamodule=datamodule)
示例#4
0
def get_model():
    if exp_model == 'MLP':
        return MLP(hist_len, pred_len, in_dim)
    elif exp_model == 'LSTM':
        return LSTM(hist_len, pred_len, in_dim, city_num, batch_size, device)
    elif exp_model == 'GRU':
        return GRU(hist_len, pred_len, in_dim, city_num, batch_size, device)
    elif exp_model == 'nodesFC_GRU':
        return nodesFC_GRU(hist_len, pred_len, in_dim, city_num, batch_size, device)
    elif exp_model == 'GC_LSTM':
        return GC_LSTM(hist_len, pred_len, in_dim, city_num, batch_size, device, graph.edge_index)
    elif exp_model == 'PM25_GNN':
        return PM25_GNN(hist_len, pred_len, in_dim, city_num, batch_size, device, graph.edge_index, graph.edge_attr, wind_mean, wind_std)
    elif exp_model == 'PM25_GNN_nosub':
        return PM25_GNN_nosub(hist_len, pred_len, in_dim, city_num, batch_size, device, graph.edge_index, graph.edge_attr, wind_mean, wind_std)
    else:
        raise Exception('Wrong model name!')
示例#5
0
test_iter = BucketIterator(test,
                           batch_size=opt.batch_size,
                           sort=False,
                           train=False,
                           shuffle=False)

# endregion

# %%
# region Define the model
# if opt.notrain:
#     model = torch.load(opt.weight_datapath + "model.pt")
#     model.state_dict = torch.load(opt.weight_datapath + './state.pt')
if not opt.notrain:
    if opt.model == 'LSTM':
        model = LSTM().to(device)
    elif opt.model == 'GRU':
        model = GRU().to(device)
    elif opt.model == 'AGRU':
        model = AGRU().to(device)
    elif opt.model == 'SharedGRU':
        model = SharedGRU().to(device)
    elif opt.model == 'SharedAGRU':
        model = SharedAGRU().to(device)
    elif opt.model == 'CNN':
        model = CNN().to(device)
    model.text_embedding_layer.weight.data.copy_(
        TITLE.vocab.vectors).to(device)
    for para in model.text_embedding_layer.parameters():
        para.requires_grad = False
    # FIXME speedup
示例#6
0
TEXT.build_vocab(train, max_size=10000)
LABEL.build_vocab(train)

BATCH_SIZE = 1

train_iterator, valid_iterator = data.BucketIterator.splits(
    (train, val),
    batch_size=BATCH_SIZE,
    sort_key=lambda x: len(x.text),
    repeat=False)

INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
model = LSTM(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, BATCH_SIZE)
optimizer = optim.SGD(model.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()
device = torch.device('cuda')

model = model.to(device)
criterion = criterion.to(device)


def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """
    # round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float()  # convert into float for division
示例#7
0
def main():
    global epoch
    # Get arguments, setup,  prepare data and print some info
    args = parse()

    log_path = os.path.join("logs", args.name)
    if not os.path.exists(log_path):
        os.makedirs(log_path)
    writer = SummaryWriter(log_path)

    if args.task == 'babi':
        train_dataset = bAbIDataset(args.dataset_path, args.babi_task)
        val_dataset = bAbIDataset(args.dataset_path,
                                  args.babi_task,
                                  train=False)
    else:
        raise NotImplementedError

    # Setting up the Model
    if args.model == 'lstm':
        model = LSTM(40,
                     train_dataset.num_vocab,
                     100,
                     args.device,
                     sentence_size=max(train_dataset.sentence_size,
                                       train_dataset.query_size))
        print("Using LSTM")
    else:
        # model = REN(args.num_blocks, train_dataset.num_vocab, 100, args.device, train_dataset.sentence_size,
        #             train_dataset.query_size).to(args.device)
        model = RecurrentEntityNetwork(train_dataset.num_vocab,
                                       device=args.device,
                                       sequence_length=max(
                                           train_dataset.sentence_size,
                                           train_dataset.query_size))
        print("Using EntNet")
    if args.multi:  # TODO: Whats this?
        model = torch.nn.DataParallel(model, device_ids=args.gpu_range)

    if args.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)
    elif args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    weight_decay=args.weight_decay)
    else:
        Exception("Invalid optimizer")
    if args.cyc_lr:
        cycle_momentum = True if args.optimizer == 'sgd' else False
        lr_scheduler = torch.optim.lr_scheduler.CyclicLR(
            optimizer,
            5e-5,
            args.lr,
            cycle_momentum=cycle_momentum,
            step_size_up=args.cyc_step_size_up)
    else:
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                       step_size=25,
                                                       gamma=0.5)

    # Before we are getting started, let's get ready to give some feedback
    print("Dataset size: ", len(train_dataset))
    print("Sentence size:", train_dataset.sentence_size)
    print("Vocab set", [
        str(i) + ': ' + str(train_dataset.vocab[i])
        for i in range(len(train_dataset.vocab))
    ])

    # Prepare Visdom
    Visdom.start()
    lr_plt = Visdom.Plot2D("Curent learning rate",
                           store_interval=1,
                           xlabel="Epochs",
                           ylabel="Learning Rate")
    # TODO: Check legend
    train_loss = Visdom.Plot2D("Loss on Train Data",
                               store_interval=1,
                               xlabel="iteration",
                               ylabel="loss",
                               legend=['one', 2, 'three'])
    train_accuracy = Visdom.Plot2D("Accuracy on Train Data",
                                   store_interval=1,
                                   xlabel="iteration",
                                   ylabel="accuracy")
    validation_loss = Visdom.Plot2D("Loss on Validation Set",
                                    store_interval=1,
                                    xlabel="epoch",
                                    ylabel="loss")
    validation_accuracy = Visdom.Plot2D("Accuracy on Validation Set",
                                        store_interval=1,
                                        xlabel="epoch",
                                        ylabel="accuracy")
    babi_text_plt = Visdom.Text("Network Output")
    train_plots = {'loss': train_loss, 'accuracy': train_accuracy}
    val_plots = {'text': babi_text_plt}

    epoch = 0

    # Register Variables and plots to save
    saver = Saver(os.path.join(args.output_path, args.name),
                  short_interval=args.save_interval)
    saver.register('train_loss', StateSaver(train_loss))
    saver.register('train_accuracy', StateSaver(train_accuracy))
    saver.register('validation_loss', StateSaver(validation_loss))
    saver.register('validation_accuracy', StateSaver(validation_accuracy))
    saver.register('lr_plot', StateSaver(lr_plt))
    saver.register("model", StateSaver(model))
    saver.register("optimizer", StateSaver(optimizer))
    saver.register("epoch", GlobalVarSaver('epoch'))
    # saver.register("train_dataset", StateSaver(train_dataset))
    # saver.register("val_dataset", StateSaver(val_dataset))

    eval_on_start = False
    print("Given model argument to load from: ", args.load_model)
    # TODO: Load learning rate scheduler
    if args.load_model:
        if not saver.load(args.load_model):
            #  model.reset_parameters()
            print('Not loading, something went wrong', args.load_model)
            pass
        else:
            eval_on_start = False

    start_epoch = epoch
    end_epoch = start_epoch + args.epochs
    model.to(args.device)

    # TODO: Use saver only on full epochs or use it on certain iteration
    """ TRAIN START """
    # Eval on Start
    if eval_on_start:
        val_result = val_dataset.eval(args, model, plots=val_plots)
        validation_loss.add_point(0, val_result['loss'])
        validation_accuracy.add_point(0, val_result['accuracy'])
        saver.write(epoch)
    for epoch in range(start_epoch, end_epoch):
        train_result = train_dataset.test(args,
                                          model,
                                          optimizer,
                                          epoch=epoch,
                                          plots=train_plots,
                                          scheduler=lr_scheduler)
        val_result = val_dataset.eval(args,
                                      model,
                                      epoch=epoch + 1,
                                      plots=val_plots)
        validation_loss.add_point(epoch, val_result['loss'])
        validation_accuracy.add_point(epoch, val_result['accuracy'])

        current_lr = None
        for param_group in optimizer.param_groups:
            current_lr = param_group['lr']
            break
        lr_plt.add_point(epoch, current_lr if current_lr else 0)

        saver.tick(epoch + 1)
        if not args.cyc_lr:
            lr_scheduler.step()

        # TODO: Add writer
        # Log
        if epoch % args.save_interval == 0 or epoch == args.epochs - 1:
            for param_group in optimizer.param_groups:
                log_lr = param_group['lr']
                break

            log = 'Epoch: [{epoch}]\t Train Loss {tl} Acc {ta}\t Val Loss {vl} Acc {va} lr {lr}'.format(
                epoch=epoch,
                tl=round(train_result['loss'], 3),
                ta=round(train_result['accuracy'], 3),
                vl=round(val_result['loss'], 3),
                va=round(val_result['accuracy'], 3),
                lr=log_lr)
            print(log)
示例#8
0
def main():

    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument(
        "--data_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The input data dir. Should contain the .tsv files (or other data files) for the task."
    )
    parser.add_argument("--word_embedding_path",
                        default=None,
                        type=str,
                        required=True,
                        help="Word Embedding Path")
    parser.add_argument("--model",
                        default='CNN',
                        type=str,
                        required=True,
                        help="CNN/LSTM/LSTM+Attention")
    parser.add_argument("--output_dir",
                        default=None,
                        type=str,
                        required=True,
                        help="The result path")
    parser.add_argument("--output_name",
                        default=None,
                        type=str,
                        required=True,
                        help="The result path")
    parser.add_argument(
        "--max_length",
        default=25,
        type=int,
        help="Maximum sequence length, sequences longer than this are truncated"
    )
    parser.add_argument("--epochs",
                        default=15,
                        type=int,
                        help="Number of epochs to train for")
    parser.add_argument("--learning_rate",
                        default=0.001,
                        type=float,
                        dest="learning_rate",
                        help="Learning rate for optimizer")
    parser.add_argument(
        "--device",
        default="cuda:0",
        dest="device",
        help="Device to use for training and evaluation e.g. (cpu, cuda:0)")
    parser.add_argument(
        "--dropout",
        default=0.1,
        type=float,
        dest="dropout",
        help=
        "Dropout (not keep_prob, but probability of ZEROING during training, i.e. keep_prob = 1 - dropout)"
    )
    parser.add_argument("--batch_size",
                        default=64,
                        type=int,
                        help="Batch size")
    parser.add_argument("--filter_sizes",
                        default=[1, 2, 3, 4, 5],
                        type=list,
                        help="The filter sizes(CNN model)")
    parser.add_argument("--num_filters",
                        default=50,
                        type=int,
                        help="The number of filters(CNN model)")
    parser.add_argument(
        "--hidden_size",
        default=64,
        type=int,
        help="The number of hidden_size(LSTM/LSTM_Attention model)")
    parser.add_argument(
        "--layer_num",
        default=1,
        type=int,
        help="The number of layer_num(LSTM/LSTM_Attention model)")
    parser.add_argument(
        "--bidirectional",
        default=True,
        type=bool,
        help="Is that bidirectional or not(LSTM/LSTM_Attention model)")
    parser.add_argument("--attention_size",
                        default=32,
                        type=int,
                        help="The dim of attention(LSTM_Attention model)")
    parser.add_argument(
        "--model_size",
        default=128,
        type=int,
        help="The size of transformer's model(Transformer model)")
    parser.add_argument("--num_heads",
                        default=4,
                        type=int,
                        help="The number of heads(Transformer model)")
    parser.add_argument("--num_blocks",
                        default=2,
                        type=int,
                        help="The number of block(Transformer model)")

    args = parser.parse_args()

    print('......................Loading Data......................')
    x_trainval, y_trainval, x_test, y_test = get_data(args.data_dir)
    x_train, x_val, y_train, y_val = train_test_split(x_trainval,
                                                      y_trainval,
                                                      test_size=0.2,
                                                      random_state=66)
    word_embedding = load_word_embedding(args.word_embedding_path)
    x_train, x_val, x_test, average_len, vocab = pre_process(
        x_train, x_val, x_test)
    vocab = set(vocab)

    #Create the dictionary word_to_index and target_to_index
    word_to_id = {word: index for index, word in enumerate(vocab)}
    target_to_id = {
        target: index
        for index, target in enumerate(set(y_trainval))
    }
    id_to_target = {value: key for key, value in target_to_id.items()}

    #Define some hyperparameters
    embedding_dim = 300
    vocab_size = len(vocab)
    output_size = 3
    pre_trained_enbedding = torch.zeros(vocab_size, embedding_dim)

    for key, value in word_to_id.items():
        if key in word_embedding and (key != '<pad>'):
            pre_trained_enbedding[value, :] = torch.from_numpy(
                word_embedding[key])

    #Transform data from text to tensor and put them in the Dataloader(for batch)
    train_loader = prepare_data(x_train, y_train, average_len, word_to_id,
                                target_to_id, vocab, args.batch_size)
    val_loader = prepare_data(x_val, y_val, average_len, word_to_id,
                              target_to_id, vocab, args.batch_size)
    test_loader = prepare_data(x_test, y_test, average_len, word_to_id,
                               target_to_id, vocab, args.batch_size)

    #Build Model
    if args.model == 'CNN':
        model = CNN(vocab_size, embedding_dim, pre_trained_enbedding,
                    args.filter_sizes, args.num_filters, args.dropout,
                    output_size).to(args.device)
    if args.model == 'LSTM':
        model = LSTM(vocab_size, embedding_dim, pre_trained_enbedding,
                     args.hidden_size, args.layer_num, args.bidirectional,
                     output_size).to(args.device)
    if args.model == 'LSTM_Attention':
        model = LSTM_Attention(vocab_size, embedding_dim,
                               pre_trained_enbedding, args.hidden_size,
                               args.layer_num, args.bidirectional,
                               args.attention_size,
                               output_size).to(args.device)
    if args.model == 'Transformer':
        model = TransformerModel(vocab_size, average_len, args.batch_size,
                                 embedding_dim, pre_trained_enbedding,
                                 args.model_size, args.num_heads,
                                 args.num_blocks, args.dropout,
                                 output_size).to(args.device)

    print(model)
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)

    #Training Data
    print('......................Training Data......................')
    print('          ')
    losses = []
    best_recall_score = 0.0

    with open(
            os.path.join(
                args.output_dir,
                args.model + args.output_name + '_training_result.csv'),
            'w') as csvfile:
        fieldnames = [
            'Epoch', 'Loss', 'train_accuracy_score', 'train_recall_score',
            'train_f1_score', 'val_accuracy_score', 'val_recall_score',
            'val_f1_score', 'test_accuracy_score', 'test_recall_score',
            'test_f1_score'
        ]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for epoch in range(args.epochs):
            total_loss = 0
            for batch_x, batch_y in train_loader:
                if torch.cuda.is_available():
                    batch_x, batch_y = batch_x.cuda(), batch_y.cuda()
                optimizer.zero_grad()
                logits = model(batch_x)
                loss = loss_function(logits, torch.max(batch_y, 1)[0])
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
            losses.append(total_loss)

            # print('......................Epoch: %d, Loss: %f......................' %(epoch,total_loss))
            # print('......................Training Data Performance......................')
            train_accuracy_score, train_recall_score, train_f1_score = evaluate(
                train_loader, model, id_to_target)
            # print('......................Validation Data Performance......................')
            val_accuracy_score, val_recall_score, val_f1_score = evaluate(
                val_loader, model, id_to_target)

            if val_recall_score > best_recall_score:
                best_recall_score = val_accuracy_score
                # real_model = model.module
                torch.save(model.state_dict(), args.model + '2model_best.pth')
            # print('......................Test Data Performance......................')
            test_accuracy_score, test_recall_score, test_f1_score = evaluate(
                test_loader, model, id_to_target)
            writer.writerow({
                'Epoch': epoch,
                'Loss': total_loss,
                'train_accuracy_score': train_accuracy_score,
                'train_recall_score': train_recall_score,
                'train_f1_score': train_f1_score,
                'val_accuracy_score': val_accuracy_score,
                'val_recall_score': val_recall_score,
                'val_f1_score': val_f1_score,
                'test_accuracy_score': test_accuracy_score,
                'test_recall_score': test_recall_score,
                'test_f1_score': test_f1_score
            })
            print('              ')

    if args.model == 'CNN':
        model = CNN(vocab_size, embedding_dim, pre_trained_enbedding,
                    args.filter_sizes, args.num_filters, args.dropout,
                    output_size)
    if args.model == 'LSTM':
        model = LSTM(vocab_size, embedding_dim, pre_trained_enbedding,
                     args.hidden_size, args.layer_num, args.bidirectional,
                     output_size)
    if args.model == 'LSTM_Attention':
        model = LSTM_Attention(vocab_size, embedding_dim,
                               pre_trained_enbedding, args.hidden_size,
                               args.layer_num, args.bidirectional,
                               args.attention_size, output_size)
    if args.model == 'Transformer':
        model = TransformerModel(vocab_size, average_len, args.batch_size,
                                 embedding_dim, pre_trained_enbedding,
                                 args.model_size, args.num_heads,
                                 args.num_blocks, args.dropout, output_size)

    checkpoint = torch.load(args.model + '2model_best.pth')
    model.load_state_dict(checkpoint)
    model.to(args.device)

    print('......................Test Data Performance......................')
    test_accuracy_score, test_recall_score, test_f1_score = evaluate(
        test_loader, model, id_to_target)
    print('The accuracy is:%f ' % test_accuracy_score)
    print('The macro_recall is:%f ' % test_recall_score)
    print('The macro_F_score is:%f ' % test_f1_score)
    print('              ')
示例#9
0
    print('start to generate result excel')
    df = pd.read_excel('./data/sourceData/test.xlsx')
    new_df = df[['_id', '_id_x', '_id_y', 'nick_name', 'content']]
    nationalism_predictions = []
    for index, each in tqdm(new_df.iterrows()):
        nationalism_predictions.append(
            weibo_id_prediction_dic.get(int(each["_id"]), ""))
    new_df['{}_prediction'.format(
        CURRENT_MODEL_NAME)] = nationalism_predictions
    new_df.to_excel('./predictionResults/{}_prediction_result.xlsx'.format(
        CURRENT_MODEL_NAME))


if __name__ == "__main__":
    if IS_TRAIN:
        model = LSTM(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM,
                     BATCH_SIZE)
    else:
        model = torch.load(
            './trainedModel/best_{}_model.pkl'.format(CURRENT_MODEL_NAME))
        print('load model successfully')
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    criterion = nn.BCEWithLogitsLoss()
    device = torch.device('cuda')
    model = model.to(device)
    criterion = criterion.to(device)
    if IS_TRAIN:
        for i in range(5):
            train(model, train_iterator, optimizer, criterion)
    else:
        test(model, test_iterator)
示例#10
0
from model.LSTM import LSTM
from Tool.get_preprocess_data import *

train_x, test_x, train_y, test_y = get_train_test_set()
embedding_matrix, word_list = get_embedding_matrix()
lstm = LSTM(embedding_matrix, train_x, train_y, test_x, test_y)
lstm.run()