Exemplo n.º 1
0
def run_task(data_dir, task_id):
    """
    Train and test for each task
    """
    print("Train and test for task %d ..." % task_id)

    print("We are going to use this")

    # Parse data
    train_files = glob.glob('%s/qa3_*_train.txt' % (data_dir, task_id))
    test_files = glob.glob('%s/qa3_*_test.txt' % (data_dir, task_id))

    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = parse_babi_task(
        train_files, dictionary, False)
    test_story, test_questions, test_qstory = parse_babi_task(
        test_files, dictionary, False)

    general_config = BabiConfig(train_story, train_questions, dictionary)

    # #### R: this line build a empty model to train
    # memory, model, loss = build_model(general_config)

    # if general_config.linear_start:
    #     train_linear_start(train_story, train_questions, train_qstory, memory, model, loss, general_config)
    # else:
    #     train(train_story, train_questions, train_qstory, memory, model, loss, general_config)

    # memory, model, loss = build_model(general_config)

    # this line
    test(test_story, test_questions, test_qstory, memory, model, loss,
         general_config)
Exemplo n.º 2
0
def run_task(data_dir, task_id):
    """
    Train and test for each task
    """
    print("Train and test for task %d ..." % task_id)

    # Parse data
    train_files = glob.glob('%s/qa%d_*_train.txt' % (data_dir, task_id))
    test_files = glob.glob('%s/qa%d_*_test.txt' % (data_dir, task_id))

    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = parse_babi_task(
        train_files, dictionary, False)
    test_story, test_questions, test_qstory = parse_babi_task(
        test_files, dictionary, False)

    general_config = BabiConfig(train_story, train_questions, dictionary)

    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory,
                           model, loss, general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss,
              general_config)

    test(test_story, test_questions, test_qstory, memory, model, loss,
         general_config)
def main():
    train_story_set = StoryDataset(sis_train, vocab)
    # val_story_set = StoryDataset(sis_val, vocab)
    # test_story_set = StoryDataset(sis_test, vocab)

    train_loader = DataLoader(train_story_set,
                              shuffle=False,
                              batch_size=BATCH_SIZE,
                              collate_fn=collate_story,
                              pin_memory=False)
    # imgs of shape [BS, 5, 3, 224, 224]
    # sents BS * 5  * MAX_LEN

    model_v1 = ModelV1(vocab)

    # Learning rate is the most sensitive value to set,
    # will need to test what works well past 400 instances
    optimizer = torch.optim.Adam(model_v1.parameters(),
                                 lr=0.001)  # .001 for 400
    isTraining = True

    if isTraining:
        train(10, model_v1, train_loader, optimizer)
    else:
        model_v1.load_state_dict(torch.load('./Training/7'))
        test_loader = DataLoader(train_story_set,
                                 shuffle=False,
                                 batch_size=BATCH_SIZE,
                                 collate_fn=collate_story)
        test(model_v1, test_loader, device, vocab)
Exemplo n.º 4
0
def main():
    model = Seq2Seq(input_dim=40, vocab_size=len(LETTER_LIST), hidden_dim=128)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(reduction=None)
    nepochs = 25
    batch_size = 64 if DEVICE == 'cuda' else 1

    speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data(
    )
    character_text_train = transform_letter_to_index(transcript_train,
                                                     LETTER_LIST)
    character_text_valid = transform_letter_to_index(transcript_valid,
                                                     LETTER_LIST)

    train_dataset = Speech2TextDataset(speech_train, character_text_train)
    # val_dataset =
    test_dataset = Speech2TextDataset(speech_test, None, False)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              collate_fn=collate)
    # val_loader =
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=False,
                             collate_fn=collate_test)

    for epoch in range(nepochs):
        train(model, train_loader, criterion, optimizer, epoch)
        # val()
        test(model, test_loader, epoch)
Exemplo n.º 5
0
def run():
    parser = GooeyParser()
    subs = parser.add_subparsers(help='commands', dest='commands')

    train_parser = subs.add_parser('train', help='Configurate model training')
    param_group = train_parser.add_argument_group("Model parameter option", gooey_options={'show_border': True, 'columns': 2})
    args_param.add(param_group)
    data_group = train_parser.add_argument_group("Data Options", gooey_options={'show_border': True}, )
    args_data.add(data_group)
    save_group = train_parser.add_argument_group("Save option", gooey_options={'show_border': True, 'columns': 2})
    args_save.add(save_group)


    test_parser = subs.add_parser('test', help='Configurate model testining')
    data_group = test_parser.add_argument_group("Data Options", gooey_options={'show_border': True}, )
    args_data.add(data_group)
    load_group = test_parser.add_argument_group("Load option", gooey_options={'show_border': True, 'columns': 1})
    args_load.add(load_group, model_savefiles())
    save_group = test_parser.add_argument_group("Save option", gooey_options={'show_border': True, 'columns': 2})
    args_save.add(save_group)

    args = parser.parse_args()
    X, Y = load_data(args.data_path)

    if args.commands =='train':
        train(args, X, Y, save_dir)
    else:
        with open(save_dir + args.load_model, 'rb') as f:
            model = pickle.load(f)
        test(args, X, Y, save_dir, model)
def running_stats(model, num):
    model1 = deepcopy(model)
    print('infere net in eval mode ...')
    test(model1, test_loader)
    run_mean = deepcopy(model1[num].running_mean)
    run_var = deepcopy(model1[num].running_var)
    print('infere net in train mode ...')
    test(model1, test_loader, is_train=True)
    print('Mean relation mean ',
          torch.mean(mode1[num].running_mean / run_mean))
    print('Mean relation var ', torch.mean(model1[num].running_var / run_var))
Exemplo n.º 7
0
def test_model(data_dir, model_file, log_path, rnd_seed):
    memn2n = MemN2N(data_dir, model_file, log_path, rnd_seed)
    memn2n.load_model()
    #_, _, memn2n.loss = build_model(memn2n.general_config)
    # Read test data
    print("Reading test data from %s ..." % memn2n.data_dir)
    test_data_path = glob.glob('%s/qa*_*_test.txt' % memn2n.data_dir)
    test_story, test_questions, test_qstory = \
      parse_babi_task(test_data_path,
                      memn2n.general_config.dictionary,
                      False)
    test(test_story, test_questions, test_qstory, memn2n.memory, memn2n.model,
         memn2n.loss, memn2n.general_config)
Exemplo n.º 8
0
def tester() -> None:
    train_on_gpu = torch.cuda.is_available()
    test_data = torch.load("test_clean.pt")
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=64,
                                              pin_memory=True)

    model = SiAudNet()
    if train_on_gpu:
        model = model.cuda()
    model.load_dict("model_siaudnet.pt")
    test(model, train_on_gpu, test_loader)
    print()
Exemplo n.º 9
0
def main():
    model = Seq2Seq(input_dim=40, vocab_size=len(LETTER_LIST), hidden_dim=128)
    optimizer = optim.SGD(model.parameters(), lr=1e-4, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.93)
    criterion = nn.CrossEntropyLoss(reduction='none')
    init_epoch = 0
    nepochs = 50
    batch_size = 64 if DEVICE == 'cuda' else 1

    speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data(
    )
    character_text_train = transform_letter_to_index(transcript_train,
                                                     LETTER_LIST)
    character_text_valid = transform_letter_to_index(transcript_valid,
                                                     LETTER_LIST)

    train_dataset = Speech2TextDataset(speech_train, character_text_train)
    val_dataset = Speech2TextDataset(speech_valid, character_text_valid)
    test_dataset = Speech2TextDataset(speech_test, None, False)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              collate_fn=collate_train_val)
    val_loader = DataLoader(val_dataset,
                            batch_size=batch_size,
                            shuffle=True,
                            collate_fn=collate_train_val)
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=False,
                             collate_fn=collate_test)

    val_distances = []
    exp = 27
    # model.load_state_dict(torch.load('BestModel9.pth'))

    with open('stats_{}'.format(exp), 'w') as file:

        file.write('Experiment: {}\n'.format(exp))

    for epoch in range(init_epoch, nepochs):
        train(model, train_loader, criterion, optimizer, scheduler, epoch, exp)
        val_distances.append(val(model, val_loader, epoch, exp))
        if val_distances[-1] == min(val_distances):
            torch.save(model.state_dict(), 'BestModel{}.pth'.format(exp))

        if epoch % 3 == 0 or epoch == nepochs - 1:
            test(model, test_loader, exp)
Exemplo n.º 10
0
def predict_with_best_net(run_name, sampled=False):
    path = 'model/VGG_paras_{}.pkl'.format(run_name)
    net = my_VGG(requeires_grad=False)
    if torch.cuda.is_available():
        net.load_state_dict(torch.load(path))
        net = net.cuda()
    else:
        net.load_state_dict(torch.load(path, map_location='cpu'))

    _, _, _, _, image_test_list, test_filenames = load_file_list(sampled=sampled)

    criterion = nn.CrossEntropyLoss()

    prediction = test(net, criterion, image_test_list, is_test=True, sampled=sampled, is_file_type=True,
                      model_type='vgg')

    prediction_dict = {}
    for i in range(len(test_filenames)):
        id = int(test_filenames[i].split('.', 1)[0])
        prediction_dict[id] = prediction[i]

    if not os.path.exists('result'):
        os.makedirs('result')

    file_out = open('./result/result_VGG_{}.csv'.format(run_name), 'w')
    file_out.write('id,label\n')
    for key in sorted(prediction_dict.keys()):
        file_out.write('{},{}\n'.format(key, prediction_dict[key]))

    file_out.close()
Exemplo n.º 11
0
def save():
    sampled = False
    run_name = 'trainable_worker50'
    path = 'model/ResNet_paras_{}.pkl'.format(run_name)
    net = my_ResNet(requeires_grad=False)
    if torch.cuda.is_available():
        net.load_state_dict(torch.load(path))
        net = net.cuda()
    else:
        net.load_state_dict(torch.load(path, map_location='cpu'))

    _, _, images_val_list, labels_val, _, _ = load_file_list(sampled=sampled)

    criterion = nn.CrossEntropyLoss()

    prediction = test(net, criterion, images_val_list, is_test=True, sampled=sampled, is_file_type=True,
                      model_type='resnet', my_path='./train')

    prediction_dict = {}
    for i in range(len(images_val_list)):
        print('{}, label = {}, prediction = {}'.format(images_val_list[i], 1 - labels_val[i], prediction[i]))
        prediction_dict[images_val_list[i]] = (1 - labels_val[i], prediction[i])

    if not os.path.exists('val_analysis'):
        os.makedirs('val_analysis')

    save_object(prediction_dict, 'val_analysis/val_prediction_result.pkl')
Exemplo n.º 12
0
def get_accuracies(network, loader, device, model_name=None):
    if model_name is not None:
        print(model_name)
    total_len = len(loader.dataset)
    test_loss, correct = test(network, loader, device=device, is_print=False)
    print(
        f'Accuracy in eval mode:  test loss: {np.mean(test_loss):.4f}, accuracy: {np.sum(correct)}/{total_len} ({100. * np.sum(correct)/total_len:.2f}%)'
    )
    test_loss, correct = test(network,
                              loader,
                              device=device,
                              is_train=True,
                              is_print=False)
    print(
        f'Accuracy in train mode: test loss: {np.mean(test_loss):.4f}, accuracy: {np.sum(correct)}/{total_len} ({100. * np.sum(correct)/total_len:.2f}%)'
    )
    print('____________________________________________')
Exemplo n.º 13
0
def main():
    
    # Train classifiers using training samples
    train_pos_dir = "VJ_dataset\\trainset\\faces"
    train_neg_dir = "VJ_dataset\\trainset\\non-faces"
    rounds = 10
    
    cascade = train( train_pos_dir, train_neg_dir, rounds )
    
    # Save training results
    save_cascade(cascade)
    
    # Test with test samples
    test_pos_dir = "VJ_dataset\\testset\\faces"
    test_neg_dir = "VJ_dataset\\testset\\faces"
    
    test( test_pos_dir, test_neg_dir, cascade, 10 )
Exemplo n.º 14
0
def main(epochs, batch_size, input_size, hidden_size, num_layers, spatial_num, drop_out, logged=False):
    DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print('DEVICE: ', DEVICE)

    date = '06_03'
    group = 1
    sorted_ = True
    # sorted_ = False

    # load data from '.npy' file
    # x_train, x_test, y_train, y_test = load_group_eeg_data(date, group, sorted_=sorted_)
    x_train, x_test, y_train, y_test = load_combined_eeg_data(date, sorted_=sorted_)
    # x: (N, C, T)  N: trials  C: channels  T: times 
    train_num, test_num = x_train.shape[0], x_test.shape[0]
    
    # make dataset for train and test
    train_data = MyDataset(x_train, x_test, y_train, y_test)
    test_data = MyDataset(x_train, x_test, y_train, y_test, train=False)
    train_loader = DataLoader(train_data, batch_size=batch_size)
    test_loader = DataLoader(test_data, batch_size=batch_size)

    # model initiation
    # model = LSTM(num_classes=2, input_size=64, hidden_size=256, num_layers=2)
    
    # model = LSTM_CNN(num_classes=2, channels=x_train.shape[1], input_size=input_size, hidden_size=hidden_size, 
    #                  num_layers=num_layers, spatial_num=spatial_num, drop_out=drop_out)
    
    # model = LSTM_CNN_Half(num_classes=2, batch_size=batch_size, T=x_train.shape[-1],
    #                       C=x_train.shape[-2], input_size=input_size, hidden_size=hidden_size,
    #                       num_layers=num_layers, spatial_num=spatial_num)
    
    model = LSTM_CNN_Spatial(num_classes=2, batch_size=batch_size, T=x_train.shape[-1],
                          C=x_train.shape[-2], input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, spatial_num=spatial_num)
    
    model = model.to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters())

    log = []
    if logged:
        log.append(f'{epochs}\t{batch_size}\t{input_size}\t{hidden_size}\t'
                   f'{num_layers}\t{spatial_num}\t{drop_out}\t')
    train(model, criterion, optimizer, train_loader, DEVICE,train_num, epochs, logged)
    test(model, criterion, test_loader, DEVICE, test_num, log, logged)
Exemplo n.º 15
0
def run_joint_tasks(data_dir):
    """
    Train and test for all tasks but the trained model is built using training data from all tasks.
    """
    print("Jointly train and test for all tasks ...")
    tasks = range(20)

    # Parse training data
    train_data_path = []
    for t in tasks:
        train_data_path += glob.glob('%s/qa%d_*_train.txt' % (data_dir, t + 1))

    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = parse_babi_task(
        train_data_path, dictionary, False)

    # Parse test data for each task so that the dictionary covers all words before training
    for t in tasks:
        test_data_path = glob.glob('%s/qa%d_*_test.txt' % (data_dir, t + 1))
        parse_babi_task(test_data_path, dictionary,
                        False)  # ignore output for now

    general_config = BabiConfigJoint(train_story, train_questions, dictionary)
    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory,
                           model, loss, general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss,
              general_config)

    # Test on each task
    for t in tasks:
        print("Testing for task %d ..." % (t + 1))
        test_data_path = glob.glob('%s/qa%d_*_test.txt' % (data_dir, t + 1))
        dc = len(dictionary)
        test_story, test_questions, test_qstory = parse_babi_task(
            test_data_path, dictionary, False)
        assert dc == len(
            dictionary
        )  # make sure that the dictionary already covers all words

        test(test_story, test_questions, test_qstory, memory, model, loss,
             general_config)
def main():
    model = Seq2Seq(input_dim=40,
                    vocab_size=len(LETTER_LIST),
                    hidden_dim=256,
                    isAttended=True)
    #     print(model)
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-5)
    # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)
    criterion = nn.CrossEntropyLoss(reduce=False, reduction=None)
    nepochs = 18
    batch_size = 64 if DEVICE == 'cuda' else 1

    speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data(
    )
    character_text_train = transform_letter_to_index(transcript_train,
                                                     LETTER_LIST)
    character_text_valid = transform_letter_to_index(transcript_valid,
                                                     LETTER_LIST)

    train_dataset = Speech2TextDataset(speech_train, character_text_train)
    val_dataset = Speech2TextDataset(speech_valid, character_text_valid)

    test_dataset = Speech2TextDataset(speech_test, None, False)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True)  #, collate_fn=collate_train)
    val_loader = DataLoader(val_dataset, batch_size=batch_size,
                            shuffle=True)  #, collate_fn=collate_train)
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=False)  #, collate_fn=collate_test)

    for epoch in range(nepochs):
        train(model, train_loader, criterion, optimizer, epoch)
        # scheduler.step()
        val(model, val_loader, criterion, epoch)
        # Test and Save results
        test_preds = test(model, test_loader)
        test_preds = test_preds.cpu().numpy()
        results = []
        for i in range(test_preds.shape[0]):
            result = ""
            for j in range(test_preds.shape[1]):
                if (test_preds[i, j] == 0 or (test_preds[i, j] == 33)):
                    continue
                if (test_preds[i, j] == 34):
                    break
                result = result + index2letter[test_preds[i, j]]
            results.append(result)
        name = "Epoch_" + str(epoch) + "_LAS_submission.csv"
        ids = list(range(len(test_dataset)))
        ids.insert(0, 'Id')
        results.insert(0, 'Predicted')
        with open(name, 'w') as f:
            writer = csv.writer(f)
            writer.writerows(zip(ids, results))
Exemplo n.º 17
0
def test_model(data_dir, model_file, log_path, rnd_seed):
  memn2n = MemN2N(data_dir, model_file, log_path, rnd_seed)
  memn2n.load_model()
  #_, _, memn2n.loss = build_model(memn2n.general_config)
  # Read test data
  print("Reading test data from %s ..." % memn2n.data_dir)
  test_data_path = glob.glob('%s/qa*_*_test.txt' % memn2n.data_dir)
  test_story, test_questions, test_qstory = \
    parse_babi_task(test_data_path, 
                    memn2n.general_config.dictionary, 
                    False)
  test(test_story, 
       test_questions, 
       test_qstory, 
       memn2n.memory, 
       memn2n.model, 
       memn2n.loss, 
       memn2n.general_config)
Exemplo n.º 18
0
def run(args):
    if USE_CUDA:
        print("Using cuda...")
    else:
        print("Suggest using cuda, break now...")

    phone_map = make_phone_map()
    phone2index, index2phone, index2char = make_phone_char()
    label = make_label(phone2index)
    if args.test:
        test(args.test, args.feature, args.model, args.hidden, args.layer,
             args.output, index2char, index2phone, phone_map, phone2index)
    elif args.loss:
        train_loss(args.loss)
    else:
        train(args.feature, label, args.epochs, args.model, args.layer,
              args.hidden, args.save, args.postfix, index2char, index2phone,
              phone_map, phone2index)
Exemplo n.º 19
0
def main():
    # Check device available
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print("Running on: {}".format(device))
    # parse and print arguments
    args = make_args_parser()
    print_args(args)
    # Load both source and target domain datasets
    source_dataloader = datasets.get_source_domain(args.source)
    target_dataloader = datasets.get_target_domain(args.target)
    # Create directory to save model's checkpoints
    try:
        model_root = MODEL_CHECKPOINTS + args.source + '-' + args.target
        os.makedirs(model_root)
    except OSError as e:
        if e.errno == errno.EEXIST:
            pass
        else:
            raise
    # Init model
    if args.model == 'MultibranchLeNet':
        net = models.MultibranchLeNet()
        architectures = ['conv1_d', 'conv1_t', 'conv2_d', 'conv2_t']
    if device == 'cuda':
        net.cuda()
    # Init losses
    class_loss = torch.nn.NLLLoss()
    domain_loss = torch.nn.NLLLoss()
    if device == 'cuda':
        class_criterion.cuda()
        domain_criterion.cuda()
    # Init optimizer
    optimizer = optim.SGD(net.parameters(),
                          lr=constants.LR,
                          momentum=constants.MOMENTUM)
    # Init all parameters to be optimized using Backpropagation
    for param in net.parameters():
        param.requires_grad = True
    # Train model
    for epoch in range(constants.N_EPOCHS):
        train_test.train(net, class_loss, domain_loss, source_dataloader,
                         target_dataloader, optimizer, epoch, model_root,
                         device)
        train_test.test(net, source_dataloader, target_dataloader, device)
Exemplo n.º 20
0
def main():

    args = parser.parse_args()

    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '/tmp',
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '/tmp',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    model = fc.Net().to(device)
    optimizer = optim.SGD(model.parameters(), lr=args.lr)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, args.epochs, epoch)
        test(args, model, device, test_loader)

    print_nonzeros(model)
Exemplo n.º 21
0
def get_accuracies_pd(models, loader, device):
    columns = ['Accuracy eval', 'Accuracy train', 'Loss eval', 'Loss train']
    index = [m for m in models]
    data = pd.DataFrame(index=index, columns=columns)
    total_len = len(loader.dataset)
    for i, model in enumerate(models):
        test_loss, correct = test(models[model],
                                  loader,
                                  device=device,
                                  is_print=False)
        data['Loss eval'].iloc[i] = np.mean(test_loss)
        data['Accuracy eval'].iloc[i] = 100. * np.sum(correct) / total_len
        test_loss, correct = test(models[model],
                                  loader,
                                  device=device,
                                  is_train=True,
                                  is_print=False)
        data['Loss train'].iloc[i] = np.mean(test_loss)
        data['Accuracy train'].iloc[i] = 100. * np.sum(correct) / total_len
    return data.rename_axis('Model Name', axis=1)
Exemplo n.º 22
0
def main(nets, net_name, data_info, batch_size, epochs, num_iteration, logged=False):
    # num_classes = date_info['num_classes']
    subject_id = data_info['subject_id']
    edge_type = data_info['edge_type']
    feature_type = data_info['feature_type']
    num_features = data_info['num_features']
    sorted_ = data_info['sorted']

    train_loader = gen_dataloader(data_info['train_lis'],
                                  batch_size=batch_size)
    test_loader = gen_dataloader(data_info['test_lis'],
                                 batch_size=batch_size)

    for i in trange(num_iteration):
        # model initiation
        if net_name == 'tag_jk' or net_name == 'tag_jk_learn':
            model = nets[net_name](num_features, 4).to(device)
        elif net_name == 'tag_lstm':
            model = nets[net_name]().to(device)
        else:
            model = nets[net_name](num_features).to(device)

        # criterion = torch.nn.BCELoss()
        # criterion = torch.nn.CrossEntropyLoss(weight=torch.tensor([1.0, 0.5]).to(device))
        criterion = torch.nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters())

        log = []
        if logged:
            if len(subject_id) > 2:
                log.append(f'{subject_id}\t{sorted_}\t{edge_type:<6s}\t{feature_type:<6s}\t'
                           f'{net_name:<8s}\t{batch_size:<4d}\t{epochs:<4d}\t')
            else:
                log.append(f's{subject_id:02d}\t{edge_type:<6s}\t{feature_type:<6s}\t'
                       f'{net_name:<8s}\t{batch_size:<4d}\t{epochs:<4d}\t')

        train(model, criterion, optimizer, train_loader, device,
              data_info['train_num'], epochs, logged)

        test(model, criterion, test_loader, device,
             data_info['test_num'], log, logged)
Exemplo n.º 23
0
def imputation_eval(model_class, data, opts):
    if model_class == MAGIC:
        data.x = data.y = data.x.t()
        data.nonzeromask = data.nonzeromask.t()
    criterion = torch.nn.MSELoss()
    kf = KFold(n_splits=3, random_state=opts.seed, shuffle=True)
    loss_test = []
    if opts.dataset == 'Ecoli':
        indices = np.indices([data.x.size(0), data.x.size(1)]).reshape(2, -1)
    else:
        indices = np.array(data.x.cpu().data.numpy().nonzero())
    for k, train_test_indices in enumerate(kf.split(np.arange(len(
            indices[0])))):
        print('Fold number: {:d}'.format(k))
        train_index, test_index = train_test_indices
        eval_data = copy.deepcopy(data)
        eval_data.train_mask = index_to_mask(
            [indices[0, train_index], indices[1, train_index]],
            eval_data.x.size()).to(opts.device)
        eval_data.test_mask = index_to_mask(
            [indices[0, test_index], indices[1, test_index]],
            eval_data.x.size()).to(opts.device)
        eval_data.x = eval_data.x * eval_data.train_mask
        if model_class == MAGIC:
            pred = model_class().fit_transform(
                (eval_data.x * eval_data.train_mask).cpu().data.numpy())
            loss_test.append(
                scimse(pred * eval_data.test_mask.cpu().data.numpy(),
                       (eval_data.y * eval_data.test_mask).cpu().data.numpy()))
        else:
            model = model_class(eval_data.num_features, opts).to(opts.device)
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=opts.learning_rate)
            best_loss = 1e9
            for epoch in range(1, opts.epochs + 1):
                loss_train = train_epoch(model,
                                         eval_data,
                                         optimizer,
                                         opts,
                                         criterion=criterion)
                if loss_train < best_loss:
                    best_loss = loss_train
                    best_model = copy.deepcopy(model)
                if epoch % 10 == 0:
                    print('Epoch number: {:03d}, Train_loss: {:.5f}'.format(
                        epoch, loss_train))
            loss_test.append(test(best_model, eval_data, None, criterion,
                                  opts))
            print('Loss: {:.5f}, TestLoss: {:.5f}'.format(
                loss_train, loss_test[k]))
    print('Average+-std Error for test RNA values: {:.5f}+-{:.5f}'.format(
        np.mean(loss_test), np.std(loss_test)))
    return np.mean(loss_test)
Exemplo n.º 24
0
def main(m):
    best_error = 100
    opt = parser_params()

    if opt.dataset == 'cifar10':
        train_loader, test_loader = cifar10_dataloaders(
            batch_size=opt.batch_size, num_workers=opt.num_workers)
        n_cls = 10
    else:
        raise NotImplementedError(opt.dataset)

    print(opt.model[m])
    model = model_dict[opt.model[m]](num_classes=n_cls)

    optimizer = optim.SGD(model.parameters(),
                          lr=opt.learning_rate,
                          momentum=opt.momentum,
                          weight_decay=opt.weight_decay)
    criterion = nn.CrossEntropyLoss()

    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()
        cudnn.benchmark = True

    for epoch in range(1, opt.epochs + 1):

        if m == 4 and epoch == 1:
            opt.learning_rate = 0.01
        else:
            opt.learning_rate = 0.1

        adjust_learning_rate(epoch, opt, optimizer)
        print("==> training...")

        train_error, train_loss = train(epoch, train_loader, model, criterion,
                                        optimizer, list_loss_train[m])
        print('epoch {} | train_loss: {}'.format(epoch, train_loss))
        print('epoch {} | train_error: {}'.format(epoch, train_error))

        test_error, test_loss = test(test_loader, model, criterion,
                                     list_loss_test[m])
        print('epoch {} | test_loss: {}'.format(epoch, test_loss))
        print('epoch {} | test_error: {}'.format(epoch, test_error))
        print('iterations: {}'.format(epoch * len(train_loader)))

        if best_error > test_error:
            best_error = test_error

    print('Min error: ', best_error)
Exemplo n.º 25
0
def run_joint_tasks(data_dir):
    """
    Train and test for all tasks but the trained model is built using training data from all tasks.
    """
    print("Jointly train and test for all tasks ...")
    tasks = range(20)

    # Parse training data
    train_data_path = []
    for t in tasks:
        train_data_path += glob.glob('%s/qa%d_*_train.txt' % (data_dir, t + 1))

    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = parse_babi_task(train_data_path, dictionary, False)

    # Parse test data for each task so that the dictionary covers all words before training
    for t in tasks:
        test_data_path = glob.glob('%s/qa%d_*_test.txt' % (data_dir, t + 1))
        parse_babi_task(test_data_path, dictionary, False)  # ignore output for now

    general_config = BabiConfigJoint(train_story, train_questions, dictionary)
    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory, model, loss, general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss, general_config)

    # Test on each task
    for t in tasks:
        print("Testing for task %d ..." % (t + 1))
        test_data_path = glob.glob('%s/qa%d_*_test.txt' % (data_dir, t + 1))
        dc = len(dictionary)
        test_story, test_questions, test_qstory = parse_babi_task(test_data_path, dictionary, False)
        assert dc == len(dictionary)  # make sure that the dictionary already covers all words

        test(test_story, test_questions, test_qstory, memory, model, loss, general_config)
Exemplo n.º 26
0
def main():
    model = Seq2Seq(input_dim=40, vocab_size=len(LETTER_LIST), hidden_dim=128)
    learningRate = 0.001
    weightDecay = 5e-5
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learningRate,
                                 weight_decay=weightDecay)
    criterion = nn.CrossEntropyLoss(reduction='none')
    nepochs = 40
    batch_size = 64 if DEVICE == 'cuda' else 1

    speech_train, speech_valid, speech_test, transcript_train, transcript_valid = load_data(
    )
    character_text_train = transform_letter_to_index(transcript_train,
                                                     LETTER_LIST)
    character_text_valid = transform_letter_to_index(transcript_valid,
                                                     LETTER_LIST)

    train_dataset = Speech2TextDataset(speech_train, character_text_train)
    val_dataset = Speech2TextDataset(speech_valid, character_text_valid)
    test_dataset = Speech2TextDataset(speech_test, None, False)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              collate_fn=collate_train)
    val_loader = DataLoader(val_dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            collate_fn=collate_train)
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=False,
                             collate_fn=collate_test)

    model.train()
    model.load_state_dict(torch.load('./new1.pth'))
    model.to(DEVICE)

    scheduler = StepLR(optimizer, step_size=10, gamma=0.5)
    for epoch in range(nepochs):
        train(model, train_loader, criterion, optimizer, epoch)
        scheduler.step()

    model.eval()
    data_list = test(model, test_loader)

    save_to_csv(data_list)
    print('done')
Exemplo n.º 27
0
def run_task(data_dir, task_id):
    """
    Train and test for each task
    """
    print("Train and test for task %d ..." % task_id)

    # Parse data
    train_files = glob.glob('%s/qa%d_*_train.txt' % (data_dir, task_id))
    test_files = glob.glob('%s/qa%d_*_test.txt' % (data_dir, task_id))

    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = parse_babi_task(train_files, dictionary, False)
    test_story, test_questions, test_qstory = parse_babi_task(test_files, dictionary, False)

    general_config = BabiConfig(train_story, train_questions, dictionary)

    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory, model, loss, general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss, general_config)

    test(test_story, test_questions, test_qstory, memory, model, loss, general_config)
Exemplo n.º 28
0
def run_tableQA(data_path, model_file):
    """
    Train and test for table QA
    """

    # Parse data
    train_files = glob.glob(data_path.format('train'))
    test_files = glob.glob(data_path.format('test'))
    # SV: init dict with pre-trained vectors, e.g. from fastText
    # dictionary = fasttext.load_model(EMBEDDINGS_MODEL_PATH)
    dictionary = {"nil": 0}
    train_story, train_questions, train_qstory = parse_babi_task(
        train_files, dictionary, False)
    test_story, test_questions, test_qstory = parse_babi_task(
        test_files, dictionary, False)
    # print test_questions
    print 'Dictionary:', len(dictionary)
    general_config = BabiConfig(train_story, train_questions, dictionary)

    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory,
                           model, loss, general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss,
              general_config)

    test(test_story, test_questions, test_qstory, memory, model, loss,
         general_config)

    # save_model
    with gzip.open(model_file, "wb") as f:
        print("Saving model to file %s ..." % model_file)
        reversed_dict = dict((ix, w) for w, ix in dictionary.items())
        pickle.dump((reversed_dict, memory, model, loss, general_config), f)
Exemplo n.º 29
0
def predict_with_best_net(run_name, sampled=False, hidden_num=50):
    path = 'model/Mixture_paras_{}.pkl'.format(run_name)
    net = Mixture_Model(hidden_num=hidden_num)
    if torch.cuda.is_available():
        net.load_state_dict(torch.load(path))
        net = net.cuda()
    else:
        net.load_state_dict(torch.load(path, map_location='cpu'))

    _, _, _, _, images_test, test_filenames = get_data(
        'cache/embedding_resnet_nonshuffle.pkl',
        'cache/embedding_vgg_nonshuffle.pkl')

    images_test = torch.from_numpy(images_test)

    criterion = nn.CrossEntropyLoss()

    prediction = test(net,
                      criterion,
                      images_test,
                      is_test=True,
                      sampled=sampled,
                      is_file_type=False,
                      dim=2)

    prediction_dict = {}
    for i in range(len(test_filenames)):
        id = int(test_filenames[i].split('.', 1)[0])
        prediction_dict[id] = prediction[i]

    if not os.path.exists('result'):
        os.makedirs('result')

    file_out = open('./result/result_Mixture_{}.csv'.format(run_name), 'w')
    file_out.write('id,label\n')
    for key in sorted(prediction_dict.keys()):
        file_out.write('{},{}\n'.format(key, prediction_dict[key]))

    file_out.close()
Exemplo n.º 30
0
def run_epochs(model, train_dataloader, validation_dataloader, optimizer,
               scheduler, epochs):
    start_time = time.time()

    train_losses, train_accs = [], []
    test_losses, test_accs = [], []

    for epoch in range(epochs):
        print('======== Epoch %d ========' % epoch)

        train_loss, train_acc = train(model, train_dataloader, optimizer,
                                      scheduler)
        train_losses.append(train_loss)
        train_accs.append(train_acc)

        test_loss, test_acc = test(model, validation_dataloader)
        test_losses.append(test_loss)
        test_accs.append(test_acc)

    print("Total training took %.2f seconds" % (time.time() - start_time))

    return train_losses, train_accs, test_losses, test_accs
Exemplo n.º 31
0
def run_test(data_dir, task_id, memn2n):
    print("Test for task %d ..." % task_id)
    test_files = None
    if type(data_dir) is tuple:
        test_files = glob.glob('%s/qa%d_valid.txt' % (data_dir[1], task_id))
    else:
        test_files = glob.glob('%s/qa%d_*_test.txt' % (data_dir, task_id))

    test_story, test_questions, test_qstory = parse_babi_task(
        test_files, memn2n.general_config.dictionary, False)
    """
    reversed_dict = None
    memory = None
    model = None
    loss = None
    general_config = None

    with gzip.open(model_file, "rb") as f:
        self.reversed_dict, self.memory, self.model, self.loss, self.general_config = pickle.load(f)
    """
    return test(test_story, test_questions, test_qstory, memn2n.memory,
                memn2n.model, memn2n.loss, memn2n.general_config)
Exemplo n.º 32
0
def run(model, optimizer, criterion, train_loader, dev_loader, nepochs):
    train_losses, train_accs = [], []
    test_losses, test_accs = [], []
    epochs = []

    for e in range(nepochs):
        print('----- EPOCH %d ------- \n' % e)
        start_time = time.time()

        # Train
        train_loss, train_acc = train(train_loader, model, criterion,
                                      optimizer)
        train_losses.append(train_loss)
        train_accs.append(train_acc)

        # Test
        test_loss, test_acc = test(dev_loader, model, criterion)
        test_losses.append(test_loss)
        test_accs.append(test_acc)

    print(train_loss, train_acc, test_loss, test_acc)
    return train_losses, train_accs, test_losses, test_accs
Exemplo n.º 33
0
def result_gen(test_loader, model_num):
    model = Seq2Seq(input_dim=40,
                    vocab_size=len(LETTER_LIST),
                    hidden_dim=128,
                    value_size=128,
                    key_size=256,
                    is_attended=True)

    model.load_state_dict(torch.load('model_{}'.format(model_num + 1)))
    model.eval()

    model = model.to(DEVICE)

    test_text = test(model, test_loader)

    test_text_str = []

    for cur_text in test_text:
        test_text_str.append(transform_index_to_letter(cur_text, LETTER_LIST))

    res_df = pd.DataFrame(test_text_str)
    res_df.to_csv('result_{}.csv'.format(model_num + 1),
                  index=True,
                  header=False)