Пример #1
0
def run_task(data_directory, task_id):
    """
    Parse data, build model, and run training and testing for a single task.

    :param data_directory: Path to train and test data.
    :param task_id: Task to evaluate
    """
    print("Train and test for task %d ..." % task_id)

    # Parse data
    train_files = glob.glob('%s/qa%d_*_train.txt' % (data_directory, task_id))
    test_files = glob.glob('%s/qa%d_*_test.txt' % (data_directory, task_id))

    dictionary = {"nil": 0}

    # Story shape: (SENTENCE_SIZE, STORY_SIZE, NUM_STORIES)
    # Questions shape: (14 (see parser.py), NUM_SAMPLES)
    # QStory shape: (SENTENCE_SIZE, NUM_SAMPLES)
    train_story, train_questions, train_qstory = parse_babi_task(train_files, dictionary, False)
    test_story, test_questions, test_qstory = parse_babi_task(test_files, dictionary, False)

    general_config = BabiConfig(train_story, train_questions, dictionary)

    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory, model, loss,
                           general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss, general_config)

    test(test_story, test_questions, test_qstory, memory, model, loss, general_config)
Пример #2
0
def loadModel():
    global model, train_data, trainNameList
    start = time.clock()
    model = mltest.load_model()
    end = time.clock()
    print('loadModel time: %s Seconds' % (end - start))
    start = time.clock()
    test = train_data[trainNameList.index("75119")]
    mltest.test(model, FloorPlan(test, train=True))
    end = time.clock()
    print('test Model time: %s Seconds' % (end - start))
Пример #3
0
def train():
    graph = Graph(is_train=True)
    graph.create_model()

    sv = tf.train.Supervisor(logdir=get_path(args.logdir),
                             global_step=graph.global_step,
                             saver=graph.saver,
                             save_model_secs=600)
    sess = sv.PrepareSession()
    losses = []
    while True:
        input_feed = get_data(graph.inputs_ph)
        fetches = [graph.train_op, graph.loss, graph.global_step]
        _, loss, step = sess.run(fetches, input_feed)
        losses.append(loss)

        if step % 100 == 0:
            print('Loss\t%s.' % np.mean(losses))
            losses = []

            from model.test import test
            test()
Пример #4
0
def run_task(data_directory, task_id):
    """
    Parse data, build model, and run training and testing for a single task.

    :param data_directory: Path to train and test data.
    :param task_id: Task to evaluate
    """
    print("Train and test for task %d ..." % task_id)

    # Parse data
    train_files = glob.glob('%s/qa%d_*_train.txt' % (data_directory, task_id))
    test_files = glob.glob('%s/qa%d_*_test.txt' % (data_directory, task_id))

    dictionary = {"nil": 0}

    # Story shape: (SENTENCE_SIZE, STORY_SIZE, NUM_STORIES)
    # Questions shape: (14 (see parser.py), NUM_SAMPLES)
    # QStory shape: (SENTENCE_SIZE, NUM_SAMPLES)
    train_story, train_questions, train_qstory = parse_babi_task(
        train_files, dictionary, False)
    test_story, test_questions, test_qstory = parse_babi_task(
        test_files, dictionary, False)

    general_config = BabiConfig(train_story, train_questions, dictionary)

    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory,
                           model, loss, general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss,
              general_config)

    test(test_story, test_questions, test_qstory, memory, model, loss,
         general_config)
Пример #5
0
        ), batch_size=1000, shuffle=True)

    visualize_dataset_tensorboard(train_loader, writer)

    lr = 0.01
    epochs = 1
    model = Net()
    # model_state_dict = torch.load(str(URLs.RESULTS_PATH / 'model.pth'))
    # model.load_state_dict(model_state_dict)
    opt = optim.SGD(model.parameters(), lr, momentum=0.5)
    # opt_state_dict = torch.load(str(URLs.RESULTS_PATH / 'optimizer.pth'))
    # opt.load_state_dict(opt_state_dict)
    train_losses = []
    train_counter = []
    test_losses = []
    test_counter = [i * len(train_loader.dataset) for i in range(epochs + 1)]
    device = torch.device("cpu")

    if torch.cuda.is_available():
        device = torch.device('cuda:0')
        model.cuda()

    start_time = time.time()
    test(model, test_loader, device, test_losses)
    for epoch in range(1, epochs + 1):
        train(epoch, model, opt, train_loader, device, train_losses, train_counter, 10, writer)
        test(model, test_loader, device, test_losses)
    writer.close()
    print(f'Running time: {time.strftime("%M:%S", time.gmtime(time.time() - start_time))} sec')
    plot_perf(train_counter, train_losses, test_counter, test_losses)
Пример #6
0
def main():
    args, other = get_args()

    experiment = args.experiment
    anserini_path = args.anserini_path
    datasets_path = os.path.join(args.data_path, 'datasets')

    if not os.path.isdir('log'):
        os.mkdir('log')

    if args.mode == 'training':
        train(args)

    elif args.mode == 'check_performance':
        check_dev_performance(args)

    elif args.mode == 'inference':
        scores = test(args)
        print_scores(scores)
    else:
        folds_path = os.path.join(anserini_path, 'src', 'main', 'resources',
                                  'fine_tuning', args.folds_file)
        qrels_path = os.path.join(anserini_path, 'src', 'main', 'resources',
                                  'topics-and-qrels', args.qrels_file)

        topK = int(other[0])
        alpha = float(other[1])
        beta = float(other[2])
        gamma = float(other[3])
        test_folder_set = int(other[4])
        mode = other[5]

        # Divide topics according to fold parameters
        train_topics, test_topics, all_topics = [], [], []
        with open(folds_path) as f:
            folds = json.load(f)
        for i in range(0, len(folds)):
            all_topics.extend(folds[i])
            if i != test_folder_set:
                train_topics.extend(folds[i])
            else:
                test_topics.extend(folds[i])

        if args.interactive:
            sentid2text = query_sents(args)
            test(args)  # inference over each sentence

        collection_path = os.path.join(
            datasets_path, args.collection +
            '.csv') if not args.interactive else args.interactive_path
        predictions_path = os.path.join(
            args.data_path, 'predictions', 'predict.' +
            experiment) if not args.interactive else os.path.join(
                args.data_path, 'predictions', args.predict_path)

        top_doc_dict, doc_bm25_dict, sent_dict, q_dict, doc_label_dict = eval_bm25(
            collection_path)
        score_dict = load_bert_scores(predictions_path, q_dict, sent_dict)

        if args.interactive:
            top_rank_docs = visualize_scores(collection_path, score_dict)
            with open(os.path.join(args.data_path, 'query_sent_scores.csv'),
                      'w') as scores_file:
                for doc in top_rank_docs[:100]:
                    scores_file.write('{}\t{}\t{}\t{}\t{}\n'.format(
                        doc[0], sentid2text[doc[0]], doc[1], doc[2],
                        'BM25' if doc[3] > 0 else 'BERT'))
                for doc in top_rank_docs[-100:]:
                    scores_file.write('{}\t{}\t{}\t{}\t{}\n'.format(
                        doc[0], sentid2text[doc[0]], doc[1], doc[2],
                        'BM25' if doc[3] > 0 else 'BERT'))

        if not os.path.isdir('runs'):
            os.mkdir('runs')

        if mode == 'train':
            topics = train_topics if not args.interactive else list(
                q_dict.keys())
            # Grid search for best parameters
            for a in np.arange(0.0, alpha, 0.1):
                for b in np.arange(0.0, beta, 0.1):
                    for g in np.arange(0.0, gamma, 0.1):
                        calc_q_doc_bert(score_dict,
                                        'run.' + experiment + '.cv.train',
                                        topics, top_doc_dict, doc_bm25_dict,
                                        topK, a, b, g)
                        base = 'runs/run.' + experiment + '.cv.train'
                        os.system(
                            '{}/eval/trec_eval.9.0.4/trec_eval -M1000 -m map {} {}> eval.base'
                            .format(anserini_path, qrels_path, base))
                        with open('eval.base', 'r') as f:
                            for line in f:
                                metric, qid, score = line.split('\t')
                                map_score = float(score)
                                print(test_folder_set, round(a, 2),
                                      round(b, 2), round(g, 2), map_score)

        elif mode == 'test':
            topics = test_topics if not args.interactive else list(
                q_dict.keys())
            calc_q_doc_bert(
                score_dict,
                'run.' + experiment + '.cv.test.' + str(test_folder_set),
                topics, top_doc_dict, doc_bm25_dict, topK, alpha, beta, gamma)
        else:
            topics = all_topics if not args.interactive else list(
                q_dict.keys())
            calc_q_doc_bert(score_dict, 'run.' + experiment + '.cv.all',
                            topics, top_doc_dict, doc_bm25_dict, topK, alpha,
                            beta, gamma)
Пример #7
0
def detect():
    """Graph based machine learning for bot detection"""
    data = request.get_json()
    print(data)
    return str(test(data))
Пример #8
0
def main():
    args, other = get_args()

    experiment = args.experiment
    anserini_path = args.anserini_path
    datasets_path = os.path.join(args.data_path, 'datasets')

    if args.mode == 'training':
        train(args)
    elif args.mode == 'inference':
        test(args)
    else:
        if args.interactive:
            # TODO: sync with HiCAL
            from utils.query import query_sents, visualize_scores

            sentid2text, hits = query_sents(args, K=10)
            test(args)

        else:
            folds_path = os.path.join(args.data_path, 'folds',
                                      '{}-folds.json'.format(args.collection))
            qrels_path = os.path.join(args.data_path, 'qrels',
                                      'qrels.{}.txt'.format(args.collection))

            topK = int(other[0])
            alpha = float(other[1])
            beta = float(other[2])
            gamma = float(other[3])
            test_folder_set = int(other[4])
            mode = other[5]

            # Divide topics according to fold parameters
            train_topics, test_topics, all_topics = [], [], []
            with open(folds_path) as f:
                folds = json.load(f)
            for i in range(0, len(folds)):
                all_topics.extend(folds[i])
                if i != test_folder_set:
                    train_topics.extend(folds[i])
                else:
                    test_topics.extend(folds[i])

            collection_path = os.path.join(
                datasets_path, '{}_sents.csv'.format(args.collection))
            predictions_path = os.path.join(args.data_path, 'predictions',
                                            'predict.' + experiment)

            top_doc_dict, doc_bm25_dict, sent_dict, q_dict, doc_label_dict = eval_bm25(
                collection_path)
            score_dict = load_bert_scores(predictions_path, q_dict, sent_dict)

            if not os.path.isdir('runs'):
                os.mkdir('runs')

            if mode == 'train':
                for a in np.arange(0.0, alpha, 0.1):
                    for b in np.arange(0.0, beta, 0.1):
                        for g in np.arange(0.0, gamma, 0.1):
                            calc_q_doc_bert(score_dict,
                                            'run.' + experiment + '.cv.train',
                                            train_topics, top_doc_dict,
                                            doc_bm25_dict, topK, a, b, g)
                            base = 'runs/run.' + experiment + '.cv.train'
                            os.system(
                                '{}/eval/trec_eval.9.0.4/trec_eval -M1000 -m map {} {}> eval.base'
                                .format(anserini_path, qrels_path, base))
                            with open('eval.base', 'r') as f:
                                for line in f:
                                    metric, qid, score = line.split('\t')
                                    map_score = float(score)
                                    print(test_folder_set, round(a, 2),
                                          round(b, 2), round(g, 2), map_score)

            elif mode == 'test':
                calc_q_doc_bert(
                    score_dict,
                    'run.' + experiment + '.cv.test.' + str(test_folder_set),
                    test_topics, top_doc_dict, doc_bm25_dict, topK, alpha,
                    beta, gamma)
Пример #9
0
model = GMAN(SE, args, bn_decay=0.1)
loss_criterion = nn.MSELoss()

optimizer = optim.Adam(model.parameters(), args.learning_rate)
scheduler = optim.lr_scheduler.StepLR(optimizer,
                                      step_size=args.decay_epoch,
                                      gamma=0.9)
parameters = count_parameters(model)
log_string(log, 'trainable parameters: {:,}'.format(parameters))

if __name__ == '__main__':
    start = time.time()
    loss_train, loss_val = train(model, args, log, loss_criterion, optimizer,
                                 scheduler)
    plot_train_val_loss(loss_train, loss_val, 'figure/train_val_loss.png')
    trainPred, valPred, testPred = test(args, log)
    end = time.time()
    log_string(log, 'total time: %.1fmin' % ((end - start) / 60))
    log.close()
    trainPred_ = trainPred.numpy().reshape(-1, trainY.shape[-1])
    trainY_ = trainY.numpy().reshape(-1, trainY.shape[-1])
    valPred_ = valPred.numpy().reshape(-1, valY.shape[-1])
    valY_ = valY.numpy().reshape(-1, valY.shape[-1])
    testPred_ = testPred.numpy().reshape(-1, testY.shape[-1])
    testY_ = testY.numpy().reshape(-1, testY.shape[-1])

    # Save training, validation and testing datas to disk
    l = [trainPred_, trainY_, valPred_, valY_, testPred_, testY_]
    name = ['trainPred', 'trainY', 'valPred', 'valY', 'testPred', 'testY']
    for i, data in enumerate(l):
        np.savetxt('./figure/' + name[i] + '.txt', data, fmt='%s')
Пример #10
0
    models = to.load('./models', map_location=device)
    for i in range(2, 11):
        test_classes = range(i)
        ds_test = tv.datasets.MNIST(root='./',
                                    train=False,
                                    transform=tv.transforms.ToTensor(),
                                    download=True)
        ds_test.targets, ds_test.data = filter_classes(ds_test, test_classes)
        dl_test = to.utils.data.DataLoader(ds_test, batch_size=256)
        model_name = "model_%d_" % i
        args = models[model_name + "args"]
        kwargs = models[model_name + "kwargs"]
        model = NN(*args, **kwargs).to(device)
        model.load_state_dict(models[model_name + "state_dict"])
        print("Testing model %d..." % i)
        acc, _ = test(model, dl_test, closed_set_accuracy, device)
        print("Testing accuracy  %f" % acc)

if (mode == "sample"):
    batch_size = 1
    train_classes = range(9)
    ds = tv.datasets.MNIST(root='./',
                           train=False,
                           transform=tv.transforms.ToTensor(),
                           download=True)
    ds.targets, ds.data = filter_classes(ds, train_classes)
    dl = to.utils.data.DataLoader(ds, batch_size=batch_size, shuffle=True)
    for x, y in islice(dl, 1):
        x = x.view(batch_size, -1).cpu().numpy()
        y = y.cpu().numpy()
        print(json.dumps(x[0].tolist()))
Пример #11
0
 def createTest(hill, city):
     vytvorenyTest = test(1, hill, city)
Пример #12
0
    time_str = time.strftime("%m%d-%H%M", time.localtime(time.time()))
    rootdir = "{}/{}/{}-semi-{}-fixed-{}-ratio-{}-lr-{}/".format(
        "/data/yangy/data_prepare/result", hp['dataname'], time_str,
        str(hp['semi']), str(hp['fixed']), str(hp['ratio']), str(args.lr))
    os.makedirs(rootdir, exist_ok=True)
    hp['rootdir'] = rootdir

    np.save('{}parameter.npy'.format(rootdir), hp)

    # 获取模型
    my_models = load_model(hp)

    #获取数据
    train_data, test_data = load_data(hp)

    #预训练模型
    #my_models = pre_train(hp, my_models, train_data, test_data)

    # 预训练结果
    #result = test(test_data,hp,my_models,'pretrain')

    # 训练模型
    my_models = train(hp, my_models, train_data)

    # 保存模型
    save_model(my_models, rootdir)

    # 测试模型
    result = test(test_data, hp, my_models, 'final')
Пример #13
0
# For fix slurm cannot load PYTHONPATH
import sys
sys.path.insert(0, '/ihome/hdaqing/saz31/sanqiang/text_simplification')

import os
from model.test import test
from model.model_config import SubTestWikiEightRefConfig, SubTestWikiEightRefConfigV2, SubTestWikiEightRefConfigV2Sing
from util.arguments import get_args

args = get_args()

if __name__ == '__main__':
    mapper = {}
    path = '/zfs1/hdaqing/saz31/text_simplification/' + args.output_folder  #'/Users/zhaosanqiang916/git/acl' #'/zfs1/hdaqing/saz31/text_simplification/'
    for root, dirs, files in os.walk(path):
        for file in files:
            if 'model' in root and file.endswith('.index'):
                sid = file.index('ckpt-') + len('ckpt-')
                eid = file.rindex('.index')
                step = file[sid:eid]
                resultpath = root + '/../result/eightref_test/joshua_target_' + step + '.txt'
                if not os.path.exists(resultpath):
                    ckpt = root + '/' + file[:-len('.index')]
                    test(SubTestWikiEightRefConfig(), ckpt)
                    test(SubTestWikiEightRefConfigV2(), ckpt)
                    test(SubTestWikiEightRefConfigV2Sing(), ckpt)
Пример #14
0
def pre_train(hp, models, train_data, test_data):
    print("----------start pre-training models----------")
    view_num = len(models)
    par = []
    for i in range(view_num):
        models[i].cuda()
        models[i].train()
        par.append({'params': models[i].parameters()})

    optimizer = optim.Adam(par, lr=hp['pre_lr'])
    scheduler = StepLR(optimizer, step_size=10, gamma=0.5)
    batch_size = hp['pre_size']
    loss_func = nn.MSELoss()

    for epoch in range(hp['pre_epoch']):
        scheduler.step()
        running_loss = 0.0
        data_num = 0
        for i in range(view_num):
            models[i].train()
        for i in range(3):
            data = train_data[i]
            if data == None:
                continue
            bag_num = len(data)
            data_num += bag_num
            max_step = int(bag_num / batch_size)
            while max_step * batch_size < bag_num:
                max_step += 1

            for step in range(max_step):
                # get data
                step_data = get_batch(
                    data,
                    list(
                        range(step * batch_size,
                              min((step + 1) * batch_size, bag_num))), hp)
                x1, x2, bag1, bag2, y = step_data
                b_y = Variable(y).cuda()
                loss = 0
                if i == 0 or i == 2:
                    x_img = Variable(x1).cuda()
                    h1, _, _ = models[0](x_img, bag1)
                    loss += loss_func(h1, b_y)
                if i == 0 or i == 1:
                    x_text = Variable(x2).cuda()
                    h2, _, _ = models[1](x_text, bag2)
                    loss += loss_func(h2, b_y)

                running_loss += loss.data * x2.size(0)

                # backward
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
        # epoch loss
        epoch_loss = running_loss / data_num
        print('epoch {}/{} | Loss: {:.9f}'.format(epoch, hp['pre_epoch'],
                                                  epoch_loss))

        rootpath = "{}{}/".format(hp['modelpath'], str(epoch + 1))
        os.makedirs(rootpath, exist_ok=True)
        save_model(models, rootpath)
        hp['rootdir'] = rootpath
        result = test(test_data, hp, models, 'pretrain')

    print("----------end pre-training models----------")
    return models
Пример #15
0
#-*- coding:utf-8 -*-
import model.train as train
import model.test as test
import tensorflow as tf

if __name__ == '__main__':
    train.train()
    tf.reset_default_graph()  # Initialize graph
    test.test()