def benchmark(weights_filename, r, verbose = "True"):
    filepath = 'model/weights/' + weights_filename # filepath of weights
    num_images = 729 # number of testing images to benchmark on (<=729)
    if verbose == "True":
        verbose = True
    else:
        verbose = False
    
    # Compile model
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    # Peak Signal-to-Noise Ratio
    def PSNR(y_true, y_pred):
        max_pixel = 1.0
        return tf.image.psnr(y_true, y_pred, max_val=max_pixel)
    model = espcn(r)
    model.compile(optimizer=opt, loss='mse', metrics=[PSNR])
    # Initialize testing generator
    testing_generator = DataGenerator('LRbicx' + str(r), batch_size = 1, dictionary = "test")
    # Load weights
    model.load_weights(filepath)
    # Calculate average PSNR of all testing data
    average_psnr = 0
    for i in range(0, num_images):
        lr, hr = testing_generator.__getitem__(i)
        sr = model.predict(lr)
        result = psnr(sr[0], hr[0])
        average_psnr += result
        if verbose:
            print('Image: ' + str(i) + ', PSNR: ' + str(result) + ', Average: ' + str(average_psnr/(i+1)))
    print("Average PSNR: " + str(average_psnr/num_images))
Пример #2
0
def regulate(model, iters, batch_size, data_dir, dataset, embed_path,
             save_iter, saver, sess):
    data = DataGenerator(data_dir, dataset=dataset, portion='tra')

    optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
    train_op = optimizer.minimize(model.loss, var_list=[model.embed])
    var_list = [v.name for v in tf.trainable_variables()]
    sess.run(tf.global_variables_initializer())

    for i in range(iters):
        if i % (data.size / batch_size) == 0:
            data.shuffle()

        seqs, lens, gold = data.get_batch(i)
        loss, acc, w, _ = sess.run(
            [model.loss, model.accuracy, var_list, train_op], {
                model.x_ph: seqs,
                model.y_ph: gold,
                model.len_ph: lens
            })

        if ((i + 1) % 100 == 0):
            print('| Iter {:3}: loss {:.4f}, acc {:.4f} |'.format(
                i + 1, loss, acc))

        if ((i + 1) == save_iter):
            weight_dict = {}
            for key, value in zip(var_list, w):
                weight_dict[key] = value

            embed = weight_dict['embed:0']
            np.save(embed_path, embed)
Пример #3
0
def main():
    batch_size = 1

    coord = tf.train.Coordinator()
    with tf.name_scope('create_inputs'):
        reader = DataGenerator(coord)
        input_batch = reader.dequeue(batch_size)

    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
    init = tf.global_variables_initializer()
    sess.run(init)

    threads = reader.start_threads(sess)
    net = define_net(input_batch)
    queue_size = reader.queue_size
    for step in range(10000):
        print('size queue =', queue_size.eval(session=sess))
        print(sess.run(net))

        # Make this thread slow. You can comment this line. If you do so, you will dequeue
        # faster than you enqueue, so expect the queue not to reach its maximum (32 by default)
        time.sleep(1)

    coord.request_stop()
    print("stop requested.")
    for thread in threads:
        thread.join()
Пример #4
0
def train(model, iters, batch_size, data_dir, dataset, ckpt_path, save_iter,
          saver, sess):
    data = DataGenerator(data_dir, dataset=dataset, portion='tra')

    optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
    train_op = optimizer.minimize(model.loss)
    sess.run(tf.global_variables_initializer())

    for i in range(iters):
        if i % (data.size / batch_size) == 0:
            data.shuffle()

        seqs, lens, gold = data.get_batch(i)
        loss, acc, _ = sess.run([model.loss, model.accuracy, train_op], {
            model.x_ph: seqs,
            model.y_ph: gold,
            model.len_ph: lens
        })

        if ((i + 1) % 100 == 0):
            print('| Iter {:3}: loss {:.4f}, acc {:.4f} |'.format(
                i + 1, loss, acc))

        if ((i + 1) == save_iter):
            saver.save(sess, ckpt_path, write_meta_graph=False)
            print('| Saved to {}'.format(ckpt_path))
            break
Пример #5
0
def fit_model(model,
              train_indexes,
              valid_indexes,
              test_indexes,
              const,
              p,
              verbose=1):
    ''' train_history has train and valid '''
    train_gen = DataGenerator(train_indexes, const['batch_size'],
                              const['datadir'])
    valid_gen = DataGenerator(valid_indexes, const['batch_size'],
                              const['datadir'])
    test_gen = DataGenerator(test_indexes, const['batch_size'],
                             const['datadir'])
    callbacks = []
    # tensorboard_logdir_path = p2logdir_path(folder_path=const['tensorboard_dir'],p=p)
    # callbacks.append( tensorboard(tensorboard_logdir_path, batch_size=const['batch_size']) )
    callbacks.append(EvaluateData(test_gen, log_word='test'))
    callbacks.append(PredictData(test_gen, denormalize, log_word='test'))
    callbacks.append(PredictData(valid_gen, denormalize, log_word='val'))
    callbacks.append(PredictData(train_gen, denormalize, log_word=''))
    history = model.fit_generator(generator=train_gen,
                                  epochs=const['epochs'],
                                  steps_per_epoch=len(train_gen),
                                  validation_data=valid_gen,
                                  validation_steps=len(valid_gen),
                                  verbose=verbose,
                                  callbacks=callbacks)
    return history.history
Пример #6
0
def main():
    args = parse_args()
    gen = DataGenerator(args.num_vocab, args.max_length, args.num_examples)
    train_set, test_set = gen.get_tarin_test_datasets()
    train_loader = DataLoader(train_set,
                              args.batch_size,
                              True,
                              pin_memory=True)
    test_loader = DataLoader(test_set, args.batch_size, True, pin_memory=True)
    if not args.attention:
        Model = Network
    else:
        Model = AttnNetwork
    network = Model(args.num_vocab + 2, args.num_vocab + 2, args.emb_dim,
                    args.hidden_dim, args.max_length, args.num_layers,
                    args.drop, args.rnn_type, args.tied, args.device)
    # optimizer = optim.SGD(network.parameters(), args.lr, 0.99)
    optimizer = optim.RMSprop(network.parameters(), args.lr)
    scheduler = optim.lr_scheduler.StepLR(optimizer, 1, args.lr_decay)

    best_loss = float('inf')
    for epoch in range(args.epochs):
        for param_group in optimizer.param_groups:
            print('#%d: \tLR %f' % (epoch, param_group['lr']))
        acc, loss = single_step(network, train_loader, optimizer,
                                .8 - epoch / args.epochs)
        print("#%d: \tTRAIN  loss:%.03f \t acc:%.03f" % (epoch, loss, acc))
        acc, loss = single_step(network, test_loader)
        print("#%d: \tTEST  loss:%.03f \t acc:%.03f" % (epoch, loss, acc))
        scheduler.step()

        if best_loss > loss:
            best_loss = loss
            print('Saving model to %s' % args.save)
            torch.save(network, args.save)
Пример #7
0
def test(model, batch_size, data_dir, dataset, sess):
    data = DataGenerator(data_dir, dataset=dataset, portion='test')

    iters = int(np.ceil(1.0 * data.size / batch_size))
    preds = 0

    for i in range(iters):
        seqs, lens, gold = data.get_batch(i)
        pred = sess.run(model.correct, {
            model.x_ph: seqs,
            model.y_ph: gold,
            model.len_ph: lens
        })

        if (i + 1) * batch_size < data.size:
            preds += len(pred[pred == True])
        else:
            remain = data.size - i * batch_size
            pred = pred[:remain]
            preds += len(pred[pred == True])

        if i % 100 == 0:
            sys.stdout.write('\r{}/{}'.format(i, iters))
            sys.stdout.flush()

    accuracy = 100. * preds / data.size
    print('\rTest accuracy: {:.2f}'.format(accuracy))
Пример #8
0
def train(conf):
    gan = KernelGAN(conf)
    learner = Learner()
    data = DataGenerator(conf, gan)
    for iteration in tqdm.tqdm(range(conf.max_iters), ncols=60):
        [g_in, d_in] = data.__getitem__(iteration)
        gan.train(g_in, d_in)
        learner.update(iteration, gan)
    gan.finish()
def estimate_kernel(img_file):
    conf = config_kernelGAN(img_file)
    kgan = KernelGAN(conf)
    learner = Learner()
    data = DataGenerator(conf, kgan)
    for iteration in tqdm.tqdm(range(conf.max_iters), ncols=70):
        [g_in, d_in, _] = data.__getitem__(iteration)
        kgan.train(g_in, d_in)
        learner.update(iteration, kgan)
    kgan.finish()
Пример #10
0
def train(conf):
    sr_net = DBPISR(conf)
    learner = Learner()
    data = DataGenerator(conf, sr_net)
    for iteration in tqdm.tqdm(range(conf.max_iters), ncols=60):
        g_in = data.__getitem__(iteration)
        sr_net.train(g_in)
        learner.update(iteration, sr_net)

    sr_net.finish(data.input_image)
Пример #11
0
def visualize_algorithm(algorithm, data):
    """ Responsible for user interaction, using cli  """
    g = DataGenerator()
    options = ['bubble_sort', 'insertion_sort', 'merge_sort', 'quick_sort', 'selection_sort']
    options_data = {'random': g.random(), 'reversed' : g.reversed(), 'partsorted' : g.part_sorted()}
    
    algorithm = algorithm.lower() if algorithm.lower() in options else 'bubble_sort'
    data= options_data[data.lower()] if data.lower() in options_data.keys() else options_data['random']
    
    v = Visualizer(data)
    v.visualize_algorithm(algorithm)
Пример #12
0
def main():

    NUM_THREADS = multiprocessing.cpu_count()
    COMMON_PATH = os.path.join(os.path.expanduser("~"),
                               'local_tensorflow_content')

    pickle_file = 'titles_CBOW_data.pkl'
    pickle_file_path = os.path.join(os.path.expanduser("~"), pickle_file)
    dataGen = DataGenerator(pickle_file_path)

    model_config, training_config = {}, {}
    model_config['vocab_size'] = dataGen.vocab_size
    model_config['batch_size'] = 32
    model_config['context_window'] = 2
    model_config['embedding_size'] = 128
    model_config['neg_sample_size'] = 2
    model_config['learning_rate'] = 0.0005
    model_config['model_name'] = 'word2vec'
    batches = dataGen.generate_sequence(model_config['batch_size'])
    model = word2vec(**model_config)

    use_gpu = False
    if use_gpu:
        training_config['sess_config'] = tf.ConfigProto(
            log_device_placement=False,
            gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.5))
    else:
        os.environ[
            'CUDA_VISIBLE_DEVICES'] = '-1'  # the only way to completely not use GPU
        training_config['sess_config'] = tf.ConfigProto(
            intra_op_parallelism_threads=NUM_THREADS)

    training_config['model_path'] = create_local_model_path(
        COMMON_PATH, model_config['model_name'])
    training_config['log_path'] = create_local_log_path(
        COMMON_PATH, model_config['model_name'])
    generate_tensorboard_script(
        training_config['log_path']
    )  # create the script to start a tensorboard session

    training_config['epoch_num'] = 20000
    training_config['display_steps'] = 1000
    training_config['saving_steps'] = 1 * training_config['display_steps']
    training_config['num_batches'] = int(dataGen.data_size *
                                         training_config['epoch_num'] /
                                         model_config['batch_size'])
    print 'total #batches: {}, vocab_size: {}'.format(
        training_config['num_batches'], model_config['vocab_size'])

    model.train(batches, training_config, restore_model=False)
Пример #13
0
def main(params):
    data = polyvore_dataset()
    transforms = data.get_data_transforms()
    X_train, X_test, y_train, y_test, n_classes = data.create_dataset()
    train_set = (X_train, y_train, transforms['train'])
    test_set = (X_test, y_test, transforms['test'])
    dataset_size = {'train': len(y_train), 'test': len(y_test)}
    params = {
        'batch_size': Config['batch_size'],
        'n_classes': n_classes,
        'shuffle': True
    }
    train_generator = DataGenerator(train_set, dataset_size, params)
    test_generator = DataGenerator(test_set, dataset_size, params)
Пример #14
0
def compile_model():
    model = create_model()

    model.compile(optimizer=optimizers.Adam(), loss=deepball_loss_function, metrics=([deepball_precision]))

    model_checkpoint = callbacks.ModelCheckpoint(filepath='footballcnn.h5', verbose=1)

    train_datagen = DataGenerator(file_path=IMAGE_PATH, config_path=CONFIG_PATH)

    model.fit(x=train_datagen, epochs=6, callbacks=[model_checkpoint])

    model.save_weights('footballcnn.h5')

    """"img = preprocessing.image.load_img('1frame1199.jpg', target_size=(360, 640, 3))
    input1 = preprocessing.image.img_to_array(img)
    input1 = input1.reshape([1, 360, 640, 3])
    input1 = input1 / 255.
    b = model.predict(input1)
    print(b.shape)
    b = b[0, :, :, 0]
    b = np.expand_dims(b, axis=2)
    preprocessing.image.save_img('pred.jpg', b)
    c = np.unravel_index(b.argmax(), b.shape)
    print(c)"""

    return
Пример #15
0
def train(args):
    if args.load_trained:
        last_epoch, arch, model, tokenizer, scores = load_checkpoint(args.pytorch_dump_path)
    else:
        # May load local file or download from huggingface
        model, tokenizer = load_pretrained_model_tokenizer(base_model=args.local_model,
                                                           base_tokenizer=args.local_tokenizer,
                                                           device=args.device)
        last_epoch = 1

    train_dataset = DataGenerator(args.data_path, args.data_name, args.batch_size, tokenizer, 'train', args.device)
    validate_dataset = DataGenerator(args.data_path, args.data_name, args.batch_size, tokenizer, 'dev', args.device)
    test_dataset = DataGenerator(args.data_path, args.data_name, args.batch_size, tokenizer, 'test', args.device)

    optimizer = init_optimizer(model, args.learning_rate, args.warmup_proportion,
                               args.num_train_epochs, train_dataset.data_size, args.batch_size)

    model.train()
    best_score = 0
    step = 0
    for epoch in range(last_epoch, args.num_train_epochs + 1):
        print('Epoch: {}'.format(epoch))
        tr_loss = 0
        while True:
            batch = train_dataset.load_batch()
            if batch is None:
                break
            tokens_tensor, segments_tensor, mask_tensor, label_tensor = batch[:4]
            loss = model(tokens_tensor, segments_tensor, mask_tensor, label_tensor)
            loss.backward()
            tr_loss += loss.item()
            optimizer.step()
            model.zero_grad()

            if args.eval_steps > 0 and step % args.eval_steps == 0:
                print('Step: {}'.format(step))
                best_score = eval_select(model, tokenizer, validate_dataset, test_dataset, args.pytorch_dump_path,
                                         best_score, epoch)

            step += 1

        print('[train] loss: {}'.format(tr_loss))
        best_score = eval_select(model, tokenizer, validate_dataset, test_dataset, args.pytorch_dump_path, best_score,
                                 epoch)

    scores = test(args, split='test')
    print_scores(scores)
Пример #16
0
def main():
    model_dir = "/home/tensor/tensor/scene/DataSet/checkpoints/"
    train_image_dir = "/home/tensor/tensor/scene/DataSet/train/"
    validate_image_dir = "/home/tensor/tensor/scene/DataSet/validation/"
    pretrained_model_path = "/home/tensor/tensor/scene/DataSet/pre_trained/inception_resnet_v2.ckpt"
    datagen = DataGenerator(train_image_dir, validate_image_dir)
    model = ModelFactory(datagen, net='INCEPTION_RESNET_V2', model_dir=model_dir, fine_tune=True, 
        pretrained_path=pretrained_model_path)
    with tf.Session() as session:
        model.train(session)
Пример #17
0
def main(images_path, labels_path):
    keras.backend.clear_session()

    data_df = get_data(images_path, labels_path)

    raw_train, valid = split_data(data_df)

    model = create_model(num_classes=28, input_shape=input_shape)
    model.compile(loss="binary_crossentropy",
                  optimizer=Adam(),
                  metrics=["acc", f1])
    # model.compile(loss=[_focal_loss(gamma=2,alpha=0.75)], optimizer=Adam(), metrics=["acc", f1])

    epochs = 50
    batch_size = 64
    checkpointer = ModelCheckpoint("../working/InceptionResNetV2.model",
                                   verbose=2,
                                   save_best_only=True)
    early_stopping = EarlyStopping(monitor="val_loss", patience=2)
    reduce_lr = ReduceLROnPlateau(monitor="val_loss", patience=1, factor=0.1)

    train_generator = DataGenerator.create_train(raw_train,
                                                 batch_size,
                                                 DEFAULT_IMG_SIZE_WHC,
                                                 augument=True)
    validation_generator = DataGenerator.create_train(valid,
                                                      100,
                                                      DEFAULT_IMG_SIZE_WHC,
                                                      augument=False)

    train_steps = raw_train.shape[0] // batch_size
    valid_steps = valid.shape[0] // batch_size

    # train model
    history = model.fit_generator(
        train_generator,
        steps_per_epoch=train_steps,
        validation_data=next(validation_generator),
        validation_steps=valid_steps,
        epochs=epochs,
        verbose=1,
        callbacks=[checkpointer, reduce_lr],
    )
Пример #18
0
def main():
    datagen = DataGenerator(FLAGS.train_json, FLAGS.train_image_dir,
                            FLAGS.validate_json, FLAGS.validate_image_dir)
    model = ModelFactory(datagen,
                         net='INCEPTION_RESNET_V2',
                         model_dir=FLAGS.model_dir,
                         fine_tune=True,
                         pretrained_path=FLAGS.pretrained_model_path)
    with tf.Session() as session:
        model.train(session)
Пример #19
0
def predict_to_file(infile, out_file="result.json"):

    def save_json(record_dict):
        fw = open(out_file, 'w', encoding='utf-8')
        json_str = json.dumps(record_dict, ensure_ascii=False, indent=4)
        fw.write(json_str)
        fw.close()

    model=torch.load("dureder_model").to(device)
    model.eval()
    
    R = {}

    ignore_id=set()
    if resume_predict:
        with open(out_file,'r') as load_f:
            R = json.load(load_f)
        for key in R:
            ignore_id.add(key)


    data_gen = DataGenerator(infile, tokenizer,device)
    batch_cnt=0
    for batch in data_gen.batchIterNoAnswer(predict_batch_size,ignore_id):
        batch_cnt+=1

        batch_id=batch["batch_id"]
        batch_pair_ids = batch["batch_pair_ids"]
        batch_token_type_ids = batch["batch_token_type_ids"]
        batch_attention_mask = batch["batch_attention_mask"]
        batch_context_len=batch["batch_context_len"]
        batch_question_len=batch["batch_question_len"]
        batch_context_ids=batch["batch_context_ids"]

        p1, p2 = model(batch_pair_ids,batch_token_type_ids,batch_attention_mask,batch_context_len,batch_question_len)
        answers=get_batch_predict_answer(batch_context_ids, p1, p2)
        print(answers)
        for i in range(len(batch_id)):
            R[batch_id[i]] = answers[i]
        if(batch_cnt % 200 == 0):
            save_json(R)
    
    save_json(R)
def train(model, r, batch_size, epochs):
    # Fit model
    training_generator = DataGenerator('LRbicx' + str(r),
                                       batch_size=batch_size)
    model.fit_generator(generator=training_generator, epochs=epochs, verbose=1)
    # Save weights
    # Filepath where the weights will be saved to
    filepath = 'model/weights/r' + str(r) + 'bs' + str(
        batch_size) + 'epochs' + str(epochs) + 'weights.h5'
    model.save_weights(filepath)
    print("Saved weights at : " + filepath)
Пример #21
0
def main():
    print('loading modules ...')
    gen = DataGenerator('protain/all/train/', 'protain/all/test/', 'protain/all/train.csv')
    model = Resnet(resnet_layers=4, channels=[4, 16, 16, 32, 32])
    print('Done')
    epoch = 10

    for i in range(epoch):
        val_x, val_y = gen.get_validation_set()
        bar = tqdm(gen.get_batch(), total=len(gen.train_ids) // 8)
        for x, y in bar:
            loss = model.train(x, y)
            bar.set_description('loss = {:.5f}'.format(loss))
        preds = np.array([[int(y >= 0.5) for y in model.predict([x])[0]] for x in tqdm(val_x)])
        print('[epoch {}]: f1_macro = {}'.format(i, f1_score(val_y, preds, average='macro')))

    preds_test = [(name, [i for i, y in enumerate(model.predict([x])[0]) if y >= 0.5]) for name, x in gen.get_test_set()]
    with open('submission.csv', 'w') as f:
        f.write('Id,Predicted\n')
        for id_, preds in preds_test:
            f.write('{},{}\n'.format(id_, ' '.join(list(map(str, preds)))))
Пример #22
0
def train(infile):
    if resume_train==False:
        encode_model = AlbertModel.from_pretrained(pretrained).to(device)
        model=BertBidaf(tokenizer,encode_model,device)
    else:
        model=torch.load("dureder_model")
    model=model.to(device)
    data_gen=DataGenerator(infile,tokenizer,device)
    optimizer = optim.Adam(model.parameters(),lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    model.train()

    for e in range(epoch):
        batch_cnt=0
        for batch in data_gen.batchIter(train_batch_size):
            batch_cnt+=1

            batch_pair_ids=batch["batch_pair_ids"]
            batch_token_type_ids=batch["batch_token_type_ids"]
            batch_attention_mask=batch["batch_attention_mask"]
            batch_start=batch["batch_start"]
            batch_end=batch["batch_end"]
            batch_context_len=batch["batch_context_len"]
            batch_question_len=batch["batch_question_len"]


            p1,p2=model(batch_pair_ids,batch_token_type_ids,batch_attention_mask,batch_context_len,batch_question_len)
            optimizer.zero_grad()
            batch_loss = criterion(p1, batch_start) + criterion(p2, batch_end)
            print(e,batch_cnt*train_batch_size,batch_loss.item())
            # print(get_batch_predict_answer(batch_pair_ids, p1, p2))
            # print(batch_ans)
            batch_loss.backward()
            optimizer.step()

            if(batch_cnt % 20 == 0):
                torch.save(model,'dureder_model')

        torch.save(model,'dureder_model')
def test (model, testDirectory, classList, INPUT_FRAMES = 64, batchSize = 10):
        
    print("\n\n\ngenerating Annotation List...")
    annotationList = generateDatasetList(testDirectory,INPUT_FRAMES,classList=classList)
    print("creating data generator...")
    dataGenerator = DataGenerator(annotationList,INPUT_FRAMES,batch_size=batchSize)
    print("starting test...\n")
    out_logits = model.predict_generator(dataGenerator, steps=None, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=1)
    out_logits = out_logits[:len(annotationList)]
    predictions = getPredictions(out_logits)
    output = generateFormatedOutput(predictions,annotationList,classList)
    writeJsontoFile("results.json",output)
    np.save("logits.npy", out_logits)
Пример #24
0
def main():
    batch_size = 32
    num_classes = 4
    epochs = 100
    save_dir = os.path.join(os.getcwd(), 'saved_models')
    model_name = 'orientation-inception.h5'

    data_train, data_test = load_data()

    # Use Google Inception v3 model
    model = InceptionV3(
        include_top=False,
        weights=None,
        input_shape=(192, 192, 3),
        pooling='softmax',
        classes=4,
    )

    # initiate RMSprop optimizer
    opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

    # Let's train the model using RMSprop
    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])

    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)
    checkpointer = ModelCheckpoint(
        filepath=os.path.join(save_dir, 'checkpoint.h5'),
        verbose=1,
        save_best_only=True,
    )
    early_stopping = EarlyStopping(monitor='val_loss', patience=2)
    train_generator = DataGenerator(data_train)
    val_generator = DataGenerator(data_test)
    model.fit_generator(
        train_generator.flow(batch_size=batch_size),
        epochs=epochs,
        validation_data=val_generator.flow(batch_size=batch_size),
        shuffle=True,
        callbacks=[checkpointer, early_stopping],
    )

    # Save model and weights
    model_path = os.path.join(save_dir, model_name)
    model.save(model_path)
    print('Saved trained model at %s' % model_path)

    # Score trained model.
    scores = model.evaluate_generator(
        val_generator.flow(batch_size=batch_size))
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])
Пример #25
0
 def __init__(self):
     try:
         from faker import Faker
         from faker.config import DEFAULT_PROVIDERS
         self.fake = Faker()
         self.providers = DEFAULT_PROVIDERS
     except:
         self.fake = None
         self.providers = None
     self.outfile = StringIO()
     self.section = None
     self.section_titles = []
     self.section_structure = []
     self.data_gen = DataGenerator()
def main():
    conf = load_conf()
    wandb.init(project=conf.proj_name, config=dict(conf))

    agent = Agent(embed_hidden=conf.embed_hidden,
                  enc_stacks=conf.enc_stacks,
                  ff_hidden=conf.ff_hidden,
                  enc_heads=conf.enc_heads,
                  query_hidden=conf.query_hidden,
                  att_hidden=conf.att_hidden,
                  crit_hidden=conf.crit_hidden,
                  n_history=conf.n_history,
                  p_dropout=conf.p_dropout)
    wandb.watch(agent)

    dataset = DataGenerator()

    trainer = Trainer(conf, agent, dataset)
    trainer.run()

    # Save trained agent
    torch.save(agent.state_dict(), conf.model_path)

    if conf.test:
        device = torch.device(conf.device)
        # Load trained agent
        agent.load_state_dict(torch.load(conf.model_path))
        agent.eval()
        agent = agent.to(device)

        running_reward = 0
        for _ in range(conf.test_steps):
            input_batch = dataset.test_batch(conf.batch_size,
                                             conf.max_len,
                                             conf.dimension,
                                             shuffle=False)
            input_batch = torch.Tensor(input_batch).to(device)

            tour, *_ = agent(input_batch)

            reward = reward_fn(input_batch, tour)

            # Find best solution
            j = reward.argmin()
            best_tour = tour[j][:-1].tolist()

            # Log
            running_reward += reward[j]

            # Display
            print('Reward (before 2 opt)', reward[j])
            opt_tour, opt_length = dataset.loop2opt(
                input_batch.cpu()[0][best_tour])
            print('Reward (with 2 opt)', opt_length)
            dataset.visualize_2D_trip(opt_tour)

        wandb.run.summary["test_reward"] = running_reward / conf.test_steps
Пример #27
0
def train(conf):
    gan = KernelGAN(conf)
    learner = Learner()
    data = DataGenerator(conf, gan)
    dataloader = DataLoader(data, batch_size=batch_size,
                        shuffle=False)
    timer = 0
    for i_batch, sample_batched in enumerate(tqdm.tqdm(dataloader)):
        g_in,d_in = sample_batched
        gan.train(g_in,d_in)
        learner.update(i_batch*batch_size, gan)
        if learner.flag:
            timer += 1
        if timer > 10:
            break
    gan.finish()
Пример #28
0
def main():

    #模型训练预测时的参数
    mp = model_params()

    #构建bert的参数
    bp = bert_params(with_pool=True)

    datagen = DataGenerator(bp,
                            batch_size=mp.batch_size,
                            num_neg=mp.num_neg,
                            shuffle=mp.shuffle)

    #后续再尝试用其它的优化器
    optimizer = Adagrad(learning_rate=mp.learning_rate)
    my_model = Bert4QA(bp)

    #训练主类
    t = TrainOrPredict(mp)

    #final_model就是训练好的模型
    final_model = t.train(my_model, optimizer, datagen)

    data = datagen.data_faq
    tokenizer = datagen.tokenizer

    #训练完成后查看效果
    real_query_text = "月球和地球是什么关系?"

    question_score = {}
    for query_name in data.query_dict.keys():
        query_text = data.query_dict[query_name]
        token_ids, segment_ids = tokenizer.encode(real_query_text, query_text)
        question_score[query_name] = final_model.predict(
            [token_ids, segment_ids])
    question_score = {k: v.numpy() for k, v in question_score.items()}
    qs = dict(sorted(question_score.items(), key=lambda x: x[1], reverse=True))
    c = 0
    for k, v in qs.items():
        c += 1
        print(k, data.query_dict[k], v)
        if c == 10: break

    return final_model
Пример #29
0
def main():
    if os.path.exists(JOB_NAME):
        raise AssertionError("Job name already exists")
    else:
        os.mkdir(JOB_NAME)
        f = open(os.path.join(JOB_NAME, "train_params.txt"), 'w')
        f.write("META_LEARNER " + str(META_LEARNER) + '\n')
        f.write("FUNCTION " + str(FUNCTION_TRAIN) + '\n')
        f.write("K_TRAIN " + str(K_TRAIN) + '\n')
        f.write("SGD_STEPS_TRAIN " + str(SGD_STEPS_TRAIN) + '\n')
        f.write("NOISE_PERCENT_TRAIN " + str(NOISE_PERCENT_TRAIN) + '\n')
        f.write("ITERATIONS_TRAIN " + str(ITERATIONS_TRAIN) + '\n')
        f.write("OUTER_LR_TRAIN " + str(OUTER_LR_TRAIN) + '\n')
        f.write("INNER_LR_TRAIN " + str(INNER_LR_TRAIN) + '\n')
        f.write("AVERAGER_SIZE_TRAIN " + str(AVERAGER_SIZE_TRAIN) + '\n')
        f.close()

    model = Net()
    if META_LEARNER == "reptile":
        learning_alg = Reptile(lr_inner=INNER_LR_TRAIN,
                               lr_outer=OUTER_LR_TRAIN,
                               sgd_steps_inner=SGD_STEPS_TRAIN)
    elif META_LEARNER == "maml":
        learning_alg = MAML(lr_inner=INNER_LR_TRAIN,
                            lr_outer=OUTER_LR_TRAIN,
                            sgd_steps_inner=SGD_STEPS_TRAIN)
    else:
        learning_alg = Insect(lr_inner=INNER_LR_TRAIN,
                              lr_outer=OUTER_LR_TRAIN,
                              sgd_steps_inner=SGD_STEPS_TRAIN,
                              averager=AVERAGER_SIZE_TRAIN)
    meta_train_data = DataGenerator(function=FUNCTION_TRAIN,
                                    size=ITERATIONS_TRAIN,
                                    K=K_TRAIN,
                                    noise_percent=NOISE_PERCENT_TRAIN)
    learning_alg.train(model, meta_train_data)

    torch.save(model, os.path.join(JOB_NAME, "trained_model.pth"))
    test(model)
Пример #30
0
def evaluate(base_model, function, K, noise_percent, SGD_steps, inner_lr, runs=100):
    all_losses = []
    test_tasks = DataGenerator(function, runs, K, noise_percent).shuffled_set()

    for test_task in test_tasks:
        xeval, yeval = test_task.eval_set(size=100)

        model = copy.deepcopy(base_model)
        optim = torch.optim.SGD(model.parameters(), lr=inner_lr)
        losses = []
        
        predicted = model(xeval)
        losses.append(F.mse_loss(predicted, yeval).item())

        for i in range(SGD_steps):
            inner_train(model, test_task, optim)
            predicted = model(xeval)
            losses.append(F.mse_loss(predicted, yeval).item())

        norm_losses = np.array(losses) / test_task.amp * 100
        all_losses.append(norm_losses)

    return np.array(all_losses)
Пример #31
0
# -*- coding: utf-8 -*-

import json
from data import DataParser, CSVParser, DataGenerator, DEPDataParser
import mturk, pickle
from pprint import pprint
import nltk
from random import shuffle


parser = DataParser()
csv_parser = CSVParser()

data_generator = DataGenerator()
dep_data_generator = DEPDataParser()

####
# INPUT FOR THE SCRIPT
####

main_data_file = "data/data_all.json"
split_values = {"train": 0.7, "validate": 0.1, "test": 0.2}
fix_file = "fix_turk_result.csv"

####

# This is main data (from Tim)
with open(main_data_file) as data_file:
    data = json.load(data_file)

# Segment it into json
Пример #32
0
# -*- coding: utf-8 -*-

import json
from data import DataParser, CSVParser, DataGenerator, DEPDataParser
import mturk
from pprint import pprint


parser = DataParser()
csv_parser = CSVParser()

data_generator = DataGenerator()
dep_data_generator = DEPDataParser()

####
# INPUT FOR THE SCRIPT
####

main_data_file = "data/data_all.json"

####


# This is main data (from Tim)
with open(main_data_file) as data_file:
    data = json.load(data_file)

# Segment it into json
segmented_data = parser.parse_data(data)

annotated_paragraphs = []
Пример #33
0
def train(args):
    path = args.save_dir
    try:
        os.makedirs(path)
    except OSError as exc:
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise

    with open(args.vocab, 'r') as fin:
        vocab = json.load(fin)
        args.vocab_size = len(vocab)

    args.docs_looped = True
    train = DataGenerator(args, '../data/processed/train.npy')
    args.docs_looped = False
    val = DataGenerator(args, '../data/processed/test.npy')

    args.iterations_per_epoch = int(train.samples / args.batch_size)
    args.iterations_per_val = int(val.samples / args.batch_size)

    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(args.init_from), \
            " %s must be a a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from, "config.pkl")), \
            "config.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl')) as f:
            saved_model_args = pickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(args)[checkme], \
                "Command line argument and saved model disagree on '%s' " % checkme

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        pickle.dump(args, f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())

        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)

        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr,
                               args.learning_rate * (args.decay_rate ** e)))
            train.reset()

            state = model.initial_state.eval()
            pbar = tqdm(range(args.iterations_per_epoch))
            for b in pbar:
                x, y = next(train)

                feed = {model.input_data: x,
                        model.targets: y,
                        model.initial_state: state}

                train_loss, state, _ = sess.run([model.cost,
                                                 model.final_state,
                                                 model.train_op],
                                                feed)

                if b % 10 == 0:
                    pbar.set_description('train_loss: {:.3f}'
                                         .format(train_loss))

            print('Checkpoint')
            checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
            saver.save(sess, checkpoint_path,
                       global_step=e * args.iterations_per_epoch)

            print('Validation...')
            # we can restore training states
            # train_states = state[:]

            val_loss = 0.0
            state = model.initial_state.eval()

            val.reset()
            pbar = tqdm(range(args.iterations_per_val))
            for b in pbar:
                x, y = next(val)

                feed = {model.input_data: x,
                        model.targets: y,
                        model.initial_state: state}

                (loss, state) = sess.run([model.cost,
                                          model.final_state, ],
                                         feed)

                pbar.set_description('val_loss: {:.3f}'
                                     .format(loss))

                val_loss += loss

            val_loss /= args.iterations_per_val
            print('Mean val_loss is {:.3f}'.format(val_loss))