Python DataLoader示例，data_generator.DataLoader Python示例

示例#1

0

显示文件

def run_KMC(model, num_iterations, foldername, atom_features_bool,
            pairs_features_bool, num_atom_features, num_pairs_features,
            num_atoms, molecule_size_normalizer, cycle_size_normalizer,
            max_num_of_bonds, num_timesteps, validation_percentage):
    # Run the whole Kinetic Monte Carlo process using the trained "model"
    data_loader = DataLoader(foldername, atom_features_bool,
                             pairs_features_bool, num_atom_features,
                             num_pairs_features, num_atoms,
                             molecule_size_normalizer, cycle_size_normalizer,
                             max_num_of_bonds, num_timesteps,
                             validation_percentage)
    # Get the initial input for the model
    Xtest_input_atom, Xtest_input_pairs, Xtest_atom_graph, Xtest_mask, Xtest_extract_pairs, Ytest, Ytest_time = data_loader.get_data_no_generator(
        1, 1, 'train')
    bond_change = {}
    first_frame = get_first_frame(Xtest_input_pairs, Xtest_extract_pairs,
                                  num_atoms)
    atom_types = Xtest_input_atom[0, :, 0]
    adjacency_matrix = first_frame.copy()
    time = [0]
    for i in range(num_iterations):
        if i % 10 == 0:
            print(i)
        # Get the "reactivity scores" from the model
        results_probs, result_time = model.predict([
            Xtest_input_atom, Xtest_input_pairs, Xtest_atom_graph, Xtest_mask,
            Xtest_extract_pairs
        ])
        # From the "reactivity scores" pick a reaction and the time before this reaction using the Kinetic Monte Carlo algorithm.
        adjacency_matrix, bond_change, time_new = run_step_KMC(
            results_probs[0], result_time[0, 0], Xtest_extract_pairs,
            adjacency_matrix, bond_change, time[-1])
        time.append(time_new)
        # Update the system with the picked reaction and recalculate the input of the model.
        Xtest_input_atom, Xtest_input_pairs, Xtest_atom_graph = get_new_input(
            adjacency_matrix, atom_features_bool, pairs_features_bool,
            molecule_size_normalizer, cycle_size_normalizer, num_atoms,
            num_atom_features, num_pairs_features, max_num_of_bonds,
            atom_types)
        Xtest_input_pairs = tf.gather_nd(Xtest_input_pairs,
                                         Xtest_extract_pairs)
    return bond_change, first_frame, time

示例#2

0

显示文件

def train(args):
    batch_size = 16
    shape = (200, 100, 3)
    loader = DataLoader(args.path, args.samples, shape[:2], augment=args.aug)
    exp_path = "./"

    model = siamese_model(shape)
    optim = tf.keras.optimizers.Adam(lr=0.0001)

    loss = 'binary_crossentropy'
    metrics = ['binary_accuracy', 'acc']

    model.compile(loss=loss, optimizer=optim, metrics=metrics)

    model.summary()

    tb = tf.keras.callbacks.TensorBoard(log_dir=os.path.join(
        exp_path, args.exp_name, "logs"),
                                        histogram_freq=0,
                                        write_graph=True,
                                        write_images=True)

    checkpoint = tf.keras.callbacks.ModelCheckpoint(os.path.join(
        exp_path, args.exp_name, "ckpt"),
                                                    monitor='val_acc',
                                                    verbose=1,
                                                    save_weights_only=True,
                                                    save_best_only=True,
                                                    mode='max')
    callbacks_list = [checkpoint, tf.keras.callbacks.TerminateOnNaN(), tb]

    history = model.fit_generator(
        loader.generate_epoch_train(batch_size),
        validation_data=loader.generate_epoch_val(batch_size),
        validation_steps=loader.val_size // batch_size,
        steps_per_epoch=loader.train_size // batch_size,
        epochs=700,
        callbacks=callbacks_list)

    acc = history.history['val_binary_accuracy']
    tacc = history.history['binary_accuracy']

示例#3

0

显示文件

def test(args):
    batch_size = 16
    shape = (200, 100, 3)
    loader = DataLoader(args.path, 50, shape[:2], test=True)
    exp_path = "./"

    model = siamese_model(shape)
    optim = tf.keras.optimizers.Adam(lr=0.0001)

    loss = 'binary_crossentropy'
    metrics = ['binary_accuracy', 'acc']

    model.compile(loss=loss,
                  optimizer=optim,
                  metrics=metrics)

    model.load_weights(os.path.join(exp_path, args.exp_name, "ckpt"))
    print("Model Loaded Successifully")
    model.summary()
    #
    # tb = tf.keras.callbacks.TensorBoard(log_dir=os.path.join(exp_path, args.exp_name, "logs"),
    #                                     histogram_freq=0,
    #                                     write_graph=True,
    #                                     write_images=True)
    #
    # checkpoint = tf.keras.callbacks.ModelCheckpoint(os.path.join(exp_path, args.exp_name, "ckpt"),
    #                                                 monitor='val_acc',
    #                                                 verbose=1,
    #                                                 save_weights_only=True,
    #                                                 save_best_only=True,
    #                                                 mode='max')
    # callbacks_list = [checkpoint, tf.keras.callbacks.TerminateOnNaN(), tb]

    history = model.evaluate(loader.generate_test(batch_size),
                             steps=loader.test_size * 50 // batch_size
                             )
    print(history)
    sleep(5)

示例#4

0

显示文件

文件： train.py 项目： xcgfth/Knowledge-Aware-Reader

def train(cfg):
    tf_logger = SummaryWriter('tf_logs/' + cfg['model_id'])

    # train and test share the same set of documents
    documents = load_documents(cfg['data_folder'] +
                               cfg['{}_documents'.format(cfg['mode'])])

    # train data
    train_data = DataLoader(cfg, documents)
    valid_data = DataLoader(cfg, documents, mode='dev')

    model = KAReader(cfg)
    model = model.to(torch.device('cuda'))

    trainable = filter(lambda p: p.requires_grad, model.parameters())
    optim = torch.optim.Adam(trainable, lr=cfg['learning_rate'])

    if cfg['lr_schedule']:
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optim, [30],
                                                         gamma=0.5)

    model.train()
    best_val_f1 = 0
    best_val_hits = 0
    for epoch in range(cfg['num_epoch']):
        batcher = train_data.batcher(shuffle=True)
        train_loss = []
        for feed in batcher:
            loss, pred, pred_dist = model(feed)
            train_loss.append(loss.item())
            # acc, max_acc = cal_accuracy(pred, feed['answers'].cpu().numpy())
            # train_acc.append(acc)
            # train_max_acc.append(max_acc)
            optim.zero_grad()
            loss.backward()
            if cfg['gradient_clip'] != 0:
                torch.nn.utils.clip_grad_norm_(trainable, cfg['gradient_clip'])
            optim.step()
        tf_logger.add_scalar('avg_batch_loss', np.mean(train_loss), epoch)

        val_f1, val_hits = test(model, valid_data, cfg['eps'])
        if cfg['lr_schedule']:
            scheduler.step()
        tf_logger.add_scalar('eval_f1', val_f1, epoch)
        tf_logger.add_scalar('eval_hits', val_hits, epoch)
        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
        if val_hits > best_val_hits:
            best_val_hits = val_hits
            torch.save(
                model.state_dict(),
                'model/{}/{}_best.pt'.format(cfg['name'], cfg['model_id']))
        print('evaluation best f1:{} current:{}'.format(best_val_f1, val_f1))
        print('evaluation best hits:{} current:{}'.format(
            best_val_hits, val_hits))

    print('save final model')
    torch.save(model.state_dict(),
               'model/{}/{}_final.pt'.format(cfg['name'], cfg['model_id']))

    # model_save_path = 'model/{}/{}_best.pt'.format(cfg['name'], cfg['model_id'])
    # model.load_state_dict(torch.load(model_save_path))

    print('\n..........Finished training, start testing.......')

    test_data = DataLoader(cfg, documents, mode='test')
    model.eval()
    print('finished training, testing final model...')
    test(model, test_data, cfg['eps'])

示例#5

0

显示文件

文件： train.py 项目： xcgfth/Knowledge-Aware-Reader

    print('how many eval samples......', len(f1s))
    print('avg_f1', np.mean(f1s))
    print('avg_hits', np.mean(hits))

    model.train()
    return np.mean(f1s), np.mean(hits)


if __name__ == "__main__":
    # config_file = sys.argv[2]
    cfg = get_config()
    random.seed(cfg['seed'])
    np.random.seed(cfg['seed'])
    torch.manual_seed(cfg['seed'])
    torch.cuda.manual_seed_all(cfg['seed'])
    if cfg['mode'] == 'train':
        train(cfg)
    elif cfg['mode'] == 'test':
        documents = load_documents(cfg['data_folder'] +
                                   cfg['{}_documents'.format(cfg['mode'])])
        test_data = DataLoader(cfg, documents, mode='test')
        model = KAReader(cfg)
        model = model.to(torch.device('cuda'))
        model_save_path = 'model/{}/{}_best.pt'.format(cfg['name'],
                                                       cfg['model_id'])
        model.load_state_dict(torch.load(model_save_path))
        model.eval()
        test(model, test_data, cfg['eps'])
    else:
        assert False, "--train or --test?"

示例#6

0

显示文件

from data_generator import DataLoader
from rp_net import RPNet

import numpy as np
import tensorflow as tf

import cv2

if __name__ == "__main__":
    tf.random.set_seed(123)
    print(tf.executing_eagerly())

    loader = DataLoader("rcnn_data")
    num_anchors = len(loader.anchor_sizes)

    anchor_labels = loader.label_anchors(8, 0.7, 0.30)
    scale = loader.anchor_scale

    images = loader.images
    images = np.reshape(images, (-1, 256, 256, 3))
    images = images.astype(np.float32) / 255

    output_width = int(256 / scale)
    output_height = int(256 / scale)

    target_cls = loader.anchor_cls
    target_cls = np.reshape(target_cls,
                            (-1, output_height, output_width, num_anchors))
    target_cls = target_cls.astype(np.float32)

    target_reg = loader.anchor_reg

示例#7

0

显示文件

文件： train_modeltune.py 项目： Spyciblock/hydrocarbon_kinetics-predictions

    myLosses = {
        'prediction_probs': "categorical_crossentropy",
        'prediction_time': 'mse'
    }
    myLossesWeights = {'prediction_probs': 10**5, 'prediction_time': 10**-13}

    depthlist = [5]  # range(5,3,-1)
    hiddenlist = [32]
    ratelist = {"a": 0.001}
    nbdense = 2
    # Select how to load and how to preprocess data according to the path selected,using data generator or not that fits with our computational ressources
    # for training and validation data
    if use_bucket == 0:
        data_loader = DataLoader(foldername, atom_features_bool,
                                 pairs_features_bool, num_atom_features,
                                 num_pairs_features, num_atoms,
                                 molecule_size_normalizer,
                                 cycle_size_normalizer, max_num_of_bonds,
                                 num_timesteps, validation_percentage)
    else:
        data_loader = DataLoaderBucket(foldername, atom_features_bool,
                                       pairs_features_bool, num_atom_features,
                                       num_pairs_features, num_atoms,
                                       molecule_size_normalizer,
                                       cycle_size_normalizer, max_num_of_bonds,
                                       num_timesteps, validation_percentage)

    if use_generator:
        data = tf.data.Dataset.from_generator(
            data_loader.get_data_with_generator,
            args=[num_examples_per_epoch, batch_size],
            output_types=((tf.float32, tf.float32, tf.int32, tf.float32,