def read_and_split_data_from_file(file_name):
    data_X0, data_Z, data_info = read_data(file_name)
    data_info['train_size'], data_info['validate_size'], data_info['test_size'] = 0.7, 0.2, 0.1
    X0_train, Z_train, X0_validate, Z_validate, X0_test, Z_test = \
        train_test_split(X=data_X0, Z=data_Z, \
                            train_size=data_info['train_size'], \
                            validate_size=data_info['validate_size'])

    return X0_train, Z_train, X0_validate, Z_validate, X0_test, Z_test, data_info
Exemplo n.º 2
0
def load_CartPole_data():
    """Loads CartPole training data"""
    visited_states_df = np.load('Data/RL_Data/CP_states_df.npy')
    selected_actions_df = np.load(
        'Data/RL_Data/CP_selected_action_sequence_df.npy')
    input_map_df = np.zeros((len(visited_states_df), 1))

    train_data, val_data, test_data = data.train_test_split(
        list(zip(input_map_df, visited_states_df, selected_actions_df)))

    assert all([len(train_data[i][1]) == len(train_data[i][2]) for i in range(len(train_data))]), \
        'Lengths of states and actions are not equal'

    return train_data, val_data, test_data
Exemplo n.º 3
0
def load_FrozenLake_data():
    """Loads FrozenLake training data"""
    input_map_df = data.load_np_data('Data/RL_Data/FL_input_map_df.npy')
    visited_states_df = np.load('Data/RL_Data/FL_states_df.npy',
                                allow_pickle=True)
    selected_actions_df = np.load(
        'Data/RL_Data/FL_selected_action_sequence_df.npy', allow_pickle=True)

    train_data, val_data, test_data = data.train_test_split(
        list(zip(input_map_df, visited_states_df, selected_actions_df)))

    assert all([len(train_data[i][1]) == len(train_data[i][2]) for i in range(len(train_data))]), \
        'Lengths of states and actions are not equal'

    return train_data, val_data, test_data
Exemplo n.º 4
0
def train_test(list_of_objects, object_names, target):
    input_train = []
    input_test = []
    output_train = []
    output_test = []

    for i in list_of_objects:
        # Calling the function
        a,b,c,d = train_test_split(i, object_names, target)

        # adding them to the exsisting lists
        input_train += a
        input_test += b
        output_train += c
        output_test += d

    return input_train, input_test, output_train, output_test
Exemplo n.º 5
0
def dataset_sequences_experiment(exp_params, path_dict, reuse_sequences=None):
    
    t_exp = Timer()
    exp_params = dict(exp_params)
    ident = dict_get(exp_params, 'dataset_ident', default='', cast=str)
    start_time = t_exp.tic("Starting a 'dataset sequences' experiment. (%s)" % str(ident))
    
    # Get parameters
    t = Timer()
    t.tic("Parsing parameters ...")
    
    train_perc = dict_get(exp_params, 'train_perc', default=1., cast=float)
    gt_dir = dict_get(path_dict, 'gt_dir', default=None)
    check_dir(gt_dir)
    ds_params = dict_get(exp_params, 'dataset_params', default=dict(), cast=dict)
    if reuse_sequences is None or type(reuse_sequences) != tuple or len(reuse_sequences) != 2:
        gt_sequences, _, _ = get_dataset_sequences(ident, ds_params, gt_dir)
        train_X, test_X = train_test_split(gt_sequences, train_perc)
    else:
        train_X, test_X = reuse_sequences
        timestamp_msg("Reusing sequences ...")
    
    # Check gt_sequences
    _, _, n_train_emissions = check_sequences(train_X)
    n_test_emissions = None
    if test_X is not None and len(test_X) > 0:
        _, _, n_test_emissions = check_sequences(test_X)
    _save_data(path_dict, train_X, test_X)
    if n_test_emissions is not None and n_train_emissions != n_test_emissions:
        raise Exception("Number of emissions in train and test sequence differs")
    exp_params['n_emissions'] = n_train_emissions
    
    exp_params = _parse_base_parameters(exp_params, path_dict)
    exp_params = _parse_standard_and_dense(exp_params, path_dict, exp_params['n_emissions']) 
    _exp_params = _save_experiment_parameters(exp_params, path_dict)
    t.toc("Parameters parsed. Using parameters: %s" % str(_exp_params))
    
    if 'fair_standard_params' in exp_params: 
        _standard_vs_dense(train_X, test_X, (exp_params['standard_params'], exp_params['fair_standard_params']),
                           exp_params['dense_params'])
    else:
        _standard_vs_dense(train_X, test_X, exp_params['standard_params'], exp_params['dense_params'])
    
    fin_time, diff = t_exp.toc("Finished a 'dataset sequences' experiment.")
Exemplo n.º 6
0
def main(dataset, saved_model_path, _config, _log):
    policy = tf.saved_model.load(saved_model_path)
    flat_loss = policy.loss
    policy.loss = lambda *structs: flat_loss(*tf.nest.flatten(structs))
    learner = Learner(policy=policy, **_config['learner'])

    _, test_paths = data.train_test_split(**dataset)

    embed_controller = embed.embed_controller_discrete  # TODO: configure
    data_config = dict(_config['data'], embed_controller=embed_controller)
    test_data = data.make_source(filenames=test_paths, **data_config)
    test_manager = train_lib.TrainManager(learner, test_data,
                                          dict(train=False))

    total_steps = 0

    for _ in range(1000):
        # now test
        test_stats = test_manager.step()
        train_lib.log_stats(ex, test_stats, total_steps)
        test_loss = test_stats['loss'].numpy()
        print(f'test_loss={test_loss:.4f}')
Exemplo n.º 7
0
def load_data(features_dict):
    dataset = f'movielens/{FLAGS.dataset}-ratings'
    ratings = tfds.load(dataset, split='train', data_dir=FLAGS.data_dir)

    # Prepare for binarization
    ratings.filter(lambda x: x['user_rating'] != 3.0)

    ratings = prepare_dataset(ratings, features_dict)

    # Cache for efficiency
    ratings = ratings.cache(tempfile.NamedTemporaryFile().name)

    features = features_by_type(features_dict)
    categorical_features = features['string'] + features['integer']
    vocabularies = get_vocabularies(ratings, categorical_features)

    train, test = train_test_split(ratings, train_size=0.8, seed=FLAGS.seed)

    train_size = len(train)
    train = train.shuffle(train_size).batch(FLAGS.train_batch_size)
    test = test.batch(FLAGS.eval_batch_size)

    return train, test, vocabularies
Exemplo n.º 8
0
                        metavar="/path/to/numpy/image/file",
                        help='Path to numpy image file')
    parser.add_argument('--masks',
                        required=False,
                        metavar="/path/to/numpy/mask/file",
                        help='Path to numpy mask file')
    args = parser.parse_args()

    assert args.command in [
        'imgmask', 'img', 'split', 'reset'
    ], "command must be in ['imgmask', 'img', 'split', 'reset']"

    train_directory = os.path.join(args.outdir, 'training_data')
    test_directory = os.path.join(args.outdir, 'testing_data')

    if args.command == 'imgmask':
        assert [args.images, args.masks
                ], 'imgmask requires the --images and --masks arguments'
        np_to_imgmask(args.images, args.masks, args.outdir)

    if args.command == 'img':
        assert args.images, "img requires the --images argument"
        np_to_img(args.images, args.outdir)

    if args.command == 'split':
        train_test_split(train_directory, test_directory, args.outdir,
                         TRAIN_PERCENT)

    if args.command == 'reset':
        reset(train_directory, test_directory, args.outdir)
Exemplo n.º 9
0
with open(CONFIGFILE, "r") as f:
    config = yaml.load(f)


ap = ArgumentParser()
ap.add_argument('--inspect_data', action='store_true', default=False,
                help="plot training data for inspection")
ap.add_argument('--train', action='store_true', default=False,
                help="Run training")
ap.add_argument('--test', action='store_true', default=False,
                help="Run test")
args = ap.parse_args()

df = load_data()
milk = clean_data(df)
train, test = train_test_split(milk)

if args.inspect_data:
    print("RAW DATA")
    print(df.head())
    milk.plot()
    plt.show()
elif args.train:
    model = LSTMPredictor(config)
    model.fit(train['Milk Production'].values.reshape(1, -1))
    model.close()
elif args.test:
    model = LSTMPredictor(config)
    y_pred = model.infer(train['Milk Production'].values.reshape(1, -1), 12)
    y_pred = list(y_pred)
    test = test.copy()
Exemplo n.º 10
0
print(opt)

root_dir = "/home/szhang67/data/raw_data"
preprocessed_dir = "/home/szhang67/data/preprocessed"

cuda = opt.cuda
if cuda and not torch.cuda.is_available():
    raise Exception("No GPU found, please run without --cuda")

torch.manual_seed(opt.seed)
if cuda:
    torch.cuda.manual_seed(opt.seed)

print('===> Loading datasets')
train_set, test_set = train_test_split(opt.upscale_factor, root_dir,
                                       preprocessed_dir, opt.split_ratio)
training_data_loader = DataLoader(dataset=train_set,
                                  num_workers=opt.threads,
                                  batch_size=opt.batchSize,
                                  shuffle=True)
testing_data_loader = DataLoader(dataset=test_set,
                                 num_workers=opt.threads,
                                 batch_size=opt.testBatchSize,
                                 shuffle=False)

print('===> Building model')
model = Net()
criterion = nn.MSELoss()

if cuda:
    model = model.cuda()
Exemplo n.º 11
0
def train(
    dirpath,
    pairs,
    test_pairs=None,
    train_val_split_ratio=0.95,
    batch_size=512,
    num_workers=8,
    seed=1234,
    args={},
):
    set_seed(seed)

    src_lang, trg_lang = PolynomialLanguage.create_vocabs(pairs)
    train_pairs, val_pairs = train_test_split(
        pairs, train_test_split_ratio=train_val_split_ratio)

    train_tensors = pairs_to_tensors(train_pairs, src_lang, trg_lang)
    val_tensors = pairs_to_tensors(val_pairs, src_lang, trg_lang)

    collate_fn = Collater(src_lang, trg_lang)
    train_dataloader = DataLoader(
        SimpleDataset(train_tensors),
        batch_size=batch_size,
        collate_fn=collate_fn,
        num_workers=num_workers,
    )
    val_dataloader = DataLoader(
        SimpleDataset(val_tensors),
        batch_size=batch_size,
        collate_fn=collate_fn,
        num_workers=num_workers,
    )

    save_to_pickle = {
        "src_lang.pickle": src_lang,
        "trg_lang.pickle": trg_lang,
    }
    for k, v in save_to_pickle.items():
        with open(os.path.join(dirpath, k), "wb") as fo:
            pickle.dump(v, fo)

    model = Seq2Seq(src_lang, trg_lang, **vars(args)).to(device)

    checkpoint_callback = ModelCheckpoint(
        monitor="val_loss",
        dirpath=dirpath,
        filename="model",
        save_top_k=1,
        mode="min",
    )

    trainer = pl.Trainer.from_argparse_args(
        args,
        default_root_dir=dirpath,
        callbacks=[checkpoint_callback],
    )
    trainer.fit(model, train_dataloader, val_dataloader)  # pylint: disable=no-member

    # not sure why, but after trainer.fit, the model is sent to cpu, so we'll
    # need to send it back to device so evaluate doesn't break
    model.to(device)

    if test_pairs:
        final_score = evaluate(model, test_pairs, batch_size=batch_size)
        with open(os.path.join(dirpath, "eval.txt"), "w") as fo:
            fo.write(f"{final_score:.4f}\n")

    return model