Exemplo n.º 1
0
 def __init__(self, fields, reader=None, sep='\t', seed=None):
     self.fields = fields
     self.reader = reader# GlobReader!!
     self.sep = sep
     self.seed = seed
     if seed==None or seed<=0:
         self.seed = U.get_seed()
     else:
         self.seed = seed
     self.rng = np.random.RandomState(self.seed + U.string2rand('FieldParser'))
Exemplo n.º 2
0
def split_data(data, target, seed=None, test_size=.1):
    '''
    Will split the data and target
    params:
        data: the data to be split
        target: the target dataset that will be split
        seed (optional): seed to split, else will use the common seed
        test_size (optional): test size. Default 10%
    '''
    seed = seed if seed else get_seed()
    return cv.train_test_split(data,
                               target,
                               test_size=test_size,
                               random_state=seed)
Exemplo n.º 3
0
 def __init__(self, file_name, chunk_size=1000, shuf=True, regex=None, seed=None, bucket=None, verbose=True):
     self.file_name = file_name
     self.chunk_size = chunk_size
     if chunk_size==None or chunk_size<=0: # read entire file as one chunk
         self.chunk_size=np.iinfo(np.int32).max
     self.shuf = shuf
     self.bucket = bucket
     self.verbose = verbose
     self.regex = regex
     if regex:
         self.regex = re.compile(regex)
     if seed==None or seed<=0:
         self.seed = U.get_seed()
     else:
         self.seed = seed
     self.rng = np.random.RandomState(self.seed + U.string2rand('ChunkReader'))
Exemplo n.º 4
0
 def __init__(self, file_pattern, chunk_size=1000, shuf=True, regex=None, seed=None, bucket=None, verbose=True):
     self.file_pattern = file_pattern
     self.file_names = glob.glob(self.file_pattern)
     self.file_names.sort()
     self.chunk_size = chunk_size
     self.shuf = shuf
     self.bucket = bucket
     self.verbose = verbose
     self.regex = regex
     if seed==None or seed<=0:
         self.seed = U.get_seed()
     else:
         self.seed = seed
     self.rng = np.random.RandomState(self.seed + U.string2rand('GlobReader'))
     self.num_files = None
     self.bpf = None
     self.prev_file = ''
Exemplo n.º 5
0
def train_regression(data, target, models=None,
                     scorer=None, verbose=False, seed=None):
    '''
    Helper to split the models always the same
    :return: list of tuples (model id, dict of model)
    :params:
        data: data to be trained, no target values
        target: model target val
        models: models to be trained, inside a list []
        scorer: the scorer to order the best results
        verbose: print information
    '''
    seed = seed if seed else get_seed()

    # get the models to test
    if not models:
        models = [ExtraTreesClassifier(random_state=seed),
                  GradientBoostingClassifier(random_state=seed),
                  RandomForestClassifier(random_state=seed),
                  LogisticRegressionCV(),
                  RidgeClassifierCV(),
                  LinearSVC(random_state=seed),
                  SVC(random_state=seed),
                  SGDClassifier(random_state=seed),
                  GaussianNB()]

    # choose accuracy if no scorer was passed
    if not scorer:
        scorer = accuracy_score

    # split in train and validation set
    X_train, X_val, y_train, y_val = split_data(data, target, seed)
    print_time('Created train and validation')
    print_time('Size train: {} test:{}'.
               format(X_train.shape, X_val.shape))
    return train_all_models(X_train, X_val, y_train, y_val,
                            models, scorer, verbose)
Exemplo n.º 6
0
# Load config parameters
locals().update(config)

# Set up model and prediction function
x = tensor.tensor3('inputs', dtype='float64')
y = tensor.tensor3('targets', dtype='float64')

model = 'bs'
with open ('gru_best.pkl', 'r') as picklefile:
    model = load(picklefile)
y_hat, cost, cells = nn_fprop(x, y, frame_length, hidden_size, num_layers, model)
predict_fn = theano.function([x], y_hat)

# Generate
print "generating audio..."
seed = get_seed(hdf5_file, [seed_index])
sec = 16000
samples_to_generate = sec*secs_to_generate
num_frames_to_generate = samples_to_generate/frame_length + seq_length #don't include seed
predictions = []
prev_input = seed
for i in range(num_frames_to_generate):
    prediction = predict_fn(prev_input)
    predictions.append(prediction)
    pred_min = numpy.min(predictions)
    pred_max = numpy.max(predictions)
    prev_input = rescale(prediction, pred_max, pred_min) 
actually_generated = numpy.asarray(predictions)[seq_length:,:,:,:] #cut off seed
last_frames = actually_generated[:,:,-1,:]
make_wav(output_filename, actually_generated.flatten())
print str(secs_to_generate)+'seconds of audio generated'
Exemplo n.º 7
0
# Fourier magnitude map
modlmap = enmap.modlmap(shape, wcs)

# Unlensed CMB theory
theory = cosmology.default_theory()
cltt2d = theory.uCl('TT', modlmap)
clte2d = theory.uCl('TE', modlmap)
clee2d = theory.uCl('EE', modlmap)
power = np.zeros((3, 3, shape[0], shape[1]))
power[0, 0] = cltt2d
power[1, 1] = clee2d
power[1, 2] = clte2d
power[2, 1] = clte2d

# Unlensed CMB generator
mgen = maps.MapGen((3, ) + shape, wcs, power)

for j, task in enumerate(my_tasks):
    print(f'Rank {rank} performing task {task} as index {j}')
    cmb = mgen.get_map(
        seed=cutils.get_seed('lensed', task, False)
    )  # unlensed map ; you can save it if you want, before lensing it
    cmb = lens_map(cmb)  # do the lensing
    dcmb = cmb.resample(
        (3, dNpix, dNpix)
    )  # downsample the lensed CMB map (if saving unlensed, do the same to it)
    # For some reason, I was saving the Fourier transforms... probably since it is easier to apply a beam later
    kmap = enmap.map2harm(dcmb, iau=False)
    enmap.write_map(f'{savedir}lensed_kmap_real_{task:06d}.fits', kmap.real)
    enmap.write_map(f'{savedir}lensed_kmap_imag_{task:06d}.fits', kmap.imag)
Exemplo n.º 8
0
              saveload.Load(load_path), plotter,
              saveload.Checkpoint(last_path, save_separately=['log']),
              ] + track_best('dev_cost', save_path)

if learning_rate_decay not in (0, 1):
    extensions.append(SharedVariableModifier(step_rules[0].learning_rate,
                                             lambda n, lr: numpy.cast[theano.config.floatX](learning_rate_decay * lr), after_epoch=True, after_batch=False))

print 'number of parameters in the model: ' + str(tensor.sum([p.size for p in cg.parameters]).eval())
# Finally build the main loop and train the model
main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm,
                     model=Model(cost), extensions=extensions)
main_loop.run()

# Generate
print "generating audio..."
seed = get_seed(hdf5_file, [400])
seed_influence_length = frame_length * 3
sec = 16000
samples_to_generate = sec*secs_to_generate
num_frames_to_generate = samples_to_generate/frame_length + seed_influence_length
generated_seq = []
prev_input = seed
for x in range(0, num_frames_to_generate):
    prediction = predict_fn(prev_input)
    generated_seq.append(prediction) #NEED TO OVERLAP/AVG?
    prev_input = prediction
actually_generated = numpy.asarray(generated_seq).flatten()[seed_influence_length:]
filename = str(frame_length)+str(seq_length)+'.wav'
make_wav(filename, actually_generated)
Exemplo n.º 9
0
 def sign_up(total_count, needed_count):
     dict_path = os.path.join(utils.root_path(), 'english.txt')
     seed = utils.get_seed(dict_path, 12)
     keys = shamir.crypto_keys(needed_count, total_count, seed)
     WalletManager.create_wallet(seed.encode())
     return keys
Exemplo n.º 10
0
        "-m",
        "--model",
        help="Save model in different file",
        action="store_true",
    )
    parser.add_argument(
        "--split",
        metavar="[1-99]",
        help="Choose size of split",
        choices=(range(1, 100)),
        type=split_size,
        default=80,
    )

    args = parser.parse_args()
    get_seed(args.seed)
    input_n = 13
    output_n = 2
    hidden_layers = args.hidden_layer

    n = neuralNetwork(input_n, output_n, hidden_layers, args.learningrate,
                      sigmoid, args.bias)
    fit(args, n)
    print()
    if args.visu is True:
        fig1, ax1 = display(n.loss, n.val_loss, "Loss Trend", "loss",
                            "val_loss")
        fig2, ax2 = display(n.acc, n.val_acc, "Accuracy Trend", "acc",
                            "val_acc")
        plt.show()
    save_model(args.model, n)
Exemplo n.º 11
0
def _train(args, pretrain_args):
    """Train the language model.

    Creates train/valid/test models, runs training epochs, saves model and
    writes results to database if specified.
    """
    start_time = time.time()
    print('Training', ', '.join(args.speakers), '...')

    # randomly sample validation set monte_carlo_cv_num times
    for num in range(args.monte_carlo_cv_num):
        # get seed used to sub-sample validation dataset (use 42 for 1st run)
        seed = utils.get_seed(num)

        # get train/valid/test data and convert to sequences
        train_data, valid_data, test_data, id_to_word = data_reader.get_data(
            args, seed=seed)
        # set configurations/hyperparameters for model
        config, test_config = utils.set_config(args, id_to_word)

        # initialize word embeddings
        init_embed = utils.init_embedding(id_to_word,
                                          dim=args.embed_size,
                                          init_scale=args.init_scale,
                                          embed_path=args.embed_path)

        with tf.Graph().as_default():
            # initializer used to initialize TensorFlow variables
            initializer = tf.random_uniform_initializer(
                -config['init_scale'], config['init_scale'])
            # create Train model
            with tf.name_scope('Train'):
                with tf.variable_scope('Model',
                                       reuse=None,
                                       initializer=initializer):
                    m_train = model.Model(args,
                                          is_training=True,
                                          config=config,
                                          init_embed=init_embed,
                                          name='Train')
                    m_train.build_graph()

            # create Valid model
            with tf.name_scope('Valid'):
                with tf.variable_scope('Model',
                                       reuse=True,
                                       initializer=initializer):
                    m_valid = model.Model(args,
                                          is_training=False,
                                          config=config,
                                          init_embed=init_embed,
                                          name='Valid')
                    m_valid.build_graph()

            # create Test model
            with tf.name_scope('Test'):
                with tf.variable_scope('Model',
                                       reuse=True,
                                       initializer=initializer):
                    m_test = model.Model(args,
                                         is_training=False,
                                         config=test_config,
                                         init_embed=init_embed,
                                         name='Test')
                    m_test.build_graph()

            # create summaries to be viewed in TensorBoard
            tb_summaries = utils.TensorBoardSummaries()
            tb_summaries.create_ops()

            init = tf.global_variables_initializer()

            # if pretrained, must create dict to initialize TF Saver
            if bool(pretrain_args):
                # get trainable variables and convert to dict for Saver
                reuse_vars = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES)
                reuse_vars_dict = dict([(var.op.name, var)
                                        for var in reuse_vars])
                # create saver for TF session (see function for addl details)
                saver = utils.create_tf_saver(args, pretrain_args,
                                              reuse_vars_dict)
            else:
                saver = tf.train.Saver()

            # ppls dict has perplexities that are stored in results database
            ppls = {}
            ppls, _ = _update_ppls(ppls, initialize=True)

            with tf.Session() as sess:
                sess.run(init)

                if args.load_path != '':
                    print('Restoring model...')
                    saver.restore(sess, args.load_path)

                for epoch in range(config['max_epoch']):
                    print('Epoch: {0} Learning rate: {1:.3f}\n'.format(
                        epoch + 1, sess.run(m_train.lr)))
                    for i, speaker in enumerate(args.speakers):
                        print('Training {0} ...'.format(speaker))

                        # run epoch on training data
                        train_perplexity = _run_epoch(
                            sess,
                            m_train,
                            args,
                            train_data,
                            i,
                            tb_summaries,
                            id_to_word,
                            train_op=m_train.train_op,
                            verbose=True)
                        print('Epoch: {0} Train Perplexity: {1:.3f}'.format(
                            epoch + 1, train_perplexity))
                        ppls, _ = _update_ppls(ppls,
                                               epoch=epoch + 1,
                                               speaker=speaker,
                                               ppl=train_perplexity,
                                               dataset='train')

                        print('Validating...')
                        # run epoch on validation data
                        valid_perplexity = _run_epoch(sess,
                                                      m_valid,
                                                      args,
                                                      valid_data,
                                                      i,
                                                      tb_summaries,
                                                      id_to_word,
                                                      verbose=True)
                        print('Epoch: {0} Valid Perplexity: {1:.3f}'.format(
                            epoch + 1, valid_perplexity))
                        ppls, improved = _update_ppls(ppls,
                                                      epoch=epoch + 1,
                                                      speaker=speaker,
                                                      ppl=valid_perplexity,
                                                      dataset='valid')

                        if improved:
                            # save model if valid ppl is lower than current
                            # best valid ppl
                            if args.save_path != '':
                                print('Saving model to {0}.'.format(
                                    args.save_path))
                                saver.save(sess, args.save_path)

                for i, speaker in enumerate(args.speakers):
                    print('Testing {0} ...'.format(speaker))
                    print('Restoring best model for testing...')
                    saver.restore(sess, args.save_path)
                    # run model on test data
                    test_perplexity = _run_epoch(sess, m_test, args, test_data,
                                                 i)
                    ppls['test_ppl_' + speaker] = test_perplexity
                    print('Test Perplexity: {0:.3f}'.format(test_perplexity))

            if args.insert_db == 'True':
                # write params/config/results to sql database
                results_db.insert_results(args, config, start_time, ppls)