Exemplo n.º 1
0
def main(args):
    steps_per_epoch = NUM_IMAGES // args.batch_size
    training_steps = (NUM_IMAGES * args.num_epochs) // args.batch_size
    train_imsize = nfnet_params[args.variant]["train_imsize"]
    test_imsize = nfnet_params[args.variant]["test_imsize"]
    aug_base_name = "cutmix_mixup_randaugment"
    augment_name = f"{aug_base_name}_{nfnet_params[args.variant]['RA_level']}"
    max_lr = 0.1 * args.batch_size / 256
    eval_preproc = "resize_crop_32"

    model = NFNet(
        num_classes=1000,
        variant=args.variant,
        label_smoothing=args.label_smoothing,
        ema_decay=args.ema_decay,
    )
    model.build((1, 224, 224, 3))
    lr_decayed_fn = tf.keras.experimental.CosineDecay(
        initial_learning_rate=max_lr,
        decay_steps=training_steps - 5 * steps_per_epoch,
    )
    lr_schedule = WarmUp(
        initial_learning_rate=max_lr,
        decay_schedule_fn=lr_decayed_fn,
        warmup_steps=5 * steps_per_epoch,
    )
    optimizer = tfa.optimizers.SGDW(learning_rate=lr_schedule,
                                    weight_decay=2e-5,
                                    momentum=0.9)
    model.compile(
        optimizer=optimizer,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[
            tf.keras.metrics.SparseCategoricalAccuracy(name="top_1_acc"),
            tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5,
                                                           name="top_5_acc"),
        ],
    )
    ds_train = load(
        Split(2),
        is_training=True,
        batch_dims=(args.batch_size, ),  # dtype=tf.bfloat16,
        image_size=(train_imsize, train_imsize),
        augment_name=augment_name,
    )
    # ds_valid = load(Split(3), is_training=False, batch_dims=(256, ), augment_name="cutmix")
    ds_test = load(
        Split(4),
        is_training=False,
        batch_dims=(25, ),  # dtype=tf.bfloat16,
        image_size=(test_imsize, test_imsize),
        eval_preproc=eval_preproc,
    )
    model.fit(
        ds_train,
        validation_data=ds_test,
        epochs=args.num_epochs,
        steps_per_epoch=steps_per_epoch,
        callbacks=[tf.keras.callbacks.TensorBoard()],
    )
 def test_read_large_dataset(self):
     dataset = Dataset('/home/karthik/PycharmProjects/cmps242/project/yelp_dataset_challenge_academic_dataset', 8,
                       1000, -1)
     dataset_stats = dataset.load()
     term_freq_prod_inv_doc_freq = dataset_stats.top_term_freq_prod_inv_doc_freq(50)
     for term, freq in term_freq_prod_inv_doc_freq.iteritems():
         print('term:%s tf-idf:%s idf:%s' %(term, str(freq), str(dataset_stats.inverse_doc_freq(term))))
Exemplo n.º 3
0
def run_training(args):
    out_dir = pathlib.Path(args.directory)
    sentences = dataset.load(args.source)

    if args.epoch is not None:
        start = args.epoch + 1
        storage = load(out_dir, args.epoch)
        sentences = itertools.islice(sentences, start, None)
    else:
        start = 0
        storage = init(args)
        if (out_dir / meta_name).exists():
            if input('Overwrite? [y/N]: ').strip().lower() != 'y':
                exit(1)
        with (out_dir / meta_name).open('wb') as f:
            np.save(f, [storage])

    batchsize = 5000
    for i, sentence in enumerate(sentences, start):
        if i % batchsize == 0:
            print()
            serializers.save_npz(str(out_dir / model_name(i)), storage.model)
            serializers.save_npz(str(out_dir / optimizer_name(i)),
                                 storage.optimizer)
        else:
            print(util.progress('batch {}'.format(i // batchsize),
                                (i % batchsize) / batchsize, 100),
                  end='')
        train(storage.model, storage.optimizer, generate_data(sentence),
              generate_label(sentence),
              generate_attr(sentence, storage.mappings))
Exemplo n.º 4
0
def main(args):
    steps_per_epoch = NUM_IMAGES // args.batch_size
    test_imsize = nfnet_params[args.variant]["test_imsize"]
    eval_preproc = "resize_crop_32"

    model = NFNet(
        num_classes=1000,
        variant=args.variant,
    )
    model.build((1, test_imsize, test_imsize, 3))
    model.compile(
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[
            tf.keras.metrics.SparseCategoricalAccuracy(name="top_1_acc"),
            tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5,
                                                           name="top_5_acc"),
        ],
    )
    ds_test = load(
        Split(4),
        is_training=False,
        batch_dims=(args.batch_size, ),  # dtype=tf.bfloat16,
        image_size=(test_imsize, test_imsize),
        eval_preproc=eval_preproc,
    )
    model.load_weights(f"{args.variant}_NFNet/{args.variant}_NFNet")
    model.evaluate(ds_test, steps=steps_per_epoch)
Exemplo n.º 5
0
def download_poster_links(save_every=20, pbar=True):
    ids = dataset.load('links.csv', usecols=[
        'movieId', 'imdbId'], index_col='movieId')
    ids = _only_unfinished(ids)
    posters = pd.DataFrame()
    posters.index.name = 'movieId'

    iterator = enumerate(ids.iterrows())
    if pbar:
        iterator = tqdm(iterator, total=len(ids.index))

    for i, row in iterator:
        movieId, (imdbId,) = row
        try:
            link = movieposters.get_poster(id=imdbId)
        except (movieposters.MovieNotFound, movieposters.PosterNotFound):
            link = np.nan
        posters.at[movieId, 'poster'] = link
        if (i + 1) % save_every == 0:
            save_poster_links(posters)
            # once saved, not clearing the df will result in dupes saved later
            posters = posters.iloc[0:0]

    if (i + 1) % save_every != 0:  # didn't save the last batch
        save_poster_links(posters)
Exemplo n.º 6
0
def main():
    with tf.Session() as session:
        network = network_mod.restore(session)
        dataset = dataset_mod.load()
        cost, accuracy = run_test(session, network, dataset)

        print(f"Test Cost: {cost:0.3f} | Test Acc: {100*accuracy:3.1f}")
Exemplo n.º 7
0
def run_simulation(num_rounds: int, num_clients: int, fraction_fit: float):
    """Start a FL simulation."""
    # This will hold all the processes which we are going to create
    processes = []

    # Start the server
    server_process = Process(
        target=start_server, args=(num_rounds, num_clients, fraction_fit)
    )
    server_process.start()
    processes.append(server_process)

    # Optionally block the script here for a second or two so the server has time to start
    time.sleep(2)

    # Load the dataset partitions
    partitions = dataset.load(num_clients)

    # Start all the clients
    for partition in partitions:
        client_process = Process(target=start_client, args=(partition,))
        client_process.start()
        processes.append(client_process)

    # Block until all processes are finished
    for p in processes:
        p.join()
Exemplo n.º 8
0
 def test_iris(self):
     x, y = dataset.load('iris')
     self.assertEqual(x.shape, (150, 4))
     self.assertEqual(y.shape, (150, ))
     self.assertEqual(np.min(x), 0)
     self.assertEqual(np.max(x), 1)
     self.assertEqual(np.min(y), 0)
     self.assertEqual(np.max(y), 2)
Exemplo n.º 9
0
 def test_load(self):
     x, y = dataset.load()
     self.assertEqual(x.shape, (70000, 28, 28, 1))
     self.assertEqual(y.shape, (70000, ))
     self.assertEqual(np.min(x), 0)
     self.assertEqual(np.max(x), 1)
     self.assertEqual(np.min(y), 0)
     self.assertEqual(np.max(y), 9)
Exemplo n.º 10
0
def get_title(movieId):
    global _TITLES_BY_ID  # noqa
    try:
        return _TITLES_BY_ID.at[movieId, 'title']  # noqa
    except NameError:
        _TITLES_BY_ID = dataset.load(
            'movies.csv', index_col='movieId', usecols=['movieId', 'title'])
        return _TITLES_BY_ID.at[movieId, 'title']
Exemplo n.º 11
0
def get_model():
    if os.path.isfile(model_path):
        model = load_model(model_path)
    else:
        data = dataset.load()
        model = create_net(data)

    return model
Exemplo n.º 12
0
def dump_dset(ps):
    ps.max_val = 10000
    ps.num_samples = 1000  # 100000
    ps.num_shards = 10
    fs = [f for f in qd.dump(ps)]
    ps.dim_batch = 100
    for i, _ in enumerate(qd.load(ps, fs).map(adapter)):
        pass
    print(f'dumped {i} batches of {ps.dim_batch} samples each')
    return fs
Exemplo n.º 13
0
def submit(predicted, filename='submission.csv'):
    U.log('Converting predictions into submission file.')
    if U.on_kaggle():
        U.log('Running on Kaggle.')
        sample = pd.read_csv(
            '/kaggle/input/data-science-bowl-2019/sample_submission.csv')
    else:
        U.log('Running locally.')
        [sample] = load(Subset.Sample)
    sample['accuracy_group'] = predicted.astype(int)
    sample.to_csv(filename, index=False)
    return filename
Exemplo n.º 14
0
 def load_ev(config=setup_config(), args=args):
     dl_ev = torch.utils.data.DataLoader(dataset.load(
         name=args.dataset,
         root=config['dataset'][args.dataset]['root'],
         classes=config['dataset'][args.dataset]['classes']['eval'],
         transform=dataset.utils.make_transform(
             **config['transform_parameters'], is_train=False)),
                                         batch_size=args.sz_batch,
                                         shuffle=False,
                                         num_workers=args.nb_workers,
                                         pin_memory=True)
     return dl_ev
Exemplo n.º 15
0
def start_game(mode):
    global game_mode, game_state, moves, field, winner, features, labels
    game_mode = mode
    if game_mode != training:
        game_state = game_inprocess
        if game_mode == pvai:
            features, labels = load()  #Reloading data
    else:
        game_state = game_intraining
    field = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]
    moves = 0
    winner = ''
Exemplo n.º 16
0
def load(config, options=None):
    label = None
    description = None
    dataset = None
    model = None
    training = None

    if options is None:
        options = {}

    if 'base' not in options or options['base'] is None:
        options['base'] = os.getcwd()

    if not options['base'].startswith('/'):
        options['base'] = os.path.join(os.getcwd(), options['base'])

    if 'label' in config:
        label = config['label']

    if 'description' in config:
        description = config['description']

    if 'model' in config:
        model = model_utils.load(config['model'], options)

    if 'training' in config:
        training = training_utils.load(config['training'], options)

    if 'dataset' in config:
        dataset = dataset_utils.load(config['dataset'], model, training, options)

    if 'weightsHdf5' in config:
        weights_hdf5 = config['weightsHdf5']

        if not weights_hdf5.startswith('/'):
            weights_hdf5 = os.path.join(options['base'], weights_hdf5)

    project = Project(
        label=label,
        description=description,
        weights_hdf5=weights_hdf5,
        dataset=dataset,
        model=model,
        training=training,
        options=options)

    if 'loadWeights' in options and options['loadWeights']:
        project.model.load_weights_hdf5(project.weights_hdf5)

    return project
Exemplo n.º 17
0
def main(generators, image_file):
    input_image = load(image_file)
    input_image = tf.image.resize(input_image, (768, 1024))
    gen_image = recuperacion(generators, image_file)
    
    plt.subplot(1,2,1); plt.imshow(input_image)
    plt.subplot(1,2,2); plt.imshow(gen_image)
    plt.show()
    
    if not os.path.exists('imagenes_generadas'):
        os.mkdir('imagenes_generadas') # Creamos el directorio si es que no existe.

    gen_image = tf.cast(gen_image * 255, tf.uint8)
    gen_image = tf.image.encode_jpeg(gen_image)
    tf.io.write_file(f'imagenes_generadas/imagen_generada_{str(time.time())}.jpg', gen_image)
Exemplo n.º 18
0
def create_random_submission(
    test_csv: str = "data/test.csv",
    output_file: str = "submission.csv",
):
    dataset = load(test_csv, preprocess=False)
    try:
        ids = dataset["id"]
    except:
        try:
            ids = dataset["comment_id"]
        except:
            ids = dataset["comment_text"]
    all_predictions = [random.randint(0, 1) for _ in range(len(ids))]
    df = pd.DataFrame(columns=["id", "prediction"],
                      data=zip(*[ids, all_predictions]))
    df.to_csv(output_file)
Exemplo n.º 19
0
def run_test(args):
    out_dir = pathlib.Path(args.directory)
    sentences = dataset.load(args.source)
    storage = load(out_dir, args.epoch)
    y_sum = None
    zs_sum = None
    
    for i, sentence in enumerate(itertools.islice(sentences, 100)):
        y_mat, zs_mat = test(
            storage.model,
            generate_data(sentence),
            generate_label(sentence),
            generate_attr(
                sentence,
                storage.mappings
            )
        )
        if i == 0:
            y_sum = y_mat
            zs_sum = zs_mat
        else:
            y_sum += y_mat
            for z_sum, z_mat in zip(zs_sum, zs_mat):
                z_sum += z_mat
        
        prec, rec, f = statistic.f_measure(y_sum)
        print('== segmentation ==')
        print('precision:', prec)
        print('recall:', rec)
        print('F-measure:', f)
        for k, z_sum in zip(storage.mappings._fields, zs_sum):
            prec, rec, f = statistic.f_measure_micro_average(z_sum)
            print('== {} =='.format(k))
            print('precision:', prec)
            print('recall:', rec)
            print('F-measure:', f)
            

        print('expect:', '/'.join(
            info.surface_form for info in sentence)
        )
        print('actual:', '/'.join(
            y for (y, zs) in generate(
                storage.model,
                generate_data(sentence)
            )
        ))
Exemplo n.º 20
0
def run_training(args):
    out_dir = pathlib.Path(args.directory)
    sentences = dataset.load(args.source)
    
    if args.epoch is not None:
        start = args.epoch + 1
        storage = load(out_dir, args.epoch)
        sentences = itertools.islice(sentences, start, None)
    else:
        start = 0
        storage = init(args)        
        if (out_dir/meta_name).exists():
            if input('Overwrite? [y/N]: ').strip().lower() != 'y':
                exit(1)
        with (out_dir/meta_name).open('wb') as f:
            np.save(f, [storage])
        
    batchsize = 5000
    for i, sentence in enumerate(sentences, start):
        if i % batchsize == 0:
            print()
            serializers.save_npz(
                str(out_dir/model_name(i)),
                storage.model
            )
            serializers.save_npz(
                str(out_dir/optimizer_name(i)),
                storage.optimizer
            )
        else:
            print(
                util.progress(
                    'batch {}'.format(i // batchsize),
                    (i % batchsize) / batchsize, 100),
                end=''
            )
        train(storage.model,
              storage.optimizer,
              generate_data(sentence),
              generate_label(sentence),
              generate_attr(
                  sentence,
                  storage.mappings
              )
        )
Exemplo n.º 21
0
def main():
    cifar = dataset.load(10000)
    X, Y = cifar.data, cifar.target
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y,
                                                        test_size=0.2,
                                                        random_state=0)
    logistic = linear_model.LogisticRegression(C=6000.0)
    rbm = BernoulliRBM(n_components=100,
                       learning_rate=0.025,
                       batch_size=10,
                       n_iter=100,
                       verbose=True)

    classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])

    # Training RBM-Logistic Pipeline
    classifier.fit(X_train, Y_train)

    # Training Logistic regression
    logistic_classifier = linear_model.LogisticRegression(C=100.0)
    logistic_classifier.fit(X_train, Y_train)

    Y_predicted_rbm = classifier.predict(X_test)
    Y_predicted_raw = logistic_classifier.predict(X_test)

    # Evaluate classifiers
    print()
    print("Logistic regression using RBM features:\n%s\n" % (
        metrics.classification_report(
            Y_test,
            Y_predicted_rbm,
            target_names=cifar.target_names)))

    print("Logistic regression using raw pixel features:\n%s\n" % (
        metrics.classification_report(
            Y_test,
            Y_predicted_raw,
            target_names=cifar.target_names)))

    print("Confusion matrix RBM features:\n%s" % metrics.confusion_matrix(Y_test, Y_predicted_rbm))
    print("Confusion matrix raw pixel features:\n%s" % metrics.confusion_matrix(Y_test, Y_predicted_raw))

    # Plot RBM features
    plot(rbm, 100)
Exemplo n.º 22
0
def generate_samples(model_path, rows, cols, channels, sample_size):
    img_size = rows * cols

    # Load model
    json_file = open(model_path + ".json", 'r')
    json_model = json_file.read()
    json_file.close()
    model = model_from_json(json_model)
    model.load_weights(model_path + ".h5")

    # Load dataset
    (_, sketches) = dataset.load("sketches")
    sketches = sketches[0:sample_size]

    # Resize dataset
    sketches = sketches / 127.5 - 1.
    sketches = np.expand_dims(sketches, axis=3)
    sketches = sketches.reshape(sketches.shape[0], img_size)

    # Generate samples
    gen_imgs = model.predict(sketches)

    # Rescale images 0 - 1
    gen_imgs = 0.5 * gen_imgs + 0.5

    # Reshape images
    sketches = sketches.reshape((sample_size, rows, cols, channels))
    gen_imgs = gen_imgs.reshape((sample_size, rows, cols, channels))

    # Make directories
    if not os.path.exists("model_results/input"):
        os.makedirs("model_results/input")

    if not os.path.exists("model_results/output"):
        os.makedirs("model_results/output")

    # Save images
    for i in range(len(sketches)):
        img = image.array_to_img(sketches[i])
        img.save('model_results/input/' + str(i) + '.png')
        
    for i in range(len(gen_imgs)):
        img = image.array_to_img(gen_imgs[i])
        img.save('model_results/output/' + str(i) + '.png')
Exemplo n.º 23
0
def main():
    cifar = dataset.load(10000)
    X, Y = cifar.data, cifar.target
    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.2,
                                                        random_state=0)
    logistic = linear_model.LogisticRegression(C=6000.0)
    rbm = BernoulliRBM(n_components=100,
                       learning_rate=0.025,
                       batch_size=10,
                       n_iter=100,
                       verbose=True)

    classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])

    # Training RBM-Logistic Pipeline
    classifier.fit(X_train, Y_train)

    # Training Logistic regression
    logistic_classifier = linear_model.LogisticRegression(C=100.0)
    logistic_classifier.fit(X_train, Y_train)

    Y_predicted_rbm = classifier.predict(X_test)
    Y_predicted_raw = logistic_classifier.predict(X_test)

    # Evaluate classifiers
    print()
    print("Logistic regression using RBM features:\n%s\n" %
          (metrics.classification_report(
              Y_test, Y_predicted_rbm, target_names=cifar.target_names)))

    print("Logistic regression using raw pixel features:\n%s\n" %
          (metrics.classification_report(
              Y_test, Y_predicted_raw, target_names=cifar.target_names)))

    print("Confusion matrix RBM features:\n%s" %
          metrics.confusion_matrix(Y_test, Y_predicted_rbm))
    print("Confusion matrix raw pixel features:\n%s" %
          metrics.confusion_matrix(Y_test, Y_predicted_raw))

    # Plot RBM features
    plot(rbm, 100)
Exemplo n.º 24
0
def train(name, resume):

    # paths
    log_path = "logs/{}.json".format(name)
    out_path = "snapshots/" + name + ".{epoch:06d}.h5"
    echo('log path', log_path)
    echo('out path', out_path)

    lib.log.info(log_path, {'_commandline': {'name': name, 'resume': resume}})

    # init
    echo('train', (name, resume))
    session = tf.Session('')
    K.set_session(session)
    K.set_learning_phase(1)

    # dataset
    echo('dataset loading...')
    (x_train, y_train), (x_test, y_test) = dataset.load()

    # model building
    echo('model building...')
    model = lib.model.build()
    model.summary()
    if resume:
        echo('Resume Learning from {}'.format(resume))
        model.load_weights(resume, by_name=True)

    # training
    echo('start learning...')
    callbacks = [
        lib.log.JsonLog(log_path),
        keras.callbacks.ModelCheckpoint(out_path,
                                        monitor='val_loss',
                                        save_weights_only=True)
    ]
    model.fit(x_train,
              y_train,
              batch_size=30,
              epochs=10,
              callbacks=callbacks,
              validation_data=(x_test, y_test))
Exemplo n.º 25
0
def main():
    ds = dataset.load()
    dates = dataset.get_dates_array()
    import jordicolomer_autoregressive
    for m in range(1,2):
        params = {'m':m, 'dates':dates}
        print 'jordicolomer_autoregressive',m,evaluate_all(ds, jordicolomer_autoregressive.predict, params),'\n'
    #exit(0)
    import jordicolomer_average
    for n in range(1,11):
        params = {'n':n, 'dates':dates}
        print 'jordicolomer_average',n,evaluate_all(ds, jordicolomer_average.predict, params),'\n'
    import jordicolomer_averageweekly
    for n in range(1,11):
        params = {'n':n, 'dates':dates}
        print 'jordicolomer_averageweekly',n,evaluate_all(ds, jordicolomer_averageweekly.predict, params),'\n'
    import jordicolomer_averageWeeklyWithTrend
    for m in range(1,20):
        params = {'m':m, 'dates':dates}
        print 'jordicolomer_averageWeeklyWithTrend',m,evaluate_all(ds, jordicolomer_averageWeeklyWithTrend.predict, params),'\n'
Exemplo n.º 26
0
def test(theta):
    print 'loading data...'
    _, _, dataTe = dataset.load(name='mnist.pkl.gz')

    print 'building the graph...'
    # fprop
    x = T.matrix('x', 'float32')
    F = models.create_mlp(x, theta)
    # zero-one loss
    y = T.ivector('y')
    ell = loss.create_zeroone(F, y)
    # all in one graph
    f_graph = function(
        inputs=[],
        outputs=ell,
        givens={x: dataTe[0], y: dataTe[1]}
    )

    print 'fire the graph...'
    er = f_graph()
    print 'error rate = %5.4f' % (er,)
Exemplo n.º 27
0
def run(hidden,
        layer,
        dropout,
        learning_rate,
        iteration,
        save,
        train=None,
        test=None):
    if train:
        dataset_id = train.split('/')[-1].split('.')[0]

        pre_processing = PreProcessing(open(train, 'r'), dataset_id)
        dataset = process(pre_processing)

        encoder_embeddings = WordEmbedding(source=dataset.pairs)
        decoder_embeddings = WordEmbedding(source=dataset.pairs)

        encoder = EncoderRNN(encoder_embeddings, hidden,
                             layer).to(settings.device)
        decoder = DecoderRNN(hidden, decoder_embeddings, dropout,
                             layer).to(settings.device)

        model = Model(
            encoder=encoder,
            decoder=decoder,
            learning_rate=learning_rate,
        )
        model.summary()
        model.train(dataset, n_iter=iteration, save_every=save)

    if test:

        dataset = load(test)

        model = Model.load(test)

        while True:
            decoded_words = model.evaluate(str(input("> ")), dataset)
            print(' '.join(decoded_words))
Exemplo n.º 28
0
def compute_qini(parameters):

    X_original, t_original, y_original = dataset.load(parameters['dataset_id'])
    X, t, y = dataset.shuffled(X_original,
                               t_original,
                               y_original,
                               seed=parameters['shuffle_seed'])

    ((X_train, t_train, y_train),
     (X_test, t_test,
      y_test)) = dataset.train_test_split(X, t, y, train_proportion=2 / 3)

    rfc = RandomForestClassifier(
        n_estimators=parameters['n_estimators'],
        criterion=parameters['criterion'],
        max_depth=parameters['max_depth'],
        min_samples_split=parameters['min_samples_split'],
        min_samples_leaf=parameters['min_samples_leaf'])

    rfc.fit(X_train, y_train, t_train)
    uplift_test = rfc.predict_uplift(X_test)

    return qini_q(y_test, uplift_test, t_test)
Exemplo n.º 29
0
def load_data(fn, data_path, spit_date):
    ratings_ = dataset.load(fn, path=data_path, delim=',')
    ratings = dataset.parse_timestamp(ratings_)
    # rename ratings columns
    ratings = ratings.rename(
        columns={
            "userId": "user_id",
            "movieId": "item_id",
            "rating": "rating",
            "datetime": "datetime"
        })
    # Movielese data stats
    print("ratings columns: {}".format(ratings.columns))
    print("No of rows in ratings df: {}".format(ratings.shape[0]))
    print("Min datetime: {}, max datetime: {}".format(
        ratings["datetime"].min(), ratings["datetime"].max()))
    split_time = pd.datetime.strptime(spit_date, '%Y-%m-%d %H:%M:%S.%f')
    # split train/test folds
    train_df, test_df = dataset.split(ratings, split_time)
    print("Size of train dataset: {} & size of test dataset: {}".format(
        train_df.shape[0], test_df.shape[0]))
    print(ratings.head(5))
    return train_df, test_df
def train(name, resume):

    # paths
    log_path = "logs/{}.json".format(name)
    out_path = "snapshots/" + name + ".{epoch:06d}.h5"
    echo('log path', log_path)
    echo('out path', out_path)

    lib.log.info(log_path, {'_commandline': {'name': name, 'resume': resume}})

    # init
    echo('train', (name, resume))
    session = tf.Session('')
    K.set_session(session)
    K.set_learning_phase(1)

    # dataset
    echo('dataset loading...')
    (x_train, y_train), (x_test, y_test) = dataset.load()

    # model building
    echo('model building...')
    model = lib.model.build()
    model.summary()
    if resume:
        echo('Resume Learning from {}'.format(resume))
        model.load_weights(resume, by_name=True)

    # training
    echo('start learning...')
    callbacks = [
        lib.log.JsonLog(log_path),
        keras.callbacks.ModelCheckpoint(out_path, monitor='val_loss', save_weights_only=True)
    ]
    model.fit(x_train, y_train, batch_size=30, epochs=10,
              callbacks=callbacks,
              validation_data=(x_test, y_test))
Exemplo n.º 31
0
def run_test(args):
    out_dir = pathlib.Path(args.directory)
    sentences = dataset.load(args.source)
    storage = load(out_dir, args.epoch)
    y_sum = None
    zs_sum = None

    for i, sentence in enumerate(itertools.islice(sentences, 100)):
        y_mat, zs_mat = test(storage.model, generate_data(sentence),
                             generate_label(sentence),
                             generate_attr(sentence, storage.mappings))
        if i == 0:
            y_sum = y_mat
            zs_sum = zs_mat
        else:
            y_sum += y_mat
            for z_sum, z_mat in zip(zs_sum, zs_mat):
                z_sum += z_mat

        prec, rec, f = statistic.f_measure(y_sum)
        print('== segmentation ==')
        print('precision:', prec)
        print('recall:', rec)
        print('F-measure:', f)
        for k, z_sum in zip(storage.mappings._fields, zs_sum):
            prec, rec, f = statistic.f_measure_micro_average(z_sum)
            print('== {} =='.format(k))
            print('precision:', prec)
            print('recall:', rec)
            print('F-measure:', f)

        print('expect:', '/'.join(info.surface_form for info in sentence))
        print(
            'actual:', '/'.join(
                y
                for (y,
                     zs) in generate(storage.model, generate_data(sentence))))
Exemplo n.º 32
0
def main():
    ds = dataset.load()
    dates = dataset.get_dates_array()
    import jordicolomer_autoregressive
    for m in range(1, 2):
        params = {'m': m, 'dates': dates}
        print 'jordicolomer_autoregressive', m, evaluate_all(
            ds, jordicolomer_autoregressive.predict, params), '\n'
    #exit(0)
    import jordicolomer_average
    for n in range(1, 11):
        params = {'n': n, 'dates': dates}
        print 'jordicolomer_average', n, evaluate_all(
            ds, jordicolomer_average.predict, params), '\n'
    import jordicolomer_averageweekly
    for n in range(1, 11):
        params = {'n': n, 'dates': dates}
        print 'jordicolomer_averageweekly', n, evaluate_all(
            ds, jordicolomer_averageweekly.predict, params), '\n'
    import jordicolomer_averageWeeklyWithTrend
    for m in range(1, 20):
        params = {'m': m, 'dates': dates}
        print 'jordicolomer_averageWeeklyWithTrend', m, evaluate_all(
            ds, jordicolomer_averageWeeklyWithTrend.predict, params), '\n'
def recuperacion(generators, image_file):
    """
    Genera una imagen de un paisaje recuperado luego de un incendio.
    El generador transformara la imagen de a trozos.
        Args:
        - generators: Una lista de modelos generadores que tomara una imagen y 
                     generara su versionrecuperada.
        - image_file: El directorio de la imagen que desea tranformar.
        
        Returns: 
            La imagen resultante almacenada en un `Tensor`
            de valores 0-1 de tipo float32.
    """
    img_prueba = load(image_file)
    img_prueba = tf.image.resize(img_prueba, (768, 1024))
    resultado = np.zeros((768, 1024, 3))
    x, y, _ = img_prueba.shape
    fila = 0
    columna = 0
    intervalo_y = (y - 256) // 5
    intervalo_x = (x - 256) // 3
    while 256 + fila < x:
        while 256 + columna < y:
            #graficar(img_prueba/255, img_prueba[: , 0+columna:256+columna]/255 )
            part = img_prueba[fila:256 + fila, columna:256 + columna]
            part = (part * 2) - 1
            part_gen = generar_imagen(generators, part[None, ...])
            part = resultado[fila:256 + fila, columna:256 + columna]
            part[part == 0] = part_gen[part == 0]
            part[:] = np.mean([part, part_gen], axis=0)
            columna += intervalo_y
        columna = 0
        fila += intervalo_x
    resultado = (resultado + 1) / 2
    resultado = tf.cast(resultado, tf.float32)
    return resultado
Exemplo n.º 34
0
  cfg.read(sys.argv[1])
  print 'train:', cfg.get('data', 'train')
  print 'test:', cfg.get('data', 'test')  
  print 'batch:', cfg.get('cnn', 'batch')
  print 'epochs:', cfg.get('cnn', 'epochs')
  print 'embdims:', cfg.get('cnn', 'embdims')
  print 'filters:', cfg.get('cnn', 'filters')
  print 'filtlen:', cfg.get('cnn', 'filtlen')
  print 'hidden:', cfg.get('cnn', 'hidden')
  print 'dropout:', cfg.get('cnn', 'dropout')
  print 'learnrt:', cfg.get('cnn', 'learnrt')

  # learn alphabets from training examples
  dataset = dataset.DatasetProvider(cfg.get('data', 'train'))
  # now load training examples and labels
  train_x1, train_x2, train_y = dataset.load(cfg.get('data', 'train'))
  maxlen = max([len(seq) for seq in train_x1])
  # now load test examples and labels
  test_x1, test_x2, test_y = dataset.load(cfg.get('data', 'test'), maxlen=maxlen)
  
  init_vectors = None
  # TODO: what what are we doing for index 0 (oov words)?
  # use pre-trained word embeddings?
  if cfg.has_option('data', 'embed'):
    print 'embeddings:', cfg.get('data', 'embed')
    word2vec = word2vec_model.Model(cfg.get('data', 'embed'))
    init_vectors = [word2vec.select_vectors(dataset.word2int)]
  
  # turn x and y into numpy array among other things
  classes = len(set(train_y))
  train_x1 = pad_sequences(train_x1, maxlen=maxlen)
Exemplo n.º 35
0
		pass
	
	def share_data(self, data, dtype):
		if(data.dtype != np.dtype(dtype)):
			data = data.astype(dtype)
		return tn.shared(data, borrow=borrow)
	

if __name__ == '__run__':
	data_file = 'mnist.pkl.gz'
	learning_rate = 0.005
	epochs = 10000
	batch_size = 500
	borrow = True
	
	data = dataset.load(data_file, True)
	train_set, valid_set, test_set = data
	m, n = train_set[0].shape
	k = np.max(train_set[1]) + 1
	print('data:', train_set[0].shape, train_set[1].shape, m, n, k)
	
	classifier = Softmaxclassifier(n_in = n, n_out = k)
	
	trainer = SoftmaxclassifierTrainer(
		train_set, 
		m, n, k,
		valid_data = valid_set,
		classifier = classifier
	)
	
	del(data)
Exemplo n.º 36
0
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM, SimpleRNN, GRU

NFOLDS = 10
BATCH = 50
EPOCHS = 5
EMBDIMS = 300

if __name__ == "__main__":

  dataset = dataset.DatasetProvider()
  x, y = dataset.load()
  print 'x shape:', x.shape
  print 'y shape:', y.shape
  
  scores = []
  folds = sk.cross_validation.KFold(len(y), n_folds=NFOLDS, shuffle=True)

  for fold_num, (train_indices, test_indices) in enumerate(folds):
    train_x = x[train_indices]
    train_y = y[train_indices]
    test_x = x[test_indices]
    test_y = y[test_indices]
    
    model = k.models.Sequential()
    model.add(LSTM(128, input_length=205845, input_dim=300))
    # model.add(Dense(128, input_shape=(EMBDIMS,)))
Exemplo n.º 37
0
#!/usr/bin/env python
# encode: utf-8

# Active Learning (Uncertainly Sampling)
# This code is available under the MIT License.
# (c)2013 Nakatani Shuyo / Cybozu Labs Inc.

import numpy
import dataset
from sklearn.linear_model import LogisticRegression

categories = ['crude', 'money-fx', 'trade', 'interest', 'ship', 'wheat', 'corn']
doclist, labels, voca, vocalist = dataset.load(categories)
print "document size : %d" % len(doclist)
print "vocaburary size : %d" % len(voca)

data = numpy.zeros((len(doclist), len(voca)))
for j, doc in enumerate(doclist):
    for i, c in doc.iteritems():
        data[j, i] = c

def activelearn(data, label, strategy, train):
    print strategy

    N, D = data.shape
    train = list(train) # copy initial indexes of training
    pool = range(N)
    for x in train: pool.remove(x)

    predict = None
    precisions = []
Exemplo n.º 38
0
def main(_):
    graph = tf.Graph()

    with graph.as_default():
        with graph.device(device_for_node_cpu):
            print('-' * 120)
            print('C2S task                  = {t}'.format(t=FLAGS.task))
            print('    data                  = {data}'.format(data=FLAGS.data))
            print('    max_epochs            = {max_epochs}'.format(max_epochs=FLAGS.max_epochs))
            print('    batch_size            = {batch_size}'.format(batch_size=FLAGS.batch_size))
            print('    learning_rate         = {learning_rate}'.format(learning_rate=FLAGS.learning_rate))
            print('    decay                 = {decay}'.format(decay=FLAGS.decay))
            print('    beta1                 = {beta1}'.format(beta1=FLAGS.beta1))
            print('    beta2                 = {beta2}'.format(beta2=FLAGS.beta2))
            print('    epsilon               = {epsilon}'.format(epsilon=FLAGS.epsilon))
            print('    pow                   = {pow}'.format(pow=FLAGS.pow))
            print('    regularization        = {regularization}'.format(regularization=FLAGS.regularization))
            print('    max_gradient_norm     = {max_gradient_norm}'.format(max_gradient_norm=FLAGS.max_gradient_norm))
            print('    use_inputs_prob_decay = {use_inputs_prob_decay}'.format(
                    use_inputs_prob_decay=FLAGS.use_inputs_prob_decay))
            print('-' * 120)
            train_set, test_set, idx2word_history, word2idx_history, idx2word_target, word2idx_target = dataset.load(
                    mode=FLAGS.task, text_data_fn=FLAGS.data
            )

            print('Input vocabulary size:  ', len(idx2word_history))
            print('Output vocabulary size: ', len(idx2word_target))
            print('-' * 120)

            train(train_set, test_set, idx2word_history, word2idx_history, idx2word_target, word2idx_target)
Exemplo n.º 39
0
def dft_data_load():
    return dataset.load(name='mnist.pkl.gz')
Exemplo n.º 40
0
from dataset import load
from sklearn import cross_validation
import argparse
import tensorflow as tf

parser = argparse.ArgumentParser()
parser.add_argument("-t", "--train", action="store_true", help="Train model with dataset otherwise load it.")
args = parser.parse_args()
# Load our dataset
X, y = load()

# Split dataset into train / test
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.25)

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# Build fully connected DNN
with tf.Session() as session:
    # Run the initializer
    session.run(init)
    with tf.device('/gpu:0'):
        # Enable logging
        tf.logging.set_verbosity(tf.logging.INFO)
        # Create our classifier
        feature_columns = [tf.contrib.layers.real_valued_column("", dimension=1024)]
        classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns, hidden_units=[400, 400, 400, 400, 400, 400], n_classes=4, model_dir="model")
        if args.train:
            print('Training...')
            classifier.fit(X_train, y_train, steps=2000)
            print('Done !')
Exemplo n.º 41
0
    direct_model = inducer.learn(d.str,xtrain,ytrain)
    #print direct_model
    loo_pred = loo_model.predict(xtest)
    #print loo_pred
    if np.isnan(loo_pred).any():
        print 'loo is wrong'
        exit()
    
    direct_pred = direct_model.predict(xtest)
    #print direct_pred
    if np.isnan(direct_pred).any():
        print 'direct is wrong'
    return np.abs(loo_pred-direct_pred)

if __name__ == '__main__':
    d = dataset.load('../data/vehicle.mat')
    x = d.x
    y = d.y
    s = nx.complete_graph(d.str.n_vars)
    cv = LeaveOneOut(d.str.n_instances)
    print cv
    options = {}
 #   cgn = ml_cgn_inducer.wcGJAN_bn_learner(d.str, x, y,options)
 #   ml_model = ml_cgn_inducer.learn_parameters(d.str,x,y,cgn,options)
    bma_ind = bma_gct_inducer.bma_gct_inducer(options)
    
    #cliques,separators = bma_gct_inducer.wcGJAN_ct_learner(d.str, x, y,options)
    print "Learning main model"
    bma_model = bma_ind.learn(d.str,x,y)
    print "Main model induced"
    print bma_model
Exemplo n.º 42
0
  print 'train:', train_file
  print 'test:', test_file
  print 'batch:', cfg.get('cnn', 'batch')
  print 'epochs:', cfg.get('cnn', 'epochs')
  print 'embdims:', cfg.get('cnn', 'embdims')
  print 'filters:', cfg.get('cnn', 'filters')
  print 'filtlen:', cfg.get('cnn', 'filtlen')
  print 'hidden:', cfg.get('cnn', 'hidden')
  print 'dropout:', cfg.get('cnn', 'dropout')
  print 'learnrt:', cfg.get('cnn', 'learnrt')

  # learn alphabets from training examples
  dataset = dataset.DatasetProvider(train_file)
  # now load training examples and labels
  train_left, train_larg, train_middle, \
    train_rarg, train_right, train_y = dataset.load(train_file)
  left_maxlen = max([len(seq) for seq in train_left])
  larg_maxlen = max([len(seq) for seq in train_larg])
  middle_maxlen = max([len(seq) for seq in train_middle])
  rarg_maxlen = max([len(seq) for seq in train_rarg])
  right_maxlen = max([len(seq) for seq in train_right])

  # now load test examples and labels
  test_left, test_larg, test_middle, test_rarg, test_right, test_y = \
    dataset.load(test_file, left_maxlen=left_maxlen, larg_maxlen=larg_maxlen,
                 middle_maxlen=middle_maxlen, rarg_maxlen=rarg_maxlen,
                 right_maxlen=right_maxlen)
  
  # turn x and y into numpy array among other things
  classes = len(set(train_y))
  train_left = pad_sequences(train_left, maxlen=left_maxlen)
Exemplo n.º 43
0
def train(itMax=100, szBatch=256, lr=0.01, vaFreq=10,
          init_theta=dft_init_theta,
          mo_create=models.create_mlp):
    print 'loading data...'
    dataTr, dataVa, _ = dataset.load(name='mnist.pkl.gz')

    print 'building graph...'
    # fprop: the MLP model
    x = T.matrix('x', 'float32')
    theta = init_theta()
    F = mo_create(x, theta)
    # fprop: the loss
    y = T.ivector('y')
    ell = loss.create_logistic(F, y)
    # bprop
    dtheta = T.grad(ell, wrt=theta)
    # the graph for training
    ibat = T.lscalar('ibat')
    fg_tr = function(
        inputs=[ibat],
        outputs=ell,
        updates=zip(theta, optim.update_gd(theta, dtheta)),
        givens={
            x: dataset.get_batch(ibat, dataTr[0], szBatch=szBatch),
            y: dataset.get_batch(ibat, dataTr[1], szBatch=szBatch)
        }
    )
    # the graph for validation
    ell_zo = loss.create_zeroone(F, y)
    fg_va = function(
        inputs=[],
        outputs=ell_zo,
        givens={
            x: dataVa[0],
            y: dataVa[1]
        }
    )


    print 'Fire the graph...'
    trLoss, er_va = [], []
    N = dataTr[0].get_value(borrow=True).shape[0]
    numBatch = (N + szBatch) / szBatch
    print '#batch = %d' % (numBatch,)
    for i in xrange(itMax):
        ibat = i % numBatch
        tmpLoss = fg_tr(ibat)
        print 'training: iteration %d, ibat = %d, loss = %6.5f' % (i, ibat, tmpLoss)
        trLoss.append(tmpLoss)
        if i%vaFreq == 0:
            tmp_er = fg_va()
            print 'validation: iteration %d, error rate = %6.5f' % (i, tmp_er)
            er_va.append(tmp_er)

    # plot
    import matplotlib.pyplot as plt
    plt.subplot(1, 2, 1)
    plt.plot(range(1, len(trLoss)+1), trLoss, 'ro-')
    plt.subplot(1, 2, 2)
    plt.plot([i*vaFreq for i in range(len(er_va))], er_va, 'bx-')
    plt.show(block=True)
    # return the parameters
    return theta
Exemplo n.º 44
0
  print 'test:', cfg.get('data', 'test')
  print 'batch:', cfg.get('lstm', 'batch')
  print 'epochs:', cfg.get('lstm', 'epochs')
  print 'embdims:', cfg.get('lstm', 'embdims')
  print 'units:', cfg.get('lstm', 'units')
  print 'dropout:', cfg.get('lstm', 'dropout')
  print 'udropout:', cfg.get('lstm', 'udropout')
  print 'wdropout:', cfg.get('lstm', 'wdropout')
  print 'learnrt:', cfg.get('lstm', 'learnrt')
  
  # learn alphabet from training data
  dataset = \
    dataset.DatasetProvider([cfg.get('data', 'train'),
                             cfg.get('data', 'test')])
  # now load training examples and labels
  train_x, train_y = dataset.load(cfg.get('data', 'train'))
  # now load test examples and labels
  test_x, test_y = dataset.load(cfg.get('data', 'test'))

  # turn x and y into numpy array among other things
  maxlen = max([len(seq) for seq in train_x + test_x])
  train_x = pad_sequences(train_x, maxlen=maxlen)
  train_y = pad_sequences(train_y, maxlen=maxlen)
  test_x = pad_sequences(test_x, maxlen=maxlen)
  test_y = pad_sequences(test_y, maxlen=maxlen)

  train_y =  np.array([to_categorical(seq, 3) for seq in train_y])
  test_y =  np.array([to_categorical(seq, 3) for seq in test_y])
  
  print 'train_x shape:', train_x.shape
  print 'train_y shape:', train_y.shape
Exemplo n.º 45
0
  test_file = os.path.join(base, cfg.get('data', 'test'))
  print 'train:', train_file
  print 'test:', test_file
  print 'batch:', cfg.get('cnn', 'batch')
  print 'epochs:', cfg.get('cnn', 'epochs')
  print 'embdims:', cfg.get('cnn', 'embdims')
  print 'filters:', cfg.get('cnn', 'filters')
  print 'filtlen:', cfg.get('cnn', 'filtlen')
  print 'hidden:', cfg.get('cnn', 'hidden')
  print 'dropout:', cfg.get('cnn', 'dropout')
  print 'learnrt:', cfg.get('cnn', 'learnrt')

  # learn alphabet from training examples
  dataset = dataset.DatasetProvider(train_file)
  # now load training examples and labels
  train_x, train_y = dataset.load(train_file)
  maxlen = max([len(seq) for seq in train_x])
  # now load test examples and labels
  test_x, test_y = dataset.load(test_file, maxlen=maxlen)

  init_vectors = None
  # TODO: what what are we doing for index 0 (oov words)?
  # use pre-trained word embeddings?
  if cfg.has_option('data', 'embed'):
    print 'embeddings:', cfg.get('data', 'embed')
    embed_file = os.path.join(base, cfg.get('data', 'embed'))
    word2vec = word2vec.Model(embed_file)
    init_vectors = [word2vec.select_vectors(dataset.word2int)]

  # turn x and y into numpy array among other things
  classes = len(set(train_y))
Exemplo n.º 46
0
def train(dataset, gpu, num_layer=4, epoch=40, batch=64):
    nb_epochs = epoch
    batch_size = batch
    patience = 20
    lr = 0.001
    l2_coef = 0.0
    hid_units = 512

    adj, diff, feat, labels, num_nodes = load(dataset)

    feat = torch.FloatTensor(feat).cuda()
    diff = torch.FloatTensor(diff).cuda()
    adj = torch.FloatTensor(adj).cuda()
    labels = torch.LongTensor(labels).cuda()

    ft_size = feat[0].shape[1]
    max_nodes = feat[0].shape[0]

    model = Model(ft_size, hid_units, num_layer)
    optimiser = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=l2_coef)

    model.cuda()

    cnt_wait = 0
    best = 1e9

    itr = (adj.shape[0] // batch_size) + 1
    for epoch in range(nb_epochs):
        epoch_loss = 0.0
        train_idx = np.arange(adj.shape[0])
        np.random.shuffle(train_idx)

        for idx in range(0, len(train_idx), batch_size):
            model.train()
            optimiser.zero_grad()

            batch = train_idx[idx:idx + batch_size]
            mask = num_nodes[idx:idx + batch_size]

            lv1, gv1, lv2, gv2 = model(adj[batch], diff[batch], feat[batch],
                                       mask)

            lv1 = lv1.view(batch.shape[0] * max_nodes, -1)
            lv2 = lv2.view(batch.shape[0] * max_nodes, -1)

            batch = torch.LongTensor(
                np.repeat(np.arange(batch.shape[0]), max_nodes)).cuda()

            loss1 = local_global_loss_(lv1, gv2, batch, 'JSD', mask)
            loss2 = local_global_loss_(lv2, gv1, batch, 'JSD', mask)
            # loss3 = global_global_loss_(gv1, gv2, 'JSD')
            loss = loss1 + loss2  #+ loss3
            epoch_loss += loss
            loss.backward()
            optimiser.step()

        epoch_loss /= itr

        # print('Epoch: {0}, Loss: {1:0.4f}'.format(epoch, epoch_loss))

        if epoch_loss < best:
            best = epoch_loss
            best_t = epoch
            cnt_wait = 0
            torch.save(model.state_dict(), f'{dataset}-{gpu}.pkl')
        else:
            cnt_wait += 1

        if cnt_wait == patience:
            break

    model.load_state_dict(torch.load(f'{dataset}-{gpu}.pkl'))

    features = feat.cuda()
    adj = adj.cuda()
    diff = diff.cuda()
    labels = labels.cuda()

    embeds = model.embed(features, adj, diff, num_nodes)

    x = embeds.cpu().numpy()
    y = labels.cpu().numpy()

    from sklearn.svm import LinearSVC
    from sklearn.metrics import accuracy_score
    params = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}
    kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None)
    accuracies = []
    for train_index, test_index in kf.split(x, y):

        x_train, x_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]
        classifier = GridSearchCV(LinearSVC(),
                                  params,
                                  cv=5,
                                  scoring='accuracy',
                                  verbose=0)
        classifier.fit(x_train, y_train)
        accuracies.append(accuracy_score(y_test, classifier.predict(x_test)))
    print(np.mean(accuracies), np.std(accuracies))
Exemplo n.º 47
0
Arquivo: pdf.py Projeto: mastbaum/lxid
    # normalize within r bin... generalize me
    h = np.apply_along_axis(lambda x: x/np.sum(x), 1, h)

    # set zero-content bins to 0.1 * minimum nonzero bin
    min_val = np.min(h[h > 0])
    h[(h == 0)] = min_val / 10

    return h, e


if __name__ == '__main__':
    import dataset
    import glob

    try:
        d = dataset.load('tl208')
    except IOError:
        d = dataset.Dataset('tl208', filenames=glob.glob('/home/mastbaum/snoplus/tl208/data/pdf/tl208/run0/av_tl208-0.root'))

    d.append(glob.glob('/home/mastbaum/snoplus/tl208/data/pdf/tl208/run1/av_tl208-*.root'))

    cut = dataset.Cut(e=(2.555,2.718))
    d.apply_cuts([cut])

    events = d.cut[cut.as_tuple()]['events']

    h, e = make_pdf(events, ['r', 'pmt_t_res'], (10, 500,))

    import matplotlib.pyplot as plt
    import matplotlib.cm as cm
    from matplotlib.colors import LogNorm
Exemplo n.º 48
0
def main():
    with tf.Session() as session:
        dataset = dataset_mod.load()
        train(session, dataset)
Exemplo n.º 49
0
  # settings file specified as command-line argument
  cfg = ConfigParser.ConfigParser()
  cfg.read(sys.argv[1])
  print_config(cfg)
  base = os.environ['DATA_ROOT']
  train_file = os.path.join(base, cfg.get('data', 'train'))
  test_file = os.path.join(base, cfg.get('data', 'test'))

  # learn alphabet from training examples
  dataset = dataset.DatasetProvider(train_file)
  print 'input alphabet size:', len(dataset.input2int)
  print 'output alphabet size:', len(dataset.output2int)

  # now load training examples and labels
  train_x, train_y = dataset.load(train_file)
  maxlen_x = max([len(seq) for seq in train_x])
  maxlen_y = max([len(seq) for seq in train_y])

  # turn x and y into numpy array among other things
  train_x = pad_sequences(train_x, maxlen=maxlen_x)
  train_y = pad_sequences(train_y, maxlen=maxlen_y)
  print train_y.shape
  print train_y

  # convert train_y into (num_examples, maxlen, alphabet_size)
  # train_y = to_categorical(np.array(train_y), classes)

  model = Sequential()
  model.add(Embedding(input_dim=len(dataset.input2int),
                      output_dim=cfg.getint('cnn', 'embdims'),