示例#1
0
def go(dset=None, path=KINECT_PATH, threed=False, skip=1):
    if dset is None:
        dataset.load_random_dataset(path)
    else:
        dataset.load_dataset(dset)
    for rgbs, depths in dataset.iter(skip=skip):
        once(rgbs, depths, threed=threed)
示例#2
0
def load_test_dataset():
    """ 
    load and returns testing dataset and labels 
    returns :  test_x, test_y (all numpy arrays)
    """
    test_x = load_dataset(TEST_X)
    test_y = load_dataset(TEST_Y)
    return test_x, test_y
示例#3
0
def go(dset=None):
    if dset is None:
        dataset.load_random_dataset()
    else:
        dataset.load_dataset(dset)

    while True:
        dataset.advance()
        once()
示例#4
0
def load_all_datasets():
    """ 
    load and returns training, testing dataset and labels 
    returns : train_x, train_y, test_x, test_y (all numpy arrays)
    """
    train_x = load_dataset(TRAIN_X)
    train_y = load_dataset(TRAIN_Y)
    test_x = load_dataset(TEST_X)
    test_y = load_dataset(TEST_Y)
    return train_x, train_y, test_x, test_y
示例#5
0
def main(product):
    TRAIN_FILE = "../data/ABSA-15_{}_Train_Data.xml".format(product)
    TEST_FILE = "../data/ABSA15_{}_Test.xml".format(product)

    # load data set
    training_reviews = load_dataset(TRAIN_FILE)
    testing_reviews = load_dataset(TEST_FILE)

    # build vocab
    vocab = build_vocab(training_reviews, TOPN=1000)
    vocab_index = list2dict(vocab)

    cate_index = get_all_categories(training_reviews)
    cates = dict2list(cate_index)
    n_cates = len(cates)

    train_X = get_X(training_reviews, vocab_index)
    test_X = get_X(testing_reviews, vocab_index)

    train_labels = get_labels(training_reviews, cate_index)
    test_labels = get_labels(testing_reviews, cate_index)

    # transtform to mono-label problem
    M = len(train_X)
    X = []
    Y = []
    for i in range(M):
        if not train_labels[i]:
            Y.append(n_cates)  # category index from 0 to n_cates-1, n_cates is for None-label
            X.append(train_X[i])
        else:
            for y in train_labels[i]:
                Y.append(y)
                X.append(list(train_X[i]))

    clf_model = MultinomialNB()
    clf_model.fit(X, np.array(Y))

    # predict
    output = predict(test_X, clf_model, threshold=0.2)

    # evaluation
    p, r, f = microF1(output, test_labels)

    # output
    out_dir = "../data/bow_nb/"
    out_file = out_dir + "laptop.txt"
    with open(out_file, 'w') as out:
        out.write("Precision:\t{}\nRecall:\t{}\nF1:\t{}\n".format(p, r, f))
        print("{}\n{}\n{}".format(p, r, f))
示例#6
0
def main():
    #load data set
    training_reviews = load_dataset(TRAIN_FILE)
    testing_reviews = load_dataset(TEST_FILE)

    #load doc2vec model
    doc2vec_model = Doc2Vec.load(DOC2VEC_MODEL)

    cate_index = get_all_categories(training_reviews)
    cates = dict2list(cate_index)
    n_cates = len(cates)

    train_X = get_X(training_reviews, doc2vec_model)
    test_X = get_X(testing_reviews, doc2vec_model)

    train_labels = get_labels(training_reviews, cate_index)
    test_labels = get_labels(testing_reviews, cate_index)

    labelwise_acc = []
    labelwise_output = []

    for cate in range(n_cates):
        # train a bonary model
        train_Y = get_Y(train_labels, cate)
        prob = svm_problem(train_Y, train_X)
        param = svm_parameter("-s 0 -t 2 -b 1")
        m = svm_train(prob, param)

        # test
        test_Y = get_Y(test_labels, cate)
        p_label, p_acc, p_val = svm_predict(test_Y, test_X, m, '-b 1')

        labelwise_acc.append(p_acc)
        labelwise_output.append(p_label)

    # evaluation
    p, r, f = microF1(labelwise_output, test_labels)

    # output
    out_dir = "../data/use_doc2vec/"
    out_file = out_dir + "laptop.txt"
    labelwise_acc = [(cates[i], labelwise_acc[i][0]) for i in range(n_cates)]
    labelwise_acc = sorted(labelwise_acc, key=lambda x:x[1])
    with open(out_file, 'w') as out:
        out.write("Precision:\t{}\nRecall:\t{}\nF1:\t{}\n".format(p, r, f))
        print("{}\n{}\n{}".format(p, r, f))
        for cate_i in range(n_cates):
            out.write("{}:\t{}\n".format(labelwise_acc[cate_i][0], labelwise_acc[cate_i][1]))
示例#7
0
def train_regression_model():
    num_batches = 1848
    total_iterations = 90000
    batch_size = 64

    num_epochs = ceil(total_iterations / num_batches)
    initial_epoch = 0
    regression_model_file = 'model_%02d.hdf5' % initial_epoch

    regression_model = None
    if os.path.exists(regression_model_file):
        print('Loading from saved file.')
        regression_model = model.get_regression_model(regression_model_file)
    else:
        print('Start training from scratch.')
        regression_model = model.create_regression_model()
    regression_model.summary()

    progbar = ProgbarLogger('steps')
    checkpoint = ModelCheckpoint('model_{epoch:02d}.hdf5',
                                 verbose=1,
                                 monitor='loss')
    terminate = TerminateOnNaN()
    callbacks = [checkpoint, progbar, terminate]

    regression_model.fit_generator(
        generator=dataset.load_dataset(training_dir),
        steps_per_epoch=num_batches,
        epochs=num_epochs,
        callbacks=callbacks,
        initial_epoch=initial_epoch,
        verbose=1)
def prepare_data(dataset, pca_n):
    global n_classes, X, y, pp, X_tr, X_inv
    n_classes = len(dataset)
    X, y = load_dataset(dataset)
    pp = Preprocess(pca_n)
    X_tr = pp.fit_transform(X)
    X_inv = pp.inverse_transform(X_tr)
示例#9
0
文件: SVM_utils.py 项目: 1tux/Bat-Lab
def get_df_from_file_path(file_path, net="NET1"):
    REAL_BEHAVIORAL_DATA = config.Config.get("REAL_BEHAVIORAL_DATA")
    CACHED_BEHAVIORAL_DATA = config.Config.get("CACHED_BEHAVIORAL_DATA")
    if REAL_BEHAVIORAL_DATA:
        cache_file_path1 = file_path.replace(".mat", "net1.csv")
        cache_file_path2 = file_path.replace(".mat", "net3.csv")
        cache_file_path = {
            "NET1": cache_file_path1,
            "NET3": cache_file_path2
        }[net]
        if CACHED_BEHAVIORAL_DATA and os.path.isfile(cache_file_path):
            print("loading cached file....")
            df = pd.read_csv(cache_file_path)
        else:
            print("parsing real data file")
            df, df2 = behavior_parse.parse_matlab_file(file_path)
            print("storing to cache...")
            df.to_csv(cache_file_path1)
            df2.to_csv(cache_file_path2)
            df = df if cache_file_path == cache_file_path1 else df2
    else:
        df = dataset.load_dataset()

    if "Unnamed: 0" in df.columns:
        df.drop(columns=["Unnamed: 0"], inplace=True)
    return df
示例#10
0
def test(dataset, batch_size, filters, context):
    datasets = {
        "facades": True,
        "cityscapes": False,
        "maps": False,
        "edges2shoes": False,
        "edges2handbags": False
    }
    mx.random.seed(int(time.time()))

    print("Loading dataset...", flush=True)
    validating_set = load_dataset(dataset,
                                  "val",
                                  batch_size,
                                  is_reversed=datasets[dataset])

    net_g = UnetGenerator(3, filters)
    net_g.load_parameters("model/{}.generator.params".format(dataset),
                          ctx=context)

    print("Testing...", flush=True)
    for batch in validating_set:
        real_in = batch.data[0].as_in_context(context)
        real_out = batch.data[1].as_in_context(context)
        fake_out = net_g(real_in)

        for i in range(batch_size):
            plt.subplot(3, batch_size, i + 1)
            visualize(real_in[i])
            plt.subplot(3, batch_size, i + batch_size + 1)
            visualize(real_out[i])
            plt.subplot(3, batch_size, i + batch_size * 2 + 1)
            visualize(fake_out[i])
        plt.show()
示例#11
0
文件: train.py 项目: tcliu0/dncnn
def main(_):
    if not os.path.exists(FLAGS.train_dir):
        os.makedirs(FLAGS.train_dir)

    if not os.path.exists(FLAGS.log_dir):
        os.makedirs(FLAGS.log_dir)
    file_handler = logging.FileHandler(os.path.join(FLAGS.log_dir, "log.txt"))
    logging.getLogger().addHandler(file_handler)
    
    dataset = load_dataset(FLAGS.small_train_set)

    denoise = get_model(FLAGS.model)(FLAGS)

    train_dir = os.path.join(FLAGS.train_dir, denoise.model_name)
    if not os.path.exists(train_dir):
        os.makedirs(train_dir)

    print(vars(FLAGS))
    with open(os.path.join(FLAGS.log_dir, "flags.json"), 'w') as fout:
        json.dump(FLAGS.__flags, fout)

    with tf.Session() as sess:
        _, epoch = initialize_model(sess, denoise, train_dir)

        denoise.train(sess, dataset, epoch)
示例#12
0
def main(train_set_filename, test_set_filename, config_filename):
    train_set = load_dataset(train_set_filename)
    test_set = load_dataset(test_set_filename)
    config = load_config(config_filename)
    if config.model == Model.ID3:
        tree = decisiontree.id3(train_set, config.max_depth)
        print(decisiontree.show_decision_tree(tree))
        predictor = assessment.make_predictor(tree, decisiontree.predict)
        print_whole_assessment(predictor, test_set)
    elif config.model == Model.RF:
        forest = randomforest.train(train_set, config.num_trees,
                                    config.max_depth, config.example_ratio,
                                    config.feature_ratio)
        randomforest.print_forest(forest)
        predictor = assessment.make_predictor(forest, randomforest.predict)
        print_whole_assessment(predictor, test_set)
def generate(network_pkl, out_dir):
    if os.path.exists(out_dir):
        raise ValueError('{} already exists'.format(out_dir))
    misc.init_output_logging()
    np.random.seed(config.random_seed)
    tfutil.init_tf(config.tf_config)
    with tf.device('/gpu:0'):
        G, D, Gs = misc.load_pkl(network_pkl)
    training_set = dataset.load_dataset(data_dir=config.data_dir,
                                        verbose=True,
                                        **config.dataset)
    # grid_size, grid_reals, grid_labels, grid_latents = train.setup_snapshot_image_grid(G, training_set, **config.grid)
    number_of_images = 1000
    grid_labels = np.zeros([number_of_images, training_set.label_size],
                           dtype=training_set.label_dtype)
    grid_latents = misc.random_latents(number_of_images, G)
    total_kimg = config.train.total_kimg
    sched = train.TrainingSchedule(total_kimg * 1000, training_set,
                                   **config.sched)
    grid_fakes = Gs.run(grid_latents,
                        grid_labels,
                        minibatch_size=sched.minibatch // config.num_gpus)
    os.makedirs(out_dir)
    # print(np.min(grid_fakes), np.mean(grid_fakes), np.max(grid_fakes))
    # misc.save_image_grid(grid_fakes, 'fakes.png', drange=[-1,1], grid_size=grid_size)
    for i, img in enumerate(grid_fakes):
        img = img.transpose((1, 2, 0))
        img = np.clip(img, -1, 1)
        img = (1 + img) / 2
        img = skimage.img_as_ubyte(img)
        imageio.imwrite(os.path.join(out_dir, '{}.png'.format(i)),
                        img[..., :3])
        if img.shape[-1] > 3:
            np.save(os.path.join(out_dir, '{}.npy'.format(i)), img)
示例#14
0
def main(verbose):
    dataset = load_dataset(
        glob('../data/trump_tweet_data_archive/condensed_*.json.zip'), verbose)
    corpus, sequences, next_chars, c2i, i2c, nc = seq_data(
        dataset, SEQ_LEN, SEQ_STEP, verbose)

    if verbose:
        print(f'corpus length: {len(corpus)}')
        print(f'num characters: {nc}')
        print(f'number of sequences: {len(sequences)}')

    # The data is shuffled so the validation data isn't simply the latest 20% of tweets
    X, y = vec_data(sequences, next_chars, SEQ_LEN, nc, c2i, verbose)
    # Split off the last 20% as validation data for pretty graphs
    n = len(X)
    num_val = int(PERCENT_VALIDATION * n)
    X_val = X[n - num_val:]
    y_val = y[n - num_val:]

    X_train = X[:n - num_val]
    y_train = y[:n - num_val]

    if verbose:
        print(f'Number validation samples: {num_val}')

    model = build_model(SEQ_LEN, nc, verbose)
    history = train_model(model, X_train, y_train, X_val, y_val, verbose)
    plot_model_loss(BASENAME, history, verbose)
    # Save the trained model so we don't have to wait 25 hours to generate another 10 tweet sample
    save_model(model, BASENAME, verbose)
    # Generate sample tweets using 10 random seeds from the corpus.
    generate(BASENAME, model, corpus, c2i, i2c, nc, 10, verbose)
示例#15
0
def main():
    train_data,test_data,user_bundle_data,user_item_data,bundle_item_data,\
    item_num,user_num,bundle_num,user_bundle_mat = dataset.load_dataset()
    train_dataset = data_prep.CreateData(train_data, bundle_num,
                                         user_bundle_mat, args.train_neg_num,
                                         True)
    test_dataset = data_prep.CreateData(test_data, bundle_num, user_bundle_mat,
                                        0, False)

    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=4)
    test_loader = data.DataLoader(test_dataset,
                                  batch_size=99 + 1,
                                  shuffle=False,
                                  num_workers=0)

    ub_graph, ui_graph, bi_graph = get_graph(train_data, user_item_data,
                                             bundle_item_data, item_num,
                                             user_num, bundle_num)
    graph = [ub_graph, ui_graph, bi_graph]

    #print(args)

    model = IHBR(args, item_num, user_num, bundle_num, graph,
                 device).to(device)
    op = optim.Adam(model.parameters(), lr=3e-4, weight_decay=1e-7)
    loss_func = nn.BCEWithLogitsLoss()
    loss = train(model, args.epochs, train_loader, op, device, loss_func,
                 test_loader)
示例#16
0
def questao21():
    dset = load_dataset('dataset1.csv')
    xo = dset.T[1].astype(float)  # segunda coluna
    x = dset.T[1].astype(float)  # segunda coluna
    yo = dset.T[2].astype(float)  # terceira coluna
    y = dset.T[2].astype(float)  # terceira coluna

    # a normalização com z-score ajudou na visualização e é necessária para agrupamento
    #x = [z_score(x, xi) for xi in x]
    #y = [z_score(y, yi) for yi in y]
    #centros_iniciais = [(z_score(xo, 1), z_score(yo, 2)), (z_score(xo, 4), z_score(yo, 2))]

    centros_iniciais = [(1, 2), (4, 2)]
    pontos = zip(x, y)

    clusters, iteracoes = kmeans(pontos, 2, centros_iniciais=centros_iniciais)

    cluster1 = clusters[0].pontos
    cluster2 = clusters[1].pontos
    plt.plot([xi[0] for xi in cluster1], [yi[1] for yi in cluster1], 'ro')
    plt.plot([clusters[0].centroide[0]], [clusters[0].centroide[1]], 'r*')
    plt.plot([xi[0] for xi in cluster2], [yi[1] for yi in cluster2], 'go')
    plt.plot([clusters[1].centroide[0]], [clusters[1].centroide[1]], 'g*')
    plt.savefig('grupo1.png')
    print "Novos centróides:", clusters[0].centroide, " e ", clusters[
        1].centroide
def plot_goal_reached_distribution(runs_dir, img_dir, filename):
    """

    :param runs_dir:
    :param img_dir:
    :param filename:
    """
    dataset_states = load_dataset(runs_dir)

    time_steps = np.arange(dataset_states.step.max() + 1)

    states_subset = dataset_states[["step", "goal_reached"]]
    last_steps = states_subset.groupby("run").map(
        lambda x: x.isel(sample=[-1]))

    false_label, false_samples = False, last_steps.where(
        last_steps.goal_reached == False, drop=True)
    true_label, true_samples = True, last_steps.where(
        last_steps.goal_reached == True, drop=True)

    plt.figure(figsize=(7.8, 4.8), constrained_layout=True)
    plt.hist([true_samples.step, false_samples.step],
             bins=time_steps,
             label=[true_label, false_label],
             stacked=True,
             alpha=0.9)
    plt.ylim(0, plt.ylim()[1] + 1)
    plt.legend()

    plt.xlim(0, dataset_states.step.max() + 1)
    plt.xlabel('timestep', fontsize=11)
    plt.ylabel('runs', fontsize=11)

    save_visualisation(filename, img_dir)
def plot_sensors(goal_object, runs_dir, video_dir, filename, run_id=0):
    """

    :param goal_object:
    :param runs_dir:
    :param video_dir:
    :param filename:
    :param run_id
    """
    dataset_states = load_dataset(runs_dir)
    run_states = dataset_states.where(dataset_states.run == run_id, drop=True)

    marxbot = viz.DatasetSource(run_states)

    # Create the visualizations
    env = viz.FuncAnimationEnv([
        viz.GridLayout((1, 3), [
            viz.TrajectoryViz(marxbot, goal_object=goal_object),
            viz.LaserScannerViz(marxbot),
            viz.ControlSignalsViz(marxbot)
        ],
                       suptitle='Run %d' % run_id)
    ],
                               sources=[marxbot])
    env.show(figsize=(14, 4))

    video_path = os.path.join(video_dir, '%s-%d.mp4' % (filename, run_id))
    env.save(video_path, dpi=300)
示例#19
0
def load_processed_dataset(diags):
    from keras.utils import np_utils
    xy = load_dataset()
    X = xy["x"]
    annotation = load_annotation()
    X = np.concatenate((X, annotation), axis=2)
    Y = xy["y"]

    Y_new = np.zeros(Y.shape[0])
    for i in range(Y.shape[0]):
        for j in diags:
            if Y[i, j] == 1:
                Y_new[i] = 1
    Y = np_utils.to_categorical(Y_new, 2)

    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.25,
                                                        random_state=42)

    X_train, X_val, Y_train, Y_val = train_test_split(X_train,
                                                      Y_train,
                                                      test_size=0.25,
                                                      random_state=42)

    return X_train, X_val, X_test, Y_train, Y_val, Y_test
示例#20
0
def run_sswe_u(window_size,
               training_file,
               vocab_size,
               embedding_size,
               alpha=0.5,
               num_negative_samples=15):
    model = model_sswe_u(window_size, vocab_size, embedding_size)
    sswe_u_loss = custom_loss(alpha=alpha)
    model.compile(optimizer=Adagrad(lr=0.01),
                  loss=sswe_u_loss,
                  metrics=['accuracy'])

    print(model.summary())

    inputs, labels = load_dataset(window_size,
                                  training_file,
                                  vocab_size,
                                  num_negative_samples=num_negative_samples)
    print(labels.shape)

    model.fit([inputs[:, 0, :], inputs[:, 1, :]],
              labels,
              epochs=2,
              batch_size=10000,
              shuffle=True)
    weights = model.get_layer('embedding').get_weights()[0]
    np.save('word_embedding.npy', weights)
    return weights
示例#21
0
def questao21():
    dset = load_dataset('dataset1.csv')
    xo = dset.T[1].astype(float) # segunda coluna
    x = dset.T[1].astype(float) # segunda coluna
    yo = dset.T[2].astype(float) # terceira coluna
    y = dset.T[2].astype(float) # terceira coluna

    # a normalização com z-score ajudou na visualização e é necessária para agrupamento
    #x = [z_score(x, xi) for xi in x]
    #y = [z_score(y, yi) for yi in y]
    #centros_iniciais = [(z_score(xo, 1), z_score(yo, 2)), (z_score(xo, 4), z_score(yo, 2))]

    centros_iniciais = [(1,2), (4,2)]
    pontos = zip(x, y)

    clusters, iteracoes = kmeans(pontos, 2, centros_iniciais=centros_iniciais)

    cluster1 = clusters[0].pontos
    cluster2 = clusters[1].pontos
    plt.plot([xi[0] for xi in cluster1], [yi[1] for yi in cluster1], 'ro')
    plt.plot([clusters[0].centroide[0]], [clusters[0].centroide[1]], 'r*')
    plt.plot([xi[0] for xi in cluster2], [yi[1] for yi in cluster2], 'go')
    plt.plot([clusters[1].centroide[0]], [clusters[1].centroide[1]], 'g*')
    plt.savefig('grupo1.png')
    print "Novos centróides:", clusters[0].centroide, " e ", clusters[1].centroide
示例#22
0
def save_train_dataset_as_nifti(results_dir=os.path.join(
    paths.results_folder, "final"),
                                out_dir=os.path.join(paths.results_folder,
                                                     "training_set_results")):
    if not os.path.isdir(out_dir):
        os.mkdir(out_dir)
    a = load_dataset()
    for fold in range(5):
        working_dir = os.path.join(results_dir, "fold%d" % fold, "validation")
        ids_in_fold = os.listdir(working_dir)
        ids_in_fold.sort()
        ids_in_fold = [
            i for i in ids_in_fold
            if os.path.isdir(os.path.join(working_dir, i))
        ]
        ids_in_fold_as_int = [int(i) for i in ids_in_fold]
        for pat_id in ids_in_fold_as_int:
            pat_in_dataset = a[pat_id]
            seg_pred = np.load(
                os.path.join(working_dir, "%03.0d" % pat_id,
                             "segs.npz"))['seg_pred']
            b = convert_to_original_coord_system(seg_pred, pat_in_dataset)
            sitk_img = sitk.GetImageFromArray(b)
            sitk_img.SetSpacing(pat_in_dataset['spacing'])
            sitk_img.SetDirection(pat_in_dataset['direction'])
            sitk_img.SetOrigin(pat_in_dataset['origin'])
            sitk.WriteImage(
                sitk_img,
                os.path.join(out_dir, pat_in_dataset['name'] + ".nii.gz"))
示例#23
0
def save_test_set_as_nifti(results_dir=os.path.join(paths.results_folder,
                                                    "final"),
                           out_dir=os.path.join(paths.results_folder,
                                                "test_set_results")):
    if not os.path.isdir(out_dir):
        os.mkdir(out_dir)
    a = load_dataset(folder=paths.preprocessed_testing_data_folder)
    for pat in a.keys():
        probs = []
        for fold in range(5):
            working_dir = os.path.join(results_dir, "fold%d" % fold,
                                       "pred_test_set")
            res = np.load(os.path.join(working_dir, "%03.0d" % pat,
                                       "segs.npz"))
            probs.append(res['softmax_ouput'][None])
        prediction = np.vstack(probs).mean(0).argmax(0)
        prediction_new = convert_to_brats_seg(prediction)
        np.savez_compressed(os.path.join(out_dir, "%03.0d.npz" % pat),
                            seg=prediction)
        b = convert_to_original_coord_system(prediction_new, a[pat])
        sitk_img = sitk.GetImageFromArray(b)
        sitk_img.SetSpacing(a[pat]['spacing'])
        sitk_img.SetDirection(a[pat]['direction'])
        sitk_img.SetOrigin(a[pat]['origin'])
        sitk.WriteImage(sitk_img,
                        os.path.join(out_dir, a[pat]['name'] + ".nii.gz"))
def import_ds(ds, type=None, parent_id=None):
    if type == 'parent':
        ds['is_parent'] = 'true'
    else:
        ds.pop('is_parent', None)

    if type == 'child':
        ds['parent_dataset'] = parent_id
    else:
        ds.pop('parent_dataset', None)

    dataset_dummy = create_dummy_dataset()
    dataset_dummy['title'] = ds['title']
    dataset_dummy['owner_org'] = owner_org

    # first run to get name created.
    ds_created = dataset_dummy.create(create_url, api_key)

    # then update the dataset with all info
    dataset_full = load_dataset(ds_created)
    map_dataset(dataset_full, ds)
    dataset_full._update(update_url, api_key)

    # add resource
    resources = ds.get('distribution', [])
    for res in resources:
        resource = Resource()
        map_resource(resource, res, dataset_full['id'])
        # skip and report empty resource
        if resource['url']:
            res_created = resource.create(resource_url, api_key)
        else:
            logging.info('   Empty resource skipped for: %s' % ds['title'])

    return dataset_full
def debug(folders, n_components, r = None, max_dimension = 1):
    X,y = load_dataset(folders)
    p = Preprocess(n_components)
    X = p.fit_transform(X)
    
    if r is None:
        distances = PairwiseDistances(X.tolist())
        distances = ExplicitDistances(distances)
        n_samples = len(X)
        r_candidates = sorted(set(np.array(distances.distances).flatten()))
        for r2 in r_candidates:
            print r2
            cx = vietoris_rips(X.tolist(), max_dimension, r2)
            cords = mds_plot(X, y)
            lines_plot(cx, cords)
            plt.show()
    else:
        cx = vietoris_rips(X.tolist(), max_dimension, r)
        actual_max_dimension = len(max(cx, key=len)) - 1
        for d in range(actual_max_dimension, 2, -1):
            sx_d = filter_simplices(cx, d)
            print "dimension", d, ":", len(sx_d), "simplices"
            for i, sx in enumerate(sx_d):
                print i, "..."
                cords = mds_plot(X, y)
                edges = list(combinations(sx, 2))
                lines_plot(edges, cords, color=np.random.rand(3,))
                plt.show()
示例#26
0
def main(argv, input_break, target_break, apply_target, S, max_epoch, lr, B=1):
    if (len(argv) != 3):
        print('Error en la entrada')

    [_, model, data] = argv

    # lectura y pre procesamiento de datos
    (input, target) = load_dataset(data, input_break, target_break,
                                   apply_target)

    if (not path.exists(model + '.p')):
        # entrenamiento
        errors, W = train(input, target, S, max_epoch, lr, B)

        plot_error(errors)
        pickle.dump(errors, open(model + '_errors.p', 'wb'))
        pickle.dump(W, open(model + '.p', 'wb'))
    else:
        # testing

        # carga de modelo entrenado
        W = pickle.load(open(model + '.p', 'rb'))

        # testeo
        r, Y, Z = test(input, target, S, W)

        if Z.shape[1] == 1:
            print('precisión: {}  (aciertos/total)'.format(r))
        else:
            print('error cuadratico medio: {}'.format(r))
示例#27
0
def convert_quant(model):
    """
    Convert Keras model to quantized tflite

    reference:
    https://www.tensorflow.org/lite/performance/post_training_quantization
    """
    (x_train, _), (_, _), (_, _) = dataset.load_dataset(config.DATASET_PATH)
    x_train = x_train.astype('float32')

    # Calibration for 1 epoch
    def representative_dataset_gen():
        for i in range(len(x_train)):
            # Get sample input data as a numpy array in a method of your choosing.
            # Format NHW
            yield [x_train[i][tf.newaxis, ..., tf.newaxis]]

    model_path = "trained_models/" + config.NETWORK + ".h5"
    converter = tf.compat.v1.lite.TFLiteConverter.from_keras_model_file(
        model_path)
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.int8
    converter.inference_output_type = tf.int8
    converter.representative_dataset = representative_dataset_gen
    tflite_quant_model = converter.convert()
    open("trained_models/" + config.NETWORK + "_quant.tflite",
         "wb").write(tflite_quant_model)
def test(model):
    train_iter, test_iter = load_dataset('spam', batchsize)
    print('Starting testing')
    truth_res = []
    pred_res = []
    avg_loss = 0.0
    avg_acc = 0.0
    i = 0
    for batch in test_iter:
        sentence = batch.sentence
        label = batch.label
        print("Batch :%d" % (i))

        i = i + 1
        truth_res += list(label.data)

        model.zero_grad()
        model.batch_size = np.shape(sentence)[1]
        model.hidden = model.init_hidden()
        tag_scores = model(sentence)
        pred_label = tag_scores.cpu().data.max(1)[1].numpy()

        pred_res += [x for x in pred_label]
        acc = get_accuracy(truth_res, pred_res)
        avg_acc += acc
    t = avg_acc / i
    print('Test Classification accuracy')
    print(t)
示例#29
0
def generate_features_main():
    print "\nLoading dataset"
    X_train, X_test, y_train, y_test = dataset.load_dataset()

    print "Creating features of train set ({} images)".format(len(X_train))
    features_train = np.array([get_features(x) for x in tqdm(X_train)])

    print "Creating features of test set  ({} images)".format(len(X_test))
    features_test = np.array([get_features(x) for x in tqdm(X_test)])

    print "Flattening individual annotated images"
    y_train = flatten_images(np.array(y_train))
    y_test = flatten_images(np.array(y_test))

    print "Flattening all pixels"
    features_train = np.concatenate(features_train)
    features_test = np.concatenate(features_test)

    y_train = y_train.flatten()
    y_test = y_test.flatten()

    print "Features train {0}, Features test {1}".format(features_train.shape, features_test.shape)
    print "Labels train {0}, Labels test {1}".format(y_train.shape, y_test.shape)

    print "Writing X to file"
    write_features((features_train,features_test))

    print "Writing Y to file"
    write_y((y_train, y_test))
    print "Done."
示例#30
0
    def go_(self, **kw):
        d = load_dataset(kw.get('dims', 1),
                         kw.get('n', 10),
                         spline=kw.get('spline', 5))
        crit = kw.get('criterion', torch.nn.MSELoss)
        optim = kw.get('optimizer', torch.optim.Adam)
        optim_args = kw.get(
            'optim_args', {
                "lr": kw.get('optim_lr', 1e-3),
                "weight_decay": kw.get('optim_weight_decay', 1e-5),
            })
        model_args = kw.get('model_args', (kw.get(
            'dims', 1), kw.get('model_Nodes', 10), kw.get('model_dimOut', 1)))
        epochs = kw.get('epochs', 1)

        def mb():
            m = model(*model_args, crit(), optim, optim_args, epochs)
            return m

        # run algo
        algo = algorithm(d, mb)
        print(algo.data.path)
        res = algo.run()

        # save algo
        s = saver()
        return s.save(kw.get('name', self.name + '/' + str(kw.get('pset_i', 0))), \
                      algo, model_args, crit, optim, optim_args, epochs)
示例#31
0
def run(fold=0):
    print fold
    I_AM_FOLD = fold
    all_data = load_dataset(folder=paths.preprocessed_validation_data_folder)

    use_patients = all_data
    experiment_name = "final"
    results_folder = os.path.join(paths.results_folder, experiment_name,
                                  "fold%d" % I_AM_FOLD)
    write_images = False
    save_npy = True

    INPUT_PATCH_SIZE = (None, None, None)
    BATCH_SIZE = 2
    n_repeats = 2
    num_classes = 4

    x_sym = T.tensor5()

    net, seg_layer = build_net(x_sym,
                               INPUT_PATCH_SIZE,
                               num_classes,
                               4,
                               16,
                               batch_size=BATCH_SIZE,
                               do_instance_norm=True)
    output_layer = seg_layer

    results_out_folder = os.path.join(results_folder, "pred_val_set")
    if not os.path.isdir(results_out_folder):
        os.mkdir(results_out_folder)

    with open(
            os.path.join(results_folder, "%s_Params.pkl" % (experiment_name)),
            'r') as f:
        params = cPickle.load(f)
        lasagne.layers.set_all_param_values(output_layer, params)

    print "compiling theano functions"
    output = softmax_helper(
        lasagne.layers.get_output(output_layer,
                                  x_sym,
                                  deterministic=False,
                                  batch_norm_update_averages=False,
                                  batch_norm_use_averages=False))
    pred_fn = theano.function([x_sym], output)
    _ = pred_fn(
        np.random.random((BATCH_SIZE, 4, 176, 192, 176)).astype(np.float32))

    run_validation_mirroring(pred_fn,
                             results_out_folder,
                             use_patients,
                             write_images=write_images,
                             hasBrainMask=False,
                             BATCH_SIZE=BATCH_SIZE,
                             num_repeats=n_repeats,
                             preprocess_fn=preprocess,
                             save_npy=save_npy,
                             save_proba=False)
示例#32
0
def main(num_seeds, verbose):
    # Load the dataset
    dataset = load_dataset(
        glob('../data/trump_tweet_data_archive/condensed_*.json.zip'), verbose)
    # We don't need to vectorize the data, but we do need to chunk it into redundant sequences
    corpus, _, _, c2i, i2c, nc = seq_data(dataset, SEQ_LEN, SEQ_STEP, verbose)
    model = load_model(BASENAME, verbose)
    generate(BASENAME, model, corpus, c2i, i2c, nc, num_seeds, verbose)
示例#33
0
def load_chunked_dataset(time_window=1,freq=256):
    """
    This function loads dataset as load_dataset function, then chunks it and ret
    urns it.
    """
    X,y = load_dataset()
    features,target = chunking(X,y,time_window,freq)
    return features,target
 def _iterate_reals(self, minibatch_size):
     dataset_obj = dataset.load_dataset(data_dir=config.data_dir,
                                        **self._dataset_args)
     while True:
         images, _labels = dataset_obj.get_minibatch_np(minibatch_size)
         if self._mirror_augment:
             images = misc.apply_mirror_augment(images)
         yield images
示例#35
0
def load_chunked_datasetFFT():
    """
    This function loads dataset as load_dataset function, then chunks it and ret
    urns it.
    """
    X,y = load_dataset()
    features,target = chunking_FFT(X,y)
    return features,target
示例#36
0
def main(config):

    if config.task == 'train':
        config.train = 1
    else:
        config.train = 0

    if config.dataset == 'life':
        config.task = 'regression'
        config.experiment = 'train-test'
    else:
        config.task = 'classification'
        config.experiment = 'doublecv'

    config.expt_name = "Exp" + str(
        config.experiment
    ) + "_" + config.mod_split + "_" + config.build_model + "_" + config.last_layer

    # Create save directories
    utils.create_directories(config)
    data = load_dataset(config)

    if config.experiment == 'mar_doublecv' or config.experiment == 'doublecv':
        n_feature_sets = len(data.keys()) - 1
    elif config.dataset == 'life':
        n_feature_sets = int(len(data.keys()) / 2) - 1

    X = [np.array(data['{}'.format(i)]) for i in range(n_feature_sets)]
    y = np.array(data['y'])

    X_test = None
    y_test = None

    if config.task == 'classification':
        config.n_classes = len(set(y))

    if config.dataset == 'life':
        X_test = [
            np.array(data['{}_test'.format(i)]) for i in range(n_feature_sets)
        ]
        y_test = np.array(data['y_test'])

    config.n_feature_sets = n_feature_sets
    config.feature_split_lengths = [i.shape[1] for i in X]

    if config.verbose > 0:
        print('Dataset used ', config.dataset)
        print('Number of feature sets ', n_feature_sets)
        [
            print('Shape of feature set {} {}'.format(e,
                                                      np.array(i).shape))
            for e, i in enumerate(X)
        ]

    trainer.train(X, y, config, X_test, y_test)

    print(config.expt_name)
    print(config.dataset)
示例#37
0
def train():
    tf.random.set_seed(22)
    np.random.seed(22)
    data_iter = dataset.load_dataset()

    # 利用数组形式实现多输入模型
    generator = Generator()
    generator.build(input_shape=[(None, z_dim), (None, 10)])
    discriminator = Discriminator()
    discriminator.build(input_shape=[(None, 28, 28, 1), (None, 10)])

    g_optimizer = tf.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5)
    d_optimizer = tf.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5)

    for epoch in range(epochs):
        for i in range(int(60000 / batch_size / epochs_d)):

            batch_z = tf.random.uniform([batch_size, z_dim],
                                        minval=0.,
                                        maxval=1.)
            batch_c = []
            for k in range(batch_size):
                batch_c.append(np.random.randint(0, 10))
            batch_c = tf.one_hot(tf.convert_to_tensor(batch_c), 10)

            # train D
            for epoch_d in range(epochs_d):
                batch_data = next(data_iter)
                batch_x = batch_data[0]
                batch_y = batch_data[1]
                with tf.GradientTape() as tape:
                    d_loss = d_loss_fn(generator, discriminator, batch_z,
                                       batch_c, batch_x, batch_y, is_training)
                grads = tape.gradient(d_loss,
                                      discriminator.trainable_variables)
                d_optimizer.apply_gradients(
                    zip(grads, discriminator.trainable_variables))

            # train G
            with tf.GradientTape() as tape:
                g_loss = g_loss_fn(generator, discriminator, batch_z, batch_c,
                                   is_training)
            grads = tape.gradient(g_loss, generator.trainable_variables)
            g_optimizer.apply_gradients(
                zip(grads, generator.trainable_variables))

        print('epoch : {epoch} d-loss : {d_loss} g-loss : {g_loss}'.format(
            epoch=epoch, d_loss=d_loss, g_loss=g_loss))

        z = tf.random.uniform([100, z_dim], minval=0., maxval=1.)
        c = []
        for i in range(10):
            for j in range(10):
                c.append(i)
        c = tf.one_hot(tf.convert_to_tensor(c), 10)
        fake_image = generator([z, c], training=False)
        img_path = os.path.join('images', 'infogan-%d-final.png' % epoch)
        saver.save_image(fake_image.numpy(), img_path, 10)
def LDA_process(dataset):
    fea, link, label = load_dataset(dataset)
    corpus = matutils.Dense2Corpus(fea, documents_columns=False)
    num_topics = 100
    print 'performing lda...'
    model = models.LdaModel(corpus, num_topics=num_topics, passes=10)
    topic_fea = matutils.corpus2dense(model[corpus], num_topics)
    topic_fea = topic_fea.transpose()
    np.save('dataset/'+dataset+'/lda_fea', topic_fea)
示例#39
0
def visualize(name):
    fea, link, label = dataset.load_dataset(name)
    label = np.argmax(label, axis=1)
    label = label.astype("float")
    label = label + 1
    label = label / max(label)
    link = link.tocsc()
    g = nx.Graph(link)
    nx.draw_networkx(g, node_size=100, with_labels=False, node_color=label)
    plt.show()
示例#40
0
def main(product):
    FILE = "../data/ABSA-15_{}_Train_Data.xml".format(product)
    reviews = load_dataset(FILE)
    FILE = "../data/ABSA15_{}_Test.xml".format(product)
    reviews += load_dataset(FILE)

    entities = set()
    attributes = set()

    for rv in reviews:
        for stc in rv.sentences:
            for opi in stc.opinions:
                cate = opi.category
                entity, attribute = cate.split('#')
                entities.add(entity)
                attributes.add(attribute)

    list2file(entities, "../data/{}.entity".format(product))
    list2file(attributes, "../data/{}.attribute".format(product))
def community_label_entropy(name):
    fea, link, label = dataset.load_dataset(name)
    c_fea = get_c_fea(name)
    cl = c_fea.transpose().dot(label)
    l = cl.shape[0]
    entropy = []
    for i in range(l):
        x = cl[i,:]
        entropy.append(stats.entropy(x[x.nonzero()]))
    return np.mean(entropy)
def get_c_fea(name):
    fea, link, label = dataset.load_dataset(name)
    num_inst = link.shape[0]
    g = nx.Graph(link)
    partition = community.best_partition(g)
    communities = partition.values()
    loc_fea = np.zeros((num_inst, max(communities)+1))
    for i, v in enumerate(communities):
        loc_fea[i, v] = 1
    return loc_fea
示例#43
0
def main():
    optparser = OptionParser()
    optparser.add_option("--train", dest="train_file",
            help="training file name")
    optparser.add_option("--test", dest="test_file",
            help="testing file")
    optparser.add_option('--pro', dest='product')

    (options, args) = optparser.parse_args()
    save_as_mimlmix_format.PATH = "./{}/data/".format(options.product)

    train_reviews = load_dataset(options.train_file)
    test_reviews = load_dataset(options.test_file)

    n_cates, cate_index = get_categories(train_reviews + test_reviews)
    vocab_size = 1000
    vocab_index = get_vocab(train_reviews, vocab_size)

    train_bags = [extract_unigram(vocab_index, vocab_size, review)\
            for review in train_reviews]
    train_labels = [extract_labels(cate_index, review)\
            for review in train_reviews]

    test_bags = [extract_unigram(vocab_index, vocab_size, review)\
            for review in test_reviews]
    test_labels = [extract_labels(cate_index, review)\
            for review in test_reviews]

    save_label_id(cate_index)
    save_view_info(view_name="ngram", dim=vocab_size,\
            data_format="sparse", view_type="discrete")
    features = train_bags + test_bags
    save_sparse_feature(corpus_name=options.product, view_name="ngram", features=features)
    labels = train_labels + test_labels
    save_label(options.product, labels)
    save_partition(len(train_labels), len(test_labels))

    #word2vec
    word2vec_feat(train_reviews+test_reviews)

    print("Done")
示例#44
0
def evaluate(dataset_name, fl, ratio):
    print dataset_name, fl.__name__, ratio
    d = dataset.load_dataset(dataset_name)
    fea = d.data
    label = d.target
    fea = fl(fea)
    ss = StratifiedShuffleSplit(label, 3, test_size=(1-ratio), random_state=0)
    svc = LinearSVC()
    for train, test in ss:
        svc.fit(fea[train,:], label[train,:])
        predict = svc.predict(fea[test, :])
        acc = accuracy_score(label[test, :], predict)
        print acc
def evaluate(dataset, model):
    kfold = load_cv(dataset)
    fea, link, label = load_dataset(dataset)
    errors = []
    for train, test in kfold:
        tmp_label = label.copy()
        tmp_label[test,:] = 0
        tmp_label = model.fit_predict(fea, link, train, tmp_label)
        error = np.abs(tmp_label[test, :]-label[test, :]).sum() / 2 / tmp_label.shape[0]
        errors.append(error)
        print error
    print 'mean', np.mean(errors)
    return errors
def load_dataset_for_previous_run(run_id, **kwargs): # => dataset_obj, mirror_augment
    result_subdir = locate_result_subdir(run_id)

    # Parse config.txt.
    parsed_cfg = dict()
    with open(os.path.join(result_subdir, 'config.txt'), 'rt') as f:
        for line in f:
            if line.startswith('dataset =') or line.startswith('train ='):
                exec(line, parsed_cfg, parsed_cfg)
    dataset_cfg = parsed_cfg.get('dataset', dict())
    train_cfg = parsed_cfg.get('train', dict())
    mirror_augment = train_cfg.get('mirror_augment', False)

    # Handle legacy options.
    if 'h5_path' in dataset_cfg:
        dataset_cfg['tfrecord_dir'] = dataset_cfg.pop('h5_path').replace('.h5', '')
    if 'mirror_augment' in dataset_cfg:
        mirror_augment = dataset_cfg.pop('mirror_augment')
    if 'max_labels' in dataset_cfg:
        v = dataset_cfg.pop('max_labels')
        if v is None: v = 0
        if v == 'all': v = 'full'
        dataset_cfg['max_label_size'] = v
    if 'max_images' in dataset_cfg:
        dataset_cfg.pop('max_images')

    # Handle legacy dataset names.
    v = dataset_cfg['tfrecord_dir']
    v = v.replace('-32x32', '').replace('-32', '')
    v = v.replace('-128x128', '').replace('-128', '')
    v = v.replace('-256x256', '').replace('-256', '')
    v = v.replace('-1024x1024', '').replace('-1024', '')
    v = v.replace('celeba-hq', 'celebahq')
    v = v.replace('cifar-10', 'cifar10')
    v = v.replace('cifar-100', 'cifar100')
    v = v.replace('mnist-rgb', 'mnistrgb')
    v = re.sub('lsun-100k-([^-]*)', 'lsun-\\1-100k', v)
    v = re.sub('lsun-full-([^-]*)', 'lsun-\\1-full', v)
    dataset_cfg['tfrecord_dir'] = v

    # Load dataset.
    dataset_cfg.update(kwargs)
    dataset_obj = dataset.load_dataset(data_dir=config.data_dir, **dataset_cfg)
    return dataset_obj, mirror_augment
示例#47
0
def stats(name):
    fea, link, label = dataset.load_dataset(name)
    g = nx.Graph(link)
    components = nx.connected_components(g)
    num_node = link.shape[0]
    num_link = link.sum() / 2
    density = float(2 * num_link) / num_node
    ratio = float(len(components[0])) / link.shape[0]
    row, col = link.nonzero()
    label = np.argmax(label, axis=1)
    homogeneity = float((label[row] == label[col]).sum()) / len(row)
    info = {
        "name": name,
        "ratio": ratio,
        "homogeneity": homogeneity,
        "num_node": num_node,
        "num_link": num_link,
        "density": density,
    }
    print info
示例#48
0
def teste_dataset(dataset, titulo):
    dataset = load_dataset(dataset)
    if len(dataset) > 100:
        np.random.shuffle(dataset)
        dataset = dataset[:100]
    pontos = [[float(linha[0]), float(linha[1])] for linha in dataset]
    num_classes, criterio_parada = 4, 0.01
    iteracoes_km = []
    iteracoes_kmpp = []
    for i in range(30):
        clusters, iteracoes = kmeans(pontos, num_classes, criterio_parada)
        iteracoes_km.append(iteracoes)
        clusters, iteracoes = kmeanspp(pontos, num_classes, criterio_parada)
        iteracoes_kmpp.append(iteracoes)
    fig, axes = plt.subplots(nrows=1, ncols=2)
    axes[0].boxplot(iteracoes_km, labels=['K-means'])
    axes[0].set_title(titulo)
    axes[1].boxplot(iteracoes_kmpp, labels=['K-means++'])
    axes[1].set_title(titulo)
    plt.show()
示例#49
0
from textgen import generate_c2w2c_text, generate_word_lstm_text
from util import info, Timer

sys.setrecursionlimit(40000)
MIN_LR        = 0.0001
MAX_PATIENCE  = 1


params = model_params.from_cli_args()
params.print_params()
print ''

use_unk = params.mode == 'WORD'

print 'Loading training data...'
training_dataset = load_dataset(params.training_dataset, params.train_data_limit, use_unk)
training_dataset.print_stats()
print ''

print 'Loading test data...'
test_dataset = load_dataset(params.test_dataset, params.test_data_limit, use_unk)
test_dataset.print_stats()
print ''

# Vocabularies
V_C = make_char_vocabulary([training_dataset])
V_W = training_dataset.vocabulary

print 'V_C statistics:'
print '  - Distinct characters: %d' % V_C.size
print ''
def dump(dataset):
    print ("dumping",dataset)
    with open(dataset+".pickle","wb") as f:
        pickle.dump(load_dataset(dataset),f)
def prepare_cv(dataset, train_ratio = 0.1):
    fea, link, label = load_dataset(dataset)
    cv = ShuffleSplit(fea.shape[0], 10, test_size=1-train_ratio, indices=False, random_state=0)
    pickle.dump(cv, open('benchmark/cv/'+dataset,'wb'))
# -*- coding: utf-8 -*-
import feature_extraction
import dataset
import pickle
import numpy as np
from searcher import Searcher

if __name__ == '__main__':
    # TODO: load experiment using params
    train_path = ""
    test_path = ""
    # loads images from given paths
    train = dataset.load_dataset(train_path)
    test = dataset.load_dataset(test_path)

    # extracts descriptors for train and test sets
    train_descriptors = {item.path:feature_extraction.extract_descriptors(item.data) for item in train}
    test_descriptors = {item.path:feature_extraction.extract_descriptors(item.data) for item in test}

    # creates codebook (default size=300) based on train samples
    codebook = feature_extraction.create_codebook(np.concatenate(train_descriptors.values()))

    # generate feature vectors for train and test based on previously calculated codebook
    train_features = {key:feature_extraction.extract_features(codebook, train_descriptors[key]) for key in train_descriptors}
    test_features = {key:feature_extraction.extract_features(codebook, test_descriptors[key]) for key in test_descriptors}

    # TODO: create a similarity matrix using all features

    # persists features, codebook and similarity matrix
    pickle.dump(train_features, open("train_features.pk", "wb"))
    pickle.dump(test_features, open("test_features.pk", "wb"))
        """ Override fit_transform to avoid calling transform
            twice on the standard scaler (in fit and transform)
        """
        return self.pca.fit_transform(self.scaler.fit_transform(x))

    def inverse_transform(self, x, only_pca=False):
        """ First undo the PCA transformation, then undo the scaling unless only_pca """
        if only_pca:
            return self.pca.inverse_transform(x)
        return self.scaler.inverse_transform(self.pca.inverse_transform(x))



if __name__ == '__main__':
    from dataset import load_dataset
    x,y = load_dataset(['../data/tea_cup', '../data/spoon'])

    # Test fit -> transform vs fit_transform
    p = Preprocess(0.7)
    p.fit(x)
    x2 = p.transform(x)
    x3 = p.fit_transform(x)
    assert np.all(np.isclose(x2, x3))

    # Test inverse transform.
    # With all PCA components retained, the inverse should be equal original.
    p2 = Preprocess()
    x4 = p2.fit_transform(x)
    x5 = p2.inverse_transform(x4)
    assert np.all(np.isclose(x5, x))
示例#54
0
    for review in reviews:
        for sent in review.sentences:
            sent_str = ' '.join(sent.words)
            yield sent_str

    for sent in linesentence:
        yield sent


train_file = "../../data/ABSA-15_Laptops_Train_Data.xml"
test_file = "../../data/ABSA15_Laptops_Test.xml"

logging.basicConfig(format='%(asctime)s : %(threadName)s : %(levelname)s : %(message)s', level=logging.INFO)
logging.info("running %s" % " ".join(sys.argv))

train_reviews = load_dataset(train_file)
#train_sents = sent_iter(train_reviews)
test_reviews = load_dataset(test_file)
#test_sents = sent_iter(test_reviews)

unlabeled_sents = LineSentence("../../data/laptop.unlabeled.txt")

model_1 = Sent2Vec(sent_iter(train_reviews, unlabeled_sents),\
        model_file="../../models/laptop.word2vec.model")

model_1.save_sent2vec_format("../../models/laptop.sent2vec.model")

model_2 = Sent2Vec(sent_iter(train_reviews+test_reviews, unlabeled_sents),
        model_file="../../models/laptop.word2vec.model")
model_2.save_sent2vec_format("../../models/laptop_with_test.sentenc2vec.model")
示例#55
0
def main(num_epochs=100):
    print("Loading data...")
    X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()

    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    print("Building model and compiling functions...")
    network = build_cnn(input_var)

    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()

    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(
            loss, params, learning_rate=0.01, momentum=0.9)

    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                            target_var)
    test_loss = test_loss.mean()
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    train_fn = theano.function([input_var, target_var], loss, updates=updates)
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

    print("Starting training...")
    for epoch in range(num_epochs):
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True):
            inputs, targets = batch
            train_err += train_fn(inputs, targets)
            train_batches += 1

        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1

        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(
            val_acc / val_batches * 100))

    test_err = 0
    test_acc = 0
    test_batches = 0
    for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        test_err += err
        test_acc += acc
        test_batches += 1
    print("Final results:")
    print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
    print("  test accuracy:\t\t{:.2f} %".format(
        test_acc / test_batches * 100))

    # Dump the network weights to a file like this:
    np.savez(os.path.join(checkpoint_path, 'model.npz'), *lasagne.layers.get_all_param_values(network))
示例#56
0
def main(product):
    TRAIN_FILE = "../data/ABSA-15_{}_Train_Data.xml".format(product)
    TEST_FILE = "../data/ABSA15_{}_Test.xml".format(product)

    # load data set
    training_reviews = load_dataset(TRAIN_FILE)
    testing_reviews = load_dataset(TEST_FILE)

    # build vocab
    vocab = build_vocab(training_reviews, TOPN=1000)
    vocab_index = list2dict(vocab)

    cate_index = get_all_categories(training_reviews)
    cates = dict2list(cate_index)
    n_cates = len(cates)

    print "Loading alignment model"
    align_model = load_align_model("s2t64.actual.ti.final")

    print "Get prior"
    prior = get_prior(training_reviews)

    print "Training level 2 model..."
    lev2_model = train_pola_clf(training_reviews, vocab_index, cate_index)

    print "Predicting..."
    results = []
    for review in testing_reviews:
        for sent in review.sentences:
            pairs_predict = predict(sent, align_model, prior, lev2_model, vocab_index, cate_index)
            results.append(pairs_predict)

    print "Evaluation"
    opinions = []
    for review in testing_reviews:
        for sent in review.sentences:
            #opis = [(cate_index[opi.category], opi.polarity) for opi in sent.opinions]
            opis = []
            for opi in sent.opinions:
                if opi.category in cate_index:
                    opis.append((cate_index[opi.category], opi.polarity))
            opinions.append(opis)

    TP1 = 0.0
    FP1 = 0.0
    FN1 = 0.0
    for i in range(len(opinions)):
        o = set([pair[0] for pair in results[i]])
        g = set([pair[0] for pair in opinions[i]])
        TP1 += len(o & g)
        FP1 += len(o - g)
        FN1 += len(g - o)
    
    p = TP1 / (TP1 + FP1)
    r = TP1 / (TP1 + FN1)
    if p + r == 0:
        f = 0
    else:
        f = 2. * p * r / (p + r)

    print p, r, f
    
    TP2 = 0.0
    FP2 = 0.0
    FN2 = 0.0
    for i in range(len(opinions)):
        o = set(results[i])
        g = set(opinions[i])
        TP1 += len(o & g)
        FP1 += len(o - g)
        FN1 += len(g - o)
    
    p = TP1 / (TP1 + FP1)
    r = TP1 / (TP1 + FN1)
    if p + r == 0:
        f = 0
    else:
        f = 2. * p * r / (p + r)

    print p, r, f
示例#57
0
    parser.add_argument('--datadir', type=str, default='data')
    args = parser.parse_args()
    if args.gpu >= 0:
        cuda.check_cuda_available()
    xp = cuda.cupy if args.gpu >= 0 else np

    batchsize = 100
    n_epoch = args.epoch
    n_units = 1000

    # create result dir
    log_fn, result_dir = create_result_dir(args)

    # Prepare dataset
    print('load CIFAR10 dataset')
    dataset = load_dataset(args.datadir)
    x_train, y_train, x_test, y_test = dataset
    x_train = x_train.astype(np.float32) / 255.0
    y_train = y_train.astype(np.int32)
    x_test = x_test.astype(np.float32) / 255.0
    y_test = y_test.astype(np.int32)
    N = x_train.shape[0]
    N_test = x_test.shape[0]
    
    models = []
    model = VGG_mini()
    
    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()
示例#58
0
            'k': k,
            'momentum': momentum,
            'mae': mae,
            'rmse': rmse,
            'lrate': current_l_w
        }

        config_result['results'].append(iteration_result)

        print(iteration_str.format(j, k, current_l_w, momentum, mae, rmse))

        with open('{}_{}.json'.format(config_name, name), 'wt') as res_output:
            res_output.write(json.dumps(config_result, indent=4))

if __name__ == "__main__":
    experiments = read_experiment(sys.argv[1])

    for experiment in experiments:
        name = experiment['name']
        train_path = experiment['train_path']
        test_path = experiment['test_path']
        sep = experiment['sep']
        configs = experiment['configs']

        all_users, all_movies, tests = load_dataset(train_path, test_path,
                                                    sep, user_based=True)

        for config in configs:
            run(name, train_path, config, all_users, all_movies, tests,
                None, sep)
示例#59
0
文件: combine.py 项目: rjc362/pa4
import numpy as np
import sys
import math

from scipy.misc import imsave, imread
from scipy.sparse.linalg import lsqr
import cv2
import time

from util import pyrup, save_mesh, form_poisson_equation, pyrdown

from dataset import load_dataset

assert len(sys.argv) > 2
data = load_dataset(sys.argv[1])
mode = sys.argv[2]
assert mode in ('normals', 'depth', 'both')

alpha = data.right_alpha
depth_weight = None
depth = None
K_right = None
normals = None
albedo = None

tic = time.time()
if mode in ('normals', 'both'):
    albedo = imread(data.albedo_png)
    normals = np.load(data.normals_npy)

if mode in ('depth', 'both'):