示例#1
0
import utils
import os
from argparse import ArgumentParser

arguments = ArgumentParser()
arguments.add_argument('--base', type=str, default='')
arguments.add_argument('--pattern', type=str, default='')
args = arguments.parse_args()

base_dir = args.base
merged_res = {}
for i in range(100):
    try:
        res = utils.read_pickle(
            os.path.join(base_dir, '{0}_{1}'.format(args.pattern, i)))
    except:
        continue
    for k, v in res.iteritems():
        merged_res[k] = v

utils.write_pickle(merged_res,
                   os.path.join(base_dir, '{0}_merged'.format(args.pattern)))
示例#2
0
文件: skl.py 项目: lansiz/neuron
def load_data():
    return utils.read_pickle('train_data.pkl'), utils.read_pickle(
        'test_data.pkl')
示例#3
0
文件: d11.py 项目: yangsenwxy/TReNDS
def run(seed):

    # create folders for scores models and preds
    folder_models = './models/domain1_var1/scores/'
    if not os.path.exists(folder_models):
        os.makedirs(folder_models)

    folder_preds = './predicts/domain1_var1/scores/'
    if not os.path.exists(folder_preds):
        os.makedirs(folder_preds)

    print('Loading data...')

    # load biases
    ic_bias = read_pickle('./data/biases/ic_biases.pickle')
    ic_bias_site = read_pickle('./data/biases/ic_biases_site.pickle')
    fnc_bias = read_pickle('./data/biases/fnc_biases.pickle')
    fnc_bias_site = read_pickle('./data/biases/fnc_biases_site.pickle')
    pca_bias = read_pickle('./data/biases/200pca_biases.pickle')
    pca_bias_site = read_pickle('./data/biases/200pca_biases_site.pickle')

    # load classifier and add extra sites2
    extra_site = pd.DataFrame()
    extra_site['Id'] = np.load('./predicts/classifier/site2_test_new_9735.npy')

    # load competiton data
    ids_df = pd.read_csv('./data/raw/reveal_ID_site2.csv')
    fnc_df = pd.read_csv('./data/raw/fnc.csv')
    loading_df = pd.read_csv('./data/raw/loading.csv')
    labels_df = pd.read_csv('./data/raw/train_scores.csv')

    ids_df = ids_df.append(extra_site)
    print('Detected Site2 ids count: ', ids_df['Id'].nunique())

    # load created features
    agg_df = pd.read_csv('./data/features/agg_feats.csv')
    im_df = pd.read_csv('./data/features/im_feats.csv')
    dl_df = pd.read_csv('./data/features/dl_feats.csv')

    pca_df = pd.read_csv('./data/features/200pca_feats/200pca_3d_k0.csv')
    for i in range(1, 6):
        part = pd.read_csv(
            './data/features/200pca_feats/200pca_3d_k{}.csv'.format(i))
        del part['Id']
        pca_df = pd.concat((pca_df, part), axis=1)

    # merge data
    ic_cols = list(loading_df.columns[1:])
    fnc_cols = list(fnc_df.columns[1:])
    agg_cols = list(agg_df.columns[1:])
    im_cols = list(im_df.columns[1:])
    pca_cols = list(pca_df.columns[1:])
    dl_cols = list(dl_df.columns[1:])

    df = fnc_df.merge(loading_df, on='Id')
    df = df.merge(agg_df, how='left', on='Id')
    df = df.merge(im_df, how='left', on='Id')
    df = df.merge(pca_df, how='left', on='Id')
    df = df.merge(dl_df, how='left', on='Id')
    df = df.merge(labels_df, how='left', on='Id')

    del loading_df, fnc_df, agg_df, im_df, pca_df
    gc.collect()

    # split train and test
    df.loc[df['Id'].isin(labels_df['Id']), 'is_test'] = 0
    df.loc[~df['Id'].isin(labels_df['Id']), 'is_test'] = 1

    train = df.query('is_test==0')
    del train['is_test']
    test = df.query('is_test==1')
    del test['is_test']
    y = train['domain1_var1'].copy().reset_index(drop=True)
    d11_index = list(train['domain1_var1'].dropna().index)

    # apply biases
    for c in ic_bias_site.keys():
        test.loc[~test['Id'].isin(ids_df['Id']), c] += ic_bias[c]
        test.loc[test['Id'].isin(ids_df['Id']), c] += ic_bias_site[c]

    for c in fnc_bias_site.keys():
        test.loc[~test['Id'].isin(ids_df['Id']), c] += fnc_bias[c]
        test.loc[test['Id'].isin(ids_df['Id']), c] += fnc_bias_site[c]

    for c in pca_bias_site.keys():
        test.loc[~test['Id'].isin(ids_df['Id']), c] += pca_bias[c]
        test.loc[test['Id'].isin(ids_df['Id']), c] += pca_bias_site[c]

    # save df for scaling
    df_scale = pd.concat([train, test], axis=0)

    # I. Create fnc score
    print('Creating FNC score...')

    # prepare datasets for fnc score
    train_for_score, test_for_score = scale_select_data(
        train, test, df_scale, fnc_cols)

    # define models
    names = ['ENet', 'BRidge']
    names = [name + '_fnc_seed{}'.format(seed) for name in names]
    pack = [
        ElasticNet(alpha=0.05, l1_ratio=0.5, random_state=0),
        BayesianRidge()
    ]

    # train models
    zoo = TrendsModelSklearn(pack, seed=seed)
    zoo.fit([train_for_score] * 2, y)
    score_blend = zoo.blend_oof()
    pred = zoo.predict([test_for_score] * 2, names)

    # save oof, pred, models
    np.save(folder_preds + 'fnc_score_seed{}.npy'.format(seed), score_blend)
    np.save(folder_preds + 'fnc_score_test_seed{}.npy'.format(seed), pred)
    zoo.save_models(names, folder=folder_models)

    # II. Create agg score
    print('Creating AGG score...')

    # prepare datasets for agg score
    train_for_score, test_for_score = scale_select_data(
        train, test, df_scale, agg_cols)

    # define models
    names = ['ENet', 'Huber']
    names = [name + '_agg_seed{}'.format(seed) for name in names]
    pack = [
        ElasticNet(alpha=0.05, l1_ratio=0.3, random_state=0),
        HuberRegressor(epsilon=2.5, alpha=1)
    ]

    # train models
    zoo = TrendsModelSklearn(pack, seed=seed)
    zoo.fit([train_for_score] * 2, y)
    score_blend = zoo.blend_oof()
    pred = zoo.predict([test_for_score] * 2, names)

    # save oof, pred, models
    np.save(folder_preds + 'agg_score_seed{}.npy'.format(seed), score_blend)
    np.save(folder_preds + 'agg_score_test_seed{}.npy'.format(seed), pred)
    zoo.save_models(names, folder=folder_models)

    # III. Create pca score
    print('Creating PCA score...')

    # prepare datasets for pca score
    train_for_score, test_for_score = scale_select_data(
        train, test, df_scale, pca_cols)

    # define models
    names = ['ENet', 'BRidge']
    names = [name + '_pca_seed{}'.format(seed) for name in names]
    pack = [
        ElasticNet(alpha=0.2, l1_ratio=0.2, random_state=0),
        BayesianRidge()
    ]

    # train models
    zoo = TrendsModelSklearn(pack, seed=seed)
    zoo.fit([train_for_score] * 2, y)
    score_blend = zoo.blend_oof()
    pred = zoo.predict([test_for_score] * 2, names)

    # save oof, pred, models
    np.save(folder_preds + 'pca_score_seed{}.npy'.format(seed), score_blend)
    np.save(folder_preds + 'pca_score_test_seed{}.npy'.format(seed), pred)
    zoo.save_models(names, folder=folder_models)

    # IV. Create im score
    print('Creating IM score...')

    # prepare datasets for pca score
    train_for_score, test_for_score = scale_select_data(
        train, test, df_scale, im_cols)

    # define models
    names = ['ENet', 'BRidge']
    names = [name + '_im_seed{}'.format(seed) for name in names]
    pack = [
        ElasticNet(alpha=0.2, l1_ratio=0.2, random_state=0),
        BayesianRidge()
    ]

    # train models
    zoo = TrendsModelSklearn(pack, seed=seed)
    zoo.fit([train_for_score] * 2, y)
    score_blend = zoo.blend_oof()
    pred = zoo.predict([test_for_score] * 2, names)

    # save oof, pred, models
    np.save(folder_preds + 'im_score_seed{}.npy'.format(seed), score_blend)
    np.save(folder_preds + 'im_score_test_seed{}.npy'.format(seed), pred)
    zoo.save_models(names, folder=folder_models)

    # V. Create dl score
    print('Creating DL score...')

    # prepare datasets for pca score
    train_for_score, test_for_score = scale_select_data(
        train, test, df_scale, dl_cols)

    # define models
    names = ['ENet', 'BRidge']
    names = [name + '_dl_seed{}'.format(seed) for name in names]
    pack = [
        ElasticNet(alpha=0.2, l1_ratio=0.2, random_state=0),
        BayesianRidge()
    ]

    # train models
    zoo = TrendsModelSklearn(pack, seed=seed)
    zoo.fit([train_for_score] * 2, y)
    score_blend = zoo.blend_oof()
    pred = zoo.predict([test_for_score] * 2, names)

    # save oof, pred, models
    np.save(folder_preds + 'dl_score_seed{}.npy'.format(seed), score_blend)
    np.save(folder_preds + 'dl_score_test_seed{}.npy'.format(seed), pred)
    zoo.save_models(names, folder=folder_models)

    # VI. Training and predicting procedure
    print('Training has started...')
    print('Reading scores from ', folder_preds)

    # add scores
    for prefix in ['fnc', 'agg', 'im', 'pca', 'dl']:
        train.loc[d11_index, prefix + '_score'] = np.load(
            folder_preds + '{}_score_seed{}.npy'.format(prefix, seed))
        test.loc[:, prefix + '_score'] = np.load(
            folder_preds + '{}_score_test_seed{}.npy'.format(prefix, seed))
    score_cols = [c for c in train.columns if c.endswith('_score')]

    # save df for scaling
    df_scale = pd.concat([train, test], axis=0)

    # create differents datasets
    # linear
    linear_cols = sorted(
        list(set(ic_cols + fnc_cols + pca_cols) - set(['IC_20'])))
    train_linear, test_linear = scale_select_data(train, test, df_scale,
                                                  linear_cols)

    # kernel
    kernel_cols = sorted(list(set(ic_cols + pca_cols) - set(['IC_20'])))
    train_kernel, test_kernel = scale_select_data(train=train,
                                                  test=test,
                                                  df_scale=df_scale,
                                                  cols=kernel_cols,
                                                  scale_factor=0.2,
                                                  scale_cols=pca_cols,
                                                  sc=MinMaxScaler())

    # score
    sc_cols = sorted(list(set(ic_cols + score_cols) - set(['IC_20'])))
    train_sc, test_sc = scale_select_data(train, test, df_scale, sc_cols)

    # learning process on different datasets
    names = ['GP', 'SVM1', 'SVM2', 'OMP', 'KR']
    names = [name + '_seed{}'.format(seed) for name in names]
    pack = [
        GaussianProcessRegressor(DotProduct(), random_state=0),
        NuSVR(C=5, kernel='rbf'),
        NuSVR(C=5, kernel='rbf'),
        OrthogonalMatchingPursuitCV(),
        KernelRidge(kernel='poly', degree=2, alpha=10)
    ]

    zoo = TrendsModelSklearn(pack, seed=seed)
    zoo.fit([train_sc] * 2 + [train_kernel] + [train_linear] * 2, y)
    de_blend = zoo.blend_oof()
    preds = zoo.predict([test_sc] * 2 + [test_kernel] + [test_linear] * 2,
                        names,
                        is_blend=True)

    # rewrite folders for models and preds
    folder_models = './models/domain1_var1/stack/'
    if not os.path.exists(folder_models):
        os.makedirs(folder_models)

    folder_preds = './predicts/domain1_var1/stack/'
    if not os.path.exists(folder_preds):
        os.makedirs(folder_preds)

    print('Saving models to', folder_models)
    print('Saving predictions to', folder_preds)

    # save oofs and models
    zoo.save_oofs(names, folder=folder_preds)
    zoo.save_models(names, folder=folder_models)

    # stacking predictions
    print('Stacking predictions...')
    d11_prediction = pd.DataFrame()
    d11_prediction['Id'] = test['Id'].values
    d11_prediction['pred'] = preds
    d11_prediction.to_csv(folder_preds +
                          'domain1_var1_stack_seed{}.csv'.format(seed),
                          index=False)
    print('domain1_var1 seed pred is saved as',
          folder_preds + 'domain1_var1_stack_seed{}.csv'.format(seed))
示例#4
0
import pickle
import pandas as pd
from stattools import grangercausalitytests
from datetime import datetime
import utils

logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)

arguments = ArgumentParser()
arguments.add_argument('--path', type=str, default="flattened-timeseries")
arguments.add_argument('--index', type=int)
args = arguments.parse_args()

results = {}
lags = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 14, 21, 30, 60, 90]
df = utils.read_pickle(args.path)


def pairwise_granger(words):
    global df
    (word1, word2) = words
    if word1 not in df or word2 not in df:
        return (word1, word2, None)
    best_lag, res = grangercausalitytests(numpy.transpose(
        [df[word2], df[word1]]),
                                          lags,
                                          verbose=False)
    causal_lag = None
    f_pvalue = res[0]['params_ftest'][1]
    lr_pvalue = res[0]['lrtest'][1]
    if f_pvalue < 0.01 and lr_pvalue < 0.01:
示例#5
0

# Sample images for learners.
def sample_zip(fn_in, fn_out, rate=0.01, seed=42):
    np.random.seed(seed)
    with zipfile.ZipFile(fn_in) as fin, zipfile.ZipFile(fn_out, "w") as fout:
        sampled = filter(lambda _: np.random.rand() < rate, fin.filelist)
        for zInfo in sampled:
            fout.writestr(zInfo, fin.read(zInfo))


sample_zip("train2014.zip", "train2014_sample.zip")
sample_zip("val2014.zip", "val2014_sample.zip")

# Load prepared embeddings.
train_img_embeds = utils.read_pickle("train_img_embeds.pickle")
train_img_fns = utils.read_pickle("train_img_fns.pickle")
val_img_embeds = utils.read_pickle("val_img_embeds.pickle")
val_img_fns = utils.read_pickle("val_img_fns.pickle")

# Check shapes.
print(train_img_embeds.shape, len(train_img_fns))
print(val_img_embeds.shape, len(val_img_fns))


# Extract captions.
def get_captions_for_fns(fns, zip_fn, zip_json_path):
    zf = zipfile.ZipFile(zip_fn)
    j = json.loads(zf.read(zip_json_path).decode("utf8"))
    id_to_fn = {img["id"]: img["file_name"] for img in j["images"]}
    fn_to_caps = defaultdict(list)
示例#6
0
    cv2.waitKey(0)
    cv2.destroyAllWindows()


if __name__ == '__main__':
    args = utils.get_options()

    input_image_dirpath = osp.join(osp.dirname(__file__), args.in_dir)
    # recognize any extentions
    image_paths, image_names = utils.get_file_paths(input_image_dirpath, "*")

    # read camera parameters
    camera_param_filepath = osp.join(osp.dirname(__file__),
                                     args.camera_param_path)
    cameraMatrix, distCoeffs, rvecs, tvecs, stdDevIn, stdDevEx = \
        utils.read_pickle(camera_param_filepath)

    # read parameters from arguments
    dictionary = utils.get_aruco_dict(args.aruco_dict)
    squareL = args.square_length
    markerL = args.marker_length
    tb = args.v_margin
    lr = args.h_margin
    pixels_per_mm = args.pixels_per_mm
    # read parameters from configuration pickle file
    if args.input_board_cfg_pkl:
        board_cfg_pkl_path = osp.join(osp.dirname(__file__),
                                      args.board_cfg_pkl_path)
        board_cfg = utils.read_pickle(board_cfg_pkl_path)
        dictionary = utils.get_aruco_dict(board_cfg['dict_label'])
        squareL = board_cfg['square_length']
示例#7
0
def train(args):
    #for creating the visdom object
    DEFAULT_PORT = 8097
    DEFAULT_HOSTNAME = "http://localhost"
    viz = Visdom(DEFAULT_HOSTNAME, DEFAULT_PORT, ipv6=False)

    hyparam_list = [
        ("model", args.model_name),
        ("cube", args.cube_len),
        ("bs", args.batch_size),
        ("g_lr", args.g_lr),
        ("d_lr", args.d_lr),
        ("z", args.z_dis),
        ("bias", args.bias),
        ("sl", args.soft_label),
    ]

    hyparam_dict = OrderedDict(((arg, value) for arg, value in hyparam_list))
    log_param = make_hyparam_string(hyparam_dict)
    print(log_param)

    # for using tensorboard
    if args.use_tensorboard:
        import tensorflow as tf

        summary_writer = tf.summary.FileWriter(args.output_dir + args.log_dir +
                                               log_param)

        def inject_summary(summary_writer, tag, value, step):
            summary = tf.Summary(
                value=[tf.Summary.Value(tag=tag, simple_value=value)])
            summary_writer.add_summary(summary, global_step=step)

        inject_summary = inject_summary

    # datset define
    dsets_path = args.input_dir + args.data_dir + "train/"
    print(dsets_path)

    x_train = np.load("voxels_3DMNIST_16.npy")
    dataset = x_train.reshape(-1,
                              args.cube_len * args.cube_len * args.cube_len)
    print(dataset.shape)
    dset_loaders = torch.utils.data.DataLoader(dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=1)

    # model define
    D = _D(args)
    G = _G(args)

    D_solver = optim.Adam(D.parameters(), lr=args.d_lr, betas=args.beta)
    G_solver = optim.Adam(G.parameters(), lr=args.g_lr, betas=args.beta)

    if torch.cuda.is_available():
        print("using cuda")
        D.cuda()
        G.cuda()

    criterion = nn.BCELoss()

    pickle_path = "." + args.pickle_dir + log_param
    read_pickle(pickle_path, G, G_solver, D, D_solver)

    for epoch in range(args.n_epochs):
        epoch_start_time = time.time()
        print("epoch %d started" % (epoch))
        for i, X in enumerate(dset_loaders):

            X = var_or_cuda(X)
            X = X.type(torch.cuda.FloatTensor)
            if X.size()[0] != int(args.batch_size):
                #print("batch_size != {} drop last incompatible batch".format(int(args.batch_size)))
                continue

            Z = generateZ(args)
            real_labels = var_or_cuda(torch.ones(args.batch_size)).view(
                -1, 1, 1, 1, 1)
            fake_labels = var_or_cuda(torch.zeros(args.batch_size)).view(
                -1, 1, 1, 1, 1)

            if args.soft_label:
                real_labels = var_or_cuda(
                    torch.Tensor(args.batch_size).uniform_(0.9, 1.1)).view(
                        -1, 1, 1, 1, 1)  ####
                #fake_labels = var_or_cuda(torch.Tensor(args.batch_size).uniform_(0, 0.3)).view(-1,1,1,1,1)
                fake_labels = var_or_cuda(torch.zeros(args.batch_size)).view(
                    -1, 1, 1, 1, 1)  #####
            # ============= Train the discriminator =============#
            d_real = D(X)
            d_real_loss = criterion(d_real, real_labels)

            fake = G(Z)
            d_fake = D(fake)
            d_fake_loss = criterion(d_fake, fake_labels)

            d_loss = d_real_loss + d_fake_loss

            d_real_acu = torch.ge(d_real.squeeze(), 0.5).float()
            d_fake_acu = torch.le(d_fake.squeeze(), 0.5).float()
            d_total_acu = torch.mean(torch.cat((d_real_acu, d_fake_acu), 0))

            #if 1:
            if d_total_acu <= args.d_thresh:
                D.zero_grad()
                d_loss.backward()
                D_solver.step()

            # =============== Train the generator ===============#

            Z = generateZ(args)

            fake = G(Z)
            d_fake = D(fake)
            g_loss = criterion(d_fake, real_labels)

            D.zero_grad()
            G.zero_grad()
            g_loss.backward()
            G_solver.step()
            #######
            #print(fake.shape)
            #print(fake.cpu().data[:8].squeeze().numpy().shape)

            # =============== logging each iteration ===============#
            iteration = str(G_solver.state_dict()['state'][
                G_solver.state_dict()['param_groups'][0]['params'][0]]['step'])
            #print(type(iteration))
            #iteration = str(i)
            #saving the model and a image each 100 iteration
            if int(iteration) % 300 == 0:
                #pickle_save_path = args.output_dir + args.pickle_dir + log_param
                #save_new_pickle(pickle_save_path, iteration, G, G_solver, D, D_solver)
                samples = fake.cpu().data[:8].squeeze().numpy()

                #print(samples.shape)
                for s in range(8):
                    plotVoxelVisdom(samples[s, ...], viz,
                                    "Iteration:{:.4}".format(iteration))

#                 image_path = args.output_dir + args.image_dir + log_param
#                 if not os.path.exists(image_path):
#                     os.makedirs(image_path)

#                 SavePloat_Voxels(samples, image_path, iteration)
# =============== each epoch save model or save image ===============#
            print(
                'Iter-{}; , D_loss : {:.4}, G_loss : {:.4}, D_acu : {:.4}, D_lr : {:.4}'
                .format(iteration, d_loss.item(), g_loss.item(),
                        d_total_acu.item(),
                        D_solver.state_dict()['param_groups'][0]["lr"]))

        epoch_end_time = time.time()

        if (epoch + 1) % args.image_save_step == 0:

            samples = fake.cpu().data[:8].squeeze().numpy()

            image_path = args.output_dir + args.image_dir + log_param
            if not os.path.exists(image_path):
                os.makedirs(image_path)

            SavePloat_Voxels(samples, image_path, iteration)

        if (epoch + 1) % args.pickle_step == 0:
            pickle_save_path = args.output_dir + args.pickle_dir + log_param
            save_new_pickle(pickle_save_path, iteration, G, G_solver, D,
                            D_solver)

        print("epoch time", (epoch_end_time - epoch_start_time) / 60)
        print("epoch %d ended" % (epoch))
        print("################################################")
示例#8
0
# # sample images for faster training
# def sample_zip(fn_in, fn_out, rate=0.01, seed=42):
#     np.random.seed(seed)
#     with zipfile.ZipFile(fn_in) as fin, zipfile.ZipFile(fn_out, "w") as fout:
#         sampled = filter(lambda _: np.random.rand() < rate, fin.filelist)
#         for zInfo in sampled:
#             fout.writestr(zInfo, fin.read(zInfo))

# sample_zip(train2014_zip, "../data/coco/train2014_sample_yoloV2.zip", rate = 0.01, seed = 42)
# sample_zip(val2014_zip, "../data/coco/val2014_sample_yoloV2.zip", rate = 0.01, seed = 42)

# In[12]:

# load prepared embeddings
train_img_embeds = utils.read_pickle(
    "../data/coco/extracted/train_img_embeds_yoloV2_{}.pickle".format(action))
train_img_fns = utils.read_pickle(
    "../data/coco/extracted/train_img_fns_yoloV2_{}.pickle".format(action))
val_img_embeds = utils.read_pickle(
    "../data/coco/extracted/val_img_embeds_yoloV2_{}.pickle".format(action))
val_img_fns = utils.read_pickle(
    "../data/coco/extracted/val_img_fns_yoloV2_{}.pickle".format(action))
# check shapes
print("training data: ", train_img_embeds.shape, len(train_img_fns))
print("valicatoin data: ", val_img_embeds.shape, len(val_img_fns))

# In[13]:

# check prepared samples of images
list(filter(lambda x: x.endswith("_sample_yoloV2.zip"), os.listdir(".")))
    parser.add_argument(
        '--size', type=int,
        nargs='?', default=32, help='the embedding size')

    parser.add_argument(
        '--num_walks', type=int,
        nargs='?', default=10, help='the number of random walks to originate from each vertex')

    parser.add_argument(
        '--walk_length', type=int,
        nargs='?', default=80, help='the length of each random walk')

    args = parser.parse_args()
    size = args.size
    print 'learning embeddings of dimension {}'.format(args.size)
    x = utils.read_pickle(args.x_path[0])
    g = BipartiteGraph(x)
    print 'walk path: {}'.format(args.walk_path)
    print 'x path: {}'.format(args.x_path)
    if args.walk_path == "":
        print 'generating new random walk dataset'
        print 'building edges'
        g.build_edge_array()
        print 'generating walks'
        walks = g.generate_walks(args.num_walks, args.walk_length)
        df = pd.DataFrame(walks)
        walk_path = 'local_resources/walks_thresh10_num_{}_length_{}'.format(args.num_walks, args.walk_length)
        df.to_csv(walk_path, index=False, header=None)
    else:
        print 'learning embeddings'
        walks = pd.read_csv(args.walk_path,
示例#10
0
def main(game, level, player_img, use_graph, draw_all_labels, draw_dup_labels, draw_path, show_score):

    # Create the Level
    level_obj = Level.generate_level_from_file(game, level)

    # Level saved files
    state_graph_file = "level_saved_files_%s/enumerated_state_graphs/%s/%s.gpickle" % (player_img, game, level)

    if game == "generated" and os.path.exists("level_saved_files_%s/generated_level_paths/%s.pickle" % (player_img, level)):
        generated_level_path_coords = read_pickle("level_saved_files_%s/generated_level_paths/%s.pickle" % (player_img, level))
    else:
        generated_level_path_coords = None

    if use_graph and os.path.exists(state_graph_file):
        print("***** USING ENUMERATED STATE GRAPH *****")
        state_graph = nx.read_gpickle(state_graph_file)
    else:
        print("***** USING MANUAL CONTROLS *****")
        state_graph = None

    edge_actions_dict = None if state_graph is None else nx.get_edge_attributes(state_graph, 'action')

    # Background
    FPS = 40  # frame rate
    ANI = 4  # animation cycles
    WORLD_X = min(level_obj.width, MAX_WIDTH)
    WORLD_Y = min(level_obj.height, MAX_HEIGHT)
    clock = pygame.time.Clock()
    pygame.init()
    world = pygame.display.set_mode([WORLD_X, WORLD_Y])
    BACKGROUND_COLOR = COLORS.get('DARK_GRAY')

    # Player
    player_model = Player(player_img, level_obj)
    player_view = PlayerView(player_img)
    player_list = pygame.sprite.Group()
    player_list.add(player_view)

    # Level
    platform_sprites = get_sprites(level_obj.get_platform_coords(), 'block_tile.png')
    goal_sprites = get_sprites(level_obj.get_goal_coords(), 'goal_tile.png')
    bonus_sprites = get_sprites(level_obj.get_bonus_coords(), 'bonus_tile.png')
    one_way_platform_sprites = get_sprites(level_obj.get_one_way_platform_coords(), 'one_way_block_tile.png')
    hazard_sprites = get_sprites(level_obj.get_hazard_coords(), 'hazard_tile.png')
    wall_sprites = get_sprites(level_obj.get_wall_coords(), 'block_tile.png')
    collected_bonus_tile_coords_dict = {}

    # Camera
    camera = Camera(Camera.camera_function, level_obj.width, level_obj.height, WORLD_X, WORLD_Y)

    # Setup drawing metatile labels
    if draw_all_labels or draw_dup_labels:
        metatile_labels, font_color, label_padding = \
            setup_metatile_labels(game, level, player_img, draw_all_labels, draw_dup_labels)

    # Setup drawing solution path
    if draw_path:
        path_font_color = COLORS.get('GREEN')
        start_font_color = COLORS.get('BLUE')
        goal_font_color = COLORS.get('RED')

        if generated_level_path_coords is not None:
            path_coords = generated_level_path_coords
            start_coord = generated_level_path_coords[0]
            goal_coord = generated_level_path_coords[-1]

        elif os.path.exists(state_graph_file):
            graph = nx.read_gpickle(state_graph_file)
            shortest_path_dict = shortest_path_xy(graph)
            path_coords = shortest_path_dict.get("path_coords")
            start_coord = shortest_path_dict.get("start_coord")
            goal_coord = shortest_path_dict.get("goal_coord")

        else:
            error_exit("No enumerated state graph available to draw solution path")

    # Input handling
    input_handler = Inputs()

    # Main Loop
    main = True

    while main:
        input_handler.onLoop()
        
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                main = False

            if event.type == pygame.KEYDOWN:
                if event.key == ord('q'):
                    pygame.quit()
                    main = False
                    sys.exit()
                elif event.key == ord('r'):
                    player_model.reset()
                    collected_bonus_tile_coords_dict = {}
                    platform_sprites = get_sprites(level_obj.get_platform_coords(), 'block_tile.png')

            input_handler.onEvent(event)

        if not main:
            break

        world.fill(BACKGROUND_COLOR)
        camera.update(player_view)  # set camera to track player

        # Update Player model and view
        player_model.update(action=input_handler.getAction(),
                            precomputed_graph=state_graph, edge_actions_dict=edge_actions_dict)

        player_view.update(player_model.state.x, player_model.state.y,
                           player_model.half_player_w, player_model.half_player_h)

        # Update the current score
        hit_bonus_coord = player_model.get_hit_bonus_coord()

        if hit_bonus_coord is not '':
            hit_bonus_coord_x = player_model.state.x // TILE_DIM
            hit_bonus_coord_y = player_model.state.y // TILE_DIM - 1
            if hit_bonus_coord == 'N':
                pass
            elif hit_bonus_coord == 'NE':
                hit_bonus_coord_x += 1
            elif hit_bonus_coord == 'NW':
                hit_bonus_coord_x -= 1
            else:
                error_exit("unrecognized hit bonus coord")

            hit_bonus_coord_xy = (hit_bonus_coord_x * TILE_DIM, hit_bonus_coord_y * TILE_DIM)

            if hit_bonus_coord_xy not in level_obj.get_bonus_coords():
                error_exit("hit bonus tile that is not there: " + str(hit_bonus_coord_xy))

            if collected_bonus_tile_coords_dict.get(hit_bonus_coord_xy) is None:
                collected_bonus_tile_coords_dict[hit_bonus_coord_xy] = 1
                platform_sprites.add(Tile(hit_bonus_coord_xy[0], hit_bonus_coord_xy[1], 'block_tile.png'))

        score = len(collected_bonus_tile_coords_dict) * 10

        # Draw sprites
        entities_to_draw = []
        entities_to_draw += list(bonus_sprites)  # draw bonus tiles
        entities_to_draw += list(platform_sprites)  # draw platforms tiles
        entities_to_draw += list(one_way_platform_sprites)  # draw one-way platform tiles
        entities_to_draw += list(hazard_sprites)
        entities_to_draw += list(wall_sprites)
        entities_to_draw += list(player_list)  # draw player
        entities_to_draw += list(goal_sprites)  # draw goal tiles

        for e in entities_to_draw:
            world.blit(e.image, camera.apply(e))

        # Draw metatile labels
        if draw_all_labels or draw_dup_labels:
            for coord in level_obj.get_all_possible_coords():  # draw metatile border outlines
                tile_rect = pygame.Rect(coord[0], coord[1], TILE_DIM, TILE_DIM)
                tile_rect = camera.apply_to_rect(tile_rect)  # adjust based on camera
                pygame.draw.rect(world, font_color, tile_rect, 1)

            for label in metatile_labels:  # draw metatile labels
                surface, label_x, label_y = label
                label_x, label_y = camera.apply_to_coord((label_x, label_y))
                world.blit(surface, (label_x + label_padding[0], label_y + label_padding[1]))

        # Draw level solution path
        if draw_path:
            for coord in path_coords:
                if coord == start_coord:
                    color = start_font_color
                elif coord == goal_coord:
                    color = goal_font_color
                else:
                    color = path_font_color
                coord = eval(coord)
                path_component = pygame.Rect(coord[0], coord[1], 2, 2)
                path_component = camera.apply_to_rect(path_component)
                pygame.draw.rect(world, color, path_component, 1)

        # Draw text labels
        label_rect_pairs = []
        if player_model.goal_reached():
            score += 50
            labels = [
                ("You Win!", 50, COLORS.get('GREEN')),
                ("Score: %d" % score, 30, COLORS.get('YELLOW')),
                ("Press 'R' to replay or 'Q' to quit", 30, COLORS.get('YELLOW'))
            ]
            label_rect_pairs = get_label_rect_pairs(center_x=WORLD_X/2, labels=labels)

        elif player_model.is_dead():
            labels = [
                ("Game Over", 50, COLORS.get('RED')),
                ("Score: %d" % score, 30, COLORS.get('YELLOW')),
                ("Press 'R' to replay or 'Q' to quit", 30, COLORS.get('YELLOW'))
            ]
            label_rect_pairs = get_label_rect_pairs(center_x=WORLD_X / 2, labels=labels)

        elif show_score:
            labels = [("Score: %d" % score, 50, COLORS.get('YELLOW'))]
            label_rect_pairs = get_label_rect_pairs(center_x=WORLD_X / 2, labels=labels)

        for label, label_rect in label_rect_pairs:
            world.blit(label, label_rect)

        pygame.display.flip()
        clock.tick(FPS)
示例#11
0
        else:
            # Use random drawn datasets with at least one success for
            # categories that few sufficient successful experiments for training
            run_non_meta_model(base_model,
                               common_params,
                               model_params,
                               category,
                               success=True)

    # Gradient Boosting
    base_model = GradientBoosting
    model_params = gradientboosting_params
    for category in categories:
        if '4_ii' not in category and '5_ii' not in category:
            # Use regular random drawn datasets for categories
            # that have sufficient successful experiments for training
            run_non_meta_model(base_model, common_params, model_params,
                               category)
        else:
            # Use random drawn datasets with at least one success for
            # categories that few sufficient successful experiments for training
            run_non_meta_model(base_model,
                               common_params,
                               model_params,
                               category,
                               success=True)

    # Use cv_stats.pkl to plot all graphs
    cv_stats = read_pickle(common_params['stats_path'])
    plot_all_graphs(cv_stats)
示例#12
0
def main(trial, levels, num_sol, asp, state_graph):

    if not (asp or state_graph):
        utils.error_exit(
            "Must specify at least one validation test to run: --asp or --state_graph"
        )

    # Get file formats
    config_formats = TRIAL_CONFIG_FORMATS.get(trial)
    if config_formats is None:
        utils.error_exit("--trial must be one of %s" %
                         str(list(TRIAL_CONFIG_FORMATS.keys())))
    prolog_file_format = "level_saved_files_block/prolog_files/%s.pl"
    model_str_file_format = "level_saved_files_block/generated_level_model_strs/%s.txt"
    assignments_dict_file_format = "level_saved_files_block/generated_level_assignments_dicts/%s.pickle"

    # Initialize validation counts
    asp_checked_count = 0
    asp_valid_count = 0
    state_graph_checked_count = 0
    state_graph_valid_count = 0

    # Validate each solver run
    for level in levels:
        for config_file_format in config_formats:
            for sol in range(num_sol):
                prolog_file = prolog_file_format % level
                prolog_filename = utils.get_basepath_filename(
                    prolog_file, 'pl')
                config_file = config_file_format % level
                config_filename = utils.get_basepath_filename(
                    config_file, 'json')
                answer_set_filename = '_'.join(
                    [prolog_filename, config_filename,
                     'a%d' % sol])

                if asp:
                    # Determine ASP checks to perform based on config file contents
                    config_file_contents = utils.read_json(config_file)
                    config = config_file_contents['config']
                    require_all_platforms_reachable = True
                    require_all_bonus_tiles_reachable = True
                    if config.get(
                            'require_all_platforms_reachable') is not None:
                        require_all_platforms_reachable = eval(
                            config['require_all_platforms_reachable'])
                    if config.get(
                            'require_all_bonus_tiles_reachable') is not None:
                        require_all_bonus_tiles_reachable = eval(
                            config['require_all_bonus_tiles_reachable'])

                    prolog_file_info = get_prolog_file_info(prolog_file)
                    tile_ids = get_tile_ids_dictionary(prolog_file_info)
                    model_str_file = model_str_file_format % answer_set_filename

                    if os.path.exists(model_str_file):
                        model_str = utils.read_txt(model_str_file)
                        asp_valid = Solver.asp_is_valid(
                            check_path=True,
                            check_onground=require_all_platforms_reachable,
                            check_bonus=require_all_bonus_tiles_reachable,
                            model_str=model_str,
                            player_img='block',
                            answer_set_filename=answer_set_filename,
                            tile_ids=tile_ids,
                            save=False)
                        status = "ASP VALID" if asp_valid else "ASP INVALID"
                        print("%s: %s" % (answer_set_filename, status))
                        asp_checked_count += 1
                        asp_valid_count += 1 if asp_valid else 0

                if state_graph:
                    assignments_dict_file = assignments_dict_file_format % answer_set_filename
                    if os.path.exists(assignments_dict_file):
                        assignments_dict = utils.read_pickle(
                            assignments_dict_file)
                        valid_path = Solver.get_state_graph_valid_path(
                            assignments_dict=assignments_dict,
                            player_img='block',
                            prolog_filename=prolog_filename,
                            answer_set_filename=answer_set_filename,
                            save=True)
                        status = "GRAPH VALID" if valid_path else "GRAPH INVALID"
                        print("%s: %s" % (answer_set_filename, status))
                        state_graph_checked_count += 1
                        state_graph_valid_count += 1 if valid_path is not None else 0

    # Print validation results summary
    if asp:
        print("ASPs Checked: %d" % asp_checked_count)
        print("ASPs Valid: %d" % asp_valid_count)

    if state_graph:
        print("State Graphs Checked: %d" % state_graph_checked_count)
        print("State Graphs Valid: %d" % state_graph_valid_count)
示例#13
0
import utils
# import lfw_dataset
# import numpy as np
# data, attr = lfw_dataset.load_lfw_dataset(dimx = 36, dimy = 36)

# data = np.float32(data) / 255.

# utils.save_pickle(data, "data.pickle")
# utils.save_pickle(data, "attr.pickle")

data = utils.read_pickle("data.pickle")
attr = utils.read_pickle("attr.pickle")

IMG_SHAPE = data.shape[1:]
CODE_SIZE = 256
    Xsamp = Xsamp[:, vertices]
    ysamp = ysingle[vertices, :]
    Xout, yout = prune_disconnected(Xsamp, ysamp)

    utils.persist_sparse_data(folder, Xout, yout[:, 32:])
    return Xout, yout[:, 32:]


def prune_disconnected(X, y):
    keep = np.where(X.sum(axis=1) > 0)[0]
    Xkeep = X[keep, :]
    Xkeep = Xkeep[:, keep]
    ykeep = y[keep, :]
    return Xkeep, ykeep


if __name__ == '__main__':
    X = utils.read_pickle('../../local_resources/blogcatalog/X.p')
    y = utils.read_pickle('../../local_resources/blogcatalog/y.p')
    xpath = '../../local_resources/blogcatalog_121_sample/X.p'
    ypath = '../../local_resources/blogcatalog_121_sample/y.p'
    folder = '../../local_resources/blogcatalog_121_sample'
    Xsamp, ysamp = sample_graph(X, y, folder)
    print X.sum()
    print 'number of vertices connected to one or more other vertices: ', sum(
        Xsamp.sum(axis=1) > 0)
    print 'label distribution: ', ysamp.sum(axis=0)
    print Xsamp.sum()
    print Xsamp.shape
    print ysamp.shape
示例#15
0
import utils
# import func
import pickle
import numpy as np

from func import caption_tokens_to_indices

train_img_embeds = utils.read_pickle("train_img_embeds.pickle")
train_img_fns = utils.read_pickle("train_img_fns.pickle")
val_img_embeds = utils.read_pickle("val_img_embeds.pickle")
val_img_fns = utils.read_pickle("val_img_fns.pickle")
train_captions = utils.read_pickle("train_captions.pickle")
val_captions = utils.read_pickle("val_captions.pickle")
vocab = utils.read_pickle("vocabs.pickle")

# swap the key value of vocab
vocab_inverse = {value: key for key, value in vocab.items()}

train_captions_index = np.array(
    caption_tokens_to_indices(train_captions, vocab))
val_captions_index = np.array(caption_tokens_to_indices(val_captions, vocab))

# train_captions = func.get_captions_for_fns(train_img_fns, "captions_train-val2014.zip",
#                                       "annotations/captions_train2014.json")

# val_captions = func.get_captions_for_fns(val_img_fns, "captions_train-val2014.zip",
#                                        "annotations/captions_val2014.json")

# with open("val_captions.pickle", "wb") as fn:
#     pickle.dump(val_captions, fn)
示例#16
0
def run(seed):

    # create folders for scores models and preds
    folder_models = './models/age/scores/'
    if not os.path.exists(folder_models):
        os.makedirs(folder_models)

    folder_preds = './predicts/age/scores/'
    if not os.path.exists(folder_preds):
        os.makedirs(folder_preds)

    print('Loading data...')

    # load biases
    ic_bias = read_pickle('./data/biases/ic_biases.pickle')
    ic_bias_site = read_pickle('./data/biases/ic_biases_site.pickle')
    fnc_bias = read_pickle('./data/biases/fnc_biases.pickle')
    fnc_bias_site = read_pickle('./data/biases/fnc_biases_site.pickle')
    pca_bias = read_pickle('./data/biases/200pca_biases.pickle')
    pca_bias_site = read_pickle('./data/biases/200pca_biases_site.pickle')

    # load classifier and add extra sites2
    extra_site = pd.DataFrame()
    extra_site['Id'] = np.load('./predicts/classifier/site2_test_new_9735.npy')

    # load competiton data
    ids_df = pd.read_csv('./data/raw/reveal_ID_site2.csv')
    fnc_df = pd.read_csv('./data/raw/fnc.csv')
    loading_df = pd.read_csv('./data/raw/loading.csv')
    labels_df = pd.read_csv('./data/raw/train_scores.csv')

    ids_df = ids_df.append(extra_site)
    print('Detected Site2 ids count: ', ids_df['Id'].nunique())

    # load created features
    agg_df = pd.read_csv('./data/features/agg_feats.csv')
    im_df = pd.read_csv('./data/features/im_feats.csv')
    dl_df = pd.read_csv('./data/features/dl_feats.csv')

    pca_df = pd.read_csv('./data/features/200pca_feats/200pca_3d_k0.csv')
    for i in range(1, 6):
        part = pd.read_csv(
            './data/features/200pca_feats/200pca_3d_k{}.csv'.format(i))
        del part['Id']
        pca_df = pd.concat((pca_df, part), axis=1)

    # merge data
    ic_cols = list(loading_df.columns[1:])
    fnc_cols = list(fnc_df.columns[1:])
    agg_cols = list(agg_df.columns[1:])
    im_cols = list(im_df.columns[1:])
    pca_cols = list(pca_df.columns[1:])
    dl_cols = list(dl_df.columns[1:])

    df = fnc_df.merge(loading_df, on='Id')
    df = df.merge(agg_df, how='left', on='Id')
    df = df.merge(im_df, how='left', on='Id')
    df = df.merge(pca_df, how='left', on='Id')
    df = df.merge(dl_df, how='left', on='Id')
    df = df.merge(labels_df, how='left', on='Id')

    del loading_df, fnc_df, agg_df, im_df, pca_df
    gc.collect()

    # split train and test
    df.loc[df['Id'].isin(labels_df['Id']), 'is_test'] = 0
    df.loc[~df['Id'].isin(labels_df['Id']), 'is_test'] = 1

    train = df.query('is_test==0')
    del train['is_test']
    test = df.query('is_test==1')
    del test['is_test']
    y = train['age'].copy().reset_index(drop=True)

    # apply biases
    for c in ic_bias_site.keys():
        test.loc[~test['Id'].isin(ids_df['Id']), c] += ic_bias[c]
        test.loc[test['Id'].isin(ids_df['Id']), c] += ic_bias_site[c]

    for c in fnc_bias_site.keys():
        test.loc[~test['Id'].isin(ids_df['Id']), c] += fnc_bias[c]
        test.loc[test['Id'].isin(ids_df['Id']), c] += fnc_bias_site[c]

    for c in pca_bias_site.keys():
        test.loc[~test['Id'].isin(ids_df['Id']), c] += pca_bias[c]
        test.loc[test['Id'].isin(ids_df['Id']), c] += pca_bias_site[c]

    # save df for scaling
    df_scale = pd.concat([train, test], axis=0)

    # I. Create fnc score
    print('Creating FNC score...')

    # prepare datasets for fnc score
    train_for_score, test_for_score = scale_select_data(
        train, test, df_scale, fnc_cols)

    # define models
    names = ['RGF', 'ENet', 'BRidge', 'Huber', 'OMP']
    names = [name + '_fnc_seed{}'.format(seed) for name in names]
    pack = [
        RGFRegressor(max_leaf=1000, reg_depth=5, normalize=True),
        ElasticNet(alpha=0.05, l1_ratio=0.5, random_state=0),
        BayesianRidge(),
        HuberRegressor(epsilon=2.5, alpha=1),
        OrthogonalMatchingPursuit(n_nonzero_coefs=300)
    ]

    # train models
    zoo = TrendsModelSklearn(pack, seed=seed)
    zoo.fit([train_for_score] * 5, y)
    score_blend = zoo.blend_oof()
    pred = zoo.predict([test_for_score] * 5, names)

    # save oof, pred, models
    np.save(folder_preds + 'fnc_score_seed{}.npy'.format(seed), score_blend)
    np.save(folder_preds + 'fnc_score_test_seed{}.npy'.format(seed), pred)
    zoo.save_models(names, folder=folder_models)

    # II. Create agg score
    print('Creating AGG score...')

    # prepare datasets for agg score
    train_for_score, test_for_score = scale_select_data(
        train, test, df_scale, agg_cols)

    # define models
    names = ['RGF', 'ENet', 'Huber']
    names = [name + '_agg_seed{}'.format(seed) for name in names]
    pack = [
        RGFRegressor(max_leaf=1000,
                     reg_depth=5,
                     min_samples_leaf=100,
                     normalize=True),
        ElasticNet(alpha=0.05, l1_ratio=0.3, random_state=0),
        HuberRegressor(epsilon=2.5, alpha=1)
    ]

    # train models
    zoo = TrendsModelSklearn(pack, seed=seed)
    zoo.fit([train_for_score] * 3, y)
    score_blend = zoo.blend_oof()
    pred = zoo.predict([test_for_score] * 3, names)

    # save oof, pred, models
    np.save(folder_preds + 'agg_score_seed{}.npy'.format(seed), score_blend)
    np.save(folder_preds + 'agg_score_test_seed{}.npy'.format(seed), pred)
    zoo.save_models(names, folder=folder_models)

    # III. Create pca score
    print('Creating PCA score...')

    # prepare datasets for pca score
    train_for_score, test_for_score = scale_select_data(
        train, test, df_scale, pca_cols)

    # define models
    names = ['RGF', 'ENet', 'BRidge', 'OMP']
    names = [name + '_pca_seed{}'.format(seed) for name in names]
    pack = [
        RGFRegressor(max_leaf=1000,
                     reg_depth=5,
                     min_samples_leaf=100,
                     normalize=True),
        ElasticNet(alpha=0.2, l1_ratio=0.2, random_state=0),
        BayesianRidge(),
        OrthogonalMatchingPursuit()
    ]

    # train models
    zoo = TrendsModelSklearn(pack, seed=seed)
    zoo.fit([train_for_score] * 4, y)
    score_blend = zoo.blend_oof()
    pred = zoo.predict([test_for_score] * 4, names)

    # save oof, pred, models
    np.save(folder_preds + 'pca_score_seed{}.npy'.format(seed), score_blend)
    np.save(folder_preds + 'pca_score_test_seed{}.npy'.format(seed), pred)
    zoo.save_models(names, folder=folder_models)

    # IV. Create im score
    print('Creating IM score...')

    # prepare datasets for pca score
    train_for_score, test_for_score = scale_select_data(
        train, test, df_scale, im_cols)

    # define models
    names = ['RGF', 'ENet', 'BRidge', 'OMP']
    names = [name + '_im_seed{}'.format(seed) for name in names]
    pack = [
        RGFRegressor(max_leaf=1000,
                     reg_depth=5,
                     min_samples_leaf=100,
                     normalize=True),
        ElasticNet(alpha=0.2, l1_ratio=0.2, random_state=0),
        BayesianRidge(),
        OrthogonalMatchingPursuit()
    ]

    # train models
    zoo = TrendsModelSklearn(pack, seed=seed)
    zoo.fit([train_for_score] * 4, y)
    score_blend = zoo.blend_oof()
    pred = zoo.predict([test_for_score] * 4, names)

    # save oof, pred, models
    np.save(folder_preds + 'im_score_seed{}.npy'.format(seed), score_blend)
    np.save(folder_preds + 'im_score_test_seed{}.npy'.format(seed), pred)
    zoo.save_models(names, folder=folder_models)

    # V. Create dl score
    print('Creating DL score...')

    # prepare datasets for pca score
    train_for_score, test_for_score = scale_select_data(
        train, test, df_scale, dl_cols)

    # define models
    names = ['RGF', 'ENet', 'BRidge']
    names = [name + '_dl_seed{}'.format(seed) for name in names]
    pack = [
        RGFRegressor(max_leaf=1000,
                     reg_depth=5,
                     min_samples_leaf=100,
                     normalize=True),
        ElasticNet(alpha=0.2, l1_ratio=0.2, random_state=0),
        BayesianRidge()
    ]

    # train models
    zoo = TrendsModelSklearn(pack, seed=seed)
    zoo.fit([train_for_score] * 3, y)
    score_blend = zoo.blend_oof()
    pred = zoo.predict([test_for_score] * 3, names)

    # save oof, pred, models
    np.save(folder_preds + 'dl_score_seed{}.npy'.format(seed), score_blend)
    np.save(folder_preds + 'dl_score_test_seed{}.npy'.format(seed), pred)
    zoo.save_models(names, folder=folder_models)

    # VI. Training and predicting procedure
    print('Training has started...')
    print('Reading scores from ', folder_preds)

    # add scores
    for prefix in ['fnc', 'agg', 'im', 'pca', 'dl']:
        train[prefix +
              '_score'] = np.load(folder_preds +
                                  '{}_score_seed{}.npy'.format(prefix, seed))
        test[prefix + '_score'] = np.load(
            folder_preds + '{}_score_test_seed{}.npy'.format(prefix, seed))
    score_cols = [c for c in train.columns if c.endswith('_score')]

    # save df for scaling
    df_scale = pd.concat([train, test], axis=0)

    # create differents datasets
    # linear
    linear_cols = sorted(
        list(
            set(ic_cols + fnc_cols + pca_cols + agg_cols + im_cols) -
            set(['IC_20'])))
    train_linear, test_linear = scale_select_data(train, test, df_scale,
                                                  linear_cols)

    # kernel
    kernel_cols = sorted(list(set(ic_cols + pca_cols) - set(['IC_20'])))
    train_kernel, test_kernel = scale_select_data(train=train,
                                                  test=test,
                                                  df_scale=df_scale,
                                                  cols=kernel_cols,
                                                  scale_cols=pca_cols)

    # score
    sc_cols = sorted(list(set(ic_cols + score_cols) - set(['IC_20'])))
    train_sc, test_sc = scale_select_data(train, test, df_scale, sc_cols)

    # dl
    dict_cols = sorted(
        list(
            set(ic_cols + fnc_cols + dl_cols + im_cols + agg_cols) -
            set(['IC_20'])))
    train_dl, test_dl = scale_select_data(train, test, df_scale, dict_cols)

    # learning process on different datasets
    names = ['MLP', 'RGF', 'SVM', 'BR', 'OMP', 'EN', 'KR']
    names = [name + '_seed{}'.format(seed) for name in names]
    pack = [
        MLPRegressor(activation='tanh', random_state=0),
        RGFRegressor(max_leaf=1500, loss='Abs'),
        NuSVR(C=10, nu=0.4, kernel='rbf'),
        BayesianRidge(),
        OrthogonalMatchingPursuitCV(),
        ElasticNet(alpha=0.5, l1_ratio=0.7, random_state=0),
        KernelRidge(kernel='poly', alpha=0.5)
    ]

    zoo = TrendsModelSklearn(pack, seed=seed)
    zoo.fit([train_sc] * 2 + [train_kernel] + [train_linear] * 2 +
            [train_dl] * 2, y)
    de_blend = zoo.blend_oof()
    preds = zoo.predict([test_sc] * 2 + [test_kernel] + [test_linear] * 2 +
                        [test_dl] * 2,
                        names,
                        is_blend=False)

    # rewrite folders for models and preds
    folder_models = './models/age/stack/'
    if not os.path.exists(folder_models):
        os.makedirs(folder_models)

    folder_preds = './predicts/age/stack/'
    if not os.path.exists(folder_preds):
        os.makedirs(folder_preds)

    print('Saving models to', folder_models)
    print('Saving predictions to', folder_preds)

    # save oofs and models
    zoo.save_oofs(names, folder=folder_preds)
    zoo.save_models(names, folder=folder_models)

    # stacking predictions
    print('Stacking predictions...')
    folds = KFold(n_splits=10, shuffle=True, random_state=0)
    stack = pd.DataFrame(zoo.oof_preds).T
    stack.columns = names

    model_stacker_rgf = RGFRegressor(max_leaf=1000,
                                     reg_depth=25,
                                     verbose=False)
    rgf_pred = cross_val_predict(model_stacker_rgf,
                                 stack,
                                 y.dropna(),
                                 cv=folds,
                                 n_jobs=-1)

    model_stacker_br = BayesianRidge()
    br_pred = cross_val_predict(model_stacker_br,
                                stack,
                                y.dropna(),
                                cv=folds,
                                n_jobs=-1)

    model_stacker_rgf.fit(stack, y.dropna())
    model_stacker_br.fit(stack, y.dropna())

    # save models
    save_pickle(model_stacker_br,
                folder_models + 'BRidge_stack_seed{}'.format(seed))
    save_pickle(model_stacker_rgf,
                folder_models + 'RGF_stack_seed{}'.format(seed))
    print('Final age NMAE: {:.5f}'.format(
        NMAE(y, 0.75 * br_pred + 0.25 * rgf_pred)))

    test_preds = pd.DataFrame(preds).T
    test_preds.columns = names

    age_prediction = pd.DataFrame()
    age_prediction['Id'] = test['Id'].values
    age_prediction['pred'] = 0.25 * model_stacker_rgf.predict(
        test_preds) + 0.75 * model_stacker_br.predict(test_preds)
    age_prediction.to_csv(folder_preds + 'age_stack_seed{}.csv'.format(seed),
                          index=False)
    print('age seed pred is saved as',
          folder_preds + 'age_stack_seed{}.csv'.format(seed))
def batch_size_scenario():
    """
    Generate embeddings using different batch sizes for the ~1000 vertex polblogs network
    :return:
    """
    import visualisation
    s = datetime.datetime.now()
    y_path = '../../local_resources/political_blogs/y.p'
    x_path = '../../local_resources/political_blogs/X.p'
    y = utils.read_pickle(y_path)
    log_path = '../../local_resources/tf_logs/polblogs/'
    walk_path = '../../local_resources/political_blogs/walks_n1_l10.csv'
    size = 2  # dimensionality of the embedding
    batch_sizes = [1, 2, 4, 8, 16, 32, 64, 128]
    embeddings = []
    for batch_size in batch_sizes:
        params = Params(walk_path,
                        batch_size=batch_size,
                        embedding_size=size,
                        neg_samples=5,
                        skip_window=5,
                        num_pairs=1500,
                        statistics_interval=10.0,
                        initial_learning_rate=0.1,
                        save_path=log_path,
                        epochs=5,
                        concurrent_steps=4)

        path = '../../local_resources/political_blogs/embeddings/Win_batch_{}_{}.csv'.format(
            batch_size, utils.get_timestamp())

        embedding_in, embedding_out = HCE.main(params)

        visualisation.plot_poincare_embedding(
            embedding_in, y,
            '../../results/political_blogs/figs/poincare_polar_Win_batch_{}_{}.pdf'
            .format(batch_size, utils.get_timestamp()))
        visualisation.plot_poincare_embedding(
            embedding_out, y,
            '../../results/political_blogs/figs/poincare_polar_Wout_batch_{}_{}.pdf'
            .format(batch_size, utils.get_timestamp()))
        df_in = pd.DataFrame(data=embedding_in,
                             index=np.arange(embedding_in.shape[0]))
        df_in.to_csv(path, sep=',')
        df_out = pd.DataFrame(data=embedding_out,
                              index=np.arange(embedding_out.shape[0]))
        df_out.to_csv(
            '../../local_resources/political_blogs/embeddings/Wout_batch_{}_{}.csv'
            .format(batch_size, utils.get_timestamp()),
            sep=',')
        print('political blogs embedding generated in: ',
              datetime.datetime.now() - s)
        embeddings.append(embedding_in)

    x, y = utils.read_data(x_path, y_path, threshold=0)

    names = [[str(batch_size)] for batch_size in batch_sizes]
    n_folds = 10
    results = run_detectors.run_all_datasets(embeddings, y, names, classifiers,
                                             n_folds)
    all_results = utils.merge_results(results, n_folds)
    results, tests = utils.stats_test(all_results)
    tests[0].to_csv('../../results/political_blogs/batch_size_pvalues' +
                    utils.get_timestamp() + '.csv')
    tests[1].to_csv('../../results/political_blogs/batch_size_pvalues' +
                    utils.get_timestamp() + '.csv')
    print('macro', results[0])
    print('micro', results[1])
    macro_path = '../../results/political_blogs/batch_size_macro' + utils.get_timestamp(
    ) + '.csv'
    micro_path = '../../results/political_blogs/batch_size_micro' + utils.get_timestamp(
    ) + '.csv'
    results[0].to_csv(macro_path, index=True)
    results[1].to_csv(micro_path, index=True)

    return path
def train(args):
    #WSGAN related params
    lambda_gp = 10
    n_critic = 5

    hyparam_list = [
        ("model", args.model_name),
        ("cube", args.cube_len),
        ("bs", args.batch_size),
        ("g_lr", args.g_lr),
        ("d_lr", args.d_lr),
        ("z", args.z_dis),
        ("bias", args.bias),
    ]

    hyparam_dict = OrderedDict(((arg, value) for arg, value in hyparam_list))
    log_param = make_hyparam_string(hyparam_dict)
    print(log_param)

    #define different paths
    pickle_path = "." + args.pickle_dir + log_param
    image_path = args.output_dir + args.image_dir + log_param
    pickle_save_path = args.output_dir + args.pickle_dir + log_param

    N = None  # None for the whole dataset
    VOL_SIZE = 64
    train_path = pathlib.Path("../Vert_dataset")
    dataset = VertDataset(train_path,
                          n=N,
                          transform=transforms.Compose(
                              [ResizeTo(VOL_SIZE),
                               transforms.ToTensor()]))
    print('Number of samples: ', len(dataset))
    dset_loaders = torch.utils.data.DataLoader(dataset,
                                               batch_size=args.batch_size,
                                               shuffle=False,
                                               num_workers=0)
    print('Number of batches: ', len(dset_loaders))

    #  Build the model
    D = _D(args)
    G = _G(args)

    #Create the solvers
    D_solver = optim.Adam(D.parameters(), lr=args.d_lr, betas=args.beta)
    G_solver = optim.Adam(G.parameters(), lr=args.g_lr, betas=args.beta)

    if torch.cuda.device_count() > 1:
        D = nn.DataParallel(D)
        G = nn.DataParallel(G)
        print("Using {} GPUs".format(torch.cuda.device_count()))
        D.cuda()
        G.cuda()

    elif torch.cuda.is_available():
        print("using cuda")
        D.cuda()
        G.cuda()

    #Load checkpoint if available
    read_pickle(pickle_path, G, G_solver, D, D_solver)

    G_losses = []
    D_losses = []

    for epoch in range(args.n_epochs):
        epoch_start_time = time.time()
        print("epoch %d started" % (epoch))
        for i, X in enumerate(dset_loaders):
            #print(X.shape)
            X = X.view(-1, args.cube_len * args.cube_len * args.cube_len)
            X = var_or_cuda(X)
            X = X.type(torch.cuda.FloatTensor)
            Z = generateZ(num_samples=X.size(0), z_size=args.z_size)

            #Train the critic
            d_loss, Wasserstein_D, gp = train_critic(X, Z, D, G, D_solver,
                                                     G_solver)

            # Train the generator every n_critic steps
            if i % n_critic == 0:
                Z = generateZ(num_samples=X.size(0), z_size=args.z_size)
                g_loss = train_gen(Z, D, G, D_solver, G_solver)

            #Log each iteration
            iteration = str(G_solver.state_dict()['state'][
                G_solver.state_dict()['param_groups'][0]['params'][0]]['step'])
            print('Iter-{}; , D_loss : {:.4}, G_loss : {:.4}, WSdistance : {:.4}, GP : {:.4}'.format(iteration, d_loss.item(), \
                                                                            g_loss.item(), Wasserstein_D.item(), gp.item() ))
        ## End of epoch
        epoch_end_time = time.time()

        #Plot the losses each epoch
        G_losses.append(g_loss.item())
        D_losses.append(d_loss.item())
        plot_losess(G_losses, D_losses, epoch)

        if (epoch + 1) % args.image_save_step == 0:
            print("Saving voxels")
            Z = generateZ(num_samples=8, z_size=args.z_size)
            gen_output = G(Z)
            samples = gen_output.cpu().data[:8].squeeze().numpy()
            samples = samples.reshape(-1, args.cube_len, args.cube_len,
                                      args.cube_len)
            Save_Voxels(samples, image_path, iteration)

        if (epoch + 1) % args.pickle_step == 0:
            print("Pickeling the model")
            save_new_pickle(pickle_save_path, iteration, G, G_solver, D,
                            D_solver)

        print("epoch time", (epoch_end_time - epoch_start_time) / 60)
        print("epoch %d ended" % (epoch))
        print("################################################")
示例#19
0
from __future__ import print_function
import numpy as np
import random
# import argparse
# import matplotlib as mpl
# mpl.use('Agg', warn=False)
import matplotlib.pyplot as plt
from nn_mnist_jellyfish import NeuralNetwork
# import mnist
import skl
import utils

imgs1 = skl.get_imgs_by_number(0)
imgs2 = skl.get_imgs_by_number(3)

strength_matrix = utils.read_pickle('pkl/nn_mnist_jellyfish_0.pkl')

# for label, img in imgs1:
propagated_1 = []
propagated_2 = []
r = range(np.min([len(imgs1), len(imgs2)]))
index = random.choice(r)
for i in range(10000):
    propagated_1.append(
        NeuralNetwork.validate(imgs1[index][1], strength_matrix))
    propagated_2.append(
        NeuralNetwork.validate(imgs2[index][1], strength_matrix))
propagated_1 = np.array(propagated_1)
propagated_2 = np.array(propagated_2)

fig, axes = plt.subplots(2, 1, figsize=(3, 3))
示例#20
0
# Create model
msg_net = nn.Sequential(nn.Linear(4, args.hs_1), nn.Tanh(),
                        nn.Linear(args.hs_1, args.hs_1), nn.Tanh(),
                        nn.Linear(args.hs_1, args.hs_1), nn.Tanh(),
                        nn.Linear(args.hs_1, args.d))
aggr_net = nn.Sequential(nn.Linear(args.d + 1, args.hs_1), nn.Tanh(),
                         nn.Linear(args.hs_1, args.hs_1), nn.Tanh(),
                         nn.Linear(args.hs_1, args.hs_1), nn.Tanh(),
                         nn.Linear(args.hs_1, 1))

model = ModelDirichlet(aggr_net, msg_net)
model.apply(utils.weights_init)
F = DynamicsFunction(model).to(device)
print("Num. of params: {:d}".format(utils.get_parameters_count(model)))

data = utils.read_pickle(['t', 'x', 'u', 'bcs_dicts'], args.data_path)
dataset = utils.generate_torchgeom_dataset(data)
bcs_dicts = data['bcs_dicts']

if args.batch_size is None:
    batch_size = len(dataset)
else:
    batch_size = args.batch_size

loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

optimizer = optim.Rprop(F.parameters(), lr=args.lr, step_sizes=(1e-8, 10.))
loss_fn = nn.MSELoss()

# Training
ts = dataset[0].t.shape[0]  # assumes the same time grid for all sim-s.
import skl
import utils
import sys

parser = argparse.ArgumentParser()
parser.add_argument('-n', action="store", dest="number")
parser.add_argument('-i',
                    action="store",
                    dest="iterations",
                    default=25000,
                    help="default: 25000")
args = parser.parse_args()
number = int(args.number)
iterations = int(args.iterations)

best_images = utils.read_pickle('best_images.pkl')

pf = lambda x: (1 / (1 + np.exp(-1 * 10 * x)) - .5) * 1.8 + .05
nn = NeuralNetwork(strength_function=pf, image_scale=8)

img = best_images[number]
print('%s' % number)

start_time = datetime.datetime.now()

for i in range(iterations):
    nn.propagate_once(img, gray_max=16)

end_time = datetime.datetime.now()
print('start time:', start_time, 'stop time: ', end_time)
示例#22
0
    lambda img, matrix: NeuralNetwork.validate_linear(
        img, matrix, power=3, weight=100), lambda img, matrix: NeuralNetwork.
    validate_threshold(img, matrix, power=3, threshhold=.2, weight=10),
    lambda img, matrix: NeuralNetwork.validate_threshold_2(
        img, matrix, power=3, weight=100)
]
# print('threshhold %s power %s weight %s' % (threshhold, power, weight))

if num >= 0:
    imgs = skl.get_imgs_by_number(num)
else:
    imgs = skl.get_imgs_by_number()
# print('test imgs #', size)

strength_matrix_l = [
    utils.read_pickle('pkl/nn_growable_' + str(i) + '.pkl') for i in range(10)
]

correct = .0
trails = .0
for i in range(iterations):
    label, img = random.choice(imgs)
    scores_a = np.array([
        validators[0](img, strength_matrix)
        for strength_matrix in strength_matrix_l
    ])
    if label == random.choice(np.where(scores_a == scores_a.max())[0]):
        correct += 1
        if not (i % 1000):
            # print(label, scores_a)
            pass
示例#23
0
args = parser.parse_args()
iterations = int(args.iterations)
num = int(args.num)

'''
threshhold = 0.8
weight = 100
print('threshhold %s weight %s' % (threshhold, weight))
'''

if num >=0:
    imgs = skl.get_imgs_by_number(num)
else:
    imgs = skl.get_imgs_by_number()

strength_matrix_l = [utils.read_pickle('pkl/nn_meshed_' + str(i) + '.pkl') for i in range(10)]

correct = .0
trails = .0
for i in range(iterations):
    trails += 1
    label, img = random.choice(imgs)
    scores_a = np.array([NeuralNetwork.validate(img, strength_matrix, gray_max=16.) for strength_matrix in strength_matrix_l])
    if label == random.choice(np.where(scores_a == scores_a.max())[0]):
        correct += 1
        if not (i % 10) and  i > 0:
            # print(round(correct / trails * 100, 2), label, scores_a)
            pass


if num >=0:
import pickle
from utils import sample
from utils import read_pickle
from utils import print_sample
import numpy as np
## Generate Names
ix_to_char,char_to_ix = read_pickle('datasets/ix_char_ix.pickle')[0]
def generate(parameters,seed,names = 20):
    for name in range(names):
        # Sample indices and print them
        sampled_indices = sample(parameters, char_to_ix, seed)
        print_sample(sampled_indices, ix_to_char)
        seed += 1   
        

def main():
    print('Program Loaded!')
    seed = np.random.randint(100)    
    sel = input('1 for Boys Names\n2 for Girls Names\n3 for Combined\n>>>')
    if sel in ['1','2','3']:
        names = int(input('ENTER NUMBER OF NAMES TO BE DISPLAYED\n>>>'))
        print('>>>')
        if sel == '1':
            parameters = read_pickle('datasets/boy_params.txt')[0]
            generate(parameters,seed,names)
        if sel == '2':
            parameters = read_pickle('datasets/girl_params.txt')[0]
            generate(parameters,seed,names)
        if sel == '3':
            parameters = read_pickle('datasets/c_params.txt')[0]
            generate(parameters,seed)
示例#25
0
device = torch.device(args.device)

msg_net = nn.Sequential(nn.Linear(6, args.hs_1), nn.Tanh(),
                        nn.Linear(args.hs_1, args.hs_1), nn.Tanh(),
                        nn.Linear(args.hs_1, args.hs_1), nn.Tanh(),
                        nn.Linear(args.hs_1, args.d))
aggr_net = nn.Sequential(nn.Linear(args.d + 2, args.hs_1), nn.Tanh(),
                         nn.Linear(args.hs_1, args.hs_1), nn.Tanh(),
                         nn.Linear(args.hs_1, args.hs_1), nn.Tanh(),
                         nn.Linear(args.hs_1, 2))

model = Model(aggr_net, msg_net)
F = DynamicsFunction(model).to(device)
F.load_state_dict(torch.load(args.model_path, map_location=device))

data = utils.read_pickle(['t', 'x', 'u'], args.data_path)
dataset = utils.generate_torchgeom_dataset(data)
loader = DataLoader(dataset, batch_size=1, shuffle=False)

# Loss
loss_fn = nn.MSELoss()

# Testing
diffs_over_time = []
losses = torch.zeros(len(loader))

inds_of_sims_to_show = set([0])

with torch.no_grad():
    for i, dp in enumerate(loader):
        edge_index = dp.edge_index
import tensorflow as tf


# Initialize the arguments
try:
    NUM_EPOCHS = int(sys.argv[1])
    NUM_OUTLIERS = int(sys.argv[2])
    is_trainable = bool(sys.argv[3])
except:
    NUM_EPOCHS = 10
    NUM_OUTLIERS = 60 # Estimation of #outliers
    is_trainable = True
BATCH_SIZE = 32 # 64

# Read the pickle file
X_train = read_pickle('../audio_data/X_train4d.pkl')
X_test = read_pickle('../audio_data/X_test4d.pkl')
Y_train = read_pickle('../audio_data/Y_train1d.pkl')
print("The shape of X_train/X_test/Y_train: ", X_train.shape, X_test.shape, Y_train.shape)

# Instantiate the model
bigan = BIGAN(X_train.shape[1], X_train.shape[2], X_train.shape[3])

if is_trainable:
    # Training the BiGAN
    bigan.train_by_batch(X_train, epochs=NUM_EPOCHS, batch_size=BATCH_SIZE)
    #bilstm.train_all(X_train_, Y_train_, BATCH_SIZE, NUM_EPOCHS)
else:
    # Restore the checkpoint
    checkpoint_dir = './runs/checkpoint_bigan'
    checkpoint = tf.train.Checkpoint()
示例#27
0
import matplotlib.pyplot as plt
import numpy as np
# import random
# import argparse
import matplotlib as mpl
mpl.use('Agg', warn=False)
# import mnist
# import seaborn as sns
# import sys

iters = 1 * 10**4
num = 6

imgs_l = [skl.get_imgs_by_number(i) for i in range(10)]
strength_matrix_l = [
    utils.read_pickle('pkl/nn_meshed_' + str(i) + '.pkl') for i in range(10)
]

fig, axes = plt.subplots(1, 1, figsize=(9, 3), sharex=True, sharey=True)
# axes = axes.flatten()

imgs = imgs_l[num]
index = random.choice(range(len(imgs)))
index = 3
print('index of testing img: ', index)
img = imgs[index][1]
# skl.show(img)

# for j, matrix in enumerate(strength_matrix_l):
if True:
    results_l = [[], [], [], [], [], [], [], [], [], []]
示例#28
0
def train(args):
    hyparam_list = [("model", args.model_name), ("cube", args.cube_len),
                    ("bs", args.batch_size), ("g_lr", args.g_lr),
                    ("d_lr", args.d_lr), ("z", args.z_dis),
                    ("bias", args.bias), ("sl", args.soft_label)]

    hyparam_dict = OrderedDict(((arg, value) for arg, value in hyparam_list))
    log_param = make_hyparam_string(hyparam_dict)
    print(log_param)

    # for using tensorboard
    if args.use_tensorboard:
        import tensorflow as tf

        summary_writer = tf.summary.FileWriter(args.output_dir + args.log_dir +
                                               log_param)

        def inject_summary(summary_writer, tag, value, step):
            summary = tf.Summary(
                value=[tf.Summary.Value(tag=tag, simple_value=value)])
            summary_writer.add_summary(summary, global_step=step)

        inject_summary = inject_summary

    # datset define
    dsets_path = args.input_dir + args.data_dir
    print(dsets_path)
    dsets = SetDataset(dsets_path, args)
    dset_loaders = torch.utils.data.DataLoader(dsets,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=1)

    # model define
    D = _D(args)
    G = _G(args)

    D_solver = optim.Adam(D.parameters(), lr=args.d_lr, betas=args.beta)
    G_solver = optim.Adam(G.parameters(), lr=args.g_lr, betas=args.beta)

    if args.lrsh:
        D_scheduler = MultiStepLR(D_solver, milestones=[500, 1000])

    if torch.cuda.is_available():
        print("using cuda")
        D.cuda()
        G.cuda()

    criterion = nn.BCELoss()

    # pickle_path = args.pickle_dir
    pickle_path = 'pickle'
    read_pickle(pickle_path, G, G_solver, D, D_solver)

    for epoch in range(args.n_epochs):
        for i, X in enumerate(dset_loaders):
            #   print(X.size())

            X = var_or_cuda(X)

            if X.size()[0] != int(args.batch_size):
                # print("batch_size != {} drop last incompatible batch".format(int(args.batch_size)))
                continue

            Z = generateZ(args)
            real_labels = var_or_cuda(torch.ones(args.batch_size))
            fake_labels = var_or_cuda(torch.zeros(args.batch_size))

            if args.soft_label:
                real_labels = var_or_cuda(
                    torch.Tensor(args.batch_size).uniform_(0.7, 1.2))
                fake_labels = var_or_cuda(
                    torch.Tensor(args.batch_size).uniform_(0, 0.3))

            # ============= Train the discriminator =============#
            d_real = D(X)
            d_real_loss = criterion(d_real, real_labels)

            fake = G(Z)
            d_fake = D(fake)
            d_fake_loss = criterion(d_fake, fake_labels)

            d_loss = d_real_loss + d_fake_loss

            d_real_acu = torch.ge(d_real.squeeze(), 0.5).float()
            d_fake_acu = torch.le(d_fake.squeeze(), 0.5).float()
            d_total_acu = torch.mean(torch.cat((d_real_acu, d_fake_acu), 0))

            if d_total_acu <= args.d_thresh:
                D.zero_grad()
                d_loss.backward()
                D_solver.step()

            # build graph
            """
            with summary_writer as w:
                w.add_graph(D, X)
                w.add_graph(G, Z)
                w.close()
            exit()
            """

            # =============== Train the generator ===============#

            Z = generateZ(args)

            fake = G(Z)
            d_fake = D(fake)
            g_loss = criterion(d_fake, real_labels)

            D.zero_grad()
            G.zero_grad()
            g_loss.backward()
            G_solver.step()

            # ==================== Save Good Results ================#
            iteration = str(G_solver.state_dict()['state'][
                G_solver.state_dict()['param_groups'][0]['params'][0]]['step'])

            if g_loss < 0.7:

                samples = fake.cpu().data[:8].squeeze().numpy()
                image_path = args.output_dir + args.image_dir + log_param
                if not os.path.exists(image_path):
                    os.makedirs(image_path)
                SavePloat_Voxels(samples, image_path, iteration)

        # =============== logging each iteration ===============#
        if args.use_tensorboard:
            log_save_path = args.output_dir + args.log_dir + log_param
            if not os.path.exists(log_save_path):
                os.makedirs(log_save_path)

            info = {
                'loss/loss_D(x)': d_real_loss,
                'loss/loss_D(G(z))': d_fake_loss,
                'loss/loss_D': d_loss,
                'loss/loss_G': g_loss,
                'loss/acc_D': d_total_acu
            }

            for tag, value in info.items():
                inject_summary(summary_writer, tag, value, iteration)

            summary_writer.flush()

        # =============== each epoch save model or save image ===============#
        print(
            'Epoch:{}, Iter-{}; , D_loss : {:.4}, G_loss : {:.4}, D_acu : {:.4}, D_lr : {:.4}'
            .format(epoch, iteration, d_loss, g_loss, d_total_acu,
                    D_solver.state_dict()['param_groups'][0]["lr"]))

        if (epoch + 1) % args.image_save_step == 0:

            samples = fake.cpu().data[:8].squeeze().numpy()

            image_path = args.output_dir + args.image_dir + log_param
            if not os.path.exists(image_path):
                os.makedirs(image_path)

            SavePloat_Voxels(samples, image_path, iteration)

        if (epoch + 1) % args.pickle_step == 0:
            print('saving pickle')
            pickle_save_path = args.output_dir + args.pickle_dir
            save_new_pickle(pickle_save_path, iteration, G, G_solver, D,
                            D_solver)

        if args.lrsh:

            try:

                D_scheduler.step()

            except Exception as e:

                print("fail lr scheduling", e)
def get_best_params():
    cv_output = read_pickle('cv_output.pickle')
    best_model_params, top_feat_params, top_model_feat_params, *_ = cv_output

    return top_feat_params, top_model_feat_params
示例#30
0
import numpy as np
import utils
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use("seaborn-colorblind")


LOAD_DIR = "results/env_2d"
LOAD_FILE = "1a_medium_norm_lambda.pickle"
LOAD_PATH = os.path.join(LOAD_DIR, LOAD_FILE)

NORMS = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
LAMBDAS = [0.5, 0.1, 0.05, 0.01, 0.005, 0.001]
RUNS = 20

results = utils.read_pickle(LOAD_PATH)
results_array = np.zeros((len(NORMS), len(LAMBDAS)))

for i, norm in enumerate(NORMS):

    for j, lambda_1 in enumerate(LAMBDAS):

        accuracies = []

        for run_idx in range(RUNS):

            key = (norm, lambda_1, run_idx)

            if key in results:

                accuracies.append(results[key][0])