import utils import os from argparse import ArgumentParser arguments = ArgumentParser() arguments.add_argument('--base', type=str, default='') arguments.add_argument('--pattern', type=str, default='') args = arguments.parse_args() base_dir = args.base merged_res = {} for i in range(100): try: res = utils.read_pickle( os.path.join(base_dir, '{0}_{1}'.format(args.pattern, i))) except: continue for k, v in res.iteritems(): merged_res[k] = v utils.write_pickle(merged_res, os.path.join(base_dir, '{0}_merged'.format(args.pattern)))
def load_data(): return utils.read_pickle('train_data.pkl'), utils.read_pickle( 'test_data.pkl')
def run(seed): # create folders for scores models and preds folder_models = './models/domain1_var1/scores/' if not os.path.exists(folder_models): os.makedirs(folder_models) folder_preds = './predicts/domain1_var1/scores/' if not os.path.exists(folder_preds): os.makedirs(folder_preds) print('Loading data...') # load biases ic_bias = read_pickle('./data/biases/ic_biases.pickle') ic_bias_site = read_pickle('./data/biases/ic_biases_site.pickle') fnc_bias = read_pickle('./data/biases/fnc_biases.pickle') fnc_bias_site = read_pickle('./data/biases/fnc_biases_site.pickle') pca_bias = read_pickle('./data/biases/200pca_biases.pickle') pca_bias_site = read_pickle('./data/biases/200pca_biases_site.pickle') # load classifier and add extra sites2 extra_site = pd.DataFrame() extra_site['Id'] = np.load('./predicts/classifier/site2_test_new_9735.npy') # load competiton data ids_df = pd.read_csv('./data/raw/reveal_ID_site2.csv') fnc_df = pd.read_csv('./data/raw/fnc.csv') loading_df = pd.read_csv('./data/raw/loading.csv') labels_df = pd.read_csv('./data/raw/train_scores.csv') ids_df = ids_df.append(extra_site) print('Detected Site2 ids count: ', ids_df['Id'].nunique()) # load created features agg_df = pd.read_csv('./data/features/agg_feats.csv') im_df = pd.read_csv('./data/features/im_feats.csv') dl_df = pd.read_csv('./data/features/dl_feats.csv') pca_df = pd.read_csv('./data/features/200pca_feats/200pca_3d_k0.csv') for i in range(1, 6): part = pd.read_csv( './data/features/200pca_feats/200pca_3d_k{}.csv'.format(i)) del part['Id'] pca_df = pd.concat((pca_df, part), axis=1) # merge data ic_cols = list(loading_df.columns[1:]) fnc_cols = list(fnc_df.columns[1:]) agg_cols = list(agg_df.columns[1:]) im_cols = list(im_df.columns[1:]) pca_cols = list(pca_df.columns[1:]) dl_cols = list(dl_df.columns[1:]) df = fnc_df.merge(loading_df, on='Id') df = df.merge(agg_df, how='left', on='Id') df = df.merge(im_df, how='left', on='Id') df = df.merge(pca_df, how='left', on='Id') df = df.merge(dl_df, how='left', on='Id') df = df.merge(labels_df, how='left', on='Id') del loading_df, fnc_df, agg_df, im_df, pca_df gc.collect() # split train and test df.loc[df['Id'].isin(labels_df['Id']), 'is_test'] = 0 df.loc[~df['Id'].isin(labels_df['Id']), 'is_test'] = 1 train = df.query('is_test==0') del train['is_test'] test = df.query('is_test==1') del test['is_test'] y = train['domain1_var1'].copy().reset_index(drop=True) d11_index = list(train['domain1_var1'].dropna().index) # apply biases for c in ic_bias_site.keys(): test.loc[~test['Id'].isin(ids_df['Id']), c] += ic_bias[c] test.loc[test['Id'].isin(ids_df['Id']), c] += ic_bias_site[c] for c in fnc_bias_site.keys(): test.loc[~test['Id'].isin(ids_df['Id']), c] += fnc_bias[c] test.loc[test['Id'].isin(ids_df['Id']), c] += fnc_bias_site[c] for c in pca_bias_site.keys(): test.loc[~test['Id'].isin(ids_df['Id']), c] += pca_bias[c] test.loc[test['Id'].isin(ids_df['Id']), c] += pca_bias_site[c] # save df for scaling df_scale = pd.concat([train, test], axis=0) # I. Create fnc score print('Creating FNC score...') # prepare datasets for fnc score train_for_score, test_for_score = scale_select_data( train, test, df_scale, fnc_cols) # define models names = ['ENet', 'BRidge'] names = [name + '_fnc_seed{}'.format(seed) for name in names] pack = [ ElasticNet(alpha=0.05, l1_ratio=0.5, random_state=0), BayesianRidge() ] # train models zoo = TrendsModelSklearn(pack, seed=seed) zoo.fit([train_for_score] * 2, y) score_blend = zoo.blend_oof() pred = zoo.predict([test_for_score] * 2, names) # save oof, pred, models np.save(folder_preds + 'fnc_score_seed{}.npy'.format(seed), score_blend) np.save(folder_preds + 'fnc_score_test_seed{}.npy'.format(seed), pred) zoo.save_models(names, folder=folder_models) # II. Create agg score print('Creating AGG score...') # prepare datasets for agg score train_for_score, test_for_score = scale_select_data( train, test, df_scale, agg_cols) # define models names = ['ENet', 'Huber'] names = [name + '_agg_seed{}'.format(seed) for name in names] pack = [ ElasticNet(alpha=0.05, l1_ratio=0.3, random_state=0), HuberRegressor(epsilon=2.5, alpha=1) ] # train models zoo = TrendsModelSklearn(pack, seed=seed) zoo.fit([train_for_score] * 2, y) score_blend = zoo.blend_oof() pred = zoo.predict([test_for_score] * 2, names) # save oof, pred, models np.save(folder_preds + 'agg_score_seed{}.npy'.format(seed), score_blend) np.save(folder_preds + 'agg_score_test_seed{}.npy'.format(seed), pred) zoo.save_models(names, folder=folder_models) # III. Create pca score print('Creating PCA score...') # prepare datasets for pca score train_for_score, test_for_score = scale_select_data( train, test, df_scale, pca_cols) # define models names = ['ENet', 'BRidge'] names = [name + '_pca_seed{}'.format(seed) for name in names] pack = [ ElasticNet(alpha=0.2, l1_ratio=0.2, random_state=0), BayesianRidge() ] # train models zoo = TrendsModelSklearn(pack, seed=seed) zoo.fit([train_for_score] * 2, y) score_blend = zoo.blend_oof() pred = zoo.predict([test_for_score] * 2, names) # save oof, pred, models np.save(folder_preds + 'pca_score_seed{}.npy'.format(seed), score_blend) np.save(folder_preds + 'pca_score_test_seed{}.npy'.format(seed), pred) zoo.save_models(names, folder=folder_models) # IV. Create im score print('Creating IM score...') # prepare datasets for pca score train_for_score, test_for_score = scale_select_data( train, test, df_scale, im_cols) # define models names = ['ENet', 'BRidge'] names = [name + '_im_seed{}'.format(seed) for name in names] pack = [ ElasticNet(alpha=0.2, l1_ratio=0.2, random_state=0), BayesianRidge() ] # train models zoo = TrendsModelSklearn(pack, seed=seed) zoo.fit([train_for_score] * 2, y) score_blend = zoo.blend_oof() pred = zoo.predict([test_for_score] * 2, names) # save oof, pred, models np.save(folder_preds + 'im_score_seed{}.npy'.format(seed), score_blend) np.save(folder_preds + 'im_score_test_seed{}.npy'.format(seed), pred) zoo.save_models(names, folder=folder_models) # V. Create dl score print('Creating DL score...') # prepare datasets for pca score train_for_score, test_for_score = scale_select_data( train, test, df_scale, dl_cols) # define models names = ['ENet', 'BRidge'] names = [name + '_dl_seed{}'.format(seed) for name in names] pack = [ ElasticNet(alpha=0.2, l1_ratio=0.2, random_state=0), BayesianRidge() ] # train models zoo = TrendsModelSklearn(pack, seed=seed) zoo.fit([train_for_score] * 2, y) score_blend = zoo.blend_oof() pred = zoo.predict([test_for_score] * 2, names) # save oof, pred, models np.save(folder_preds + 'dl_score_seed{}.npy'.format(seed), score_blend) np.save(folder_preds + 'dl_score_test_seed{}.npy'.format(seed), pred) zoo.save_models(names, folder=folder_models) # VI. Training and predicting procedure print('Training has started...') print('Reading scores from ', folder_preds) # add scores for prefix in ['fnc', 'agg', 'im', 'pca', 'dl']: train.loc[d11_index, prefix + '_score'] = np.load( folder_preds + '{}_score_seed{}.npy'.format(prefix, seed)) test.loc[:, prefix + '_score'] = np.load( folder_preds + '{}_score_test_seed{}.npy'.format(prefix, seed)) score_cols = [c for c in train.columns if c.endswith('_score')] # save df for scaling df_scale = pd.concat([train, test], axis=0) # create differents datasets # linear linear_cols = sorted( list(set(ic_cols + fnc_cols + pca_cols) - set(['IC_20']))) train_linear, test_linear = scale_select_data(train, test, df_scale, linear_cols) # kernel kernel_cols = sorted(list(set(ic_cols + pca_cols) - set(['IC_20']))) train_kernel, test_kernel = scale_select_data(train=train, test=test, df_scale=df_scale, cols=kernel_cols, scale_factor=0.2, scale_cols=pca_cols, sc=MinMaxScaler()) # score sc_cols = sorted(list(set(ic_cols + score_cols) - set(['IC_20']))) train_sc, test_sc = scale_select_data(train, test, df_scale, sc_cols) # learning process on different datasets names = ['GP', 'SVM1', 'SVM2', 'OMP', 'KR'] names = [name + '_seed{}'.format(seed) for name in names] pack = [ GaussianProcessRegressor(DotProduct(), random_state=0), NuSVR(C=5, kernel='rbf'), NuSVR(C=5, kernel='rbf'), OrthogonalMatchingPursuitCV(), KernelRidge(kernel='poly', degree=2, alpha=10) ] zoo = TrendsModelSklearn(pack, seed=seed) zoo.fit([train_sc] * 2 + [train_kernel] + [train_linear] * 2, y) de_blend = zoo.blend_oof() preds = zoo.predict([test_sc] * 2 + [test_kernel] + [test_linear] * 2, names, is_blend=True) # rewrite folders for models and preds folder_models = './models/domain1_var1/stack/' if not os.path.exists(folder_models): os.makedirs(folder_models) folder_preds = './predicts/domain1_var1/stack/' if not os.path.exists(folder_preds): os.makedirs(folder_preds) print('Saving models to', folder_models) print('Saving predictions to', folder_preds) # save oofs and models zoo.save_oofs(names, folder=folder_preds) zoo.save_models(names, folder=folder_models) # stacking predictions print('Stacking predictions...') d11_prediction = pd.DataFrame() d11_prediction['Id'] = test['Id'].values d11_prediction['pred'] = preds d11_prediction.to_csv(folder_preds + 'domain1_var1_stack_seed{}.csv'.format(seed), index=False) print('domain1_var1 seed pred is saved as', folder_preds + 'domain1_var1_stack_seed{}.csv'.format(seed))
import pickle import pandas as pd from stattools import grangercausalitytests from datetime import datetime import utils logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG) arguments = ArgumentParser() arguments.add_argument('--path', type=str, default="flattened-timeseries") arguments.add_argument('--index', type=int) args = arguments.parse_args() results = {} lags = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 14, 21, 30, 60, 90] df = utils.read_pickle(args.path) def pairwise_granger(words): global df (word1, word2) = words if word1 not in df or word2 not in df: return (word1, word2, None) best_lag, res = grangercausalitytests(numpy.transpose( [df[word2], df[word1]]), lags, verbose=False) causal_lag = None f_pvalue = res[0]['params_ftest'][1] lr_pvalue = res[0]['lrtest'][1] if f_pvalue < 0.01 and lr_pvalue < 0.01:
# Sample images for learners. def sample_zip(fn_in, fn_out, rate=0.01, seed=42): np.random.seed(seed) with zipfile.ZipFile(fn_in) as fin, zipfile.ZipFile(fn_out, "w") as fout: sampled = filter(lambda _: np.random.rand() < rate, fin.filelist) for zInfo in sampled: fout.writestr(zInfo, fin.read(zInfo)) sample_zip("train2014.zip", "train2014_sample.zip") sample_zip("val2014.zip", "val2014_sample.zip") # Load prepared embeddings. train_img_embeds = utils.read_pickle("train_img_embeds.pickle") train_img_fns = utils.read_pickle("train_img_fns.pickle") val_img_embeds = utils.read_pickle("val_img_embeds.pickle") val_img_fns = utils.read_pickle("val_img_fns.pickle") # Check shapes. print(train_img_embeds.shape, len(train_img_fns)) print(val_img_embeds.shape, len(val_img_fns)) # Extract captions. def get_captions_for_fns(fns, zip_fn, zip_json_path): zf = zipfile.ZipFile(zip_fn) j = json.loads(zf.read(zip_json_path).decode("utf8")) id_to_fn = {img["id"]: img["file_name"] for img in j["images"]} fn_to_caps = defaultdict(list)
cv2.waitKey(0) cv2.destroyAllWindows() if __name__ == '__main__': args = utils.get_options() input_image_dirpath = osp.join(osp.dirname(__file__), args.in_dir) # recognize any extentions image_paths, image_names = utils.get_file_paths(input_image_dirpath, "*") # read camera parameters camera_param_filepath = osp.join(osp.dirname(__file__), args.camera_param_path) cameraMatrix, distCoeffs, rvecs, tvecs, stdDevIn, stdDevEx = \ utils.read_pickle(camera_param_filepath) # read parameters from arguments dictionary = utils.get_aruco_dict(args.aruco_dict) squareL = args.square_length markerL = args.marker_length tb = args.v_margin lr = args.h_margin pixels_per_mm = args.pixels_per_mm # read parameters from configuration pickle file if args.input_board_cfg_pkl: board_cfg_pkl_path = osp.join(osp.dirname(__file__), args.board_cfg_pkl_path) board_cfg = utils.read_pickle(board_cfg_pkl_path) dictionary = utils.get_aruco_dict(board_cfg['dict_label']) squareL = board_cfg['square_length']
def train(args): #for creating the visdom object DEFAULT_PORT = 8097 DEFAULT_HOSTNAME = "http://localhost" viz = Visdom(DEFAULT_HOSTNAME, DEFAULT_PORT, ipv6=False) hyparam_list = [ ("model", args.model_name), ("cube", args.cube_len), ("bs", args.batch_size), ("g_lr", args.g_lr), ("d_lr", args.d_lr), ("z", args.z_dis), ("bias", args.bias), ("sl", args.soft_label), ] hyparam_dict = OrderedDict(((arg, value) for arg, value in hyparam_list)) log_param = make_hyparam_string(hyparam_dict) print(log_param) # for using tensorboard if args.use_tensorboard: import tensorflow as tf summary_writer = tf.summary.FileWriter(args.output_dir + args.log_dir + log_param) def inject_summary(summary_writer, tag, value, step): summary = tf.Summary( value=[tf.Summary.Value(tag=tag, simple_value=value)]) summary_writer.add_summary(summary, global_step=step) inject_summary = inject_summary # datset define dsets_path = args.input_dir + args.data_dir + "train/" print(dsets_path) x_train = np.load("voxels_3DMNIST_16.npy") dataset = x_train.reshape(-1, args.cube_len * args.cube_len * args.cube_len) print(dataset.shape) dset_loaders = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=1) # model define D = _D(args) G = _G(args) D_solver = optim.Adam(D.parameters(), lr=args.d_lr, betas=args.beta) G_solver = optim.Adam(G.parameters(), lr=args.g_lr, betas=args.beta) if torch.cuda.is_available(): print("using cuda") D.cuda() G.cuda() criterion = nn.BCELoss() pickle_path = "." + args.pickle_dir + log_param read_pickle(pickle_path, G, G_solver, D, D_solver) for epoch in range(args.n_epochs): epoch_start_time = time.time() print("epoch %d started" % (epoch)) for i, X in enumerate(dset_loaders): X = var_or_cuda(X) X = X.type(torch.cuda.FloatTensor) if X.size()[0] != int(args.batch_size): #print("batch_size != {} drop last incompatible batch".format(int(args.batch_size))) continue Z = generateZ(args) real_labels = var_or_cuda(torch.ones(args.batch_size)).view( -1, 1, 1, 1, 1) fake_labels = var_or_cuda(torch.zeros(args.batch_size)).view( -1, 1, 1, 1, 1) if args.soft_label: real_labels = var_or_cuda( torch.Tensor(args.batch_size).uniform_(0.9, 1.1)).view( -1, 1, 1, 1, 1) #### #fake_labels = var_or_cuda(torch.Tensor(args.batch_size).uniform_(0, 0.3)).view(-1,1,1,1,1) fake_labels = var_or_cuda(torch.zeros(args.batch_size)).view( -1, 1, 1, 1, 1) ##### # ============= Train the discriminator =============# d_real = D(X) d_real_loss = criterion(d_real, real_labels) fake = G(Z) d_fake = D(fake) d_fake_loss = criterion(d_fake, fake_labels) d_loss = d_real_loss + d_fake_loss d_real_acu = torch.ge(d_real.squeeze(), 0.5).float() d_fake_acu = torch.le(d_fake.squeeze(), 0.5).float() d_total_acu = torch.mean(torch.cat((d_real_acu, d_fake_acu), 0)) #if 1: if d_total_acu <= args.d_thresh: D.zero_grad() d_loss.backward() D_solver.step() # =============== Train the generator ===============# Z = generateZ(args) fake = G(Z) d_fake = D(fake) g_loss = criterion(d_fake, real_labels) D.zero_grad() G.zero_grad() g_loss.backward() G_solver.step() ####### #print(fake.shape) #print(fake.cpu().data[:8].squeeze().numpy().shape) # =============== logging each iteration ===============# iteration = str(G_solver.state_dict()['state'][ G_solver.state_dict()['param_groups'][0]['params'][0]]['step']) #print(type(iteration)) #iteration = str(i) #saving the model and a image each 100 iteration if int(iteration) % 300 == 0: #pickle_save_path = args.output_dir + args.pickle_dir + log_param #save_new_pickle(pickle_save_path, iteration, G, G_solver, D, D_solver) samples = fake.cpu().data[:8].squeeze().numpy() #print(samples.shape) for s in range(8): plotVoxelVisdom(samples[s, ...], viz, "Iteration:{:.4}".format(iteration)) # image_path = args.output_dir + args.image_dir + log_param # if not os.path.exists(image_path): # os.makedirs(image_path) # SavePloat_Voxels(samples, image_path, iteration) # =============== each epoch save model or save image ===============# print( 'Iter-{}; , D_loss : {:.4}, G_loss : {:.4}, D_acu : {:.4}, D_lr : {:.4}' .format(iteration, d_loss.item(), g_loss.item(), d_total_acu.item(), D_solver.state_dict()['param_groups'][0]["lr"])) epoch_end_time = time.time() if (epoch + 1) % args.image_save_step == 0: samples = fake.cpu().data[:8].squeeze().numpy() image_path = args.output_dir + args.image_dir + log_param if not os.path.exists(image_path): os.makedirs(image_path) SavePloat_Voxels(samples, image_path, iteration) if (epoch + 1) % args.pickle_step == 0: pickle_save_path = args.output_dir + args.pickle_dir + log_param save_new_pickle(pickle_save_path, iteration, G, G_solver, D, D_solver) print("epoch time", (epoch_end_time - epoch_start_time) / 60) print("epoch %d ended" % (epoch)) print("################################################")
# # sample images for faster training # def sample_zip(fn_in, fn_out, rate=0.01, seed=42): # np.random.seed(seed) # with zipfile.ZipFile(fn_in) as fin, zipfile.ZipFile(fn_out, "w") as fout: # sampled = filter(lambda _: np.random.rand() < rate, fin.filelist) # for zInfo in sampled: # fout.writestr(zInfo, fin.read(zInfo)) # sample_zip(train2014_zip, "../data/coco/train2014_sample_yoloV2.zip", rate = 0.01, seed = 42) # sample_zip(val2014_zip, "../data/coco/val2014_sample_yoloV2.zip", rate = 0.01, seed = 42) # In[12]: # load prepared embeddings train_img_embeds = utils.read_pickle( "../data/coco/extracted/train_img_embeds_yoloV2_{}.pickle".format(action)) train_img_fns = utils.read_pickle( "../data/coco/extracted/train_img_fns_yoloV2_{}.pickle".format(action)) val_img_embeds = utils.read_pickle( "../data/coco/extracted/val_img_embeds_yoloV2_{}.pickle".format(action)) val_img_fns = utils.read_pickle( "../data/coco/extracted/val_img_fns_yoloV2_{}.pickle".format(action)) # check shapes print("training data: ", train_img_embeds.shape, len(train_img_fns)) print("valicatoin data: ", val_img_embeds.shape, len(val_img_fns)) # In[13]: # check prepared samples of images list(filter(lambda x: x.endswith("_sample_yoloV2.zip"), os.listdir(".")))
parser.add_argument( '--size', type=int, nargs='?', default=32, help='the embedding size') parser.add_argument( '--num_walks', type=int, nargs='?', default=10, help='the number of random walks to originate from each vertex') parser.add_argument( '--walk_length', type=int, nargs='?', default=80, help='the length of each random walk') args = parser.parse_args() size = args.size print 'learning embeddings of dimension {}'.format(args.size) x = utils.read_pickle(args.x_path[0]) g = BipartiteGraph(x) print 'walk path: {}'.format(args.walk_path) print 'x path: {}'.format(args.x_path) if args.walk_path == "": print 'generating new random walk dataset' print 'building edges' g.build_edge_array() print 'generating walks' walks = g.generate_walks(args.num_walks, args.walk_length) df = pd.DataFrame(walks) walk_path = 'local_resources/walks_thresh10_num_{}_length_{}'.format(args.num_walks, args.walk_length) df.to_csv(walk_path, index=False, header=None) else: print 'learning embeddings' walks = pd.read_csv(args.walk_path,
def main(game, level, player_img, use_graph, draw_all_labels, draw_dup_labels, draw_path, show_score): # Create the Level level_obj = Level.generate_level_from_file(game, level) # Level saved files state_graph_file = "level_saved_files_%s/enumerated_state_graphs/%s/%s.gpickle" % (player_img, game, level) if game == "generated" and os.path.exists("level_saved_files_%s/generated_level_paths/%s.pickle" % (player_img, level)): generated_level_path_coords = read_pickle("level_saved_files_%s/generated_level_paths/%s.pickle" % (player_img, level)) else: generated_level_path_coords = None if use_graph and os.path.exists(state_graph_file): print("***** USING ENUMERATED STATE GRAPH *****") state_graph = nx.read_gpickle(state_graph_file) else: print("***** USING MANUAL CONTROLS *****") state_graph = None edge_actions_dict = None if state_graph is None else nx.get_edge_attributes(state_graph, 'action') # Background FPS = 40 # frame rate ANI = 4 # animation cycles WORLD_X = min(level_obj.width, MAX_WIDTH) WORLD_Y = min(level_obj.height, MAX_HEIGHT) clock = pygame.time.Clock() pygame.init() world = pygame.display.set_mode([WORLD_X, WORLD_Y]) BACKGROUND_COLOR = COLORS.get('DARK_GRAY') # Player player_model = Player(player_img, level_obj) player_view = PlayerView(player_img) player_list = pygame.sprite.Group() player_list.add(player_view) # Level platform_sprites = get_sprites(level_obj.get_platform_coords(), 'block_tile.png') goal_sprites = get_sprites(level_obj.get_goal_coords(), 'goal_tile.png') bonus_sprites = get_sprites(level_obj.get_bonus_coords(), 'bonus_tile.png') one_way_platform_sprites = get_sprites(level_obj.get_one_way_platform_coords(), 'one_way_block_tile.png') hazard_sprites = get_sprites(level_obj.get_hazard_coords(), 'hazard_tile.png') wall_sprites = get_sprites(level_obj.get_wall_coords(), 'block_tile.png') collected_bonus_tile_coords_dict = {} # Camera camera = Camera(Camera.camera_function, level_obj.width, level_obj.height, WORLD_X, WORLD_Y) # Setup drawing metatile labels if draw_all_labels or draw_dup_labels: metatile_labels, font_color, label_padding = \ setup_metatile_labels(game, level, player_img, draw_all_labels, draw_dup_labels) # Setup drawing solution path if draw_path: path_font_color = COLORS.get('GREEN') start_font_color = COLORS.get('BLUE') goal_font_color = COLORS.get('RED') if generated_level_path_coords is not None: path_coords = generated_level_path_coords start_coord = generated_level_path_coords[0] goal_coord = generated_level_path_coords[-1] elif os.path.exists(state_graph_file): graph = nx.read_gpickle(state_graph_file) shortest_path_dict = shortest_path_xy(graph) path_coords = shortest_path_dict.get("path_coords") start_coord = shortest_path_dict.get("start_coord") goal_coord = shortest_path_dict.get("goal_coord") else: error_exit("No enumerated state graph available to draw solution path") # Input handling input_handler = Inputs() # Main Loop main = True while main: input_handler.onLoop() for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() main = False if event.type == pygame.KEYDOWN: if event.key == ord('q'): pygame.quit() main = False sys.exit() elif event.key == ord('r'): player_model.reset() collected_bonus_tile_coords_dict = {} platform_sprites = get_sprites(level_obj.get_platform_coords(), 'block_tile.png') input_handler.onEvent(event) if not main: break world.fill(BACKGROUND_COLOR) camera.update(player_view) # set camera to track player # Update Player model and view player_model.update(action=input_handler.getAction(), precomputed_graph=state_graph, edge_actions_dict=edge_actions_dict) player_view.update(player_model.state.x, player_model.state.y, player_model.half_player_w, player_model.half_player_h) # Update the current score hit_bonus_coord = player_model.get_hit_bonus_coord() if hit_bonus_coord is not '': hit_bonus_coord_x = player_model.state.x // TILE_DIM hit_bonus_coord_y = player_model.state.y // TILE_DIM - 1 if hit_bonus_coord == 'N': pass elif hit_bonus_coord == 'NE': hit_bonus_coord_x += 1 elif hit_bonus_coord == 'NW': hit_bonus_coord_x -= 1 else: error_exit("unrecognized hit bonus coord") hit_bonus_coord_xy = (hit_bonus_coord_x * TILE_DIM, hit_bonus_coord_y * TILE_DIM) if hit_bonus_coord_xy not in level_obj.get_bonus_coords(): error_exit("hit bonus tile that is not there: " + str(hit_bonus_coord_xy)) if collected_bonus_tile_coords_dict.get(hit_bonus_coord_xy) is None: collected_bonus_tile_coords_dict[hit_bonus_coord_xy] = 1 platform_sprites.add(Tile(hit_bonus_coord_xy[0], hit_bonus_coord_xy[1], 'block_tile.png')) score = len(collected_bonus_tile_coords_dict) * 10 # Draw sprites entities_to_draw = [] entities_to_draw += list(bonus_sprites) # draw bonus tiles entities_to_draw += list(platform_sprites) # draw platforms tiles entities_to_draw += list(one_way_platform_sprites) # draw one-way platform tiles entities_to_draw += list(hazard_sprites) entities_to_draw += list(wall_sprites) entities_to_draw += list(player_list) # draw player entities_to_draw += list(goal_sprites) # draw goal tiles for e in entities_to_draw: world.blit(e.image, camera.apply(e)) # Draw metatile labels if draw_all_labels or draw_dup_labels: for coord in level_obj.get_all_possible_coords(): # draw metatile border outlines tile_rect = pygame.Rect(coord[0], coord[1], TILE_DIM, TILE_DIM) tile_rect = camera.apply_to_rect(tile_rect) # adjust based on camera pygame.draw.rect(world, font_color, tile_rect, 1) for label in metatile_labels: # draw metatile labels surface, label_x, label_y = label label_x, label_y = camera.apply_to_coord((label_x, label_y)) world.blit(surface, (label_x + label_padding[0], label_y + label_padding[1])) # Draw level solution path if draw_path: for coord in path_coords: if coord == start_coord: color = start_font_color elif coord == goal_coord: color = goal_font_color else: color = path_font_color coord = eval(coord) path_component = pygame.Rect(coord[0], coord[1], 2, 2) path_component = camera.apply_to_rect(path_component) pygame.draw.rect(world, color, path_component, 1) # Draw text labels label_rect_pairs = [] if player_model.goal_reached(): score += 50 labels = [ ("You Win!", 50, COLORS.get('GREEN')), ("Score: %d" % score, 30, COLORS.get('YELLOW')), ("Press 'R' to replay or 'Q' to quit", 30, COLORS.get('YELLOW')) ] label_rect_pairs = get_label_rect_pairs(center_x=WORLD_X/2, labels=labels) elif player_model.is_dead(): labels = [ ("Game Over", 50, COLORS.get('RED')), ("Score: %d" % score, 30, COLORS.get('YELLOW')), ("Press 'R' to replay or 'Q' to quit", 30, COLORS.get('YELLOW')) ] label_rect_pairs = get_label_rect_pairs(center_x=WORLD_X / 2, labels=labels) elif show_score: labels = [("Score: %d" % score, 50, COLORS.get('YELLOW'))] label_rect_pairs = get_label_rect_pairs(center_x=WORLD_X / 2, labels=labels) for label, label_rect in label_rect_pairs: world.blit(label, label_rect) pygame.display.flip() clock.tick(FPS)
else: # Use random drawn datasets with at least one success for # categories that few sufficient successful experiments for training run_non_meta_model(base_model, common_params, model_params, category, success=True) # Gradient Boosting base_model = GradientBoosting model_params = gradientboosting_params for category in categories: if '4_ii' not in category and '5_ii' not in category: # Use regular random drawn datasets for categories # that have sufficient successful experiments for training run_non_meta_model(base_model, common_params, model_params, category) else: # Use random drawn datasets with at least one success for # categories that few sufficient successful experiments for training run_non_meta_model(base_model, common_params, model_params, category, success=True) # Use cv_stats.pkl to plot all graphs cv_stats = read_pickle(common_params['stats_path']) plot_all_graphs(cv_stats)
def main(trial, levels, num_sol, asp, state_graph): if not (asp or state_graph): utils.error_exit( "Must specify at least one validation test to run: --asp or --state_graph" ) # Get file formats config_formats = TRIAL_CONFIG_FORMATS.get(trial) if config_formats is None: utils.error_exit("--trial must be one of %s" % str(list(TRIAL_CONFIG_FORMATS.keys()))) prolog_file_format = "level_saved_files_block/prolog_files/%s.pl" model_str_file_format = "level_saved_files_block/generated_level_model_strs/%s.txt" assignments_dict_file_format = "level_saved_files_block/generated_level_assignments_dicts/%s.pickle" # Initialize validation counts asp_checked_count = 0 asp_valid_count = 0 state_graph_checked_count = 0 state_graph_valid_count = 0 # Validate each solver run for level in levels: for config_file_format in config_formats: for sol in range(num_sol): prolog_file = prolog_file_format % level prolog_filename = utils.get_basepath_filename( prolog_file, 'pl') config_file = config_file_format % level config_filename = utils.get_basepath_filename( config_file, 'json') answer_set_filename = '_'.join( [prolog_filename, config_filename, 'a%d' % sol]) if asp: # Determine ASP checks to perform based on config file contents config_file_contents = utils.read_json(config_file) config = config_file_contents['config'] require_all_platforms_reachable = True require_all_bonus_tiles_reachable = True if config.get( 'require_all_platforms_reachable') is not None: require_all_platforms_reachable = eval( config['require_all_platforms_reachable']) if config.get( 'require_all_bonus_tiles_reachable') is not None: require_all_bonus_tiles_reachable = eval( config['require_all_bonus_tiles_reachable']) prolog_file_info = get_prolog_file_info(prolog_file) tile_ids = get_tile_ids_dictionary(prolog_file_info) model_str_file = model_str_file_format % answer_set_filename if os.path.exists(model_str_file): model_str = utils.read_txt(model_str_file) asp_valid = Solver.asp_is_valid( check_path=True, check_onground=require_all_platforms_reachable, check_bonus=require_all_bonus_tiles_reachable, model_str=model_str, player_img='block', answer_set_filename=answer_set_filename, tile_ids=tile_ids, save=False) status = "ASP VALID" if asp_valid else "ASP INVALID" print("%s: %s" % (answer_set_filename, status)) asp_checked_count += 1 asp_valid_count += 1 if asp_valid else 0 if state_graph: assignments_dict_file = assignments_dict_file_format % answer_set_filename if os.path.exists(assignments_dict_file): assignments_dict = utils.read_pickle( assignments_dict_file) valid_path = Solver.get_state_graph_valid_path( assignments_dict=assignments_dict, player_img='block', prolog_filename=prolog_filename, answer_set_filename=answer_set_filename, save=True) status = "GRAPH VALID" if valid_path else "GRAPH INVALID" print("%s: %s" % (answer_set_filename, status)) state_graph_checked_count += 1 state_graph_valid_count += 1 if valid_path is not None else 0 # Print validation results summary if asp: print("ASPs Checked: %d" % asp_checked_count) print("ASPs Valid: %d" % asp_valid_count) if state_graph: print("State Graphs Checked: %d" % state_graph_checked_count) print("State Graphs Valid: %d" % state_graph_valid_count)
import utils # import lfw_dataset # import numpy as np # data, attr = lfw_dataset.load_lfw_dataset(dimx = 36, dimy = 36) # data = np.float32(data) / 255. # utils.save_pickle(data, "data.pickle") # utils.save_pickle(data, "attr.pickle") data = utils.read_pickle("data.pickle") attr = utils.read_pickle("attr.pickle") IMG_SHAPE = data.shape[1:] CODE_SIZE = 256
Xsamp = Xsamp[:, vertices] ysamp = ysingle[vertices, :] Xout, yout = prune_disconnected(Xsamp, ysamp) utils.persist_sparse_data(folder, Xout, yout[:, 32:]) return Xout, yout[:, 32:] def prune_disconnected(X, y): keep = np.where(X.sum(axis=1) > 0)[0] Xkeep = X[keep, :] Xkeep = Xkeep[:, keep] ykeep = y[keep, :] return Xkeep, ykeep if __name__ == '__main__': X = utils.read_pickle('../../local_resources/blogcatalog/X.p') y = utils.read_pickle('../../local_resources/blogcatalog/y.p') xpath = '../../local_resources/blogcatalog_121_sample/X.p' ypath = '../../local_resources/blogcatalog_121_sample/y.p' folder = '../../local_resources/blogcatalog_121_sample' Xsamp, ysamp = sample_graph(X, y, folder) print X.sum() print 'number of vertices connected to one or more other vertices: ', sum( Xsamp.sum(axis=1) > 0) print 'label distribution: ', ysamp.sum(axis=0) print Xsamp.sum() print Xsamp.shape print ysamp.shape
import utils # import func import pickle import numpy as np from func import caption_tokens_to_indices train_img_embeds = utils.read_pickle("train_img_embeds.pickle") train_img_fns = utils.read_pickle("train_img_fns.pickle") val_img_embeds = utils.read_pickle("val_img_embeds.pickle") val_img_fns = utils.read_pickle("val_img_fns.pickle") train_captions = utils.read_pickle("train_captions.pickle") val_captions = utils.read_pickle("val_captions.pickle") vocab = utils.read_pickle("vocabs.pickle") # swap the key value of vocab vocab_inverse = {value: key for key, value in vocab.items()} train_captions_index = np.array( caption_tokens_to_indices(train_captions, vocab)) val_captions_index = np.array(caption_tokens_to_indices(val_captions, vocab)) # train_captions = func.get_captions_for_fns(train_img_fns, "captions_train-val2014.zip", # "annotations/captions_train2014.json") # val_captions = func.get_captions_for_fns(val_img_fns, "captions_train-val2014.zip", # "annotations/captions_val2014.json") # with open("val_captions.pickle", "wb") as fn: # pickle.dump(val_captions, fn)
def run(seed): # create folders for scores models and preds folder_models = './models/age/scores/' if not os.path.exists(folder_models): os.makedirs(folder_models) folder_preds = './predicts/age/scores/' if not os.path.exists(folder_preds): os.makedirs(folder_preds) print('Loading data...') # load biases ic_bias = read_pickle('./data/biases/ic_biases.pickle') ic_bias_site = read_pickle('./data/biases/ic_biases_site.pickle') fnc_bias = read_pickle('./data/biases/fnc_biases.pickle') fnc_bias_site = read_pickle('./data/biases/fnc_biases_site.pickle') pca_bias = read_pickle('./data/biases/200pca_biases.pickle') pca_bias_site = read_pickle('./data/biases/200pca_biases_site.pickle') # load classifier and add extra sites2 extra_site = pd.DataFrame() extra_site['Id'] = np.load('./predicts/classifier/site2_test_new_9735.npy') # load competiton data ids_df = pd.read_csv('./data/raw/reveal_ID_site2.csv') fnc_df = pd.read_csv('./data/raw/fnc.csv') loading_df = pd.read_csv('./data/raw/loading.csv') labels_df = pd.read_csv('./data/raw/train_scores.csv') ids_df = ids_df.append(extra_site) print('Detected Site2 ids count: ', ids_df['Id'].nunique()) # load created features agg_df = pd.read_csv('./data/features/agg_feats.csv') im_df = pd.read_csv('./data/features/im_feats.csv') dl_df = pd.read_csv('./data/features/dl_feats.csv') pca_df = pd.read_csv('./data/features/200pca_feats/200pca_3d_k0.csv') for i in range(1, 6): part = pd.read_csv( './data/features/200pca_feats/200pca_3d_k{}.csv'.format(i)) del part['Id'] pca_df = pd.concat((pca_df, part), axis=1) # merge data ic_cols = list(loading_df.columns[1:]) fnc_cols = list(fnc_df.columns[1:]) agg_cols = list(agg_df.columns[1:]) im_cols = list(im_df.columns[1:]) pca_cols = list(pca_df.columns[1:]) dl_cols = list(dl_df.columns[1:]) df = fnc_df.merge(loading_df, on='Id') df = df.merge(agg_df, how='left', on='Id') df = df.merge(im_df, how='left', on='Id') df = df.merge(pca_df, how='left', on='Id') df = df.merge(dl_df, how='left', on='Id') df = df.merge(labels_df, how='left', on='Id') del loading_df, fnc_df, agg_df, im_df, pca_df gc.collect() # split train and test df.loc[df['Id'].isin(labels_df['Id']), 'is_test'] = 0 df.loc[~df['Id'].isin(labels_df['Id']), 'is_test'] = 1 train = df.query('is_test==0') del train['is_test'] test = df.query('is_test==1') del test['is_test'] y = train['age'].copy().reset_index(drop=True) # apply biases for c in ic_bias_site.keys(): test.loc[~test['Id'].isin(ids_df['Id']), c] += ic_bias[c] test.loc[test['Id'].isin(ids_df['Id']), c] += ic_bias_site[c] for c in fnc_bias_site.keys(): test.loc[~test['Id'].isin(ids_df['Id']), c] += fnc_bias[c] test.loc[test['Id'].isin(ids_df['Id']), c] += fnc_bias_site[c] for c in pca_bias_site.keys(): test.loc[~test['Id'].isin(ids_df['Id']), c] += pca_bias[c] test.loc[test['Id'].isin(ids_df['Id']), c] += pca_bias_site[c] # save df for scaling df_scale = pd.concat([train, test], axis=0) # I. Create fnc score print('Creating FNC score...') # prepare datasets for fnc score train_for_score, test_for_score = scale_select_data( train, test, df_scale, fnc_cols) # define models names = ['RGF', 'ENet', 'BRidge', 'Huber', 'OMP'] names = [name + '_fnc_seed{}'.format(seed) for name in names] pack = [ RGFRegressor(max_leaf=1000, reg_depth=5, normalize=True), ElasticNet(alpha=0.05, l1_ratio=0.5, random_state=0), BayesianRidge(), HuberRegressor(epsilon=2.5, alpha=1), OrthogonalMatchingPursuit(n_nonzero_coefs=300) ] # train models zoo = TrendsModelSklearn(pack, seed=seed) zoo.fit([train_for_score] * 5, y) score_blend = zoo.blend_oof() pred = zoo.predict([test_for_score] * 5, names) # save oof, pred, models np.save(folder_preds + 'fnc_score_seed{}.npy'.format(seed), score_blend) np.save(folder_preds + 'fnc_score_test_seed{}.npy'.format(seed), pred) zoo.save_models(names, folder=folder_models) # II. Create agg score print('Creating AGG score...') # prepare datasets for agg score train_for_score, test_for_score = scale_select_data( train, test, df_scale, agg_cols) # define models names = ['RGF', 'ENet', 'Huber'] names = [name + '_agg_seed{}'.format(seed) for name in names] pack = [ RGFRegressor(max_leaf=1000, reg_depth=5, min_samples_leaf=100, normalize=True), ElasticNet(alpha=0.05, l1_ratio=0.3, random_state=0), HuberRegressor(epsilon=2.5, alpha=1) ] # train models zoo = TrendsModelSklearn(pack, seed=seed) zoo.fit([train_for_score] * 3, y) score_blend = zoo.blend_oof() pred = zoo.predict([test_for_score] * 3, names) # save oof, pred, models np.save(folder_preds + 'agg_score_seed{}.npy'.format(seed), score_blend) np.save(folder_preds + 'agg_score_test_seed{}.npy'.format(seed), pred) zoo.save_models(names, folder=folder_models) # III. Create pca score print('Creating PCA score...') # prepare datasets for pca score train_for_score, test_for_score = scale_select_data( train, test, df_scale, pca_cols) # define models names = ['RGF', 'ENet', 'BRidge', 'OMP'] names = [name + '_pca_seed{}'.format(seed) for name in names] pack = [ RGFRegressor(max_leaf=1000, reg_depth=5, min_samples_leaf=100, normalize=True), ElasticNet(alpha=0.2, l1_ratio=0.2, random_state=0), BayesianRidge(), OrthogonalMatchingPursuit() ] # train models zoo = TrendsModelSklearn(pack, seed=seed) zoo.fit([train_for_score] * 4, y) score_blend = zoo.blend_oof() pred = zoo.predict([test_for_score] * 4, names) # save oof, pred, models np.save(folder_preds + 'pca_score_seed{}.npy'.format(seed), score_blend) np.save(folder_preds + 'pca_score_test_seed{}.npy'.format(seed), pred) zoo.save_models(names, folder=folder_models) # IV. Create im score print('Creating IM score...') # prepare datasets for pca score train_for_score, test_for_score = scale_select_data( train, test, df_scale, im_cols) # define models names = ['RGF', 'ENet', 'BRidge', 'OMP'] names = [name + '_im_seed{}'.format(seed) for name in names] pack = [ RGFRegressor(max_leaf=1000, reg_depth=5, min_samples_leaf=100, normalize=True), ElasticNet(alpha=0.2, l1_ratio=0.2, random_state=0), BayesianRidge(), OrthogonalMatchingPursuit() ] # train models zoo = TrendsModelSklearn(pack, seed=seed) zoo.fit([train_for_score] * 4, y) score_blend = zoo.blend_oof() pred = zoo.predict([test_for_score] * 4, names) # save oof, pred, models np.save(folder_preds + 'im_score_seed{}.npy'.format(seed), score_blend) np.save(folder_preds + 'im_score_test_seed{}.npy'.format(seed), pred) zoo.save_models(names, folder=folder_models) # V. Create dl score print('Creating DL score...') # prepare datasets for pca score train_for_score, test_for_score = scale_select_data( train, test, df_scale, dl_cols) # define models names = ['RGF', 'ENet', 'BRidge'] names = [name + '_dl_seed{}'.format(seed) for name in names] pack = [ RGFRegressor(max_leaf=1000, reg_depth=5, min_samples_leaf=100, normalize=True), ElasticNet(alpha=0.2, l1_ratio=0.2, random_state=0), BayesianRidge() ] # train models zoo = TrendsModelSklearn(pack, seed=seed) zoo.fit([train_for_score] * 3, y) score_blend = zoo.blend_oof() pred = zoo.predict([test_for_score] * 3, names) # save oof, pred, models np.save(folder_preds + 'dl_score_seed{}.npy'.format(seed), score_blend) np.save(folder_preds + 'dl_score_test_seed{}.npy'.format(seed), pred) zoo.save_models(names, folder=folder_models) # VI. Training and predicting procedure print('Training has started...') print('Reading scores from ', folder_preds) # add scores for prefix in ['fnc', 'agg', 'im', 'pca', 'dl']: train[prefix + '_score'] = np.load(folder_preds + '{}_score_seed{}.npy'.format(prefix, seed)) test[prefix + '_score'] = np.load( folder_preds + '{}_score_test_seed{}.npy'.format(prefix, seed)) score_cols = [c for c in train.columns if c.endswith('_score')] # save df for scaling df_scale = pd.concat([train, test], axis=0) # create differents datasets # linear linear_cols = sorted( list( set(ic_cols + fnc_cols + pca_cols + agg_cols + im_cols) - set(['IC_20']))) train_linear, test_linear = scale_select_data(train, test, df_scale, linear_cols) # kernel kernel_cols = sorted(list(set(ic_cols + pca_cols) - set(['IC_20']))) train_kernel, test_kernel = scale_select_data(train=train, test=test, df_scale=df_scale, cols=kernel_cols, scale_cols=pca_cols) # score sc_cols = sorted(list(set(ic_cols + score_cols) - set(['IC_20']))) train_sc, test_sc = scale_select_data(train, test, df_scale, sc_cols) # dl dict_cols = sorted( list( set(ic_cols + fnc_cols + dl_cols + im_cols + agg_cols) - set(['IC_20']))) train_dl, test_dl = scale_select_data(train, test, df_scale, dict_cols) # learning process on different datasets names = ['MLP', 'RGF', 'SVM', 'BR', 'OMP', 'EN', 'KR'] names = [name + '_seed{}'.format(seed) for name in names] pack = [ MLPRegressor(activation='tanh', random_state=0), RGFRegressor(max_leaf=1500, loss='Abs'), NuSVR(C=10, nu=0.4, kernel='rbf'), BayesianRidge(), OrthogonalMatchingPursuitCV(), ElasticNet(alpha=0.5, l1_ratio=0.7, random_state=0), KernelRidge(kernel='poly', alpha=0.5) ] zoo = TrendsModelSklearn(pack, seed=seed) zoo.fit([train_sc] * 2 + [train_kernel] + [train_linear] * 2 + [train_dl] * 2, y) de_blend = zoo.blend_oof() preds = zoo.predict([test_sc] * 2 + [test_kernel] + [test_linear] * 2 + [test_dl] * 2, names, is_blend=False) # rewrite folders for models and preds folder_models = './models/age/stack/' if not os.path.exists(folder_models): os.makedirs(folder_models) folder_preds = './predicts/age/stack/' if not os.path.exists(folder_preds): os.makedirs(folder_preds) print('Saving models to', folder_models) print('Saving predictions to', folder_preds) # save oofs and models zoo.save_oofs(names, folder=folder_preds) zoo.save_models(names, folder=folder_models) # stacking predictions print('Stacking predictions...') folds = KFold(n_splits=10, shuffle=True, random_state=0) stack = pd.DataFrame(zoo.oof_preds).T stack.columns = names model_stacker_rgf = RGFRegressor(max_leaf=1000, reg_depth=25, verbose=False) rgf_pred = cross_val_predict(model_stacker_rgf, stack, y.dropna(), cv=folds, n_jobs=-1) model_stacker_br = BayesianRidge() br_pred = cross_val_predict(model_stacker_br, stack, y.dropna(), cv=folds, n_jobs=-1) model_stacker_rgf.fit(stack, y.dropna()) model_stacker_br.fit(stack, y.dropna()) # save models save_pickle(model_stacker_br, folder_models + 'BRidge_stack_seed{}'.format(seed)) save_pickle(model_stacker_rgf, folder_models + 'RGF_stack_seed{}'.format(seed)) print('Final age NMAE: {:.5f}'.format( NMAE(y, 0.75 * br_pred + 0.25 * rgf_pred))) test_preds = pd.DataFrame(preds).T test_preds.columns = names age_prediction = pd.DataFrame() age_prediction['Id'] = test['Id'].values age_prediction['pred'] = 0.25 * model_stacker_rgf.predict( test_preds) + 0.75 * model_stacker_br.predict(test_preds) age_prediction.to_csv(folder_preds + 'age_stack_seed{}.csv'.format(seed), index=False) print('age seed pred is saved as', folder_preds + 'age_stack_seed{}.csv'.format(seed))
def batch_size_scenario(): """ Generate embeddings using different batch sizes for the ~1000 vertex polblogs network :return: """ import visualisation s = datetime.datetime.now() y_path = '../../local_resources/political_blogs/y.p' x_path = '../../local_resources/political_blogs/X.p' y = utils.read_pickle(y_path) log_path = '../../local_resources/tf_logs/polblogs/' walk_path = '../../local_resources/political_blogs/walks_n1_l10.csv' size = 2 # dimensionality of the embedding batch_sizes = [1, 2, 4, 8, 16, 32, 64, 128] embeddings = [] for batch_size in batch_sizes: params = Params(walk_path, batch_size=batch_size, embedding_size=size, neg_samples=5, skip_window=5, num_pairs=1500, statistics_interval=10.0, initial_learning_rate=0.1, save_path=log_path, epochs=5, concurrent_steps=4) path = '../../local_resources/political_blogs/embeddings/Win_batch_{}_{}.csv'.format( batch_size, utils.get_timestamp()) embedding_in, embedding_out = HCE.main(params) visualisation.plot_poincare_embedding( embedding_in, y, '../../results/political_blogs/figs/poincare_polar_Win_batch_{}_{}.pdf' .format(batch_size, utils.get_timestamp())) visualisation.plot_poincare_embedding( embedding_out, y, '../../results/political_blogs/figs/poincare_polar_Wout_batch_{}_{}.pdf' .format(batch_size, utils.get_timestamp())) df_in = pd.DataFrame(data=embedding_in, index=np.arange(embedding_in.shape[0])) df_in.to_csv(path, sep=',') df_out = pd.DataFrame(data=embedding_out, index=np.arange(embedding_out.shape[0])) df_out.to_csv( '../../local_resources/political_blogs/embeddings/Wout_batch_{}_{}.csv' .format(batch_size, utils.get_timestamp()), sep=',') print('political blogs embedding generated in: ', datetime.datetime.now() - s) embeddings.append(embedding_in) x, y = utils.read_data(x_path, y_path, threshold=0) names = [[str(batch_size)] for batch_size in batch_sizes] n_folds = 10 results = run_detectors.run_all_datasets(embeddings, y, names, classifiers, n_folds) all_results = utils.merge_results(results, n_folds) results, tests = utils.stats_test(all_results) tests[0].to_csv('../../results/political_blogs/batch_size_pvalues' + utils.get_timestamp() + '.csv') tests[1].to_csv('../../results/political_blogs/batch_size_pvalues' + utils.get_timestamp() + '.csv') print('macro', results[0]) print('micro', results[1]) macro_path = '../../results/political_blogs/batch_size_macro' + utils.get_timestamp( ) + '.csv' micro_path = '../../results/political_blogs/batch_size_micro' + utils.get_timestamp( ) + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True) return path
def train(args): #WSGAN related params lambda_gp = 10 n_critic = 5 hyparam_list = [ ("model", args.model_name), ("cube", args.cube_len), ("bs", args.batch_size), ("g_lr", args.g_lr), ("d_lr", args.d_lr), ("z", args.z_dis), ("bias", args.bias), ] hyparam_dict = OrderedDict(((arg, value) for arg, value in hyparam_list)) log_param = make_hyparam_string(hyparam_dict) print(log_param) #define different paths pickle_path = "." + args.pickle_dir + log_param image_path = args.output_dir + args.image_dir + log_param pickle_save_path = args.output_dir + args.pickle_dir + log_param N = None # None for the whole dataset VOL_SIZE = 64 train_path = pathlib.Path("../Vert_dataset") dataset = VertDataset(train_path, n=N, transform=transforms.Compose( [ResizeTo(VOL_SIZE), transforms.ToTensor()])) print('Number of samples: ', len(dataset)) dset_loaders = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=0) print('Number of batches: ', len(dset_loaders)) # Build the model D = _D(args) G = _G(args) #Create the solvers D_solver = optim.Adam(D.parameters(), lr=args.d_lr, betas=args.beta) G_solver = optim.Adam(G.parameters(), lr=args.g_lr, betas=args.beta) if torch.cuda.device_count() > 1: D = nn.DataParallel(D) G = nn.DataParallel(G) print("Using {} GPUs".format(torch.cuda.device_count())) D.cuda() G.cuda() elif torch.cuda.is_available(): print("using cuda") D.cuda() G.cuda() #Load checkpoint if available read_pickle(pickle_path, G, G_solver, D, D_solver) G_losses = [] D_losses = [] for epoch in range(args.n_epochs): epoch_start_time = time.time() print("epoch %d started" % (epoch)) for i, X in enumerate(dset_loaders): #print(X.shape) X = X.view(-1, args.cube_len * args.cube_len * args.cube_len) X = var_or_cuda(X) X = X.type(torch.cuda.FloatTensor) Z = generateZ(num_samples=X.size(0), z_size=args.z_size) #Train the critic d_loss, Wasserstein_D, gp = train_critic(X, Z, D, G, D_solver, G_solver) # Train the generator every n_critic steps if i % n_critic == 0: Z = generateZ(num_samples=X.size(0), z_size=args.z_size) g_loss = train_gen(Z, D, G, D_solver, G_solver) #Log each iteration iteration = str(G_solver.state_dict()['state'][ G_solver.state_dict()['param_groups'][0]['params'][0]]['step']) print('Iter-{}; , D_loss : {:.4}, G_loss : {:.4}, WSdistance : {:.4}, GP : {:.4}'.format(iteration, d_loss.item(), \ g_loss.item(), Wasserstein_D.item(), gp.item() )) ## End of epoch epoch_end_time = time.time() #Plot the losses each epoch G_losses.append(g_loss.item()) D_losses.append(d_loss.item()) plot_losess(G_losses, D_losses, epoch) if (epoch + 1) % args.image_save_step == 0: print("Saving voxels") Z = generateZ(num_samples=8, z_size=args.z_size) gen_output = G(Z) samples = gen_output.cpu().data[:8].squeeze().numpy() samples = samples.reshape(-1, args.cube_len, args.cube_len, args.cube_len) Save_Voxels(samples, image_path, iteration) if (epoch + 1) % args.pickle_step == 0: print("Pickeling the model") save_new_pickle(pickle_save_path, iteration, G, G_solver, D, D_solver) print("epoch time", (epoch_end_time - epoch_start_time) / 60) print("epoch %d ended" % (epoch)) print("################################################")
from __future__ import print_function import numpy as np import random # import argparse # import matplotlib as mpl # mpl.use('Agg', warn=False) import matplotlib.pyplot as plt from nn_mnist_jellyfish import NeuralNetwork # import mnist import skl import utils imgs1 = skl.get_imgs_by_number(0) imgs2 = skl.get_imgs_by_number(3) strength_matrix = utils.read_pickle('pkl/nn_mnist_jellyfish_0.pkl') # for label, img in imgs1: propagated_1 = [] propagated_2 = [] r = range(np.min([len(imgs1), len(imgs2)])) index = random.choice(r) for i in range(10000): propagated_1.append( NeuralNetwork.validate(imgs1[index][1], strength_matrix)) propagated_2.append( NeuralNetwork.validate(imgs2[index][1], strength_matrix)) propagated_1 = np.array(propagated_1) propagated_2 = np.array(propagated_2) fig, axes = plt.subplots(2, 1, figsize=(3, 3))
# Create model msg_net = nn.Sequential(nn.Linear(4, args.hs_1), nn.Tanh(), nn.Linear(args.hs_1, args.hs_1), nn.Tanh(), nn.Linear(args.hs_1, args.hs_1), nn.Tanh(), nn.Linear(args.hs_1, args.d)) aggr_net = nn.Sequential(nn.Linear(args.d + 1, args.hs_1), nn.Tanh(), nn.Linear(args.hs_1, args.hs_1), nn.Tanh(), nn.Linear(args.hs_1, args.hs_1), nn.Tanh(), nn.Linear(args.hs_1, 1)) model = ModelDirichlet(aggr_net, msg_net) model.apply(utils.weights_init) F = DynamicsFunction(model).to(device) print("Num. of params: {:d}".format(utils.get_parameters_count(model))) data = utils.read_pickle(['t', 'x', 'u', 'bcs_dicts'], args.data_path) dataset = utils.generate_torchgeom_dataset(data) bcs_dicts = data['bcs_dicts'] if args.batch_size is None: batch_size = len(dataset) else: batch_size = args.batch_size loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) optimizer = optim.Rprop(F.parameters(), lr=args.lr, step_sizes=(1e-8, 10.)) loss_fn = nn.MSELoss() # Training ts = dataset[0].t.shape[0] # assumes the same time grid for all sim-s.
import skl import utils import sys parser = argparse.ArgumentParser() parser.add_argument('-n', action="store", dest="number") parser.add_argument('-i', action="store", dest="iterations", default=25000, help="default: 25000") args = parser.parse_args() number = int(args.number) iterations = int(args.iterations) best_images = utils.read_pickle('best_images.pkl') pf = lambda x: (1 / (1 + np.exp(-1 * 10 * x)) - .5) * 1.8 + .05 nn = NeuralNetwork(strength_function=pf, image_scale=8) img = best_images[number] print('%s' % number) start_time = datetime.datetime.now() for i in range(iterations): nn.propagate_once(img, gray_max=16) end_time = datetime.datetime.now() print('start time:', start_time, 'stop time: ', end_time)
lambda img, matrix: NeuralNetwork.validate_linear( img, matrix, power=3, weight=100), lambda img, matrix: NeuralNetwork. validate_threshold(img, matrix, power=3, threshhold=.2, weight=10), lambda img, matrix: NeuralNetwork.validate_threshold_2( img, matrix, power=3, weight=100) ] # print('threshhold %s power %s weight %s' % (threshhold, power, weight)) if num >= 0: imgs = skl.get_imgs_by_number(num) else: imgs = skl.get_imgs_by_number() # print('test imgs #', size) strength_matrix_l = [ utils.read_pickle('pkl/nn_growable_' + str(i) + '.pkl') for i in range(10) ] correct = .0 trails = .0 for i in range(iterations): label, img = random.choice(imgs) scores_a = np.array([ validators[0](img, strength_matrix) for strength_matrix in strength_matrix_l ]) if label == random.choice(np.where(scores_a == scores_a.max())[0]): correct += 1 if not (i % 1000): # print(label, scores_a) pass
args = parser.parse_args() iterations = int(args.iterations) num = int(args.num) ''' threshhold = 0.8 weight = 100 print('threshhold %s weight %s' % (threshhold, weight)) ''' if num >=0: imgs = skl.get_imgs_by_number(num) else: imgs = skl.get_imgs_by_number() strength_matrix_l = [utils.read_pickle('pkl/nn_meshed_' + str(i) + '.pkl') for i in range(10)] correct = .0 trails = .0 for i in range(iterations): trails += 1 label, img = random.choice(imgs) scores_a = np.array([NeuralNetwork.validate(img, strength_matrix, gray_max=16.) for strength_matrix in strength_matrix_l]) if label == random.choice(np.where(scores_a == scores_a.max())[0]): correct += 1 if not (i % 10) and i > 0: # print(round(correct / trails * 100, 2), label, scores_a) pass if num >=0:
import pickle from utils import sample from utils import read_pickle from utils import print_sample import numpy as np ## Generate Names ix_to_char,char_to_ix = read_pickle('datasets/ix_char_ix.pickle')[0] def generate(parameters,seed,names = 20): for name in range(names): # Sample indices and print them sampled_indices = sample(parameters, char_to_ix, seed) print_sample(sampled_indices, ix_to_char) seed += 1 def main(): print('Program Loaded!') seed = np.random.randint(100) sel = input('1 for Boys Names\n2 for Girls Names\n3 for Combined\n>>>') if sel in ['1','2','3']: names = int(input('ENTER NUMBER OF NAMES TO BE DISPLAYED\n>>>')) print('>>>') if sel == '1': parameters = read_pickle('datasets/boy_params.txt')[0] generate(parameters,seed,names) if sel == '2': parameters = read_pickle('datasets/girl_params.txt')[0] generate(parameters,seed,names) if sel == '3': parameters = read_pickle('datasets/c_params.txt')[0] generate(parameters,seed)
device = torch.device(args.device) msg_net = nn.Sequential(nn.Linear(6, args.hs_1), nn.Tanh(), nn.Linear(args.hs_1, args.hs_1), nn.Tanh(), nn.Linear(args.hs_1, args.hs_1), nn.Tanh(), nn.Linear(args.hs_1, args.d)) aggr_net = nn.Sequential(nn.Linear(args.d + 2, args.hs_1), nn.Tanh(), nn.Linear(args.hs_1, args.hs_1), nn.Tanh(), nn.Linear(args.hs_1, args.hs_1), nn.Tanh(), nn.Linear(args.hs_1, 2)) model = Model(aggr_net, msg_net) F = DynamicsFunction(model).to(device) F.load_state_dict(torch.load(args.model_path, map_location=device)) data = utils.read_pickle(['t', 'x', 'u'], args.data_path) dataset = utils.generate_torchgeom_dataset(data) loader = DataLoader(dataset, batch_size=1, shuffle=False) # Loss loss_fn = nn.MSELoss() # Testing diffs_over_time = [] losses = torch.zeros(len(loader)) inds_of_sims_to_show = set([0]) with torch.no_grad(): for i, dp in enumerate(loader): edge_index = dp.edge_index
import tensorflow as tf # Initialize the arguments try: NUM_EPOCHS = int(sys.argv[1]) NUM_OUTLIERS = int(sys.argv[2]) is_trainable = bool(sys.argv[3]) except: NUM_EPOCHS = 10 NUM_OUTLIERS = 60 # Estimation of #outliers is_trainable = True BATCH_SIZE = 32 # 64 # Read the pickle file X_train = read_pickle('../audio_data/X_train4d.pkl') X_test = read_pickle('../audio_data/X_test4d.pkl') Y_train = read_pickle('../audio_data/Y_train1d.pkl') print("The shape of X_train/X_test/Y_train: ", X_train.shape, X_test.shape, Y_train.shape) # Instantiate the model bigan = BIGAN(X_train.shape[1], X_train.shape[2], X_train.shape[3]) if is_trainable: # Training the BiGAN bigan.train_by_batch(X_train, epochs=NUM_EPOCHS, batch_size=BATCH_SIZE) #bilstm.train_all(X_train_, Y_train_, BATCH_SIZE, NUM_EPOCHS) else: # Restore the checkpoint checkpoint_dir = './runs/checkpoint_bigan' checkpoint = tf.train.Checkpoint()
import matplotlib.pyplot as plt import numpy as np # import random # import argparse import matplotlib as mpl mpl.use('Agg', warn=False) # import mnist # import seaborn as sns # import sys iters = 1 * 10**4 num = 6 imgs_l = [skl.get_imgs_by_number(i) for i in range(10)] strength_matrix_l = [ utils.read_pickle('pkl/nn_meshed_' + str(i) + '.pkl') for i in range(10) ] fig, axes = plt.subplots(1, 1, figsize=(9, 3), sharex=True, sharey=True) # axes = axes.flatten() imgs = imgs_l[num] index = random.choice(range(len(imgs))) index = 3 print('index of testing img: ', index) img = imgs[index][1] # skl.show(img) # for j, matrix in enumerate(strength_matrix_l): if True: results_l = [[], [], [], [], [], [], [], [], [], []]
def train(args): hyparam_list = [("model", args.model_name), ("cube", args.cube_len), ("bs", args.batch_size), ("g_lr", args.g_lr), ("d_lr", args.d_lr), ("z", args.z_dis), ("bias", args.bias), ("sl", args.soft_label)] hyparam_dict = OrderedDict(((arg, value) for arg, value in hyparam_list)) log_param = make_hyparam_string(hyparam_dict) print(log_param) # for using tensorboard if args.use_tensorboard: import tensorflow as tf summary_writer = tf.summary.FileWriter(args.output_dir + args.log_dir + log_param) def inject_summary(summary_writer, tag, value, step): summary = tf.Summary( value=[tf.Summary.Value(tag=tag, simple_value=value)]) summary_writer.add_summary(summary, global_step=step) inject_summary = inject_summary # datset define dsets_path = args.input_dir + args.data_dir print(dsets_path) dsets = SetDataset(dsets_path, args) dset_loaders = torch.utils.data.DataLoader(dsets, batch_size=args.batch_size, shuffle=True, num_workers=1) # model define D = _D(args) G = _G(args) D_solver = optim.Adam(D.parameters(), lr=args.d_lr, betas=args.beta) G_solver = optim.Adam(G.parameters(), lr=args.g_lr, betas=args.beta) if args.lrsh: D_scheduler = MultiStepLR(D_solver, milestones=[500, 1000]) if torch.cuda.is_available(): print("using cuda") D.cuda() G.cuda() criterion = nn.BCELoss() # pickle_path = args.pickle_dir pickle_path = 'pickle' read_pickle(pickle_path, G, G_solver, D, D_solver) for epoch in range(args.n_epochs): for i, X in enumerate(dset_loaders): # print(X.size()) X = var_or_cuda(X) if X.size()[0] != int(args.batch_size): # print("batch_size != {} drop last incompatible batch".format(int(args.batch_size))) continue Z = generateZ(args) real_labels = var_or_cuda(torch.ones(args.batch_size)) fake_labels = var_or_cuda(torch.zeros(args.batch_size)) if args.soft_label: real_labels = var_or_cuda( torch.Tensor(args.batch_size).uniform_(0.7, 1.2)) fake_labels = var_or_cuda( torch.Tensor(args.batch_size).uniform_(0, 0.3)) # ============= Train the discriminator =============# d_real = D(X) d_real_loss = criterion(d_real, real_labels) fake = G(Z) d_fake = D(fake) d_fake_loss = criterion(d_fake, fake_labels) d_loss = d_real_loss + d_fake_loss d_real_acu = torch.ge(d_real.squeeze(), 0.5).float() d_fake_acu = torch.le(d_fake.squeeze(), 0.5).float() d_total_acu = torch.mean(torch.cat((d_real_acu, d_fake_acu), 0)) if d_total_acu <= args.d_thresh: D.zero_grad() d_loss.backward() D_solver.step() # build graph """ with summary_writer as w: w.add_graph(D, X) w.add_graph(G, Z) w.close() exit() """ # =============== Train the generator ===============# Z = generateZ(args) fake = G(Z) d_fake = D(fake) g_loss = criterion(d_fake, real_labels) D.zero_grad() G.zero_grad() g_loss.backward() G_solver.step() # ==================== Save Good Results ================# iteration = str(G_solver.state_dict()['state'][ G_solver.state_dict()['param_groups'][0]['params'][0]]['step']) if g_loss < 0.7: samples = fake.cpu().data[:8].squeeze().numpy() image_path = args.output_dir + args.image_dir + log_param if not os.path.exists(image_path): os.makedirs(image_path) SavePloat_Voxels(samples, image_path, iteration) # =============== logging each iteration ===============# if args.use_tensorboard: log_save_path = args.output_dir + args.log_dir + log_param if not os.path.exists(log_save_path): os.makedirs(log_save_path) info = { 'loss/loss_D(x)': d_real_loss, 'loss/loss_D(G(z))': d_fake_loss, 'loss/loss_D': d_loss, 'loss/loss_G': g_loss, 'loss/acc_D': d_total_acu } for tag, value in info.items(): inject_summary(summary_writer, tag, value, iteration) summary_writer.flush() # =============== each epoch save model or save image ===============# print( 'Epoch:{}, Iter-{}; , D_loss : {:.4}, G_loss : {:.4}, D_acu : {:.4}, D_lr : {:.4}' .format(epoch, iteration, d_loss, g_loss, d_total_acu, D_solver.state_dict()['param_groups'][0]["lr"])) if (epoch + 1) % args.image_save_step == 0: samples = fake.cpu().data[:8].squeeze().numpy() image_path = args.output_dir + args.image_dir + log_param if not os.path.exists(image_path): os.makedirs(image_path) SavePloat_Voxels(samples, image_path, iteration) if (epoch + 1) % args.pickle_step == 0: print('saving pickle') pickle_save_path = args.output_dir + args.pickle_dir save_new_pickle(pickle_save_path, iteration, G, G_solver, D, D_solver) if args.lrsh: try: D_scheduler.step() except Exception as e: print("fail lr scheduling", e)
def get_best_params(): cv_output = read_pickle('cv_output.pickle') best_model_params, top_feat_params, top_model_feat_params, *_ = cv_output return top_feat_params, top_model_feat_params
import numpy as np import utils import seaborn as sns import matplotlib.pyplot as plt plt.style.use("seaborn-colorblind") LOAD_DIR = "results/env_2d" LOAD_FILE = "1a_medium_norm_lambda.pickle" LOAD_PATH = os.path.join(LOAD_DIR, LOAD_FILE) NORMS = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0] LAMBDAS = [0.5, 0.1, 0.05, 0.01, 0.005, 0.001] RUNS = 20 results = utils.read_pickle(LOAD_PATH) results_array = np.zeros((len(NORMS), len(LAMBDAS))) for i, norm in enumerate(NORMS): for j, lambda_1 in enumerate(LAMBDAS): accuracies = [] for run_idx in range(RUNS): key = (norm, lambda_1, run_idx) if key in results: accuracies.append(results[key][0])