def main(): args = parse_args() if args.plot_type == 'cluster_count': data_file = 'clustering_counts.json' axes_titles = ('Layer index', 'Components count') elif args.plot_type == 'entropy': data_file = 'entropy.json' axes_titles = ('Layer index', 'Differential entropy') elif args.plot_type == 'relative_entropy': data_file = 'entropy_relative.json' axes_titles = ('Layer index', 'Relative entropy') else: raise ValueError("Invalid plot_type value") net_plot_data = {} for root_dir in args.root_dirs: net_data_path = op.join(root_dir, data_file) main.logger.info("Reading plot data from: %s" % net_data_path) net_plot_data.update(fs_utils.read_json(net_data_path)) plot_clustering_results(net_plot_data, args.keys, args.labels, args.output, plot_title=args.plot_title, axes_titles=axes_titles, separate_legend=args.separate_legend)
def save_sum_thresh_to_metadata_file(sum_thresh, out_arr_file): metadata_file = op.join(op.dirname(out_arr_file), 'metadata.json') key_name = op.splitext(op.basename(out_arr_file))[0] if op.exists(metadata_file): existing_struct = fs_utils.read_json(metadata_file) existing_struct[key_name] = sum_thresh else: existing_struct = {key_name: sum_thresh} fs_utils.write_json(existing_struct, metadata_file)
def __init__(self, celeb_images, batch_size, crop_size, out_size, scale_img, read_precomputed=True): assert scale_img in self.__scale_img_choices__ self.crop_size = crop_size self.out_size = out_size self.scale_img = scale_img self.celeb_images = celeb_images self.batch_size = batch_size self.celeb_files_list_test, self.celeb_files_list_train = None, None self.celeb_train_arr, self.celeb_test_arr = None, None if read_precomputed: if op.exists(op.join(celeb_images, 'train.npy')): self.logger.info( "Dataset exists in precomputed numpy array form, reading from them ..." ) self.celeb_train_arr = np.load( op.join(celeb_images, 'train.npy')) self.celeb_test_arr = np.load(op.join(celeb_images, 'test.npy')) self.train_size, self.test_size = self.celeb_train_arr.shape[ 0], self.celeb_test_arr.shape[0] else: self.celeb_files_list_train = fs_utils.read_json( op.join(celeb_images, 'train.json')) self.celeb_files_list_test = fs_utils.read_json( op.join(celeb_images, 'test.json')) self.train_size, self.test_size = len( self.celeb_files_list), len(self.celeb_files_list_test) else: raise ValueError("Currently, read_precomputed must be True") self.logger.info("Celeb faces train/test dataset sizes = (%d, %d)" % (self.train_size, self.test_size))
def __init__(self, train, transform=None, corrupted_labels_path=None): if train: self.ds_pickle = fs_utils.read_pickle(op.join(base_settings.MINI_IMAGENET_ROOT, 'train.pickle')) else: self.ds_pickle = fs_utils.read_pickle(op.join(base_settings.MINI_IMAGENET_ROOT, 'test.pickle')) if not train and corrupted_labels_path is not None: raise ValueError("Don't pass corrupted_labels_path for dataset in test mode") if corrupted_labels_path is not None: self.ds_pickle['labels'] = np.array(fs_utils.read_json(corrupted_labels_path), dtype=np.long) self.transform = transform
def main(): args = parse_args() celeb_ds = celeb.CelebDataset(CELEB_DS_SETTINGS['ds_path'], 64, CELEB_DS_SETTINGS['crop_size'], CELEB_DS_SETTINGS['image_size'], False, read_precomputed=True) img_list = fs_utils.read_json(args.json_file_list) out_arr = iterate_and_collect_examples(celeb_ds, img_list) np.save(args.out_npy_file, out_arr)
def main(): args = parse_args() train_config = fs_utils.read_json( op.join(op.dirname(args.vae_model_path), 'config.json')) input_shape = (args.how_many_per_cluster, train_config['ds']['image_size'], train_config['ds']['image_size'], 3) vae_model_obj = train_vae.create_model(train_config, input_shape, False) if args.how_many_per_cluster and args.grid_size: raise ValueError( "Specify either --how_many_per_cluster or --grid_size") if args.grid_size and (args.mode not in {'joint_mixture', 'factorial_mixture'}): raise ValueError( "Specify --grid_size only if mode is 'joint_mixture' or 'factorial_mixture'" ) if (args.mode.startswith('joint') or args.mode == 'factorial') and args.unobs_dims_num: raise ValueError( "--unobs_dims_num option can be given only if mode is [conditional|marginal]" ) if args.mode == 'conditional' or args.mode == 'marginal': fs_utils.create_dir_if_not_exists(args.out_vis_path) unobs_dims_num = args.unobs_dims_num if args.unobs_dims_num else 1 else: unobs_dims_num = None if args.mode.startswith('joint'): sampler = latent_space_sampler.LatentSpaceSampler(args.trace_pkl_path) else: sampler = conditional_latent_space_sampler.ConditionalLatentSpaceSampler( args.trace_pkl_path) sample_generator = SampleGenerator(train_config, vae_model_obj, sampler, args.vae_model_path, args.mode, unobs_dims_num) clusters_limit = args.clusters_limit if args.clusters_limit else None if args.grid_size: grid_size = [int(x) for x in args.grid_size.split('x')] else: grid_size = None sample_generator.generate_samples_from_latent_space( args.how_many_per_cluster, grid_size, args.out_vis_path, clusters_limit)
def train_vae(args): if args.restore_model_path: train_args = fs_utils.read_json( op.join(op.dirname(args.restore_model_path), 'config.json')) train_args['restore_model_path'] = args.restore_model_path if 'reg_type' not in train_args and args.reg_type.startswith('mmd'): raise ValueError( "No reg_type in restored config, specified reg_type == %s" % args.reg_type) if 'delta' not in train_args: train_args.update({'delta': None}) if 'arch' not in train_args: train_args.update({'arch': 'standard'}) dataset = dataset_restore_func(train_args, args.ds_type) else: dataset = dataset_create_func(args) train_args = { 'latent_dim': args.latent_dim, 'beta': args.beta, 'ds': dataset.settings(), 'gc': args.gc if args.gc else None, 'delta': args.delta if args.delta else None, 'reg_type': args.reg_type, 'arch': args.arch } dataset.batch_size = args.batch_size input_shape = (dataset.batch_size, dataset.img_size(), dataset.img_size(), 3) z_sigma_sq = 1. if train_args['reg_type'].startswith('mmd'): z_sigma_sq = 2. train_args.update({'z_sigma_sq': z_sigma_sq}) vae_model_obj = create_model(train_args, input_shape, True) fs_utils.create_dir_if_not_exists(args.out_weights_dir) fs_utils.write_json(train_args, op.join(args.out_weights_dir, 'config.json')) trainer = trainers.VaeTrainer(dataset, args.out_weights_dir, vae_model_obj, **train_args) trainer.train(args.epochs_num)
def main(): args = parse_args() train_config = fs_utils.read_json(op.join(op.dirname(args.vae_model_path), 'config.json')) if 'delta' not in train_config: train_config.update({'delta': None}) dataset, write_order = get_dataset_from_train_config(train_config, with_write_order=True) input_shape = (args.how_many, dataset.img_size(), dataset.img_size(), 3) vae_model_obj = create_model(train_config, input_shape, False) visualizer = VaeVisualizer(vae_model_obj, args.how_many, dataset, train_config, write_order) visualizer.reconstruct(args.out_vis_path, args.vae_model_path) visualizer.generate(args.out_vis_path, args.vae_model_path)
def main(): args = parse_args() fs_utils.create_dir_if_not_exists(args.out_results_dir) if 'eigact' in args.in_results_root_dir: in_results_dirs = [args.in_results_root_dir] else: in_results_dirs = [ op.join(args.in_results_root_dir, p) for p in os.listdir(args.in_results_root_dir) if op.isdir(op.join(args.in_results_root_dir, p)) ] dims_dict = {} for in_dir in in_results_dirs: out_dir = op.join(args.out_results_dir, "%s_results" % op.basename(in_dir)) fs_utils.create_dir_if_not_exists(out_dir) main.logger.info("Processing %s" % in_dir) clustering_result_list = process_single_model_dir(in_dir) data_dim = clustering_result_list[-1]['dim'] dims_dict[op.basename(in_dir)] = data_dim main.logger.info("Clusters num from last iteration: %d" % clustering_result_list[-1]['clusters_num']) plot_ll_curves({'model': clustering_result_list}, op.join(out_dir, 'll_plot.png')) plot_ll_curves({'model': clustering_result_list}, op.join(out_dir, 'clusters_dynamics.png'), key='clusters_num') if op.exists(op.join(args.in_results_root_dir, 'metadata.json')): metadata_dict = fs_utils.read_json( op.join(args.in_results_root_dir, 'metadata.json')) new_metadata_dict = { k: (metadata_dict[k], int(dims_dict[k])) for k in metadata_dict } fs_utils.write_json(new_metadata_dict, op.join(args.out_results_dir, 'metadata.json')) else: main.logger.info("Metadata file does not exist in %s" % args.in_results_root_dir)
def main(): args = parse_args() beta_traces = calculate_diagonality_of_representation.list_traces_for_betas( args.clustering_results_root_dir, args.init_iteration, args.interval) if not op.exists(args.out_results_file): beta_relative_entropies = { beta: estimate_entropy_from_clustering.do_entropy_estimation_for_traces( beta_trace_paths, args.samples_num, 'relative') for beta, beta_trace_paths in beta_traces.items() } else: beta_relative_entropies = { float(k): v for k, v in fs_utils.read_json(args.out_results_file).items() } fs_utils.write_json(beta_relative_entropies, args.out_results_file)
def main(): args = parse_args() if not op.exists(args.out_results_file): beta_traces = calculate_diagonality_of_representation.list_traces_for_betas( args.clustering_results_root_dir, args.init_iteration, args.interval) beta_cluster_counts = { beta: [ len(fs_utils.read_pickle(p)['cluster_assignment']) for p in beta_paths ] for beta, beta_paths in beta_traces.items() } else: beta_cluster_counts = { float(k): v for k, v in fs_utils.read_json(args.out_results_file).items() } fs_utils.write_json(beta_cluster_counts, args.out_results_file)
def main(): args = parse_args() train_config = fs_utils.read_json( op.join(op.dirname(args.vae_model_path), 'config.json')) if 'delta' not in train_config: train_config.update({'delta': None}) dataset = visualize_vae.get_dataset_from_train_config(train_config) input_shape = (None, dataset.img_size(), dataset.img_size(), 3) vae_model_obj = train_vae.create_model(train_config, input_shape, trainable=False) vae_model_obj.load_params(args.vae_model_path, only_dec=False) main.logger.info("Loaded encoder params from path: %s" % args.vae_model_path) latent_codes = do_latent_codes_predictions(dataset, vae_model_obj) np.save(args.out_npy_arr, latent_codes)
def __read_corrupted_labels(path): from utils import fs_utils corrupted_labels_list = fs_utils.read_json(path) return [int(l) for l in corrupted_labels_list]
def __keys_to_float(json_path): return {float(k): v for k, v in fs_utils.read_json(json_path).items()}