def main():
    args = parse_args()
    if args.plot_type == 'cluster_count':
        data_file = 'clustering_counts.json'
        axes_titles = ('Layer index', 'Components count')
    elif args.plot_type == 'entropy':
        data_file = 'entropy.json'
        axes_titles = ('Layer index', 'Differential entropy')
    elif args.plot_type == 'relative_entropy':
        data_file = 'entropy_relative.json'
        axes_titles = ('Layer index', 'Relative entropy')
    else:
        raise ValueError("Invalid plot_type value")

    net_plot_data = {}
    for root_dir in args.root_dirs:
        net_data_path = op.join(root_dir, data_file)

        main.logger.info("Reading plot data from: %s" % net_data_path)
        net_plot_data.update(fs_utils.read_json(net_data_path))

    plot_clustering_results(net_plot_data,
                            args.keys,
                            args.labels,
                            args.output,
                            plot_title=args.plot_title,
                            axes_titles=axes_titles,
                            separate_legend=args.separate_legend)
def save_sum_thresh_to_metadata_file(sum_thresh, out_arr_file):
    metadata_file = op.join(op.dirname(out_arr_file), 'metadata.json')
    key_name = op.splitext(op.basename(out_arr_file))[0]
    if op.exists(metadata_file):
        existing_struct = fs_utils.read_json(metadata_file)
        existing_struct[key_name] = sum_thresh
    else:
        existing_struct = {key_name: sum_thresh}

    fs_utils.write_json(existing_struct, metadata_file)
    def __init__(self,
                 celeb_images,
                 batch_size,
                 crop_size,
                 out_size,
                 scale_img,
                 read_precomputed=True):
        assert scale_img in self.__scale_img_choices__

        self.crop_size = crop_size
        self.out_size = out_size
        self.scale_img = scale_img
        self.celeb_images = celeb_images

        self.batch_size = batch_size

        self.celeb_files_list_test, self.celeb_files_list_train = None, None
        self.celeb_train_arr, self.celeb_test_arr = None, None

        if read_precomputed:
            if op.exists(op.join(celeb_images, 'train.npy')):
                self.logger.info(
                    "Dataset exists in precomputed numpy array form, reading from them ..."
                )
                self.celeb_train_arr = np.load(
                    op.join(celeb_images, 'train.npy'))
                self.celeb_test_arr = np.load(op.join(celeb_images,
                                                      'test.npy'))
                self.train_size, self.test_size = self.celeb_train_arr.shape[
                    0], self.celeb_test_arr.shape[0]
            else:
                self.celeb_files_list_train = fs_utils.read_json(
                    op.join(celeb_images, 'train.json'))
                self.celeb_files_list_test = fs_utils.read_json(
                    op.join(celeb_images, 'test.json'))
                self.train_size, self.test_size = len(
                    self.celeb_files_list), len(self.celeb_files_list_test)
        else:
            raise ValueError("Currently, read_precomputed must be True")

        self.logger.info("Celeb faces train/test dataset sizes = (%d, %d)" %
                         (self.train_size, self.test_size))
    def __init__(self, train, transform=None, corrupted_labels_path=None):
        if train:
            self.ds_pickle = fs_utils.read_pickle(op.join(base_settings.MINI_IMAGENET_ROOT, 'train.pickle'))
        else:
            self.ds_pickle = fs_utils.read_pickle(op.join(base_settings.MINI_IMAGENET_ROOT, 'test.pickle'))

        if not train and corrupted_labels_path is not None:
            raise ValueError("Don't pass corrupted_labels_path for dataset in test mode")

        if corrupted_labels_path is not None:
            self.ds_pickle['labels'] = np.array(fs_utils.read_json(corrupted_labels_path), dtype=np.long)

        self.transform = transform
def main():
    args = parse_args()

    celeb_ds = celeb.CelebDataset(CELEB_DS_SETTINGS['ds_path'],
                                  64,
                                  CELEB_DS_SETTINGS['crop_size'],
                                  CELEB_DS_SETTINGS['image_size'],
                                  False,
                                  read_precomputed=True)

    img_list = fs_utils.read_json(args.json_file_list)
    out_arr = iterate_and_collect_examples(celeb_ds, img_list)

    np.save(args.out_npy_file, out_arr)
def main():
    args = parse_args()
    train_config = fs_utils.read_json(
        op.join(op.dirname(args.vae_model_path), 'config.json'))

    input_shape = (args.how_many_per_cluster, train_config['ds']['image_size'],
                   train_config['ds']['image_size'], 3)
    vae_model_obj = train_vae.create_model(train_config, input_shape, False)

    if args.how_many_per_cluster and args.grid_size:
        raise ValueError(
            "Specify either --how_many_per_cluster or --grid_size")

    if args.grid_size and (args.mode
                           not in {'joint_mixture', 'factorial_mixture'}):
        raise ValueError(
            "Specify --grid_size only if mode is 'joint_mixture' or 'factorial_mixture'"
        )

    if (args.mode.startswith('joint')
            or args.mode == 'factorial') and args.unobs_dims_num:
        raise ValueError(
            "--unobs_dims_num option can be given only if mode is [conditional|marginal]"
        )

    if args.mode == 'conditional' or args.mode == 'marginal':
        fs_utils.create_dir_if_not_exists(args.out_vis_path)
        unobs_dims_num = args.unobs_dims_num if args.unobs_dims_num else 1
    else:
        unobs_dims_num = None

    if args.mode.startswith('joint'):
        sampler = latent_space_sampler.LatentSpaceSampler(args.trace_pkl_path)
    else:
        sampler = conditional_latent_space_sampler.ConditionalLatentSpaceSampler(
            args.trace_pkl_path)

    sample_generator = SampleGenerator(train_config, vae_model_obj, sampler,
                                       args.vae_model_path, args.mode,
                                       unobs_dims_num)
    clusters_limit = args.clusters_limit if args.clusters_limit else None
    if args.grid_size:
        grid_size = [int(x) for x in args.grid_size.split('x')]
    else:
        grid_size = None

    sample_generator.generate_samples_from_latent_space(
        args.how_many_per_cluster, grid_size, args.out_vis_path,
        clusters_limit)
def train_vae(args):
    if args.restore_model_path:
        train_args = fs_utils.read_json(
            op.join(op.dirname(args.restore_model_path), 'config.json'))
        train_args['restore_model_path'] = args.restore_model_path
        if 'reg_type' not in train_args and args.reg_type.startswith('mmd'):
            raise ValueError(
                "No reg_type in restored config, specified reg_type == %s" %
                args.reg_type)

        if 'delta' not in train_args:
            train_args.update({'delta': None})

        if 'arch' not in train_args:
            train_args.update({'arch': 'standard'})

        dataset = dataset_restore_func(train_args, args.ds_type)
    else:
        dataset = dataset_create_func(args)

        train_args = {
            'latent_dim': args.latent_dim,
            'beta': args.beta,
            'ds': dataset.settings(),
            'gc': args.gc if args.gc else None,
            'delta': args.delta if args.delta else None,
            'reg_type': args.reg_type,
            'arch': args.arch
        }

    dataset.batch_size = args.batch_size
    input_shape = (dataset.batch_size, dataset.img_size(), dataset.img_size(),
                   3)

    z_sigma_sq = 1.
    if train_args['reg_type'].startswith('mmd'):
        z_sigma_sq = 2.

    train_args.update({'z_sigma_sq': z_sigma_sq})

    vae_model_obj = create_model(train_args, input_shape, True)
    fs_utils.create_dir_if_not_exists(args.out_weights_dir)

    fs_utils.write_json(train_args, op.join(args.out_weights_dir,
                                            'config.json'))
    trainer = trainers.VaeTrainer(dataset, args.out_weights_dir, vae_model_obj,
                                  **train_args)

    trainer.train(args.epochs_num)
def main():
    args = parse_args()
    train_config = fs_utils.read_json(op.join(op.dirname(args.vae_model_path), 'config.json'))
    if 'delta' not in train_config:
        train_config.update({'delta': None})

    dataset, write_order = get_dataset_from_train_config(train_config, with_write_order=True)

    input_shape = (args.how_many, dataset.img_size(), dataset.img_size(), 3)
    vae_model_obj = create_model(train_config, input_shape, False)

    visualizer = VaeVisualizer(vae_model_obj, args.how_many, dataset, train_config, write_order)

    visualizer.reconstruct(args.out_vis_path, args.vae_model_path)
    visualizer.generate(args.out_vis_path, args.vae_model_path)
示例#9
0
def main():
    args = parse_args()

    fs_utils.create_dir_if_not_exists(args.out_results_dir)

    if 'eigact' in args.in_results_root_dir:
        in_results_dirs = [args.in_results_root_dir]
    else:
        in_results_dirs = [
            op.join(args.in_results_root_dir, p)
            for p in os.listdir(args.in_results_root_dir)
            if op.isdir(op.join(args.in_results_root_dir, p))
        ]

    dims_dict = {}

    for in_dir in in_results_dirs:
        out_dir = op.join(args.out_results_dir,
                          "%s_results" % op.basename(in_dir))
        fs_utils.create_dir_if_not_exists(out_dir)

        main.logger.info("Processing %s" % in_dir)
        clustering_result_list = process_single_model_dir(in_dir)
        data_dim = clustering_result_list[-1]['dim']

        dims_dict[op.basename(in_dir)] = data_dim

        main.logger.info("Clusters num from last iteration: %d" %
                         clustering_result_list[-1]['clusters_num'])
        plot_ll_curves({'model': clustering_result_list},
                       op.join(out_dir, 'll_plot.png'))
        plot_ll_curves({'model': clustering_result_list},
                       op.join(out_dir, 'clusters_dynamics.png'),
                       key='clusters_num')

    if op.exists(op.join(args.in_results_root_dir, 'metadata.json')):
        metadata_dict = fs_utils.read_json(
            op.join(args.in_results_root_dir, 'metadata.json'))
        new_metadata_dict = {
            k: (metadata_dict[k], int(dims_dict[k]))
            for k in metadata_dict
        }
        fs_utils.write_json(new_metadata_dict,
                            op.join(args.out_results_dir, 'metadata.json'))
    else:
        main.logger.info("Metadata file does not exist in %s" %
                         args.in_results_root_dir)
def main():
    args = parse_args()
    beta_traces = calculate_diagonality_of_representation.list_traces_for_betas(
        args.clustering_results_root_dir, args.init_iteration, args.interval)

    if not op.exists(args.out_results_file):
        beta_relative_entropies = {
            beta:
            estimate_entropy_from_clustering.do_entropy_estimation_for_traces(
                beta_trace_paths, args.samples_num, 'relative')
            for beta, beta_trace_paths in beta_traces.items()
        }
    else:
        beta_relative_entropies = {
            float(k): v
            for k, v in fs_utils.read_json(args.out_results_file).items()
        }

    fs_utils.write_json(beta_relative_entropies, args.out_results_file)
def main():
    args = parse_args()
    if not op.exists(args.out_results_file):
        beta_traces = calculate_diagonality_of_representation.list_traces_for_betas(
            args.clustering_results_root_dir, args.init_iteration,
            args.interval)
        beta_cluster_counts = {
            beta: [
                len(fs_utils.read_pickle(p)['cluster_assignment'])
                for p in beta_paths
            ]
            for beta, beta_paths in beta_traces.items()
        }
    else:
        beta_cluster_counts = {
            float(k): v
            for k, v in fs_utils.read_json(args.out_results_file).items()
        }
    fs_utils.write_json(beta_cluster_counts, args.out_results_file)
def main():
    args = parse_args()
    train_config = fs_utils.read_json(
        op.join(op.dirname(args.vae_model_path), 'config.json'))
    if 'delta' not in train_config:
        train_config.update({'delta': None})

    dataset = visualize_vae.get_dataset_from_train_config(train_config)
    input_shape = (None, dataset.img_size(), dataset.img_size(), 3)

    vae_model_obj = train_vae.create_model(train_config,
                                           input_shape,
                                           trainable=False)

    vae_model_obj.load_params(args.vae_model_path, only_dec=False)
    main.logger.info("Loaded encoder params from path: %s" %
                     args.vae_model_path)

    latent_codes = do_latent_codes_predictions(dataset, vae_model_obj)
    np.save(args.out_npy_arr, latent_codes)
    def __read_corrupted_labels(path):
        from utils import fs_utils

        corrupted_labels_list = fs_utils.read_json(path)
        return [int(l) for l in corrupted_labels_list]
示例#14
0
 def __keys_to_float(json_path):
     return {float(k): v for k, v in fs_utils.read_json(json_path).items()}