Пример #1
0
def main():
    torch.set_default_dtype(torch.float64)

    save_dir = os.path.join(args.output_dir, args.manifold)
    check_mkdir(save_dir, increment=False)

    # generate the samples on the manifold, uniformly around "origin"
    man = build_manifold(args.manifold)[0]
    if args.use_rs:
        samples = gen_samples_rs(man, args.num_nodes, args.radius)
    elif args.use_gen2:
        samples = gen_samples2(man, args.num_nodes, args.radius)
    else:
        samples = gen_samples(man, args.num_nodes, args.radius)
    torch.save(samples, os.path.join(save_dir, 'xs.pt'))
    dists = squareform1(man.pdist(samples))
    plot_distances(dists, save_dir)

    # create the graph
    g = nx.Graph()
    g.add_edges_from(np.argwhere(dists.numpy() < args.radius))
    g.remove_edges_from(nx.selfloop_edges(g))

    # plots
    plot_degree_distribution(g, save_dir)
    plot_points(man, samples, args.radius, save_dir)
Пример #2
0
def grid_fn(fn, man, radius, output_dir):
    # sample
    xs = fn(man, args.num_nodes, radius)
    pdists = man.pdist(xs)

    # save and plot the pdists
    path = os.path.join(output_dir, 'pdists.npy')
    np.save(path, pdists.numpy())
    path = os.path.join(output_dir, 'pdists.pdf')
    plot_distances(pdists, path, axvline=radius, xmax=2 * radius)

    # save and plot the distances from zero too
    zeros = man.zero(len(xs))
    zero_dists = man.dist(zeros, xs)
    path = os.path.join(output_dir, 'zero_dists.npy')
    np.save(path, zero_dists)
    path = os.path.join(output_dir, 'zero_dists.pdf')
    plot_distances(zero_dists, path, xmax=radius)

    pdists = squareform1(pdists).numpy()
    thresholds = np.linspace(radius / 10, radius, args.num_thresholds)
    for threshold in thresholds:
        # create the graph
        g = nx.Graph()
        g.add_edges_from(np.argwhere(pdists < threshold))
        g.remove_edges_from(nx.selfloop_edges(g))

        # save it
        save_dir = os.path.join(output_dir, f'{threshold:.2f}')
        check_mkdir(save_dir, increment=False)
        torch.save(xs, os.path.join(save_dir, 'xs.pt'))
        nx.write_edgelist(g, os.path.join(save_dir, 'graph.edges.gz'))
Пример #3
0
def main():
    torch.set_default_dtype(torch.float64)

    save_dir = os.path.join(args.output_dir, args.manifold)
    check_mkdir(save_dir, increment=False)
    man = build_manifold(args.manifold)[0]

    # generate the samples on the manifold, uniformly around "origin"
    if args.use_rs:
        samples = gen_samples_rs(man, args.num_nodes, args.radius,
                                 np.sqrt(args.curvature_r_squared))
    else:
        samples = gen_samples_exp(man, args.num_nodes, args.radius)
    plot_points(man, samples, args.radius, save_dir)

    # the pairwise distances
    torch.save(samples, os.path.join(save_dir, 'xs.pt'))
    dists = squareform1(pdists(man, samples))
    plot_distances(dists, save_dir)

    # create the graph
    g = nx.Graph()
    g.add_edges_from(np.argwhere(dists.numpy() < args.radius))
    g.remove_edges_from(nx.selfloop_edges(g))
    # save it
    filename = '{}_n{}_r{:.2f}'.format(args.manifold, args.num_nodes,
                                       args.radius)
    filename = filename.replace('.', 'p') + '.edges.gz'
    nx.write_edgelist(g, os.path.join(save_dir, filename))

    # plots
    plot_degree_distribution(g, save_dir)
    plot_graph_distances(g, save_dir)
Пример #4
0
    def __init__(self, **kwargs):
        self.__dict__.update(default_attrs_)
        self.__dict__.update(kwargs)

        # SANITY CHECKS
        if not self.embedding or not self.optimizer or not self.objective_fn:
            raise ValueError('`embedding`, `optimizer`, and `objective_fn` '
                             'must be specified to construct a TrainingEngine.')
        if self.burnin_lower_lr and self.burnin_higher_lr:
            raise ValueError('`burnin_lower_lr` and `burnin_higher_lr` are '
                             'mutually exclusive.')

        # DEFAULTS:
        # - one optimizer
        if not isinstance(self.optimizer, (tuple, list)):  # For legacy code.
            self.optimizer = [self.optimizer]
        # - one lr scheduler
        if self.lr_scheduler is not None and \
                not isinstance(self.lr_scheduler, (tuple, list)):
            self.lr_scheduler = [self.lr_scheduler]
        # - no burn-in epochs
        if self.burnin_epochs is None:
            self.burnin_epochs = 0
        # - do not perturb
        if self.perturb_every_epochs is None:
            self.perturb_every_epochs = self.n_epochs + 1
        # - do not stabilize
        if self.stabilize_every_epochs is None:
            self.stabilize_every_epochs = self.n_epochs + 1
        # - Pearson R and the average distortion as the default metrics
        if self.metrics is None:
            self.metrics = ['pearsonr', 'average_distortion']
        # - the first in the metrics list as the main metric
        if self.main_metric_idx is None:
            self.main_metric_idx = 0
        # - do not evaluate
        if self.val_every_epochs is None:
            self.val_every_epochs = self.n_epochs + 1
        # - save metrics at the last validation
        if self.save_metrics_every_epochs is None:
            self.save_metrics_every_epochs = self.n_epochs // \
                    self.val_every_epochs * self.val_every_epochs
        # - save at the very end
        if self.save_every_epochs is None:
            self.save_every_epochs = self.n_epochs
        # - temporary save dir
        if self.save_dir is None:
            self.save_dir = tempfile.gettempdir()
            check_mkdir(self.save_dir)
            logger.info('The save dir is (%s)', self.save_dir)

        # load the states if the snapshot path is given
        if self.snapshot_path:
            self._load()
Пример #5
0
def main():
    torch.set_default_dtype(torch.float64)
    logging.getLogger().setLevel(logging.DEBUG)

    save_dir = os.path.join(args.output_dir, args.manifold)
    check_mkdir(save_dir, increment=False)
    man = build_manifold(args.manifold)[0]

    # generate the samples
    xs = random_walk_graph(man, args.num_nodes, args.radius)
    torch.save(xs, os.path.join(save_dir, 'xs.pt'))
    plot_points(man, xs, args.radius, save_dir)

    # the pairwise distances
    pdists = man.pdist(xs)
    xmax = 2 * args.radius if not args.dist_limit else args.dist_limit
    plot_distances(pdists, os.path.join(save_dir, 'pdists.pdf'), xmax)
    pdists = squareform1(pdists)

    # the distances from 0
    zeros = man.zero(len(xs))
    zero_dists = man.dist(zeros, xs)
    xmax = args.radius
    plot_distances(zero_dists, os.path.join(save_dir, 'zero_dists.pdf'), xmax)

    # create the graph
    g = nx.Graph()
    threshold = args.link_distance if args.link_distance else args.radius
    g.add_edges_from(np.argwhere(pdists.numpy() < threshold))
    g.remove_edges_from(nx.selfloop_edges(g))

    # save it
    filename = '{}_n{}_r{:.2f}_ld{:.2f}'.format(args.manifold, args.num_nodes,
                                                args.radius, threshold)
    filename = filename.replace('.', 'p') + '.edges.gz'
    nx.write_edgelist(g, os.path.join(save_dir, filename))

    # plots
    plot_degree_distribution(g, save_dir)
    plot_graph_distances(g, save_dir)
Пример #6
0
def main():
    torch.set_default_dtype(torch.float64)

    if args.gen_fn == 'rw':
        fn = lambda *arg: random_walk_graph(*arg, args.burnin, args.take_every)
    elif args.gen_fn == 'rs':
        fn = gen_samples_rs
    elif args.gen_fn == 'exp':
        fn = gen_samples

    with ProcessPoolExecutor(max_workers=args.num_cpus) as pool:
        futures = []
        for dim in args.dims:
            for manifold in args.manifolds:
                man, = build_manifold(manifold, dim)
                for radius in args.radii:
                    save_dir = os.path.join(args.output_dir, str(dim),
                                            f'{radius:.2f}', manifold)
                    check_mkdir(save_dir, increment=False)
                    f = pool.submit(grid_fn, fn, man, radius, save_dir)
                    futures.append(f)
        for f in futures:
            f.result(None)
Пример #7
0
def make_exp_dir(*args):
    save_dir = os.path.join(*args)
    check_mkdir(save_dir, increment=False)
    return save_dir
Пример #8
0
def main():
    args = parse_args()
    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    config = parse_config(args.config)
    set_seeds(args.random_seed)
    save_dir = check_mkdir(config['save_dir_root'], increment=True)
    copyfile(args.config, os.path.join(save_dir, 'config.yaml'))

    # torch settings
    torch.set_default_dtype(torch.float64)  # use double precision
    if torch.cuda.is_available():  # place everything on CUDA
        # NOTE: We rely on this in several parts of the code.
        torch.set_default_tensor_type(torch.cuda.DoubleTensor)
    if args.detect_anomaly:
        torch.autograd.set_detect_anomaly(True)

    # prepare data
    gpdists, g = load_graph_pdists(config['input_graph'],
                                   cache_dir=config.get('cache_dir'))
    n_nodes = nnm1d2_to_n(len(gpdists))
    if 'preprocess' in config:
        gpdists = config['preprocess'](gpdists)
    dataset = GraphDataset(gpdists if n_nodes < 5000 else gpdists.to('cpu'))

    # the embedding
    embedding = config['embedding'](n_nodes)

    # the optimizers
    optimizers = []
    lr_schedulers = []
    if 'embedding_optimizer' in config:
        emb_optim = config['embedding_optimizer'](embedding.xs)
        optimizers.append(emb_optim)
        if 'embedding_lr_scheduler' in config:
            lr_schedulers.append(config['embedding_lr_scheduler'](emb_optim))
    if 'curvature_optimizer' in config:
        curv_optim = config['curvature_optimizer'](embedding.curvature_params)
        optimizers.append(curv_optim)
        if 'curvature_lr_scheduler' in config:
            lr_schedulers.append(config['curvature_lr_scheduler'](curv_optim))

    # prepare training
    training_args = dict(embedding=embedding,
                         optimizer=optimizers,
                         lr_scheduler=lr_schedulers,
                         objective_fn=config['objective_fn'],
                         save_dir=save_dir)
    training_args.update(config['training_params'])

    # use the right training engine
    if isinstance(embedding, ProductManifoldEmbedding):
        from graphembed.products import TrainingEngine
    elif 'min_alpha' in training_args or 'max_alpha' in training_args:
        from graphembed.train_da import TrainingEngine
    else:
        from graphembed.train import TrainingEngine

    # use a with-block to make sure we the threads are closed even if we kill
    # the process
    with ThreadPoolExecutor(max_workers=args.num_workers) as pool:
        if g is not None:
            with Timer('constructing FastPrecision', loglevel=logging.INFO):
                fp = FastPrecision(g)
            training_args['lazy_metrics'] = {
                'Layer_Mean_F1': \
                    lambda p: pool.submit(fp.layer_mean_f1_scores, p),
            }  # yapf: disable
        training_engine = TrainingEngine(**training_args)

        # train
        with Timer('training', loglevel=logging.INFO):
            training_engine(dataset)
Пример #9
0
def make_exp_dir(*args):
    from graphembed.utils import check_mkdir
    save_dir = os.path.join(*args)
    check_mkdir(save_dir, increment=False)
    return save_dir