def main(): torch.set_default_dtype(torch.float64) save_dir = os.path.join(args.output_dir, args.manifold) check_mkdir(save_dir, increment=False) # generate the samples on the manifold, uniformly around "origin" man = build_manifold(args.manifold)[0] if args.use_rs: samples = gen_samples_rs(man, args.num_nodes, args.radius) elif args.use_gen2: samples = gen_samples2(man, args.num_nodes, args.radius) else: samples = gen_samples(man, args.num_nodes, args.radius) torch.save(samples, os.path.join(save_dir, 'xs.pt')) dists = squareform1(man.pdist(samples)) plot_distances(dists, save_dir) # create the graph g = nx.Graph() g.add_edges_from(np.argwhere(dists.numpy() < args.radius)) g.remove_edges_from(nx.selfloop_edges(g)) # plots plot_degree_distribution(g, save_dir) plot_points(man, samples, args.radius, save_dir)
def grid_fn(fn, man, radius, output_dir): # sample xs = fn(man, args.num_nodes, radius) pdists = man.pdist(xs) # save and plot the pdists path = os.path.join(output_dir, 'pdists.npy') np.save(path, pdists.numpy()) path = os.path.join(output_dir, 'pdists.pdf') plot_distances(pdists, path, axvline=radius, xmax=2 * radius) # save and plot the distances from zero too zeros = man.zero(len(xs)) zero_dists = man.dist(zeros, xs) path = os.path.join(output_dir, 'zero_dists.npy') np.save(path, zero_dists) path = os.path.join(output_dir, 'zero_dists.pdf') plot_distances(zero_dists, path, xmax=radius) pdists = squareform1(pdists).numpy() thresholds = np.linspace(radius / 10, radius, args.num_thresholds) for threshold in thresholds: # create the graph g = nx.Graph() g.add_edges_from(np.argwhere(pdists < threshold)) g.remove_edges_from(nx.selfloop_edges(g)) # save it save_dir = os.path.join(output_dir, f'{threshold:.2f}') check_mkdir(save_dir, increment=False) torch.save(xs, os.path.join(save_dir, 'xs.pt')) nx.write_edgelist(g, os.path.join(save_dir, 'graph.edges.gz'))
def main(): torch.set_default_dtype(torch.float64) save_dir = os.path.join(args.output_dir, args.manifold) check_mkdir(save_dir, increment=False) man = build_manifold(args.manifold)[0] # generate the samples on the manifold, uniformly around "origin" if args.use_rs: samples = gen_samples_rs(man, args.num_nodes, args.radius, np.sqrt(args.curvature_r_squared)) else: samples = gen_samples_exp(man, args.num_nodes, args.radius) plot_points(man, samples, args.radius, save_dir) # the pairwise distances torch.save(samples, os.path.join(save_dir, 'xs.pt')) dists = squareform1(pdists(man, samples)) plot_distances(dists, save_dir) # create the graph g = nx.Graph() g.add_edges_from(np.argwhere(dists.numpy() < args.radius)) g.remove_edges_from(nx.selfloop_edges(g)) # save it filename = '{}_n{}_r{:.2f}'.format(args.manifold, args.num_nodes, args.radius) filename = filename.replace('.', 'p') + '.edges.gz' nx.write_edgelist(g, os.path.join(save_dir, filename)) # plots plot_degree_distribution(g, save_dir) plot_graph_distances(g, save_dir)
def __init__(self, **kwargs): self.__dict__.update(default_attrs_) self.__dict__.update(kwargs) # SANITY CHECKS if not self.embedding or not self.optimizer or not self.objective_fn: raise ValueError('`embedding`, `optimizer`, and `objective_fn` ' 'must be specified to construct a TrainingEngine.') if self.burnin_lower_lr and self.burnin_higher_lr: raise ValueError('`burnin_lower_lr` and `burnin_higher_lr` are ' 'mutually exclusive.') # DEFAULTS: # - one optimizer if not isinstance(self.optimizer, (tuple, list)): # For legacy code. self.optimizer = [self.optimizer] # - one lr scheduler if self.lr_scheduler is not None and \ not isinstance(self.lr_scheduler, (tuple, list)): self.lr_scheduler = [self.lr_scheduler] # - no burn-in epochs if self.burnin_epochs is None: self.burnin_epochs = 0 # - do not perturb if self.perturb_every_epochs is None: self.perturb_every_epochs = self.n_epochs + 1 # - do not stabilize if self.stabilize_every_epochs is None: self.stabilize_every_epochs = self.n_epochs + 1 # - Pearson R and the average distortion as the default metrics if self.metrics is None: self.metrics = ['pearsonr', 'average_distortion'] # - the first in the metrics list as the main metric if self.main_metric_idx is None: self.main_metric_idx = 0 # - do not evaluate if self.val_every_epochs is None: self.val_every_epochs = self.n_epochs + 1 # - save metrics at the last validation if self.save_metrics_every_epochs is None: self.save_metrics_every_epochs = self.n_epochs // \ self.val_every_epochs * self.val_every_epochs # - save at the very end if self.save_every_epochs is None: self.save_every_epochs = self.n_epochs # - temporary save dir if self.save_dir is None: self.save_dir = tempfile.gettempdir() check_mkdir(self.save_dir) logger.info('The save dir is (%s)', self.save_dir) # load the states if the snapshot path is given if self.snapshot_path: self._load()
def main(): torch.set_default_dtype(torch.float64) logging.getLogger().setLevel(logging.DEBUG) save_dir = os.path.join(args.output_dir, args.manifold) check_mkdir(save_dir, increment=False) man = build_manifold(args.manifold)[0] # generate the samples xs = random_walk_graph(man, args.num_nodes, args.radius) torch.save(xs, os.path.join(save_dir, 'xs.pt')) plot_points(man, xs, args.radius, save_dir) # the pairwise distances pdists = man.pdist(xs) xmax = 2 * args.radius if not args.dist_limit else args.dist_limit plot_distances(pdists, os.path.join(save_dir, 'pdists.pdf'), xmax) pdists = squareform1(pdists) # the distances from 0 zeros = man.zero(len(xs)) zero_dists = man.dist(zeros, xs) xmax = args.radius plot_distances(zero_dists, os.path.join(save_dir, 'zero_dists.pdf'), xmax) # create the graph g = nx.Graph() threshold = args.link_distance if args.link_distance else args.radius g.add_edges_from(np.argwhere(pdists.numpy() < threshold)) g.remove_edges_from(nx.selfloop_edges(g)) # save it filename = '{}_n{}_r{:.2f}_ld{:.2f}'.format(args.manifold, args.num_nodes, args.radius, threshold) filename = filename.replace('.', 'p') + '.edges.gz' nx.write_edgelist(g, os.path.join(save_dir, filename)) # plots plot_degree_distribution(g, save_dir) plot_graph_distances(g, save_dir)
def main(): torch.set_default_dtype(torch.float64) if args.gen_fn == 'rw': fn = lambda *arg: random_walk_graph(*arg, args.burnin, args.take_every) elif args.gen_fn == 'rs': fn = gen_samples_rs elif args.gen_fn == 'exp': fn = gen_samples with ProcessPoolExecutor(max_workers=args.num_cpus) as pool: futures = [] for dim in args.dims: for manifold in args.manifolds: man, = build_manifold(manifold, dim) for radius in args.radii: save_dir = os.path.join(args.output_dir, str(dim), f'{radius:.2f}', manifold) check_mkdir(save_dir, increment=False) f = pool.submit(grid_fn, fn, man, radius, save_dir) futures.append(f) for f in futures: f.result(None)
def make_exp_dir(*args): save_dir = os.path.join(*args) check_mkdir(save_dir, increment=False) return save_dir
def main(): args = parse_args() if args.verbose: logging.getLogger().setLevel(logging.DEBUG) config = parse_config(args.config) set_seeds(args.random_seed) save_dir = check_mkdir(config['save_dir_root'], increment=True) copyfile(args.config, os.path.join(save_dir, 'config.yaml')) # torch settings torch.set_default_dtype(torch.float64) # use double precision if torch.cuda.is_available(): # place everything on CUDA # NOTE: We rely on this in several parts of the code. torch.set_default_tensor_type(torch.cuda.DoubleTensor) if args.detect_anomaly: torch.autograd.set_detect_anomaly(True) # prepare data gpdists, g = load_graph_pdists(config['input_graph'], cache_dir=config.get('cache_dir')) n_nodes = nnm1d2_to_n(len(gpdists)) if 'preprocess' in config: gpdists = config['preprocess'](gpdists) dataset = GraphDataset(gpdists if n_nodes < 5000 else gpdists.to('cpu')) # the embedding embedding = config['embedding'](n_nodes) # the optimizers optimizers = [] lr_schedulers = [] if 'embedding_optimizer' in config: emb_optim = config['embedding_optimizer'](embedding.xs) optimizers.append(emb_optim) if 'embedding_lr_scheduler' in config: lr_schedulers.append(config['embedding_lr_scheduler'](emb_optim)) if 'curvature_optimizer' in config: curv_optim = config['curvature_optimizer'](embedding.curvature_params) optimizers.append(curv_optim) if 'curvature_lr_scheduler' in config: lr_schedulers.append(config['curvature_lr_scheduler'](curv_optim)) # prepare training training_args = dict(embedding=embedding, optimizer=optimizers, lr_scheduler=lr_schedulers, objective_fn=config['objective_fn'], save_dir=save_dir) training_args.update(config['training_params']) # use the right training engine if isinstance(embedding, ProductManifoldEmbedding): from graphembed.products import TrainingEngine elif 'min_alpha' in training_args or 'max_alpha' in training_args: from graphembed.train_da import TrainingEngine else: from graphembed.train import TrainingEngine # use a with-block to make sure we the threads are closed even if we kill # the process with ThreadPoolExecutor(max_workers=args.num_workers) as pool: if g is not None: with Timer('constructing FastPrecision', loglevel=logging.INFO): fp = FastPrecision(g) training_args['lazy_metrics'] = { 'Layer_Mean_F1': \ lambda p: pool.submit(fp.layer_mean_f1_scores, p), } # yapf: disable training_engine = TrainingEngine(**training_args) # train with Timer('training', loglevel=logging.INFO): training_engine(dataset)
def make_exp_dir(*args): from graphembed.utils import check_mkdir save_dir = os.path.join(*args) check_mkdir(save_dir, increment=False) return save_dir