if args.folder == "" and args.id != "": global_config = config.ConfigurationFile("./DATA/config.conf") saving_folder = global_config["save_folder"] args.folder = os.path.join(saving_folder, args.id) log_in = logger.JSONLogger(os.path.join(args.folder, "log.json"), mod="continue") dataset_name = log_in["dataset"] n_gaussian = log_in["n_gaussian"] print("EVALUATE SUPERVISED CLUSTERING ON ") print("\t Dataset -> ", dataset_name) print("\t Number of communities -> ", n_gaussian) size = log_in["size"] # print("Loading Corpus ") X, Y = data_loader.load_corpus(dataset_name) representations_init = torch.load( os.path.join(args.folder, "embeddings_init.t7")) representations = torch.load(os.path.join(args.folder, "embeddings.t7"))[0] print("\t Representation matrix shape -> ", representations.size()) ground_truth = torch.LongTensor( [[1 if (y in Y[i]) else 0 for y in range(n_gaussian)] for i in range(len(X))]) print("\t Distribution of communities -> ", ground_truth.sum(0)) if (args.cuda): representations = representations.cuda() representations_init = representations_init.cuda() ground_truth = ground_truth.cuda()
import torch import os from torch import nn from torch.utils.data import DataLoader from rcome.manifold.poincare_ball import PoincareBallApproximation, PoincareBallExact from rcome.optim_tools import rsgd from rcome.data_tools import data_loader, corpora from rcome.embedding_tools.losses import tree_embedding_criterion from rcome.visualisation_tools.plot_tools import plot_poincare_disc_embeddings X, Y = data_loader.load_corpus("LFR1", directed=False) dataset = corpora.NeigbhorFlatCorpus(X, Y) dataloader = DataLoader(dataset, batch_size=20, shuffle=True) model = nn.Embedding(len(X), 2, max_norm=0.999) model.weight.data[:] = model.weight.data * 1e-2 model.cuda() manifold = PoincareBallExact optimizer = rsgd.RSGD(model.parameters(), 1e-1, manifold=manifold) default_gt = torch.zeros(20).long() criterion = nn.CrossEntropyLoss(reduction="sum") for i in range(50): tloss = 0. for x, y in dataloader: optimizer.zero_grad() pe_x = model(x.long().cuda())
help="Do print information") parser.add_argument('--plot', dest="plot", default=False, action="store_true", help="draw figures at each iteration (can be time consuming)") # Optimisation parser.add_argument('--cuda', dest="cuda", action="store_true", default=False, help="Optimize on GPU (nvidia only)") parser.add_argument('--num-threads', dest="num_threads", type=int, default=1, help="Number of threads for pytorch dataloader (can fail on windows)") args = parser.parse_args() torch.set_default_tensor_type(torch.DoubleTensor) if(args.verbose): print("Loading Corpus ") X, Y = data_loader.load_corpus(args.dataset, directed=False) dataset_l1 = corpora.NeigbhorFlatCorpus(X, Y) dataset_l2 = corpora.RandomContextSizeFlat(X, Y, precompute=args.walk_by_node, path_len=args.walk_lenght, context_size=args.context_size) dataset_l3 = corpora_tools.from_indexable(torch.arange(0, len(X), 1).unsqueeze(-1)) dataloader_l1 = DataLoader(dataset_l1, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers= args.num_threads if(args.num_threads > 1) else 0 ) def collate_fn_simple(tensor_list): v = torch.cat(tensor_list, 0)
print(directory, " has already been evaluated") # continue else: pass if (0 == 1): pass else: print(directory) # set info vars print('Loading Corpus') dataset_name = logger_object["dataset"] n_communities = nb_communities_dict[dataset_name] # loading corpus and representation X, Y = data_loader.load_corpus(dataset_name, directed=True) print("Size ", len(X)) D = corpora.RandomWalkCorpus(X, Y) with io.open(os.path.join(path_to_experiment, "embeddings.txt")) as embedding_file: V = [] for line in embedding_file: splitted_line = line.split() V.append([ float(splitted_line[i + 1]) for i in range(len(splitted_line) - 1) ]) representations = torch.Tensor(V) #### unsupervised evaluation ####
from torch import nn from rcome.data_tools import data_loader from rcome.embedding_tools import losses nodes, communities = data_loader.load_corpus('dblp', directed=False) nodes_representation = nn.Embbedding(len(nodes)) for edges in nodes: for l in edges: nodes
import torch import os from torch import nn from torch.utils.data import DataLoader from rcome.manifold.poincare_ball import PoincareBallApproximation, PoincareBallExact from rcome.optim_tools import rsgd from rcome.data_tools import data_loader, corpora from rcome.embedding_tools.losses import tree_embedding_criterion from rcome.visualisation_tools.plot_tools import plot_poincare_disc_embeddings X, Y = data_loader.load_corpus("dblp", directed=False) dataset = corpora.NeigbhorFlatCorpus(X, Y) dataloader = DataLoader(dataset, batch_size=5, shuffle=True) model = nn.Embedding(len(X), 2, max_norm=0.999) model.weight.data[:] = model.weight.data * 1e-2 model.cuda() manifold = PoincareBallExact optimizer = rsgd.RSGD(model.parameters(), 1e-1, manifold=manifold) default_gt = torch.zeros(20).long() criterion = nn.CrossEntropyLoss(reduction="sum") for i in range(50): tloss = 0. for x, y in dataloader: