def plot_ground_truth(): map = np.array(list(range(100))).reshape([10, 10]) matrix = np.array([[i, j] for i in range(10) for j in range(10)]) plot_embeddings(map, matrix, file=FINAL_EMBEDDING_PATH + '/ground_truth.png')
def main(args): nx_G = nx.read_edgelist(args.input, create_using=nx.DiGraph(), nodetype=None, data=[("weight", int)]) model = SDNE(nx_G, hidden_size=[512, 256, 256]) model.train(batch_size=3000, epochs=50, verbose=2) embeddings = model.get_embeddings() embeddings = {str(k): embeddings[k] for k in embeddings.keys()} plot_embeddings(embeddings, args.label_file)
def main(): trainer = ParticleTrainer(embedding=USE_EMBEDDING, store_embeddings=PLOT_EMBEDDING_HISTORY) trainer.train() if USE_EMBEDDING and PLOT_EMBEDDING_HISTORY: embedding_history = trainer.embedding_history embedding_map = trainer.embedding_map.numpy() for i, embedding in enumerate(embedding_history): plot_embeddings(embedding_map, embedding, annotate=False, file=EMBEDDING_HISTORY_PATH + f'/{i:02d}.png')
def main(args): # nx_G = read_graph() nx_G = nx.read_edgelist(args.input, create_using=nx.DiGraph(), nodetype=int, data=(("weight", int))) line = Line(nx_G, batch_size=1024, embedding_size=128, order='second') line.train(epoch=50, verbose=2) _embeddings = line.get_embeddings() _embeddings = {str(k): _embeddings[k] for k in _embeddings.keys()} plot_embeddings(_embeddings, args.label_file)
def main(args): nx_G = read_graph() G = deepwalk(nx_G, args.directed, args.p, args.q) walks = G.simulate_walks(args.num_walks, args.walk_length) model = learning_walks(walks) _embeddings = {} for word in nx_G.nodes(): _embeddings[str(word)] = model.wv[str(word)] plot_embeddings(_embeddings, args.label_file)
def main(args): nx_G = read_graph() G = node2vec_walk(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) model = learning_walks(walks) _embeddings = {} for v in nx_G.nodes(): _embeddings[str(v)] = model.wv[str(v)] plot_embeddings(_embeddings, args.label_file)
def main(): trainer = ParticleTrainer(embedding=True) trainer.load_snapshot(BEST_EMBEDDED_SNAPSHOT_PATH) embedding_map = trainer.embedding_map.numpy() embedding_matrix = trainer.net.embedding.weight embedding_matrix = embedding_matrix.cpu().detach().numpy() plot_ground_truth() plot_embeddings( embedding_map, embedding_matrix, file=FINAL_EMBEDDING_PATH + f'/{basename(SNAPSHOT)}.png' if SAVE_FILE else None, )
import numpy as np import matplotlib.pyplot as plt import networkx as nx from sklearn.manifold import TSNE from struc2vec import Struc2Vec from utils import evaluate_embeddings, plot_embeddings if __name__ == "__main__": G = nx.read_edgelist('./data/Wiki_edgelist.txt', create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) model = Struc2Vec(G, 20, 80, workers=4, verbose=40, opt3_num_layers=5) model.train(embed_size=256) embeddings = model.get_embeddings() evaluate_embeddings(embeddings, "./data/wiki_labels.txt") plot_embeddings(embeddings, "./data/wiki_labels.txt")
feature_less=FEATURE_LESS, ) model.compile(optimizer=Adam(0.01), loss='categorical_crossentropy', weighted_metrics=['categorical_crossentropy', 'acc']) NB_EPOCH = 200 PATIENCE = 200 # early stopping patience val_data = (model_input, y_val, val_mask) mc_callback = ModelCheckpoint('./best_model.h5', monitor='val_weighted_categorical_crossentropy', save_best_only=True, save_weights_only=True) # train print("start training") model.fit(model_input, y_train, sample_weight=train_mask, validation_data=val_data, batch_size=A.shape[0], epochs=NB_EPOCH, shuffle=False, verbose=2, callbacks=[mc_callback]) # test model.load_weights('./best_model.h5') eval_results = model.evaluate( model_input, y_test, sample_weight=test_mask, batch_size=A.shape[0]) print('Done.\n' 'Test loss: {}\n' 'Test weighted_loss: {}\n' 'Test accuracy: {}'.format(*eval_results)) embedding_model = Model(model.input, outputs=Lambda(lambda x: model.layers[-1].output)(model.input)) embedding_weights = embedding_model.predict(model_input, batch_size=A.shape[0]) y = np.genfromtxt("{}{}.content".format('../data/cora/', 'cora'), dtype=np.dtype(str))[:, -1] plot_embeddings(embedding_weights, np.arange(A.shape[0]), y)
words, word2ind, freqs = utils.extractDictionary(corpus, limit=20000) data = utils.extractWordContextPairs(corpus, windowSize, word2ind) del corpus U0 = (np.random.rand(len(words), embDim) - 0.5) / embDim V0 = (np.random.rand(len(words), embDim) - 0.5) / embDim seq = sampling.createSamplingSequence(freqs) contextFunction = lambda c: sampling.sampleContext(c, seq, negativesCount) if len(sys.argv)>1 and sys.argv[1] == 'cumulative': U,V = w2v_sgd.stochasticGradientDescend(data,np.copy(U0),np.copy(V0),contextFunction,grads.lossAndGradientCumulative) else: U,V = w2v_sgd.stochasticGradientDescend(data,np.copy(U0),np.copy(V0),contextFunction,grads.lossAndGradientBatched) np.save('w2v-U',U) np.save('w2v-V',V) E = np.concatenate([U,V],axis=1) E_reduced =utils.SVD_k_dim(E,k=2) E_normalized_2d = E_reduced /np.linalg.norm(E_reduced, axis=1)[:, np.newaxis] sampleWords = 'януари октомври седмица година медии пазар стоки бизнес фирма бюджет петрол нефт'.split() utils.plot_embeddings(E_normalized_2d, word2ind, sampleWords, 'embeddings')
def hard_triplet_baseline_exp(device='3', lr=1e-3, n_epochs=300, n_classes=10, n_samples=12, margin=0.3, log_interval=50): """ :param device: :param lr: :param n_epochs: :param n_classes: :param n_samples: :return: """ os.environ['CUDA_VISIBLE_DEVICES'] = str(device) # get the mean and std of dataset train/a standarizer = TaskbStandarizer(data_manager=Dcase18TaskbData()) mu, sigma = standarizer.load_mu_sigma(mode='train', device='a') # get the normalized train dataset train_dataset = DevSet(mode='train', device='a', transform=Compose( [Normalize(mean=mu, std=sigma), ToTensor()])) test_dataset = DevSet(mode='test', device='a', transform=Compose( [Normalize(mean=mu, std=sigma), ToTensor()])) train_batch_sampler = BalanceBatchSampler(dataset=train_dataset, n_classes=n_classes, n_samples=n_samples) train_batch_loader = DataLoader(dataset=train_dataset, batch_sampler=train_batch_sampler, num_workers=1) test_batch_sampler = BalanceBatchSampler(dataset=test_dataset, n_classes=n_classes, n_samples=n_samples) test_batch_loader = DataLoader(dataset=test_dataset, batch_sampler=test_batch_sampler, num_workers=1) model = networks.embedding_net_shallow() model = model.cuda() loss_fn = OnlineTripletLoss( margin=margin, triplet_selector=utils.RandomNegativeTripletSelector(margin=margin)) optimizer = optim.Adam(model.parameters(), lr=lr) scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=30, gamma=0.5) fit(train_loader=train_batch_loader, val_loader=test_batch_loader, model=model, loss_fn=loss_fn, optimizer=optimizer, scheduler=scheduler, n_epochs=n_epochs, log_interval=log_interval, metrics=[AverageNoneZeroTripletsMetric()]) train_embedding_tl, train_labels_tl = utils.extract_embeddings( train_batch_loader, model) utils.plot_embeddings(train_embedding_tl, train_labels_tl) test_embedding_tl, test_labels_tl = utils.extract_embeddings( test_batch_loader, model) utils.plot_embeddings(test_embedding_tl, test_labels_tl)