def plot_ground_truth():
    map = np.array(list(range(100))).reshape([10, 10])
    matrix = np.array([[i, j] for i in range(10) for j in range(10)])

    plot_embeddings(map,
                    matrix,
                    file=FINAL_EMBEDDING_PATH + '/ground_truth.png')
Пример #2
0
def main(args):
    nx_G = nx.read_edgelist(args.input, create_using=nx.DiGraph(), nodetype=None, data=[("weight", int)])
    model = SDNE(nx_G, hidden_size=[512, 256, 256])
    model.train(batch_size=3000, epochs=50, verbose=2)

    embeddings = model.get_embeddings()
    embeddings = {str(k): embeddings[k] for k in embeddings.keys()}
    plot_embeddings(embeddings, args.label_file)
def main():
    trainer = ParticleTrainer(embedding=USE_EMBEDDING, store_embeddings=PLOT_EMBEDDING_HISTORY)
    trainer.train()

    if USE_EMBEDDING and PLOT_EMBEDDING_HISTORY:
        embedding_history = trainer.embedding_history
        embedding_map = trainer.embedding_map.numpy()
        for i, embedding in enumerate(embedding_history):
            plot_embeddings(embedding_map, embedding, annotate=False, file=EMBEDDING_HISTORY_PATH + f'/{i:02d}.png')
Пример #4
0
def main(args):
    # nx_G = read_graph()
    nx_G = nx.read_edgelist(args.input,
                            create_using=nx.DiGraph(),
                            nodetype=int,
                            data=(("weight", int)))
    line = Line(nx_G, batch_size=1024, embedding_size=128, order='second')
    line.train(epoch=50, verbose=2)
    _embeddings = line.get_embeddings()
    _embeddings = {str(k): _embeddings[k] for k in _embeddings.keys()}
    plot_embeddings(_embeddings, args.label_file)
Пример #5
0
def main(args):
    nx_G = read_graph()
    G = deepwalk(nx_G, args.directed, args.p, args.q)
    walks = G.simulate_walks(args.num_walks, args.walk_length)
    model = learning_walks(walks)

    _embeddings = {}
    for word in nx_G.nodes():
        _embeddings[str(word)] = model.wv[str(word)]

    plot_embeddings(_embeddings, args.label_file)
Пример #6
0
def main(args):
    nx_G = read_graph()
    G = node2vec_walk(nx_G, args.directed, args.p, args.q)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(args.num_walks, args.walk_length)
    model = learning_walks(walks)

    _embeddings = {}
    for v in nx_G.nodes():
        _embeddings[str(v)] = model.wv[str(v)]

    plot_embeddings(_embeddings, args.label_file)
def main():
    trainer = ParticleTrainer(embedding=True)
    trainer.load_snapshot(BEST_EMBEDDED_SNAPSHOT_PATH)

    embedding_map = trainer.embedding_map.numpy()
    embedding_matrix = trainer.net.embedding.weight
    embedding_matrix = embedding_matrix.cpu().detach().numpy()

    plot_ground_truth()

    plot_embeddings(
        embedding_map,
        embedding_matrix,
        file=FINAL_EMBEDDING_PATH +
        f'/{basename(SNAPSHOT)}.png' if SAVE_FILE else None,
    )
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
from sklearn.manifold import TSNE

from struc2vec import Struc2Vec
from utils import evaluate_embeddings, plot_embeddings

if __name__ == "__main__":
    G = nx.read_edgelist('./data/Wiki_edgelist.txt', create_using=nx.DiGraph(), nodetype=None,
                         data=[('weight', int)])
    model = Struc2Vec(G, 20, 80, workers=4, verbose=40, opt3_num_layers=5)
    model.train(embed_size=256)
    embeddings = model.get_embeddings()    

    evaluate_embeddings(embeddings, "./data/wiki_labels.txt")
    plot_embeddings(embeddings, "./data/wiki_labels.txt")
Пример #9
0
                feature_less=FEATURE_LESS, )
    model.compile(optimizer=Adam(0.01), loss='categorical_crossentropy',
                  weighted_metrics=['categorical_crossentropy', 'acc'])

    NB_EPOCH = 200
    PATIENCE = 200  # early stopping patience

    val_data = (model_input, y_val, val_mask)
    mc_callback = ModelCheckpoint('./best_model.h5',
                                  monitor='val_weighted_categorical_crossentropy',
                                  save_best_only=True,
                                  save_weights_only=True)

    # train
    print("start training")
    model.fit(model_input, y_train, sample_weight=train_mask, validation_data=val_data,
              batch_size=A.shape[0], epochs=NB_EPOCH, shuffle=False, verbose=2, callbacks=[mc_callback])
    # test
    model.load_weights('./best_model.h5')
    eval_results = model.evaluate(
        model_input, y_test, sample_weight=test_mask, batch_size=A.shape[0])
    print('Done.\n'
          'Test loss: {}\n'
          'Test weighted_loss: {}\n'
          'Test accuracy: {}'.format(*eval_results))

    embedding_model = Model(model.input, outputs=Lambda(lambda x: model.layers[-1].output)(model.input))
    embedding_weights = embedding_model.predict(model_input, batch_size=A.shape[0])
    y  = np.genfromtxt("{}{}.content".format('../data/cora/', 'cora'), dtype=np.dtype(str))[:, -1]
    plot_embeddings(embedding_weights, np.arange(A.shape[0]), y)
Пример #10
0
words, word2ind, freqs = utils.extractDictionary(corpus, limit=20000)
data = utils.extractWordContextPairs(corpus, windowSize, word2ind)

del corpus

U0 = (np.random.rand(len(words), embDim) - 0.5) / embDim
V0 = (np.random.rand(len(words), embDim) - 0.5) / embDim

seq = sampling.createSamplingSequence(freqs)
contextFunction = lambda c: sampling.sampleContext(c, seq, negativesCount)

if len(sys.argv)>1 and sys.argv[1] == 'cumulative':
    U,V = w2v_sgd.stochasticGradientDescend(data,np.copy(U0),np.copy(V0),contextFunction,grads.lossAndGradientCumulative)
else:
    U,V = w2v_sgd.stochasticGradientDescend(data,np.copy(U0),np.copy(V0),contextFunction,grads.lossAndGradientBatched)

np.save('w2v-U',U)
np.save('w2v-V',V)

E = np.concatenate([U,V],axis=1)

E_reduced =utils.SVD_k_dim(E,k=2)
E_normalized_2d = E_reduced /np.linalg.norm(E_reduced, axis=1)[:, np.newaxis]

sampleWords = 'януари октомври седмица година медии пазар стоки бизнес фирма бюджет петрол нефт'.split()

utils.plot_embeddings(E_normalized_2d, word2ind, sampleWords, 'embeddings')


Пример #11
0
def hard_triplet_baseline_exp(device='3',
                              lr=1e-3,
                              n_epochs=300,
                              n_classes=10,
                              n_samples=12,
                              margin=0.3,
                              log_interval=50):
    """

    :param device:
    :param lr:
    :param n_epochs:
    :param n_classes:
    :param n_samples:
    :return:
    """

    os.environ['CUDA_VISIBLE_DEVICES'] = str(device)

    # get the mean and std of dataset train/a
    standarizer = TaskbStandarizer(data_manager=Dcase18TaskbData())
    mu, sigma = standarizer.load_mu_sigma(mode='train', device='a')

    # get the normalized train dataset
    train_dataset = DevSet(mode='train',
                           device='a',
                           transform=Compose(
                               [Normalize(mean=mu, std=sigma),
                                ToTensor()]))
    test_dataset = DevSet(mode='test',
                          device='a',
                          transform=Compose(
                              [Normalize(mean=mu, std=sigma),
                               ToTensor()]))

    train_batch_sampler = BalanceBatchSampler(dataset=train_dataset,
                                              n_classes=n_classes,
                                              n_samples=n_samples)
    train_batch_loader = DataLoader(dataset=train_dataset,
                                    batch_sampler=train_batch_sampler,
                                    num_workers=1)

    test_batch_sampler = BalanceBatchSampler(dataset=test_dataset,
                                             n_classes=n_classes,
                                             n_samples=n_samples)
    test_batch_loader = DataLoader(dataset=test_dataset,
                                   batch_sampler=test_batch_sampler,
                                   num_workers=1)

    model = networks.embedding_net_shallow()
    model = model.cuda()
    loss_fn = OnlineTripletLoss(
        margin=margin,
        triplet_selector=utils.RandomNegativeTripletSelector(margin=margin))
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = lr_scheduler.StepLR(optimizer=optimizer,
                                    step_size=30,
                                    gamma=0.5)

    fit(train_loader=train_batch_loader,
        val_loader=test_batch_loader,
        model=model,
        loss_fn=loss_fn,
        optimizer=optimizer,
        scheduler=scheduler,
        n_epochs=n_epochs,
        log_interval=log_interval,
        metrics=[AverageNoneZeroTripletsMetric()])

    train_embedding_tl, train_labels_tl = utils.extract_embeddings(
        train_batch_loader, model)
    utils.plot_embeddings(train_embedding_tl, train_labels_tl)
    test_embedding_tl, test_labels_tl = utils.extract_embeddings(
        test_batch_loader, model)
    utils.plot_embeddings(test_embedding_tl, test_labels_tl)