Exemplo n.º 1
0
def generate():
    # dataloader for training
    train_dataloader = TrainDataLoader(in_path='./data/kg/',
                                       nbatches=100,
                                       threads=8,
                                       sampling_mode="normal",
                                       bern_flag=1,
                                       filter_flag=1,
                                       neg_ent=25,
                                       neg_rel=0)

    # define the model
    transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                    rel_tot=train_dataloader.get_rel_tot(),
                    dim=Config.entity_embedding_dim,
                    p_norm=1,
                    norm_flag=True)

    transe.load_checkpoint('./data/kg/transe.ckpt')
    entity_embedding = transe.get_parameters()['ent_embeddings.weight']
    entity_embedding[0] = 0
    np.save('./data/kg/entity.npy', entity_embedding)
    context_embedding = np.empty_like(entity_embedding)
    context_embedding[0] = 0
    relation = pd.read_table('./data/sub_kg/triple2id.txt',
                             header=None)[[0, 1]]
    entity = pd.read_table('./data/sub_kg/entity2name.txt',
                           header=None)[[0]].to_numpy().flatten()

    for e in entity:
        df = pd.concat(
            [relation[relation[0] == e], relation[relation[1] == e]])
        context = list(set(np.append(df.to_numpy().flatten(), e)))
        context_embedding[e] = np.mean(entity_embedding[context, :], axis=0)

    np.save('./data/kg/context.npy', context_embedding)
Exemplo n.º 2
0
                norm_flag=True,
                rand_init=False)

model_r = NegativeSampling(model=transr,
                           loss=MarginLoss(margin=4.0),
                           batch_size=train_dataloader.get_batch_size())

# pretrain transe
# trainer = Trainer(model = model_e, data_loader = train_dataloader, train_times = 1000, alpha = 0.5, use_gpu = False)
trainer = Trainer(model=model_e,
                  data_loader=train_dataloader,
                  train_times=1000,
                  alpha=1.0,
                  use_gpu=False)
trainer.run()
parameters = transe.get_parameters()
transe.save_parameters("./result/transr_transe.json")

# train transr
# transr.set_parameters(parameters)
transr.ent_embeddings = transe.ent_embeddings
trainer = Trainer(model=model_r,
                  data_loader=train_dataloader,
                  train_times=1000,
                  alpha=0.1,
                  use_gpu=False)
trainer.run()
transr.save_checkpoint('./checkpoint/transr.ckpt')

epoch = trainer.epoch
loss = trainer.loss
Exemplo n.º 3
0
    sampling_mode="normal",
    bern_flag=1,
    filter_flag=1,
    neg_ent=25,
    neg_rel=5)

# define the model
transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                rel_tot=train_dataloader.get_rel_tot(),
                dim=200,
                p_norm=2,
                norm_flag=True)

save_path = os.path.join('checkpoint', phase, 'transe.ckpt')
transe.load_checkpoint(save_path)
rel_emb = transe.get_parameters()['rel_embeddings.weight']
ent_emb = transe.get_parameters()['ent_embeddings.weight']

e_emb, r_emb = dict(), dict()
with open(entity2id_path, 'r', encoding='utf-8') as f:
    next(f)
    for line in f:
        tmp = line.split('\t')
        entity = ''.join(tmp[:-1])
        e_emb[entity] = ent_emb[int(tmp[1]), :]

with open(relation2id_path, 'r', encoding='utf-8') as f:
    next(f)
    for line in f:
        tmp = line.split('\t')
        r_emb[tmp[0]] = rel_emb[int(tmp[1]), :]
# define the loss function
model = NegativeSampling(model=transe,
                         loss=MarginLoss(),
                         batch_size=train_dataloader.get_batch_size())

# train the model
trainer = Trainer(model=model,
                  data_loader=train_dataloader,
                  train_times=1000,
                  alpha=0.01,
                  use_gpu=True,
                  opt_method='adagrad')
trainer.run()
transe.save_checkpoint('./checkpoint/transe.ckpt')

embeddings = transe.get_parameters()
directory = './models/dbpedia50/transe300/'
pathlib.Path(directory).mkdir(exist_ok=True, parents=True)

other_name_map = {
    'ent_embeddings.weight': 'entities.p',
    'rel_embeddings.weight': 'relations.p'
}


def save_torch_embedding_as_numpy(embedding, filename):
    with open(filename, "wb") as f:
        pickle.dump(embedding, f)


for emb_name, filename in other_name_map.items():