Exemplo n.º 1
0
    '/m/028cl7':
    [5, 1, '/m/03lty', '/music/genre/subgenre', '/m/028cl7', 'tail'],
    '/m/03rg5x': [
        4, 1, '/m/0cbd2', '/people/profession/people_with_this_profession',
        '/m/03rg5x', 'tail'
    ]
}

#### LOAD DATASET
print("Loading dataset...")
complex_dataset = Dataset(name=dataset_name, separator="\t", load=True)

### LOAD TRAINED ORIGINAL MODEL
print("Loading original trained model...")
original_model = ComplEx(dataset=complex_dataset,
                         dimension=dimension,
                         init_random=True,
                         init_size=init)  # type: ComplEx
original_model.load_state_dict(torch.load(model_path))
original_model.to('cuda')

### FOR EACH ENTITY, PERFORM A KELPIE ANALYSIS
for entity_to_explain in entity_2_params:
    train_degree, test_degree, head, relation, tail, perspective = entity_2_params[
        entity_to_explain]
    print(
        "\nWorking with entity %s (train degree %i; test degree %i): explaining fact <%s, %s, %s>."
        % (entity_to_explain, train_degree, test_degree, head, relation, tail))

    # get the ids of the elements of the fact to explain and the perspective entity
    head_id, relation_id, tail_id = complex_dataset.get_id_for_entity_name(head), \
                                    complex_dataset.get_id_for_relation_name(relation), \
    '/m/01tnbn': [72, 7, '/m/0cbd2', '/people/profession/people_with_this_profession', '/m/01tnbn', 'tail'],
    '/m/028d4v': [69, 11, '/m/028d4v', '/people/person/profession', '/m/0dxtg', 'head'],
    '/m/03193l': [43, 5, '/m/03193l', '/common/topic/webpage./common/webpage/category', '/m/08mbj32', 'head'],
    '/m/0g2ff': [29, 4, '/m/0g2ff', '/music/performance_role/regular_performances./music/group_membership/role', '/m/0mkg', 'head'],
    '/m/0269kx': [21, 1, '/m/0h52w', '/protected_sites/natural_or_cultural_site_designation/sites./protected_sites/natural_or_cultural_site_listing/listed_site', '/m/0269kx', 'tail'],
    '/m/028cl7': [5, 1, '/m/03lty', '/music/genre/subgenre', '/m/028cl7', 'tail'],
    '/m/03rg5x': [4, 1, '/m/0cbd2', '/people/profession/people_with_this_profession', '/m/03rg5x', 'tail']
}

#### LOAD DATASET
print("Loading dataset...")
complex_dataset = Dataset(name=dataset_name, separator="\t", load=True)

### LOAD TRAINED ORIGINAL MODEL
print("Loading original trained model...")
original_model = ComplEx(dataset=complex_dataset, dimension=dimension, init_random=True, init_size=init) # type: ComplEx
original_model.load_state_dict(torch.load(model_path))
original_model.to('cuda')

### FOR EACH ENTITY, PERFORM A KELPIE ANALYSIS
for entity_to_explain in entity_2_params:
    train_degree, test_degree, head, relation, tail, perspective = entity_2_params[entity_to_explain]
    print("\nWorking with entity %s (train degree %i; test degree %i): explaining fact <%s, %s, %s>." %
          (entity_to_explain, train_degree, test_degree, head, relation, tail))

    # get the ids of the elements of the fact to explain and the perspective entity
    head_id, relation_id, tail_id = complex_dataset.get_id_for_entity_name(head), \
                                    complex_dataset.get_id_for_relation_name(relation), \
                                    complex_dataset.get_id_for_entity_name(tail)

    original_entity_id = head_id if perspective == 'head' else tail_id
                                original_dataset.get_id_for_entity_name(tail)
original_entity_id = head_id if args.perspective == "head" else tail_id

# create the fact to explain as a numpy array of its ids
original_triple = (head_id, relation_id, tail_id)
original_sample = numpy.array(original_triple)

# check that the fact to explain is actually a test fact
assert(original_sample in original_dataset.test_samples)


#############   INITIALIZE MODELS AND THEIR STRUCTURES
print("Loading model at location %s..." % args.model_path)
# instantiate and load the original model from filesystem
original_model = ComplEx(dataset=original_dataset,
                         dimension=args.dimension,
                         init_random=True,
                         init_size=args.init) # type: ComplEx
original_model.load_state_dict(torch.load(args.model_path))
original_model.to('cuda')

kelpie_dataset = KelpieDataset(dataset=original_dataset, entity_id=original_entity_id)


############ EXTRACT TEST FACTS AND TRAINING FACTS

print("Extracting train and test samples for the original and the kelpie entities...")
# extract all training facts and test facts involving the entity to explain
# and replace the id of the entity to explain with the id of the fake kelpie entity
original_test_samples = kelpie_dataset.original_test_samples
kelpie_test_samples = kelpie_dataset.kelpie_test_samples
kelpie_train_samples = kelpie_dataset.kelpie_train_samples
Exemplo n.º 4
0
def rbo(original_model: ComplEx, kelpie_model: KelpieComplEx,
        original_samples: numpy.array, kelpie_samples: numpy.array):

    _, original_ranks, original_predictions = original_model.predict_samples(
        original_samples)
    _, kelpie_ranks, kelpie_predictions = kelpie_model.predict_samples(
        samples=kelpie_samples, original_mode=False)

    all_original_ranks = []
    for (a, b) in original_ranks:
        all_original_ranks.append(a)
        all_original_ranks.append(b)

    all_kelpie_ranks = []
    for (a, b) in kelpie_ranks:
        all_kelpie_ranks.append(a)
        all_kelpie_ranks.append(b)

    original_mrr = mrr(all_original_ranks)
    kelpie_mrr = mrr(all_kelpie_ranks)
    original_h1 = hits_k(all_original_ranks, 1)
    kelpie_h1 = hits_k(all_kelpie_ranks, 1)

    rbos = []
    for i in range(len(original_samples)):
        _original_sample = original_samples[i]
        _kelpie_sample = kelpie_samples[i]

        original_target_head, _, original_target_tail = _original_sample
        kelpie_target_head, _, kelpie_target_tail = _kelpie_sample

        original_target_head_index, original_target_tail_index = int(
            original_ranks[i][0] - 1), int(original_ranks[i][1] - 1)
        kelpie_target_head_index, kelpie_target_tail_index = int(
            kelpie_ranks[i][0] - 1), int(kelpie_ranks[i][1] - 1)

        # get head and tail predictions
        original_head_predictions = original_predictions[i][0]
        kelpie_head_predictions = kelpie_predictions[i][0]
        original_tail_predictions = original_predictions[i][1]
        kelpie_tail_predictions = kelpie_predictions[i][1]

        assert original_head_predictions[
            original_target_head_index] == original_target_head
        assert kelpie_head_predictions[
            kelpie_target_head_index] == kelpie_target_head
        assert original_tail_predictions[
            original_target_tail_index] == original_target_tail
        assert kelpie_tail_predictions[
            kelpie_target_tail_index] == kelpie_target_tail

        # replace the target head id with the same value (-1 in this case)
        original_head_predictions[original_target_head_index] = -1
        kelpie_head_predictions[kelpie_target_head_index] = -1
        # cut both lists at the max rank that the target head obtained, between original and kelpie model
        original_head_predictions = original_head_predictions[:
                                                              original_target_head_index
                                                              + 1]
        kelpie_head_predictions = kelpie_head_predictions[:
                                                          kelpie_target_head_index
                                                          + 1]

        # replace the target tail id with the same value (-1 in this case)
        original_tail_predictions[original_target_tail_index] = -1
        kelpie_tail_predictions[kelpie_target_tail_index] = -1
        # cut both lists at the max rank that the target tail obtained, between original and kelpie model
        original_tail_predictions = original_tail_predictions[:
                                                              original_target_tail_index
                                                              + 1]
        kelpie_tail_predictions = kelpie_tail_predictions[:
                                                          kelpie_target_tail_index
                                                          + 1]

        rbos.append(
            ranking_similarity.rank_biased_overlap(original_head_predictions,
                                                   kelpie_head_predictions))
        rbos.append(
            ranking_similarity.rank_biased_overlap(original_tail_predictions,
                                                   kelpie_tail_predictions))

    avg_rbo = float(sum(rbos)) / float(len(rbos))
    return avg_rbo, original_mrr, kelpie_mrr, original_h1, kelpie_h1
Exemplo n.º 5
0
    '/m/028cl7':
    [5, 1, '/m/03lty', '/music/genre/subgenre', '/m/028cl7', 'tail'],
    '/m/03rg5x': [
        4, 1, '/m/0cbd2', '/people/profession/people_with_this_profession',
        '/m/03rg5x', 'tail'
    ]
}

#### LOAD DATASET
print("Loading dataset...")
complex_dataset = Dataset(name=dataset_name, separator="\t", load=True)

### LOAD TRAINED ORIGINAL MODEL
print("Loading original trained model...")
original_model = ComplEx(dataset=complex_dataset,
                         dimension=dimension,
                         init_random=True,
                         init_size=init)  # type: ComplEx
original_model.load_state_dict(torch.load(model_path))
original_model.to('cuda')

### FOR EACH ENTITY, PERFORM A KELPIE ANALYSIS
for entity_to_explain in entity_2_params:
    train_degree, test_degree, head, relation, tail, perspective = entity_2_params[
        entity_to_explain]
    print(
        "\nWorking with entity %s (train degree %i; test degree %i): explaining fact <%s, %s, %s>."
        % (entity_to_explain, train_degree, test_degree, head, relation, tail))

    # get the ids of the elements of the fact to explain and the perspective entity
    head_id, relation_id, tail_id = complex_dataset.get_id_for_entity_name(head), \
                                    complex_dataset.get_id_for_relation_name(relation), \
Exemplo n.º 6
0
torch.backends.cudnn.deterministic = True

if args.load is not None:
    model_path = args.load
else:
    model_path = os.path.join(MODEL_PATH,
                              "_".join(["ComplEx", args.dataset]) + ".pt")
    if not os.path.isdir(MODEL_PATH):
        os.makedirs(MODEL_PATH)

print("Loading %s dataset..." % args.dataset)
dataset = Dataset(name=args.dataset, separator="\t", load=True)

print("Initializing model...")
model = ComplEx(dataset=dataset,
                dimension=args.dimension,
                init_random=True,
                init_size=args.init)  # type: ComplEx
model.to('cuda')
if args.load is not None:
    model.load_state_dict(torch.load(model_path))

print("Training model...")
optimizer = MultiClassNLLptimizer(model=model,
                                  optimizer_name=args.optimizer,
                                  batch_size=args.batch_size,
                                  learning_rate=args.learning_rate,
                                  decay1=args.decay1,
                                  decay2=args.decay2,
                                  regularizer_name='N3',
                                  regularizer_weight=args.reg)
Exemplo n.º 7
0
def compute_fact_relevance(model: ComplEx,
                           dataset: Dataset,
                           sample_to_explain,
                           perspective="head",
                           perturbation_step=0.05,
                           lambd=1):
    head_id, relation_id, tail_id = sample_to_explain
    entity_to_explain_id = head_id if perspective == "head" else tail_id

    # get the embedding of the head entity, of the relation, and of the tail entity of the fact to explain
    head_embedding = model.entity_embeddings[head_id].detach().reshape(
        1, model.dimension * 2)
    relation_embedding = model.relation_embeddings[relation_id].detach(
    ).reshape(1, model.dimension * 2)
    tail_embedding = model.entity_embeddings[tail_id].detach().reshape(
        1, model.dimension * 2)

    # set the requires_grad flag of the embedding of the entity to explain to true
    entity_to_explain_embedding = head_embedding if perspective == "head" else tail_embedding
    entity_to_explain_embedding.requires_grad = True

    # compute the score of the fact, and extract the gradient of the embedding of the entity to explain
    # then, perturbate the embedding of the entity to explain
    score = model.score_embeddings(head_embedding, relation_embedding,
                                   tail_embedding)
    score.backward()
    gradient = entity_to_explain_embedding.grad[0]
    perturbed_entity_to_explain_embedding = entity_to_explain_embedding.detach(
    ) - perturbation_step * gradient.detach()

    # extract all training samples containing the entity to explain, and compute their scores
    samples_containing_entity_to_explain = numpy.array([
        (h, r, t) for (h, r, t) in dataset.train_samples
        if entity_to_explain_id in [h, t]
    ])
    original_scores = model.score(samples_containing_entity_to_explain)

    # extract the embeddings for the head entities, relations, and tail entities
    # of all training samples containing the entity to explain;
    head_embeddings = model.entity_embeddings[
        samples_containing_entity_to_explain[:, 0]]
    relation_embeddings = model.relation_embeddings[
        samples_containing_entity_to_explain[:, 1]]
    tail_embeddings = model.entity_embeddings[
        samples_containing_entity_to_explain[:, 2]]

    # for the entity to explain, use the perturbed embedding rather than the original one
    for i in range(samples_containing_entity_to_explain.shape[0]):
        (h, r, t) = samples_containing_entity_to_explain[i]
        if h == entity_to_explain_id:
            head_embeddings[i] = perturbed_entity_to_explain_embedding
        elif t == entity_to_explain_id:
            tail_embeddings[i] = perturbed_entity_to_explain_embedding

    # compute the scores of all training samples containing the entity to explain
    # using its perturbed embedding rather than the original one
    perturbed_scores = model.score_embeddings(
        head_embeddings, relation_embeddings,
        tail_embeddings).detach().cpu().numpy()

    # now for each training sample containing the entity to explain you have
    # both the original score and the score computed with the perturbed embedding
    # so you can compute the relevance of that training sample as original_score - lambda * perturbed_score
    sample_2_relevance = {}
    for i in range(samples_containing_entity_to_explain.shape[0]):
        sample_2_relevance[tuple(samples_containing_entity_to_explain[i])] = (
            original_scores[i] - lambd * perturbed_scores[i])[0]

    most_relevant_samples = sorted(sample_2_relevance.items(),
                                   key=lambda x: x[1],
                                   reverse=True)

    return most_relevant_samples
Exemplo n.º 8
0
                                original_dataset.get_id_for_relation_name(relation), \
                                original_dataset.get_id_for_entity_name(tail)
original_entity_id = head_id if args.perspective == "head" else tail_id

# create the fact to explain as a numpy array of its ids
original_sample_tuple = (head_id, relation_id, tail_id)
original_sample = numpy.array(original_sample_tuple)

# check that the fact to explain is actually a test fact
assert(original_sample in original_dataset.test_samples)


#############   INITIALIZE MODELS AND THEIR STRUCTURES
print("Loading model at location %s..." % args.model_path)
# instantiate and load the original model from filesystem
original_model = ComplEx(dataset=original_dataset, dimension=args.dimension, init_random=True, init_size=args.init) # type: ComplEx
original_model.load_state_dict(torch.load(args.model_path))
original_model.to('cuda')

print("Wrapping the original model in a Kelpie explainable model...")
# use model_to_explain to initialize the Kelpie model
kelpie_dataset = KelpieDataset(dataset=original_dataset, entity_id=original_entity_id)
kelpie_model = KelpieComplEx(model=original_model, dataset=kelpie_dataset, init_size=1e-3) # type: KelpieComplEx
kelpie_model.to('cuda')


############ EXTRACT TEST FACTS AND TRAINING FACTS

print("Extracting train and test samples for the original and the kelpie entities...")
# extract all training facts and test facts involving the entity to explain
# and replace the id of the entity to explain with the id of the fake kelpie entity