Пример #1
0
def teacher(e1, e2, num_paths, env, path=None):
    f = open(path)
    content = f.readlines()
    f.close()
    kb = KB()
    for line in content:
        ent1, rel, ent2 = line.rsplit()
        kb.addRelation(ent1, rel, ent2)
    # kb.removePath(e1, e2)
    intermediates = kb.pickRandomIntermediatesBetween(e1, e2, num_paths)
    res_entity_lists = []
    res_path_lists = []
    for i in range(num_paths):
        suc1, entity_list1, path_list1 = BFS(kb, e1, intermediates[i])
        suc2, entity_list2, path_list2 = BFS(kb, intermediates[i], e2)
        if suc1 and suc2:
            res_entity_lists.append(entity_list1 + entity_list2[1:])
            res_path_lists.append(path_list1 + path_list2)
    print('BFS found paths:', len(res_path_lists))

    # ---------- clean the path --------
    res_entity_lists_new = []
    res_path_lists_new = []
    for entities, relations in zip(res_entity_lists, res_path_lists):
        rel_ents = []
        for i in range(len(entities) + len(relations)):
            if i % 2 == 0:
                rel_ents.append(entities[int(i / 2)])
            else:
                rel_ents.append(relations[int(i / 2)])

        # print rel_ents

        entity_stats = Counter(entities).items()
        duplicate_ents = [item for item in entity_stats if item[1] != 1]
        duplicate_ents.sort(key=lambda x: x[1], reverse=True)
        for item in duplicate_ents:
            ent = item[0]
            ent_idx = [i for i, x in enumerate(rel_ents) if x == ent]
            if len(ent_idx) != 0:
                min_idx = min(ent_idx)
                max_idx = max(ent_idx)
                if min_idx != max_idx:
                    rel_ents = rel_ents[:min_idx] + rel_ents[max_idx:]
        entities_new = []
        relations_new = []
        for idx, item in enumerate(rel_ents):
            if idx % 2 == 0:
                entities_new.append(item)
            else:
                relations_new.append(item)
        res_entity_lists_new.append(entities_new)
        res_path_lists_new.append(relations_new)

    print(res_entity_lists_new)
    print(res_path_lists_new)

    good_episodes = []
    targetID = env.entity2id_[e2]
    for path in zip(res_entity_lists_new, res_path_lists_new):
        good_episode = []
        for i in range(len(path[0]) - 1):
            currID = env.entity2id_[path[0][i]]
            nextID = env.entity2id_[path[0][i + 1]]
            state_curr = [currID, targetID, 0]
            state_next = [nextID, targetID, 0]
            actionID = env.relation2id_[path[1][i]]
            good_episode.append(
                Transition(state=env.idx_state(state_curr),
                           action=actionID,
                           next_state=env.idx_state(state_next),
                           reward=1))
        good_episodes.append(good_episode)
    return good_episodes
Пример #2
0
def sampling(path_threshold=2, path=None):
    f = open(path)
    content = f.readlines()
    f.close()
    kb = KB()
    for line in content:
        # rsplit() is from right to left
        ent1, rel, ent2 = line.rsplit()
        kb.addRelation(ent1, rel, ent2)

    f = open(relationPath)
    train_data = f.readlines()  # positive sample(h,r,t) from this task in KG
    f.close()

    num_samples = len(train_data)
    demo_path_dict = {}
    for episode in range(num_samples):
        # print "Episode %d" % episode
        # print 'Training Sample:', train_data[episode % num_samples][:-1]  # del the '\n' in the last position
        sample = train_data[episode % num_samples].split()
        ent1 = sample[0]
        ent2 = sample[2]
        rel = sample[1]

        # print(sample[0])
        # print(sample[2])
        # curPath = kb.getPathsFrom(sample[0])
        # print(curPath)

        # temporarily remove the current triple(ent1,rel,ent2)
        # if not, we can only get the current rel as the current path
        kb.removePath(ent1, ent2)
        try:
            suc, entity_list, path_list = BFS(kb, ent1, ent2)
            # if len(path_list) > 1:
            #     print('path_list:\n', len(path_list))
            path_str = ' -> '.join(path_list)
        except Exception as e:
            print('Episode %d' % episode)
            # print('Training Sample:', train_data[episode % num_samples][:-1])  # del the '\n' in the last position
            print('Cannot find a path')
            continue

        if path_str not in demo_path_dict:
            demo_path_dict[path_str] = 1
        else:
            demo_path_dict[path_str] += 1

        if rel not in demo_path_dict:
            demo_path_dict[rel] = 1
        else:
            demo_path_dict[rel] += 1

        # add the current triple back
        kb.addRelation(ent1, rel, ent2)

    # The path has been found at least path_threshold times
    demo_path_dict = {
        k: v
        for k, v in demo_path_dict.items() if v >= path_threshold
    }
    demo_path_list = sorted(demo_path_dict.items(),
                            key=lambda x: x[1],
                            reverse=True)
    # print'demo_path_list:\n', demo_path_list
    print('BFS found paths:', len(demo_path_list))

    f = open(dataPath + 'demo_path.txt', 'w')
    for item in demo_path_list[:5]:
        f.write(item[0] + '\n')
    f.close()
    print('demo path saved')

    f = open(dataPath + 'demo_path_stat.txt', 'w')
    for item in demo_path_list:
        f.write(item[0] + '\t' + str(item[1]) + '\n')
    f.close()
    print('demo path stat saved')

    return
Пример #3
0
def teacher(e1,
            e2,
            num_paths,
            env,
            path=None,
            output_mode=0,
            relation=None,
            knowledge_base=None):
    if knowledge_base is None:
        f = open(path)
        content = f.readlines()
        f.close()
        kb = KB()
        for line in content:
            ent1, rel, ent2 = line.rsplit()
            # Each line is a triple represented with strings instead of numbers
            kb.addRelation(ent1, rel, ent2)
    else:
        kb = knowledge_base

    # kb.removePath(e1, e2)
    intermediates = kb.pickRandomIntermediatesBetween(e1, e2, num_paths)
    # Randomly pick num_paths entities from the knowledge base
    res_entity_lists = []
    res_path_lists = []
    for i in range(num_paths):
        suc1, entity_list1, path_list1 = BFS(kb, e1, intermediates[i])
        # The path from head entity to the ith intermediate entity
        suc2, entity_list2, path_list2 = BFS(kb, intermediates[i], e2)
        # The path from the ith intermediate entity to the tail entity
        if suc1 and suc2:  # success
            res_entity_lists.append(entity_list1 + entity_list2[1:])
            res_path_lists.append(path_list1 + path_list2)
    print('BFS found paths:', len(res_path_lists))

    # ---------- clean the path --------
    res_entity_lists_new = []
    res_path_lists_new = []
    for entities, relations in zip(res_entity_lists, res_path_lists):
        # There are num_paths paths
        rel_ents = []  # entity, relation, entity, relation ...
        for i in range(len(entities) + len(relations)):
            if i % 2 == 0:
                rel_ents.append(entities[int(i / 2)])
            else:
                rel_ents.append(relations[int(i / 2)])

        # print rel_ents

        entity_stats = Counter(entities).items()
        # Compute the times of each entity in res_entity_lists
        duplicate_ents = [item for item in entity_stats if item[1] != 1]
        # Entity which occure for more than 1 times
        # The format of item is (entity, times)
        # Sort in reverse according to the occurence times
        duplicate_ents.sort(key=lambda x: x[1], reverse=True)
        for item in duplicate_ents:
            ent = item[0]  # entity
            # i: index, x: value of index
            ent_idx = [i for i, x in enumerate(rel_ents) if x == ent]
            if len(ent_idx) != 0:
                min_idx = min(ent_idx)
                max_idx = max(ent_idx)
                if min_idx != max_idx:
                    # Remove reasoning paths which contain rings
                    rel_ents = rel_ents[:min_idx] + rel_ents[max_idx:]

        # Reconstruct the path
        entities_new = []
        relations_new = []
        for idx, item in enumerate(rel_ents):
            if idx % 2 == 0:
                entities_new.append(item)
            else:
                relations_new.append(item)
        res_entity_lists_new.append(entities_new)
        res_path_lists_new.append(relations_new)

    print(res_entity_lists_new)
    print(res_path_lists_new)

    good_episodes = []  # Save the successful episode
    targetID = env.entity2id_[e2]
    for path in zip(res_entity_lists_new, res_path_lists_new):
        # path[0]: entity, path[1]: relation
        good_episode = []
        for i in range(len(path[0]) - 1):
            currID = env.entity2id_[path[0][i]]
            nextID = env.entity2id_[path[0][i + 1]]
            state_curr = [currID, targetID, 0]
            state_next = [nextID, targetID, 0]
            actionID = env.relation2id_[path[1][i]]
            if output_mode == 1:
                good_episode.append(
                    Transition(state=state_curr,
                               action=actionID,
                               next_state=state_next,
                               reward=1))
            else:
                good_episode.append(
                    Transition(state=env.idx_state(state_curr,
                                                   relation=relation),
                               action=actionID,
                               next_state=env.idx_state(state_next,
                                                        relation=relation),
                               reward=1))
        good_episodes.append(good_episode)
    return good_episodes
Пример #4
0
def teacher(e1,
            e2,
            env,
            path=None,
            random_mechanism=True,
            num_paths=5):  # demo_paths is a list for str(demo paths)
    f = open(path)
    content = f.readlines()
    f.close()
    kb = KB()
    for line in content:
        # rsplit() is from right to left
        ent1, rel, ent2 = line.rsplit()
        kb.addRelation(ent1, rel, ent2)

    # print 'demo_paths:',demo_paths

    res_entity_lists_new = []
    res_path_lists_new = []
    if random_mechanism is True:
        path_str = False  # path_str in the condition is useless
        intermediates = kb.pickRandomIntermediatesBetween(e1, e2, num_paths)
        for i in xrange(num_paths):
            try:
                suc1, entity_list1, path_list1 = BFS(kb, e1, intermediates[i])
                suc2, entity_list2, path_list2 = BFS(kb, intermediates[i], e2)
                if suc1 and suc2:
                    entity_list = entity_list1 + entity_list2[1:]
                    path_list = path_list1 + path_list2
                    res_entity_lists_new.append(entity_list)
                    res_path_lists_new.append(path_list)
            except Exception as e:
                # print'Training Sample:', e1 + ' ' + e2
                print 'Cannot find a path'

        if len(res_path_lists_new) == 0:
            print 'Cannot find a path'
            return False, False
        else:
            print 'BFS found paths:', len(res_path_lists_new)
    else:
        try:
            suc, entity_list, path_list = BFS(kb, e1, e2)
            path_str = ' -> '.join(path_list)
            # if path_str not in demo_paths:
            #     print 'Not in demo paths'
            #     return False
        except Exception as e:
            # print'Training Sample:', e1 + ' ' + e2
            print 'Cannot find a path'
            return False, False
        res_entity_lists_new.append(entity_list)
        res_path_lists_new.append(path_list)
    # path_str = ' -> '.join(path_list)
    # print path_str
    # if path_str not in demo_paths:
    #     print 'Not in demo paths'
    #     return False

    # res_entity_lists_new.append(entity_list)
    # res_path_lists_new.append(path_list)

    # print 'entity_lists:\n', res_entity_lists_new
    # print 'path_lists(rel_lists):\n', res_path_lists_new

    good_episodes = []
    # we need the environment here
    targetID = env.entity2id_[e2]
    for path in zip(res_entity_lists_new, res_path_lists_new):
        good_episode = []
        for i in xrange(len(path[0]) - 1):
            currID = env.entity2id_[path[0][i]]
            nextID = env.entity2id_[path[0][i + 1]]
            state_curr = [currID, targetID, 0]
            state_next = [nextID, targetID, 0]
            actionID = env.relation2id_[path[1][i]]
            # set (state,action,next_state,reward)
            good_episode.append(
                Transition(state=env.idx_state(state_curr),
                           action=actionID,
                           next_state=env.idx_state(state_next),
                           reward=1))
        good_episodes.append(good_episode)
    # print 'good_episodes[0]:\n',good_episodes[0]
    return good_episodes, path_str