def teacher(e1, e2, num_paths, env, path=None): f = open(path) content = f.readlines() f.close() kb = KB() for line in content: ent1, rel, ent2 = line.rsplit() kb.addRelation(ent1, rel, ent2) # kb.removePath(e1, e2) intermediates = kb.pickRandomIntermediatesBetween(e1, e2, num_paths) res_entity_lists = [] res_path_lists = [] for i in range(num_paths): suc1, entity_list1, path_list1 = BFS(kb, e1, intermediates[i]) suc2, entity_list2, path_list2 = BFS(kb, intermediates[i], e2) if suc1 and suc2: res_entity_lists.append(entity_list1 + entity_list2[1:]) res_path_lists.append(path_list1 + path_list2) print('BFS found paths:', len(res_path_lists)) # ---------- clean the path -------- res_entity_lists_new = [] res_path_lists_new = [] for entities, relations in zip(res_entity_lists, res_path_lists): rel_ents = [] for i in range(len(entities) + len(relations)): if i % 2 == 0: rel_ents.append(entities[int(i / 2)]) else: rel_ents.append(relations[int(i / 2)]) # print rel_ents entity_stats = Counter(entities).items() duplicate_ents = [item for item in entity_stats if item[1] != 1] duplicate_ents.sort(key=lambda x: x[1], reverse=True) for item in duplicate_ents: ent = item[0] ent_idx = [i for i, x in enumerate(rel_ents) if x == ent] if len(ent_idx) != 0: min_idx = min(ent_idx) max_idx = max(ent_idx) if min_idx != max_idx: rel_ents = rel_ents[:min_idx] + rel_ents[max_idx:] entities_new = [] relations_new = [] for idx, item in enumerate(rel_ents): if idx % 2 == 0: entities_new.append(item) else: relations_new.append(item) res_entity_lists_new.append(entities_new) res_path_lists_new.append(relations_new) print(res_entity_lists_new) print(res_path_lists_new) good_episodes = [] targetID = env.entity2id_[e2] for path in zip(res_entity_lists_new, res_path_lists_new): good_episode = [] for i in range(len(path[0]) - 1): currID = env.entity2id_[path[0][i]] nextID = env.entity2id_[path[0][i + 1]] state_curr = [currID, targetID, 0] state_next = [nextID, targetID, 0] actionID = env.relation2id_[path[1][i]] good_episode.append( Transition(state=env.idx_state(state_curr), action=actionID, next_state=env.idx_state(state_next), reward=1)) good_episodes.append(good_episode) return good_episodes
def sampling(path_threshold=2, path=None): f = open(path) content = f.readlines() f.close() kb = KB() for line in content: # rsplit() is from right to left ent1, rel, ent2 = line.rsplit() kb.addRelation(ent1, rel, ent2) f = open(relationPath) train_data = f.readlines() # positive sample(h,r,t) from this task in KG f.close() num_samples = len(train_data) demo_path_dict = {} for episode in range(num_samples): # print "Episode %d" % episode # print 'Training Sample:', train_data[episode % num_samples][:-1] # del the '\n' in the last position sample = train_data[episode % num_samples].split() ent1 = sample[0] ent2 = sample[2] rel = sample[1] # print(sample[0]) # print(sample[2]) # curPath = kb.getPathsFrom(sample[0]) # print(curPath) # temporarily remove the current triple(ent1,rel,ent2) # if not, we can only get the current rel as the current path kb.removePath(ent1, ent2) try: suc, entity_list, path_list = BFS(kb, ent1, ent2) # if len(path_list) > 1: # print('path_list:\n', len(path_list)) path_str = ' -> '.join(path_list) except Exception as e: print('Episode %d' % episode) # print('Training Sample:', train_data[episode % num_samples][:-1]) # del the '\n' in the last position print('Cannot find a path') continue if path_str not in demo_path_dict: demo_path_dict[path_str] = 1 else: demo_path_dict[path_str] += 1 if rel not in demo_path_dict: demo_path_dict[rel] = 1 else: demo_path_dict[rel] += 1 # add the current triple back kb.addRelation(ent1, rel, ent2) # The path has been found at least path_threshold times demo_path_dict = { k: v for k, v in demo_path_dict.items() if v >= path_threshold } demo_path_list = sorted(demo_path_dict.items(), key=lambda x: x[1], reverse=True) # print'demo_path_list:\n', demo_path_list print('BFS found paths:', len(demo_path_list)) f = open(dataPath + 'demo_path.txt', 'w') for item in demo_path_list[:5]: f.write(item[0] + '\n') f.close() print('demo path saved') f = open(dataPath + 'demo_path_stat.txt', 'w') for item in demo_path_list: f.write(item[0] + '\t' + str(item[1]) + '\n') f.close() print('demo path stat saved') return
def teacher(e1, e2, num_paths, env, path=None, output_mode=0, relation=None, knowledge_base=None): if knowledge_base is None: f = open(path) content = f.readlines() f.close() kb = KB() for line in content: ent1, rel, ent2 = line.rsplit() # Each line is a triple represented with strings instead of numbers kb.addRelation(ent1, rel, ent2) else: kb = knowledge_base # kb.removePath(e1, e2) intermediates = kb.pickRandomIntermediatesBetween(e1, e2, num_paths) # Randomly pick num_paths entities from the knowledge base res_entity_lists = [] res_path_lists = [] for i in range(num_paths): suc1, entity_list1, path_list1 = BFS(kb, e1, intermediates[i]) # The path from head entity to the ith intermediate entity suc2, entity_list2, path_list2 = BFS(kb, intermediates[i], e2) # The path from the ith intermediate entity to the tail entity if suc1 and suc2: # success res_entity_lists.append(entity_list1 + entity_list2[1:]) res_path_lists.append(path_list1 + path_list2) print('BFS found paths:', len(res_path_lists)) # ---------- clean the path -------- res_entity_lists_new = [] res_path_lists_new = [] for entities, relations in zip(res_entity_lists, res_path_lists): # There are num_paths paths rel_ents = [] # entity, relation, entity, relation ... for i in range(len(entities) + len(relations)): if i % 2 == 0: rel_ents.append(entities[int(i / 2)]) else: rel_ents.append(relations[int(i / 2)]) # print rel_ents entity_stats = Counter(entities).items() # Compute the times of each entity in res_entity_lists duplicate_ents = [item for item in entity_stats if item[1] != 1] # Entity which occure for more than 1 times # The format of item is (entity, times) # Sort in reverse according to the occurence times duplicate_ents.sort(key=lambda x: x[1], reverse=True) for item in duplicate_ents: ent = item[0] # entity # i: index, x: value of index ent_idx = [i for i, x in enumerate(rel_ents) if x == ent] if len(ent_idx) != 0: min_idx = min(ent_idx) max_idx = max(ent_idx) if min_idx != max_idx: # Remove reasoning paths which contain rings rel_ents = rel_ents[:min_idx] + rel_ents[max_idx:] # Reconstruct the path entities_new = [] relations_new = [] for idx, item in enumerate(rel_ents): if idx % 2 == 0: entities_new.append(item) else: relations_new.append(item) res_entity_lists_new.append(entities_new) res_path_lists_new.append(relations_new) print(res_entity_lists_new) print(res_path_lists_new) good_episodes = [] # Save the successful episode targetID = env.entity2id_[e2] for path in zip(res_entity_lists_new, res_path_lists_new): # path[0]: entity, path[1]: relation good_episode = [] for i in range(len(path[0]) - 1): currID = env.entity2id_[path[0][i]] nextID = env.entity2id_[path[0][i + 1]] state_curr = [currID, targetID, 0] state_next = [nextID, targetID, 0] actionID = env.relation2id_[path[1][i]] if output_mode == 1: good_episode.append( Transition(state=state_curr, action=actionID, next_state=state_next, reward=1)) else: good_episode.append( Transition(state=env.idx_state(state_curr, relation=relation), action=actionID, next_state=env.idx_state(state_next, relation=relation), reward=1)) good_episodes.append(good_episode) return good_episodes
def teacher(e1, e2, env, path=None, random_mechanism=True, num_paths=5): # demo_paths is a list for str(demo paths) f = open(path) content = f.readlines() f.close() kb = KB() for line in content: # rsplit() is from right to left ent1, rel, ent2 = line.rsplit() kb.addRelation(ent1, rel, ent2) # print 'demo_paths:',demo_paths res_entity_lists_new = [] res_path_lists_new = [] if random_mechanism is True: path_str = False # path_str in the condition is useless intermediates = kb.pickRandomIntermediatesBetween(e1, e2, num_paths) for i in xrange(num_paths): try: suc1, entity_list1, path_list1 = BFS(kb, e1, intermediates[i]) suc2, entity_list2, path_list2 = BFS(kb, intermediates[i], e2) if suc1 and suc2: entity_list = entity_list1 + entity_list2[1:] path_list = path_list1 + path_list2 res_entity_lists_new.append(entity_list) res_path_lists_new.append(path_list) except Exception as e: # print'Training Sample:', e1 + ' ' + e2 print 'Cannot find a path' if len(res_path_lists_new) == 0: print 'Cannot find a path' return False, False else: print 'BFS found paths:', len(res_path_lists_new) else: try: suc, entity_list, path_list = BFS(kb, e1, e2) path_str = ' -> '.join(path_list) # if path_str not in demo_paths: # print 'Not in demo paths' # return False except Exception as e: # print'Training Sample:', e1 + ' ' + e2 print 'Cannot find a path' return False, False res_entity_lists_new.append(entity_list) res_path_lists_new.append(path_list) # path_str = ' -> '.join(path_list) # print path_str # if path_str not in demo_paths: # print 'Not in demo paths' # return False # res_entity_lists_new.append(entity_list) # res_path_lists_new.append(path_list) # print 'entity_lists:\n', res_entity_lists_new # print 'path_lists(rel_lists):\n', res_path_lists_new good_episodes = [] # we need the environment here targetID = env.entity2id_[e2] for path in zip(res_entity_lists_new, res_path_lists_new): good_episode = [] for i in xrange(len(path[0]) - 1): currID = env.entity2id_[path[0][i]] nextID = env.entity2id_[path[0][i + 1]] state_curr = [currID, targetID, 0] state_next = [nextID, targetID, 0] actionID = env.relation2id_[path[1][i]] # set (state,action,next_state,reward) good_episode.append( Transition(state=env.idx_state(state_curr), action=actionID, next_state=env.idx_state(state_next), reward=1)) good_episodes.append(good_episode) # print 'good_episodes[0]:\n',good_episodes[0] return good_episodes, path_str