def insert_tirp_to_tree(tirp_obj_class_1, tirp_obj_class_0, min_ver_support): """ This is a recursive function that gets two branches and combines them :param tirp_obj_class_1: :param tirp_obj_class_0: :param min_ver_support: :return: combined branch """ childs = tirp_obj_class_1.get_childes().copy() for child in childs: if type(child) != TIRP: child_tirp = TIRP() child_tirp.__dict__.clear() child_tirp.__dict__.update(child) else: child_tirp = child if not child_tirp.get_exist_in_class_0(): curr_tirp = tirp_obj_class_0 childs_class_0 = tirp_obj_class_0.get_childes() while curr_tirp.get_tirp_size() + 1 < child_tirp.get_tirp_size( ): #need to go down in the tree until geting to right level to insert childes_new = list() for child_class_0 in childs_class_0: if type(child_class_0) != TIRP: curr_tirp = TIRP() curr_tirp.__dict__.clear() curr_tirp.__dict__.update(child_class_0) childes_new.append(curr_tirp) else: childes_new.append(child_class_0) childs_class_0 = childes_new for curr_tirp in childs_class_0: curr_size = curr_tirp.get_tirp_size() new_symbols = child_tirp.get_symbols()[:curr_size] num_of_new_rels = int(curr_size * (curr_size - 1) / 2) new_rels = child_tirp.get_rels()[:num_of_new_rels] if curr_tirp.get_symbols() == new_symbols: if curr_tirp.get_rels() == new_rels: # if curr_tirp.get_tirp_size()+1 < child_tirp.get_tirp_size() and curr_tirp.get_exist_in_class_0(): childs_class_0 = curr_tirp.get_childes() break # the right level to insert found if not curr_tirp.get_exist_in_class_0(): for i in range(0, len(childs_class_0)): if type(childs_class_0[i]) != TIRP: tirp = TIRP() tirp.__dict__.clear() tirp.__dict__.update(childs_class_0[i]) else: tirp = childs_class_0[i] if tirp.get_symbols() == child_tirp.get_symbols( ) and tirp.get_rels() == child_tirp.get_rels(): del childs_class_0[i] break child_tirp.set_class_0_properties(min_ver_support) childs_class_0.append(child_tirp) tirp_obj_class_0 = insert_tirp_to_tree(child_tirp, tirp_obj_class_0, min_ver_support) return tirp_obj_class_0
def get_sub_tree(TIRP, states, states_by_name, path, class_name, min_ver_support, class_1_tirp_file_name, to_add_entities): if states_by_name: TIRP_name = states_by_name[TIRP.get_symbols()[0]] + '.txt' else: TIRP_name = TIRP.get_symbols()[0] + '.txt' if class_name == 'class_0': file_name = path + '/chunks' + '/' + TIRP_name else: file_name = path + '/chunks1' + '/' + TIRP_name # ParseOutputFile.parse_states_file() TIRPs = ParseOutputFile.parse_output_file(file_name, 7, states, path, class_name, min_ver_support, class_1_tirp_file_name, to_add_entities) TIRP.set_childes(TIRPs_in_output_file=TIRPs) return TIRP
def parse_main_index(path, dir_path_class_0, dir_path_class_1, states, states_by_name, class_name, min_ver_support, second_class_output_file_name, to_add_entities): TIRP_per_file.clear() root_elements = list() if class_name == 'class_0': path_to_chuncks = path + '/chunks/' else: path_to_chuncks = path + '/chunks1/' with open(path_to_chuncks + 'main_Index.txt') as fp: line1 = fp.readline() while line1: line2 = fp.readline() file_name = fp.readline().rstrip() # ParseOutputFile.parse_states_file() tirp = ParseOutputFile.parse_TIRP(line1, line2, states, path, class_name, min_ver_support, file_name, second_class_output_file_name, to_add_entities) if os.path.isfile(path_to_chuncks + file_name): tirp.set_childes(has_childs=True) tirp_with_childs = copy.deepcopy(tirp) tirp_with_childs = get_sub_tree(tirp_with_childs, states, states_by_name, path, class_name, min_ver_support, file_name, to_add_entities) tirp_with_childs_json = json.dumps( tirp_with_childs, default=lambda x: x.__dict__) if class_name == 'class_0': path_to_file = path + dir_path_class_0 + file_name else: path_to_file = path + dir_path_class_1 + file_name with open(path_to_file, "w") as fs: fs.write(tirp_with_childs_json) else: if class_name == 'class_0' and second_class_output_file_name != 'File does not exist': if not to_add_entities: # take the tirp with the entities dir_path_class_1_new = '/tempChunks1_with_entities/' else: dir_path_class_1_new = dir_path_class_1 if os.path.isfile(path + dir_path_class_1_new + file_name): with open(path + dir_path_class_1_new + file_name, "r") as fr: tirp_dict = json.load(fr) tirp_obj = TIRP() tirp_obj.__dict__.clear() tirp_obj.__dict__.update(tirp_dict) tirp.set_class_1_properties(tirp_obj) if not to_add_entities: tirp.set_supporting_instances(list()) tirp.set_supporting_entitie(list()) else: with open(path + dir_path_class_1_new + 'root.txt', "r") as fr: lines = fr.readlines() for line in lines: tirp_obj_class_1 = TIRP() tirp_obj_class_1.__dict__.clear() tirp_obj_class_1.__dict__.update( json.loads(line)) if tirp_obj_class_1.get_symbols( ) == tirp.get_symbols(): if tirp_obj_class_1.get_rels == tirp.get_rels( ): tirp.set_class_1_properties( tirp_obj_class_1) if not to_add_entities: tirp.set_supporting_instances( list()) tirp.set_supporting_entitie(list()) break s = json.dumps(tirp, default=lambda x: x.__dict__) root_elements.append(s) # root_elements.append(TIRP) TIRP_name = tirp.get_unique_name() TIRP_per_file[TIRP_name] = file_name line1 = fp.readline() return root_elements
def find_Path_of_tirps(symbols, rels, data_set_path, states, states_by_name=None, to_add_entities=None): try: relations_dict = { "<": "before", "m": "meets", "o": "overlaps", "f": "finished by", "c": "contains", "=": "equals", "s": "starts", "-": 7 } rels = list(filter(None, rels.split('.'))) tirps_path = [] symbols = list(filter(None, symbols.split('-'))) if not to_add_entities: file_name = symbols[0] + '.txt' dir_path = '/tempChunks/' for r in range(0, len(rels)): rels[r] = relations_dict[rels[r]] for i in range(0, len(symbols)): symbol = states[symbols[i]] symbols[i] = symbol else: file_name = states_by_name[symbols[0]] + '.txt' dir_path = '/tempChunks_with_entities/' tirp_size = len(symbols) if os.path.isfile(data_set_path + dir_path + file_name): with open(data_set_path + dir_path + file_name, "r") as fr: tirp_dict = json.load(fr) tirp_obj = TIRP() tirp_obj.__dict__.clear() tirp_obj.__dict__.update(tirp_dict) tirps_path.append(tirp_obj) if tirp_size > 1: childs = tirp_obj.get_childes() while len(tirps_path) < tirp_size: for child in childs: curr_tirp = TIRP() curr_tirp.__dict__.clear() curr_tirp.__dict__.update(child) curr_size = curr_tirp.get_tirp_size() new_symbols = symbols[:curr_size] num_of_new_rels = int(curr_size * (curr_size - 1) / 2) new_rels = rels[:num_of_new_rels] if curr_tirp.get_symbols() == new_symbols: if curr_tirp.get_rels() == new_rels: tirps_path.append(curr_tirp) childs = curr_tirp.get_childes() break return tirps_path else: with open(data_set_path + dir_path + 'root.txt', "r") as fr: roots_from_file = fr.readlines() for line in roots_from_file: tirp_dict = json.loads(line) tirp_obj = TIRP() tirp_obj.__dict__.clear() tirp_obj.__dict__.update(tirp_dict) if tirp_obj.get_symbols() == symbols: if tirp_obj.get_rels() == rels: return [tirp_obj] except Exception as e: print(e)
def find_tirp_in_class_1(path, class_0_tirp, class_1_tirp_file_name, to_add_entities): if to_add_entities: dir_path = path + '/tempChunks1_with_entities/' else: dir_path = path + '/tempChunks1/' path_to_class_1_tirp = dir_path + class_1_tirp_file_name if os.path.isfile(path_to_class_1_tirp): with open(path_to_class_1_tirp, "r") as fr: tirp_dict = json.load(fr) class_1_tirp = TIRP() class_1_tirp.__dict__.clear() class_1_tirp.__dict__.update(tirp_dict) class_0_tirp_size = class_0_tirp.get_tirp_size() class_1_tirp_size = class_1_tirp.get_tirp_size() found = True if class_0_tirp_size == 1: #root element class_1_tirp.set_exist_in_class_0() with open(path_to_class_1_tirp, "w") as fw: class_1_tirp_json = json.dumps( class_1_tirp, default=lambda x: x.__dict__) fw.write(class_1_tirp_json) root_elements_class_1 = list() with open(dir_path + 'root.txt', "r") as fr: lines = fr.readlines() for line in lines: tirp_obj_class_1 = TIRP() tirp_obj_class_1.__dict__.clear() tirp_obj_class_1.__dict__.update(json.loads(line)) root_elements_class_1.append(tirp_obj_class_1) for root_element in root_elements_class_1: if root_element.get_symbols( )[0] == class_1_tirp.get_symbols()[0]: root_element.set_exist_in_class_0() break os.remove(dir_path + 'root.txt') with open(dir_path + 'root.txt', "a") as fr: for root_element in root_elements_class_1: r = json.dumps(root_element, default=lambda x: x.__dict__) fr.write("%s\n" % r) return class_1_tirp else: father = class_1_tirp childs = class_1_tirp.get_childes() while class_1_tirp_size < class_0_tirp_size and len( childs) > 0 and found: found = False for index, child in enumerate(childs): curr_tirp = TIRP() curr_tirp.__dict__.clear() curr_tirp.__dict__.update(child) curr_size = curr_tirp.get_tirp_size() new_symbols = class_0_tirp.get_symbols()[:curr_size] num_of_new_rels = int(curr_size * (curr_size - 1) / 2) new_rels = class_0_tirp.get_rels()[:num_of_new_rels] if curr_tirp.get_symbols() == new_symbols: if curr_tirp.get_rels() == new_rels: if curr_tirp.get_tirp_size( ) == class_0_tirp.get_tirp_size(): curr_tirp.set_exist_in_class_0() childs[index] = curr_tirp father.update_childs(childs) with open(path_to_class_1_tirp, "w") as fw: class_1_tirp_json = json.dumps( class_1_tirp, default=lambda x: x.__dict__) fw.write(class_1_tirp_json) return curr_tirp else: father = curr_tirp childs = curr_tirp.get_childes() class_1_tirp_size = curr_tirp.get_tirp_size( ) found = True break return None else: if class_0_tirp.get_tirp_size() == 1: root_elements_class_1 = list() with open(dir_path + 'root.txt', "r") as fr: lines = fr.readlines() for line in lines: tirp_obj_class_1 = TIRP() tirp_obj_class_1.__dict__.clear() tirp_obj_class_1.__dict__.update(json.loads(line)) root_elements_class_1.append(tirp_obj_class_1) for root_element in root_elements_class_1: if root_element.get_symbols()[0] == class_0_tirp.get_symbols( )[0]: root_element.set_exist_in_class_0() os.remove(dir_path + 'root.txt') with open(dir_path + 'root.txt', "a") as fr: for element in root_elements_class_1: r = json.dumps(element, default=lambda x: x.__dict__) fr.write("%s\n" % r) return root_element return None else: return None