Пример #1
0
def insert_tirp_to_tree(tirp_obj_class_1, tirp_obj_class_0, min_ver_support):
    """
    This is a recursive function that gets two branches and combines them
    :param tirp_obj_class_1:
    :param tirp_obj_class_0:
    :param min_ver_support:
    :return: combined branch
    """

    childs = tirp_obj_class_1.get_childes().copy()
    for child in childs:
        if type(child) != TIRP:
            child_tirp = TIRP()
            child_tirp.__dict__.clear()
            child_tirp.__dict__.update(child)
        else:
            child_tirp = child
        if not child_tirp.get_exist_in_class_0():
            curr_tirp = tirp_obj_class_0
            childs_class_0 = tirp_obj_class_0.get_childes()
            while curr_tirp.get_tirp_size() + 1 < child_tirp.get_tirp_size(
            ):  #need to go down in the tree until geting to right level to insert
                childes_new = list()
                for child_class_0 in childs_class_0:
                    if type(child_class_0) != TIRP:
                        curr_tirp = TIRP()
                        curr_tirp.__dict__.clear()
                        curr_tirp.__dict__.update(child_class_0)
                        childes_new.append(curr_tirp)
                    else:
                        childes_new.append(child_class_0)
                childs_class_0 = childes_new
                for curr_tirp in childs_class_0:
                    curr_size = curr_tirp.get_tirp_size()
                    new_symbols = child_tirp.get_symbols()[:curr_size]
                    num_of_new_rels = int(curr_size * (curr_size - 1) / 2)
                    new_rels = child_tirp.get_rels()[:num_of_new_rels]
                    if curr_tirp.get_symbols() == new_symbols:
                        if curr_tirp.get_rels() == new_rels:
                            # if curr_tirp.get_tirp_size()+1 < child_tirp.get_tirp_size() and curr_tirp.get_exist_in_class_0():
                            childs_class_0 = curr_tirp.get_childes()
                            break  # the right level to insert found
            if not curr_tirp.get_exist_in_class_0():
                for i in range(0, len(childs_class_0)):
                    if type(childs_class_0[i]) != TIRP:
                        tirp = TIRP()
                        tirp.__dict__.clear()
                        tirp.__dict__.update(childs_class_0[i])
                    else:
                        tirp = childs_class_0[i]
                    if tirp.get_symbols() == child_tirp.get_symbols(
                    ) and tirp.get_rels() == child_tirp.get_rels():
                        del childs_class_0[i]
                        break
            child_tirp.set_class_0_properties(min_ver_support)
            childs_class_0.append(child_tirp)
        tirp_obj_class_0 = insert_tirp_to_tree(child_tirp, tirp_obj_class_0,
                                               min_ver_support)
    return tirp_obj_class_0
Пример #2
0
def get_sub_tree(TIRP, states, states_by_name, path, class_name,
                 min_ver_support, class_1_tirp_file_name, to_add_entities):
    if states_by_name:
        TIRP_name = states_by_name[TIRP.get_symbols()[0]] + '.txt'
    else:
        TIRP_name = TIRP.get_symbols()[0] + '.txt'
    if class_name == 'class_0':
        file_name = path + '/chunks' + '/' + TIRP_name
    else:
        file_name = path + '/chunks1' + '/' + TIRP_name
    # ParseOutputFile.parse_states_file()
    TIRPs = ParseOutputFile.parse_output_file(file_name, 7, states, path,
                                              class_name, min_ver_support,
                                              class_1_tirp_file_name,
                                              to_add_entities)
    TIRP.set_childes(TIRPs_in_output_file=TIRPs)
    return TIRP
Пример #3
0
def parse_main_index(path, dir_path_class_0, dir_path_class_1, states,
                     states_by_name, class_name, min_ver_support,
                     second_class_output_file_name, to_add_entities):
    TIRP_per_file.clear()
    root_elements = list()
    if class_name == 'class_0':
        path_to_chuncks = path + '/chunks/'
    else:
        path_to_chuncks = path + '/chunks1/'
    with open(path_to_chuncks + 'main_Index.txt') as fp:
        line1 = fp.readline()
        while line1:
            line2 = fp.readline()
            file_name = fp.readline().rstrip()
            # ParseOutputFile.parse_states_file()
            tirp = ParseOutputFile.parse_TIRP(line1, line2, states, path,
                                              class_name, min_ver_support,
                                              file_name,
                                              second_class_output_file_name,
                                              to_add_entities)
            if os.path.isfile(path_to_chuncks + file_name):
                tirp.set_childes(has_childs=True)
                tirp_with_childs = copy.deepcopy(tirp)
                tirp_with_childs = get_sub_tree(tirp_with_childs, states,
                                                states_by_name, path,
                                                class_name, min_ver_support,
                                                file_name, to_add_entities)
                tirp_with_childs_json = json.dumps(
                    tirp_with_childs, default=lambda x: x.__dict__)
                if class_name == 'class_0':
                    path_to_file = path + dir_path_class_0 + file_name
                else:
                    path_to_file = path + dir_path_class_1 + file_name
                with open(path_to_file, "w") as fs:
                    fs.write(tirp_with_childs_json)
            else:
                if class_name == 'class_0' and second_class_output_file_name != 'File does not exist':
                    if not to_add_entities:  # take the tirp with the entities
                        dir_path_class_1_new = '/tempChunks1_with_entities/'
                    else:
                        dir_path_class_1_new = dir_path_class_1
                    if os.path.isfile(path + dir_path_class_1_new + file_name):
                        with open(path + dir_path_class_1_new + file_name,
                                  "r") as fr:
                            tirp_dict = json.load(fr)
                            tirp_obj = TIRP()
                            tirp_obj.__dict__.clear()
                            tirp_obj.__dict__.update(tirp_dict)
                            tirp.set_class_1_properties(tirp_obj)
                            if not to_add_entities:
                                tirp.set_supporting_instances(list())
                                tirp.set_supporting_entitie(list())
                    else:
                        with open(path + dir_path_class_1_new + 'root.txt',
                                  "r") as fr:
                            lines = fr.readlines()
                            for line in lines:
                                tirp_obj_class_1 = TIRP()
                                tirp_obj_class_1.__dict__.clear()
                                tirp_obj_class_1.__dict__.update(
                                    json.loads(line))
                                if tirp_obj_class_1.get_symbols(
                                ) == tirp.get_symbols():
                                    if tirp_obj_class_1.get_rels == tirp.get_rels(
                                    ):
                                        tirp.set_class_1_properties(
                                            tirp_obj_class_1)
                                        if not to_add_entities:
                                            tirp.set_supporting_instances(
                                                list())
                                            tirp.set_supporting_entitie(list())
                                        break
            s = json.dumps(tirp, default=lambda x: x.__dict__)
            root_elements.append(s)
            # root_elements.append(TIRP)
            TIRP_name = tirp.get_unique_name()
            TIRP_per_file[TIRP_name] = file_name
            line1 = fp.readline()
        return root_elements
Пример #4
0
def find_Path_of_tirps(symbols,
                       rels,
                       data_set_path,
                       states,
                       states_by_name=None,
                       to_add_entities=None):
    try:
        relations_dict = {
            "<": "before",
            "m": "meets",
            "o": "overlaps",
            "f": "finished by",
            "c": "contains",
            "=": "equals",
            "s": "starts",
            "-": 7
        }
        rels = list(filter(None, rels.split('.')))
        tirps_path = []
        symbols = list(filter(None, symbols.split('-')))
        if not to_add_entities:
            file_name = symbols[0] + '.txt'
            dir_path = '/tempChunks/'
            for r in range(0, len(rels)):
                rels[r] = relations_dict[rels[r]]
            for i in range(0, len(symbols)):
                symbol = states[symbols[i]]
                symbols[i] = symbol
        else:
            file_name = states_by_name[symbols[0]] + '.txt'
            dir_path = '/tempChunks_with_entities/'
        tirp_size = len(symbols)
        if os.path.isfile(data_set_path + dir_path + file_name):
            with open(data_set_path + dir_path + file_name, "r") as fr:
                tirp_dict = json.load(fr)
                tirp_obj = TIRP()
                tirp_obj.__dict__.clear()
                tirp_obj.__dict__.update(tirp_dict)
                tirps_path.append(tirp_obj)
                if tirp_size > 1:
                    childs = tirp_obj.get_childes()
                    while len(tirps_path) < tirp_size:
                        for child in childs:
                            curr_tirp = TIRP()
                            curr_tirp.__dict__.clear()
                            curr_tirp.__dict__.update(child)
                            curr_size = curr_tirp.get_tirp_size()
                            new_symbols = symbols[:curr_size]
                            num_of_new_rels = int(curr_size * (curr_size - 1) /
                                                  2)
                            new_rels = rels[:num_of_new_rels]
                            if curr_tirp.get_symbols() == new_symbols:
                                if curr_tirp.get_rels() == new_rels:
                                    tirps_path.append(curr_tirp)
                                    childs = curr_tirp.get_childes()
                                    break
                return tirps_path
        else:
            with open(data_set_path + dir_path + 'root.txt', "r") as fr:
                roots_from_file = fr.readlines()
                for line in roots_from_file:
                    tirp_dict = json.loads(line)
                    tirp_obj = TIRP()
                    tirp_obj.__dict__.clear()
                    tirp_obj.__dict__.update(tirp_dict)
                    if tirp_obj.get_symbols() == symbols:
                        if tirp_obj.get_rels() == rels:
                            return [tirp_obj]

    except Exception as e:
        print(e)
Пример #5
0
def find_tirp_in_class_1(path, class_0_tirp, class_1_tirp_file_name,
                         to_add_entities):
    if to_add_entities:
        dir_path = path + '/tempChunks1_with_entities/'
    else:
        dir_path = path + '/tempChunks1/'
    path_to_class_1_tirp = dir_path + class_1_tirp_file_name
    if os.path.isfile(path_to_class_1_tirp):
        with open(path_to_class_1_tirp, "r") as fr:
            tirp_dict = json.load(fr)
            class_1_tirp = TIRP()
            class_1_tirp.__dict__.clear()
            class_1_tirp.__dict__.update(tirp_dict)
            class_0_tirp_size = class_0_tirp.get_tirp_size()
            class_1_tirp_size = class_1_tirp.get_tirp_size()
            found = True
            if class_0_tirp_size == 1:  #root element
                class_1_tirp.set_exist_in_class_0()
                with open(path_to_class_1_tirp, "w") as fw:
                    class_1_tirp_json = json.dumps(
                        class_1_tirp, default=lambda x: x.__dict__)
                    fw.write(class_1_tirp_json)
                root_elements_class_1 = list()
                with open(dir_path + 'root.txt', "r") as fr:
                    lines = fr.readlines()
                    for line in lines:
                        tirp_obj_class_1 = TIRP()
                        tirp_obj_class_1.__dict__.clear()
                        tirp_obj_class_1.__dict__.update(json.loads(line))
                        root_elements_class_1.append(tirp_obj_class_1)
                for root_element in root_elements_class_1:
                    if root_element.get_symbols(
                    )[0] == class_1_tirp.get_symbols()[0]:
                        root_element.set_exist_in_class_0()
                        break
                os.remove(dir_path + 'root.txt')
                with open(dir_path + 'root.txt', "a") as fr:
                    for root_element in root_elements_class_1:
                        r = json.dumps(root_element,
                                       default=lambda x: x.__dict__)
                        fr.write("%s\n" % r)
                return class_1_tirp
            else:
                father = class_1_tirp
                childs = class_1_tirp.get_childes()
                while class_1_tirp_size < class_0_tirp_size and len(
                        childs) > 0 and found:
                    found = False
                    for index, child in enumerate(childs):
                        curr_tirp = TIRP()
                        curr_tirp.__dict__.clear()
                        curr_tirp.__dict__.update(child)
                        curr_size = curr_tirp.get_tirp_size()
                        new_symbols = class_0_tirp.get_symbols()[:curr_size]
                        num_of_new_rels = int(curr_size * (curr_size - 1) / 2)
                        new_rels = class_0_tirp.get_rels()[:num_of_new_rels]
                        if curr_tirp.get_symbols() == new_symbols:
                            if curr_tirp.get_rels() == new_rels:
                                if curr_tirp.get_tirp_size(
                                ) == class_0_tirp.get_tirp_size():
                                    curr_tirp.set_exist_in_class_0()
                                    childs[index] = curr_tirp
                                    father.update_childs(childs)
                                    with open(path_to_class_1_tirp, "w") as fw:
                                        class_1_tirp_json = json.dumps(
                                            class_1_tirp,
                                            default=lambda x: x.__dict__)
                                        fw.write(class_1_tirp_json)
                                    return curr_tirp
                                else:
                                    father = curr_tirp
                                    childs = curr_tirp.get_childes()
                                    class_1_tirp_size = curr_tirp.get_tirp_size(
                                    )
                                    found = True
                                    break
                return None
    else:
        if class_0_tirp.get_tirp_size() == 1:
            root_elements_class_1 = list()
            with open(dir_path + 'root.txt', "r") as fr:
                lines = fr.readlines()
                for line in lines:
                    tirp_obj_class_1 = TIRP()
                    tirp_obj_class_1.__dict__.clear()
                    tirp_obj_class_1.__dict__.update(json.loads(line))
                    root_elements_class_1.append(tirp_obj_class_1)
            for root_element in root_elements_class_1:
                if root_element.get_symbols()[0] == class_0_tirp.get_symbols(
                )[0]:
                    root_element.set_exist_in_class_0()
                    os.remove(dir_path + 'root.txt')
                    with open(dir_path + 'root.txt', "a") as fr:
                        for element in root_elements_class_1:
                            r = json.dumps(element,
                                           default=lambda x: x.__dict__)
                            fr.write("%s\n" % r)
                    return root_element
            return None
        else:
            return None