Пример #1
0
def gain_txt(path):
    '''
    通过路径获取路径对应的txt中的文本内容,并转换成list
    :param path: txt对应路径
    :return: txt对应的list列表
    '''
    content = normal_util.read_txt(path)

    content_list = [i for i in content.replace("\n", "\r\n")]
    return content_list
Пример #2
0
def gain_relation_contact_entity(entity_T, path):
    '''
    通过ann文件获取关系和实体的联系
    :param path: ann路径
    :return:
    '''
    ann_contents = normal_util.read_txt(path)
    # entity_T = defaultdict()
    for ann_content in ann_contents.split("\n"):
        if len(ann_content) <= 1:
            continue
        if not filter_relation(ann_content.split("\t")[0]):
            gain_entity(entity_T, ann_content)
        else:
            gain_relation(entity_T, ann_content)
Пример #3
0
def clean_txt(path):
    '''
    将文件中的空格去除干净
    :param path: 文件路径
    '''
    txts = normal_util.read_txt(path)
    list_txts = txts.split("\n")
    with open(path, "w", encoding="utf-8") as f:
        for index, list_txt in enumerate(list_txts):
            list_txt = list_txt.replace(" ", "").replace("\u3000", "")
            if len(list_txt) == 0:
                continue
            f.write(list_txt)
            if index < len(list_txts) - 1:
                f.write("\n")
Пример #4
0
def read_file(paths):
    '''
    获得路径下所有内容
    :param path: 路径list
    :return: 内容list
    '''
    data = normal_util.read_txt(paths)
    dic_value = []
    for values in data.split("\n"):

        values = values.split("\t")[-1]
        if len(values) == 0:
            continue
        dic_value.append(values)
    return dic_value
Пример #5
0
def gain_label(path, word_count):
    '''
    获得label列表
    :param path: ann路径
    :param word_count: label对应的txt有多少个字
    :return: label列表
    '''
    label_list = []
    label_list.extend(["O"] * word_count)
    contents = normal_util.read_txt(path)
    for content in contents.split("\n"):
        if len(content.split("\t")) <= 1 or content.split("\t")[0].find("T") < 0:
            continue
        label_no = content.split("\t")[0]
        label_content = content.split("\t")[1]
        list = label_content.split(" ")
        # label_name = list[0]
        start_index = int(list[1])
        end_index = int(list[2])
        for i in range(start_index, end_index):
            label_list[i] = label_no
    return label_list
Пример #6
0
def read_txt(path):
    txts = normal_util.read_txt(path)
    list_txts = txts.split("\n")
    labels, contents, length = read_content(list_txts)
    return labels, contents, length
Пример #7
0
def copy(path, path_name):
    content = normal_util.read_txt(path)
    normal_util.write_content(content, path_name)