Python xml2pos示例

编程语言: Python

命名空间/包名称: src.features.make_features

方法/功能: xml2pos

hotexamples.com的示例: 3

Python xml2pos - 已找到3个示例。这些是从开源项目中提取的最受好评的src.features.make_features.xml2pos现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

    def __iter__(self):
        for filename in self.file_list[0:1]:
            sent_file = os.path.join(self.annotation_dir, filename)
            with open(sent_file) as file:
                lc = LoopTimer(update_after=100)

                for line in file:
                    if self.print_status:
                        lc.update("Lemma Para Stream")

                    data = json.loads(line)

                    doc_id = data['id']
                    para_id = data['paragraphID']
                    xml = data['annotation']

                    token_list = mf.xml2lemmas(xml)
                    pos_list = mf.xml2pos(xml)

                    para_list = []
                    for i in range(0, len(token_list)):
                        token_cleaned, pos_cleaned = utils.posFilterString(token_list[i], pos_list[i])

                        if len(token_cleaned) > 0:
                            for j in range(0, len(token_cleaned)):
                                para_list.append(token_cleaned[j])
                    yield doc_id, para_id, para_list

示例#2

显示文件

    def __iter__(self):
        for filename in self.file_list[0:1]:
            sent_file = os.path.join(self.annotation_dir, filename)
            with open(sent_file) as file:
                lc = LoopTimer(update_after=100)

                lastid = None
                for line in file:
                    if self.print_status:
                        lc.update("Posbigram Sent Stream")

                    data = json.loads(line)

                    xml = data['annotation']
                    id = data['id']
                    if lastid != id:
                        para_num = 0
                    else:
                        para_num += 1
                    lastid = id

                    token_list = mf.xml2words(xml)
                    pos_list = mf.xml2pos(xml)

                    for i in range(0, len(token_list)):
                        token_cleaned, pos_cleaned = utils.posFilterString(token_list[i], pos_list[i])

                        if len(token_cleaned) > 0:
                            yield id, para_num, utils.makeBigrams(pos_cleaned)

示例#3

显示文件

    def __iter__(self):
        for filename in self.file_list[0:1]:
            sent_file = os.path.join(self.annotation_dir, filename)
            with open(sent_file) as file:
                lc = LoopTimer(update_after=100)
                abs_list = []

                lastid = None
                for line in file:
                    if self.print_status:
                        lc.update("Lemma Doc Stream")

                    data = json.loads(line)

                    doc_id = data['id']
                    xml = data['annotation']

                    if lastid != doc_id and len(abs_list) > 0:
                        # Yield Stuff
                        yield lastid, abs_list
                        abs_list = []

                    lastid = doc_id
                    token_list = mf.xml2lemmas(xml)
                    pos_list = mf.xml2pos(xml)

                    for i in range(0, len(token_list)):
                        token_cleaned, pos_cleaned = utils.posFilterString(token_list[i], pos_list[i])

                        if len(token_cleaned) > 0:
                            for j in range(0, len(token_cleaned)):
                                abs_list.append(token_cleaned[j])
                if len(abs_list) > 0:
                    # Yield Stuff
                    yield lastid, abs_list