Python find_sentence示例，docutil.str_util.find_sentence Python示例

示例#1

0

显示文件

文件： etree_util.py 项目： nengnengwu/recodoc2

def get_sentence(element, element_text, text_context, xtext=XTEXT):
    indexes = find_list(text_context, element_text)
    size = len(indexes)
    if size == 0:
        return ''
    elif size == 1:
        return find_sentence(text_context, indexes[0],
                             indexes[0] + len(element_text))
    else:
        parent = element.getparent()
        child_index_in_parent = 0
        for child in parent:
            if child == element:
                break
            else:
                temp_text = normalize(xtext(child))
                # We have encountered a child that has the same text,
                # so the first index is not the good one.
                if temp_text.find(element_text) != -1:
                    child_index_in_parent += 1

        if child_index_in_parent < size:
            return find_sentence(
                text_context, indexes[child_index_in_parent],
                indexes[child_index_in_parent] + len(element_text))
        else:
            # Something went wrong.
            return find_sentence(element_text, indexes[0],
                                 indexes[0] + len(element_text))

示例#2

0

显示文件

文件： etree_util.py 项目： bartdag/recodoc2

def get_sentence(element, element_text, text_context, xtext=XTEXT):
    indexes = find_list(text_context, element_text)
    size = len(indexes)
    if size == 0:
        return ""
    elif size == 1:
        return find_sentence(text_context, indexes[0], indexes[0] + len(element_text))
    else:
        parent = element.getparent()
        child_index_in_parent = 0
        for child in parent:
            if child == element:
                break
            else:
                temp_text = normalize(xtext(child))
                # We have encountered a child that has the same text,
                # so the first index is not the good one.
                if temp_text.find(element_text) != -1:
                    child_index_in_parent += 1

        if child_index_in_parent < size:
            return find_sentence(
                text_context, indexes[child_index_in_parent], indexes[child_index_in_parent] + len(element_text)
            )
        else:
            # Something went wrong.
            return find_sentence(element_text, indexes[0], indexes[0] + len(element_text))

示例#3

0

显示文件

文件： tests.py 项目： bartdag/recodoc2

    def test_find_sentence(self):
        p1 = "Hello world. This is luis. Come and get me."
        index1 = p1.find("world")
        index2 = p1.find("is")
        index3 = p1.find("Come")
        self.assertEqual("Hello world.", su.find_sentence(p1, index1, index1 + len("world")))
        self.assertEqual("This is luis.", su.find_sentence(p1, index2, index2 + len("is")))
        self.assertEqual("Come and get me.", su.find_sentence(p1, index3, index3 + len("Come")))

        p2 = "Hello world"
        self.assertEqual("Hello world", su.find_sentence(p2, 0, len("Hello")))

示例#4

0

显示文件

    def test_find_sentence(self):
        p1 = 'Hello world. This is luis. Come and get me.'
        index1 = p1.find('world')
        index2 = p1.find('is')
        index3 = p1.find('Come')
        self.assertEqual('Hello world.',
                         su.find_sentence(p1, index1, index1 + len('world')))
        self.assertEqual('This is luis.',
                         su.find_sentence(p1, index2, index2 + len('is')))
        self.assertEqual('Come and get me.',
                         su.find_sentence(p1, index3, index3 + len('Come')))

        p2 = 'Hello world'
        self.assertEqual('Hello world', su.find_sentence(p2, 0, len('Hello')))

示例#5

0

显示文件

文件： actions.py 项目： nengnengwu/recodoc2

def process_matches(text, matches, single_refs, kinds, kinds_hierarchies,
                    save_index, find_context):
    filtered = set()
    index = 0
    avoided = False

    for match in matches:
        if is_valid_match(match, matches, filtered):
            (parent, children) = match
            content = text[parent[0]:parent[1]]
            if parent[2] == IGNORE_KIND:
                avoided = True
                continue
            main_reference = SingleCodeReference(content=content,
                                                 kind_hint=kinds[parent[2]])
            if save_index:
                main_reference.index = index
            if find_context:
                main_reference.sentence = find_sentence(
                    text, parent[0], parent[1])
                main_reference.paragraph = find_paragraph(
                    text, parent[0], parent[1])
            main_reference.save()
            single_refs.append(main_reference)

            # Process children
            process_children_matches(text, matches, children, index,
                                     single_refs, kinds, kinds_hierarchies,
                                     save_index, find_context)
            index += 1
        else:
            filtered.add(match)

    return avoided

示例#6

0

显示文件

文件： actions.py 项目： bartdag/recodoc2

def process_matches(text, matches, single_refs, kinds, kinds_hierarchies,
        save_index, find_context):
    filtered = set()
    index = 0
    avoided = False

    for match in matches:
        if is_valid_match(match, matches, filtered):
            (parent, children) = match
            content = text[parent[0]:parent[1]]
            if parent[2] == IGNORE_KIND:
                avoided = True
                continue
            main_reference = SingleCodeReference(
                    content=content,
                    kind_hint=kinds[parent[2]])
            if save_index:
                main_reference.index = index
            if find_context:
                main_reference.sentence = find_sentence(text, parent[0],
                        parent[1])
                main_reference.paragraph = find_paragraph(text, parent[0],
                        parent[1])
            main_reference.save()
            single_refs.append(main_reference)

            # Process children
            process_children_matches(text, matches, children, index,
                    single_refs, kinds, kinds_hierarchies, save_index,
                    find_context)
            index += 1
        else:
            filtered.add(match)

    return avoided

示例#7

0

显示文件

文件： actions.py 项目： nengnengwu/recodoc2

def process_children_matches(text, matches, children, index, single_refs,
                             kinds, kinds_hierarchies, save_index,
                             find_context):

    for i, child in enumerate(children):
        content = text[child[0]:child[1]]
        parent_reference = find_parent_reference(child[2], single_refs,
                                                 kinds_hierarchies)
        child_reference = SingleCodeReference(
            content=content,
            kind_hint=kinds[child[2]],
            child_index=i,
            parent_reference=parent_reference)
        if save_index:
            child_reference.index = index
        if find_context:
            child_reference.sentence = find_sentence(text, child[0], child[1])
            child_reference.paragraph = find_paragraph(text, child[0],
                                                       child[1])
        child_reference.save()
        single_refs.append(child_reference)

示例#8

0

显示文件

文件： actions.py 项目： bartdag/recodoc2

def process_children_matches(text, matches, children, index, single_refs,
        kinds, kinds_hierarchies, save_index, find_context):

    for i, child in enumerate(children):
        content = text[child[0]:child[1]]
        parent_reference = find_parent_reference(child[2], single_refs,
                        kinds_hierarchies)
        child_reference = SingleCodeReference(
                content=content,
                kind_hint=kinds[child[2]],
                child_index=i,
                parent_reference=parent_reference)
        if save_index:
            child_reference.index = index
        if find_context:
            child_reference.sentence = find_sentence(text, child[0],
                    child[1])
            child_reference.paragraph = find_paragraph(text, child[0],
                    child[1])
        child_reference.save()
        single_refs.append(child_reference)