Пример #1
0
def get_sentence(element, element_text, text_context, xtext=XTEXT):
    indexes = find_list(text_context, element_text)
    size = len(indexes)
    if size == 0:
        return ''
    elif size == 1:
        return find_sentence(text_context, indexes[0],
                             indexes[0] + len(element_text))
    else:
        parent = element.getparent()
        child_index_in_parent = 0
        for child in parent:
            if child == element:
                break
            else:
                temp_text = normalize(xtext(child))
                # We have encountered a child that has the same text,
                # so the first index is not the good one.
                if temp_text.find(element_text) != -1:
                    child_index_in_parent += 1

        if child_index_in_parent < size:
            return find_sentence(
                text_context, indexes[child_index_in_parent],
                indexes[child_index_in_parent] + len(element_text))
        else:
            # Something went wrong.
            return find_sentence(element_text, indexes[0],
                                 indexes[0] + len(element_text))
Пример #2
0
def get_sentence(element, element_text, text_context, xtext=XTEXT):
    indexes = find_list(text_context, element_text)
    size = len(indexes)
    if size == 0:
        return ""
    elif size == 1:
        return find_sentence(text_context, indexes[0], indexes[0] + len(element_text))
    else:
        parent = element.getparent()
        child_index_in_parent = 0
        for child in parent:
            if child == element:
                break
            else:
                temp_text = normalize(xtext(child))
                # We have encountered a child that has the same text,
                # so the first index is not the good one.
                if temp_text.find(element_text) != -1:
                    child_index_in_parent += 1

        if child_index_in_parent < size:
            return find_sentence(
                text_context, indexes[child_index_in_parent], indexes[child_index_in_parent] + len(element_text)
            )
        else:
            # Something went wrong.
            return find_sentence(element_text, indexes[0], indexes[0] + len(element_text))
Пример #3
0
    def get_text(self, element, complex_text=False):
        '''Computes the text of this element by creating a deepcopy of the
           element, removing the bad children, getting the text
           representation.

           This is quite inefficient, but it's the best way to get a good
           representation of the text in a hierarchical section.'''
        new_element = deepcopy(element)
        bad_elements = self.first_filter(new_element)
        for bad_element in bad_elements:
            if bad_element in new_element:
                new_element.remove(bad_element)
        if complex_text:
            text_parts = []
            get_recursive_text(new_element, text_parts)
        else:
            text_parts = new_element.xpath('.//text()')
        text = '\n'.join(text_parts)
        return normalize(text)
Пример #4
0
    def get_text(self, element, complex_text=False):
        """Computes the text of this element by creating a deepcopy of the
           element, removing the bad children, getting the text
           representation.

           This is quite inefficient, but it's the best way to get a good
           representation of the text in a hierarchical section."""
        new_element = deepcopy(element)
        bad_elements = self.first_filter(new_element)
        for bad_element in bad_elements:
            if bad_element in new_element:
                new_element.remove(bad_element)
        if complex_text:
            text_parts = []
            get_recursive_text(new_element, text_parts)
        else:
            text_parts = new_element.xpath(".//text()")
        text = "\n".join(text_parts)
        return normalize(text)
Пример #5
0
 def get_text_from_parent(self, parent, index=0, complex_text=False):
     text = ''
     elem = self.get_element(parent, index)
     if elem is not None:
         text = self.get_text(elem, complex_text)
     return normalize(text)
Пример #6
0
 def get_text(self, element):
     return normalize(self.xtext(element))
Пример #7
0
 def get_text_from_parent(self, parent, index=0):
     text = ''
     elem = self.get_element(parent, index)
     if elem is not None:
         text = self.xtext(elem)
     return normalize(text)
Пример #8
0
def get_text(element, xtext=XTEXT):
    return normalize(xtext(element))
Пример #9
0
def get_text(element, xtext=XTEXT):
    return normalize(xtext(element))
Пример #10
0
 def get_text_from_parent(self, parent, index=0, complex_text=False):
     text = ""
     elem = self.get_element(parent, index)
     if elem is not None:
         text = self.get_text(elem, complex_text)
     return normalize(text)
Пример #11
0
 def get_text(self, element):
     return normalize(self.xtext(element))
Пример #12
0
 def get_text_from_parent(self, parent, index=0):
     text = ""
     elem = self.get_element(parent, index)
     if elem is not None:
         text = self.xtext(elem)
     return normalize(text)