def get_sentence(element, element_text, text_context, xtext=XTEXT): indexes = find_list(text_context, element_text) size = len(indexes) if size == 0: return '' elif size == 1: return find_sentence(text_context, indexes[0], indexes[0] + len(element_text)) else: parent = element.getparent() child_index_in_parent = 0 for child in parent: if child == element: break else: temp_text = normalize(xtext(child)) # We have encountered a child that has the same text, # so the first index is not the good one. if temp_text.find(element_text) != -1: child_index_in_parent += 1 if child_index_in_parent < size: return find_sentence( text_context, indexes[child_index_in_parent], indexes[child_index_in_parent] + len(element_text)) else: # Something went wrong. return find_sentence(element_text, indexes[0], indexes[0] + len(element_text))
def get_sentence(element, element_text, text_context, xtext=XTEXT): indexes = find_list(text_context, element_text) size = len(indexes) if size == 0: return "" elif size == 1: return find_sentence(text_context, indexes[0], indexes[0] + len(element_text)) else: parent = element.getparent() child_index_in_parent = 0 for child in parent: if child == element: break else: temp_text = normalize(xtext(child)) # We have encountered a child that has the same text, # so the first index is not the good one. if temp_text.find(element_text) != -1: child_index_in_parent += 1 if child_index_in_parent < size: return find_sentence( text_context, indexes[child_index_in_parent], indexes[child_index_in_parent] + len(element_text) ) else: # Something went wrong. return find_sentence(element_text, indexes[0], indexes[0] + len(element_text))
def test_find_sentence(self): p1 = "Hello world. This is luis. Come and get me." index1 = p1.find("world") index2 = p1.find("is") index3 = p1.find("Come") self.assertEqual("Hello world.", su.find_sentence(p1, index1, index1 + len("world"))) self.assertEqual("This is luis.", su.find_sentence(p1, index2, index2 + len("is"))) self.assertEqual("Come and get me.", su.find_sentence(p1, index3, index3 + len("Come"))) p2 = "Hello world" self.assertEqual("Hello world", su.find_sentence(p2, 0, len("Hello")))
def test_find_sentence(self): p1 = 'Hello world. This is luis. Come and get me.' index1 = p1.find('world') index2 = p1.find('is') index3 = p1.find('Come') self.assertEqual('Hello world.', su.find_sentence(p1, index1, index1 + len('world'))) self.assertEqual('This is luis.', su.find_sentence(p1, index2, index2 + len('is'))) self.assertEqual('Come and get me.', su.find_sentence(p1, index3, index3 + len('Come'))) p2 = 'Hello world' self.assertEqual('Hello world', su.find_sentence(p2, 0, len('Hello')))
def process_matches(text, matches, single_refs, kinds, kinds_hierarchies, save_index, find_context): filtered = set() index = 0 avoided = False for match in matches: if is_valid_match(match, matches, filtered): (parent, children) = match content = text[parent[0]:parent[1]] if parent[2] == IGNORE_KIND: avoided = True continue main_reference = SingleCodeReference(content=content, kind_hint=kinds[parent[2]]) if save_index: main_reference.index = index if find_context: main_reference.sentence = find_sentence( text, parent[0], parent[1]) main_reference.paragraph = find_paragraph( text, parent[0], parent[1]) main_reference.save() single_refs.append(main_reference) # Process children process_children_matches(text, matches, children, index, single_refs, kinds, kinds_hierarchies, save_index, find_context) index += 1 else: filtered.add(match) return avoided
def process_matches(text, matches, single_refs, kinds, kinds_hierarchies, save_index, find_context): filtered = set() index = 0 avoided = False for match in matches: if is_valid_match(match, matches, filtered): (parent, children) = match content = text[parent[0]:parent[1]] if parent[2] == IGNORE_KIND: avoided = True continue main_reference = SingleCodeReference( content=content, kind_hint=kinds[parent[2]]) if save_index: main_reference.index = index if find_context: main_reference.sentence = find_sentence(text, parent[0], parent[1]) main_reference.paragraph = find_paragraph(text, parent[0], parent[1]) main_reference.save() single_refs.append(main_reference) # Process children process_children_matches(text, matches, children, index, single_refs, kinds, kinds_hierarchies, save_index, find_context) index += 1 else: filtered.add(match) return avoided
def process_children_matches(text, matches, children, index, single_refs, kinds, kinds_hierarchies, save_index, find_context): for i, child in enumerate(children): content = text[child[0]:child[1]] parent_reference = find_parent_reference(child[2], single_refs, kinds_hierarchies) child_reference = SingleCodeReference( content=content, kind_hint=kinds[child[2]], child_index=i, parent_reference=parent_reference) if save_index: child_reference.index = index if find_context: child_reference.sentence = find_sentence(text, child[0], child[1]) child_reference.paragraph = find_paragraph(text, child[0], child[1]) child_reference.save() single_refs.append(child_reference)
def process_children_matches(text, matches, children, index, single_refs, kinds, kinds_hierarchies, save_index, find_context): for i, child in enumerate(children): content = text[child[0]:child[1]] parent_reference = find_parent_reference(child[2], single_refs, kinds_hierarchies) child_reference = SingleCodeReference( content=content, kind_hint=kinds[child[2]], child_index=i, parent_reference=parent_reference) if save_index: child_reference.index = index if find_context: child_reference.sentence = find_sentence(text, child[0], child[1]) child_reference.paragraph = find_paragraph(text, child[0], child[1]) child_reference.save() single_refs.append(child_reference)