def _add_code_ref(self, index, code_ref_element, page, load, s_code_references): text = self.xcoderef.get_text(code_ref_element) text = clean_breaks(text).strip() # Not significant if len(text) < 2 or text.isdigit(): return text_context = get_text_context(code_ref_element) sentence = get_sentence(code_ref_element, text, text_context) (text, kind_hint) = self._get_code_ref_kind(code_ref_element, text) xpath = load.tree.getpath(code_ref_element) for code in parse_single_code_references(text, kind_hint, self.kind_strategies, self.kinds): code.xpath = xpath code.file_path = page.file_path code.source = DOCUMENT_SOURCE code.index = index code.sentence = sentence code.paragraph = text_context code.project = self.document.project_release.project code.project_release = self.document.project_release code.save() s_code_references.append(code)
def _parse_paragraphs(self, message, load, text_paragraphs): for para_index, paragraph in enumerate(text_paragraphs): text = merge_lines(paragraph, False) kind_hint = self.kinds['unknown'] for i, code in enumerate( parse_single_code_references( text, kind_hint, self.kind_strategies, self.kinds, find_context=True, strict=True, code_words=load.code_words)): code.file_path = message.file_path code.url = message.url code.index = i + (para_index * 1000) code.project = self.channel.project code.local_context = message if load.entry is not None: code.global_context = load.entry code.save()
def _process_title_references(self, message, load): text_context = message.title sentence = message.title kind_hint = self.kinds['unknown'] xpath = message.xpath for code in parse_single_code_references(sentence, kind_hint, self.kind_strategies, self.kinds, strict=True): code.xpath = xpath code.file_path = message.file_path code.source = CHANNEL_SOURCE code.index = -1 code.sentence = sentence code.paragraph = text_context code.title_context = message code.local_context = message if load.entry is not None: code.global_context = load.entry code.save()
def _process_title_references(self, page, load, section): text_context = section.title sentence = section.title kind_hint = self.kinds['unknown'] xpath = section.xpath for code in parse_single_code_references(sentence, kind_hint, self.kind_strategies, self.kinds, strict=True): code.xpath = xpath code.file_path = page.file_path code.source = DOCUMENT_SOURCE code.index = -1 code.sentence = sentence code.paragraph = text_context code.title_context = section code.local_context = section code.mid_context = self._get_mid_context(section) code.global_context = page code.save()
def _parse_paragraphs(self, message, load, text_paragraphs): for para_index, paragraph in enumerate(text_paragraphs): text = merge_lines(paragraph, False) kind_hint = self.kinds['unknown'] for i, code in enumerate( parse_single_code_references(text, kind_hint, self.kind_strategies, self.kinds, find_context=True, strict=True, code_words=load.code_words)): code.file_path = message.file_path code.url = message.url code.index = i + (para_index * 1000) code.project = self.channel.project code.local_context = message if load.entry is not None: code.global_context = load.entry code.save()