def find_reference_section(docbody): """Search in document body for its reference section. More precisely, find the first line of the reference section. Effectively, the function starts at the end of a document and works backwards, line-by-line, looking for the title of a reference section. It stops when (if) it finds something that it considers to be the first line of a reference section. @param docbody: (list) of strings - the full document body. @return: (dictionary) : { 'start_line' : (integer) - index in docbody of 1st reference line, 'title_string' : (string) - title of the reference section. 'marker' : (string) - the marker of the first reference line, 'marker_pattern' : (string) - regexp string used to find the marker, 'title_marker_same_line' : (integer) - flag to indicate whether the reference section title was on the same line as the first reference line's marker or not. 1 if it was; 0 if not. } Much of this information is used by later functions to rebuild a reference section. -- OR -- (None) - when the reference section could not be found. """ ref_details = None title_patterns = get_reference_section_title_patterns() # Try to find refs section title: for title_pattern in title_patterns: # Look for title pattern in docbody for reversed_index, line in enumerate(reversed(docbody)): title_match = title_pattern.match(line) if title_match: title = title_match.group('title') index = len(docbody) - 1 - reversed_index temp_ref_details, found_title = find_numeration(docbody[index:index+6], title) if temp_ref_details: if ref_details and 'title' in ref_details \ and ref_details['title'] \ and not temp_ref_details['title']: continue if ref_details and 'marker' in ref_details \ and ref_details['marker'] \ and not temp_ref_details['marker']: continue ref_details = temp_ref_details ref_details['start_line'] = index ref_details['title_string'] = title if found_title: break if ref_details: break return ref_details
def find_reference_section(docbody): """Search in document body for its reference section. More precisely, find the first line of the reference section. Effectively, the function starts at the end of a document and works backwards, line-by-line, looking for the title of a reference section. It stops when (if) it finds something that it considers to be the first line of a reference section. @param docbody: (list) of strings - the full document body. @return: (dictionary) : { 'start_line' : (integer) - index in docbody of 1st reference line, 'title_string' : (string) - title of the reference section. 'marker' : (string) - the marker of the first reference line, 'marker_pattern' : (string) - regexp string used to find the marker, 'title_marker_same_line' : (integer) - flag to indicate whether the reference section title was on the same line as the first reference line's marker or not. 1 if it was; 0 if not. } Much of this information is used by later functions to rebuild a reference section. -- OR -- (None) - when the reference section could not be found. """ ref_details = None title_patterns = get_reference_section_title_patterns() # Try to find refs section title: for reversed_index, line in enumerate(reversed(docbody)): title_match = regex_match_list(line, title_patterns) if title_match: title = title_match.group('title') index = len(docbody) - 1 - reversed_index temp_ref_details, found_title = find_numeration(docbody[index:index+6], title) if temp_ref_details: if ref_details and 'title' in ref_details \ and ref_details['title'] \ and not temp_ref_details['title']: continue if ref_details and 'marker' in ref_details \ and ref_details['marker'] \ and not temp_ref_details['marker']: continue ref_details = temp_ref_details ref_details['start_line'] = index ref_details['title_string'] = title if found_title: break return ref_details
def test_reference_section_title_pattern(self): r = refextract_re.get_reference_section_title_patterns() self.assert_(len(r) > 2)
def test_reference_section_title_pattern(self): r = refextract_re.get_reference_section_title_patterns() self.assert_(len(r) > 2)