def test_level_setter_does_not_overwrite_title(): s = Section('={{t}}=\nb') t = s.templates[0] s.level = 1 # testing for no effect s.level = 2 assert '=={{t}}==\nb' == s.string assert '{{t}}' == t.string
def find_section_definitions(word: str, section: wtp.Section, locale: str) -> List[Definitions]: """Find definitions from the given *section*, with eventual sub-definitions.""" definitions: List[Definitions] = [] # do not look for definitions in french verb form section if locale == "fr" and section.title.strip().startswith( "{{S|verbe|fr|flexion"): return definitions # es uses definition lists, not well supported by the parser... # replace them by numbered lists if locale == "es": lists = section.get_lists(pattern="[:;]") if lists: sec = "".join(a_list.string for a_list in lists) section.contents = re.sub(r";[0-9]+[ |:]+", "# ", sec) section.contents = re.sub(r":;[a-z]:+[\s]+", "## ", section.contents) lists = section.get_lists(pattern=section_patterns[locale]) if lists: for a_list in lists: for idx, code in enumerate(a_list.items): # Ignore some patterns if word not in words_to_keep[locale] and any( ignore_me in code.lower() for ignore_me in definitions_to_ignore[locale]): continue # Transform and clean the Wikicode definition = process_templates(word, clean(code), locale) # Skip empty definitions # [SV] Skip almost empty definitions if not definition or (locale == "sv" and len(definition) < 2): continue # Keep the definition ... definitions.append(definition) # ... And its eventual sub-definitions subdefinitions: List[SubDefinitions] = [] for sublist in a_list.sublists( i=idx, pattern=sublist_patterns[locale]): for idx2, subcode in enumerate(sublist.items): subdefinition = process_templates( word, clean(subcode), locale) subdefinitions.append(subdefinition) subsubdefinitions: List[str] = [] for subsublist in sublist.sublists( i=idx2, pattern=sublist_patterns[locale]): for subsubcode in subsublist.items: subsubdefinitions.append( process_templates(word, clean(subsubcode), locale)) if subsubdefinitions: subdefinitions.append(tuple(subsubdefinitions)) if subdefinitions: definitions.append(tuple(subdefinitions)) return definitions
def test_level_setter_does_not_overwrite_title(self): ae = self.assertEqual s = Section('={{t}}=\nb') t = s.templates[0] s.level = 1 # testing for no effect s.level = 2 ae('=={{t}}==\nb', s.string) ae('{{t}}', t.string)
def test_unbalanced_equalsigns_in_title(self): s = Section('====== == \n') self.assertEqual(2, s.level) self.assertEqual('==== ', s.title) s = Section('== ====== \n') self.assertEqual(2, s.level) self.assertEqual(' ====', s.title) s = Section('======== \n') self.assertEqual(3, s.level) self.assertEqual('==', s.title)
def test_unbalanced_equalsigns_in_title(): s = Section('====== == \n') assert 2 == s.level assert '==== ' == s.title s = Section('== ====== \n') assert 2 == s.level assert ' ====' == s.title s = Section('======== \n') assert 3 == s.level assert '==' == s.title
def find_section_definitions(word: str, section: wtp.Section, locale: str) -> List[Definitions]: """Find definitions from the given *section*, with eventual sub-definitions.""" definitions: List[Definitions] = [] # do not look for definitions in french verb form section if locale == "fr" and section.title.strip().startswith( "{{S|verbe|fr|flexion"): return definitions if locale == "es" and section.title.strip().startswith( ("Forma adjetiva", "Forma verbal")): return definitions # es uses definition lists, not well supported by the parser... # replace them by numbered lists if locale == "es": if lists := section.get_lists(pattern="[:;]"): sec = "".join(a_list.string for a_list in lists) section.contents = re.sub(r";[0-9]+[ |:]+", "# ", sec) section.contents = re.sub(r":;[\s]*[a-z]:+[\s]+", "## ", section.contents)
def find_section_definitions(word: str, section: wtp.Section, locale: str) -> List[Definitions]: """Find definitions from the given *section*, with eventual sub-definitions.""" definitions: List[Definitions] = [] # do not look for definitions in french verb form section if locale == "fr" and section.title.strip().startswith( "{{S|verbe|fr|flexion"): return definitions lists = section.get_lists(pattern=section_patterns[locale]) if lists: for a_list in lists: for idx, code in enumerate(a_list.items): # Ignore some patterns if word not in words_to_keep[locale] and any( ignore_me in code.lower() for ignore_me in definitions_to_ignore[locale]): continue # Transform and clean the Wikicode definition = process_templates(word, clean(code), locale) # Skip empty definitions # [SV] Skip almost empty definitions if not definition or (locale == "sv" and len(definition) < 2): continue # Keep the definition ... definitions.append(definition) # ... And its eventual sub-definitions subdefinitions: List[SubDefinitions] = [] for sublist in a_list.sublists( i=idx, pattern=sublist_patterns[locale]): for idx2, subcode in enumerate(sublist.items): subdefinition = process_templates( word, clean(subcode), locale) subdefinitions.append(subdefinition) subsubdefinitions: List[str] = [] for subsublist in sublist.sublists( i=idx2, pattern=sublist_patterns[locale]): for subsubcode in subsublist.items: subsubdefinitions.append( process_templates(word, clean(subsubcode), locale)) if subsubdefinitions: subdefinitions.append(tuple(subsubdefinitions)) if subdefinitions: definitions.append(tuple(subdefinitions)) return definitions
def find_etymology(word: str, locale: str, parsed_section: wtp.Section) -> str: """Find the etymology.""" etyl: str if locale == "ca": return process_templates(word, clean(parsed_section.contents), locale) elif locale == "en": items = [ item for item in parsed_section.get_lists(pattern=("", ))[0].items if not item.lstrip().startswith(("===Etymology", "{{PIE root")) ] for item in items: etyl = process_templates(word, clean(item), locale) if etyl: return etyl elif locale == "es": etyl = parsed_section.get_lists(pattern=("", ))[0].items[1] return process_templates(word, clean(etyl), locale) elif locale == "pt": section_title = parsed_section.title.strip() if section_title == "{{etimologia|pt}}": try: etyl = parsed_section.get_lists()[0].items[0] except IndexError: etyl = parsed_section.get_lists(pattern=("", ))[0].items[1] else: # "Etimologia" title section try: etyl = parsed_section.get_lists(pattern=("^:", ))[0].items[0] except IndexError: etyl = parsed_section.get_lists(pattern=("", ))[0].items[1] return process_templates(word, clean(etyl), locale) etymologies = chain.from_iterable( section.items for section in parsed_section.get_lists()) for etymology in etymologies: if any(ignore_me in etymology.lower() for ignore_me in definitions_to_ignore[locale]): continue etyl = process_templates(word, clean(etymology), locale) if etyl: return etyl return ""
def find_etymology(word: str, locale: str, parsed_section: wtp.Section) -> List[Definitions]: """Find the etymology.""" definitions: List[Definitions] = [] etyl: str if locale in {"ca", "no"}: definitions.append( process_templates(word, clean(parsed_section.contents), locale)) return definitions elif locale == "en": items = [ item for item in parsed_section.get_lists(pattern=("", ))[0].items if not item.lstrip().startswith(("===Etymology", "{{PIE root")) ] for item in items: if etyl := process_templates(word, clean(item), locale): definitions.append(etyl) return definitions
def test_template_at_the_start(self): ts = Section('{{t}}').templates self.assertEqual(ts[0].string, '{{t}}')
def test_section_heading_tabs(self): s = Section('=\tt\t=\t') self.assertEqual(s.string, '=\tt\t=\t') self.assertEqual(s.title, '\tt\t') self.assertEqual(s.contents, '')
def test_set_lead_contents(self): s = Section('lead') s.contents = 'newlead' self.assertEqual('newlead', s.string)
def test_set_level(self): s = Section('=== t ===\ntext') s.level = 2 self.assertEqual('== t ==\ntext', s.string)
def find_etymology(word: str, locale: str, parsed_section: wtp.Section) -> List[Definitions]: """Find the etymology.""" definitions: List[Definitions] = [] etyl: str if locale in ("ca", "no"): definitions.append( process_templates(word, clean(parsed_section.contents), locale)) return definitions elif locale == "en": items = [ item for item in parsed_section.get_lists(pattern=("", ))[0].items if not item.lstrip().startswith(("===Etymology", "{{PIE root")) ] for item in items: etyl = process_templates(word, clean(item), locale) if etyl: definitions.append(etyl) return definitions elif locale in ("es", "it"): items = [ item.strip() for item in parsed_section.get_lists(pattern=("", ))[0].items[1:] ] for item in items: etyl = process_templates(word, clean(item), locale) if etyl: definitions.append(etyl) return definitions elif locale == "pt": section_title = parsed_section.title.strip() if section_title == "{{etimologia|pt}}": try: etyl = parsed_section.get_lists()[0].items[0] except IndexError: etyl = parsed_section.get_lists(pattern=("", ))[0].items[1] else: # "Etimologia" title section try: etyl = parsed_section.get_lists(pattern=("^:", ))[0].items[0] except IndexError: etyl = parsed_section.get_lists(pattern=("", ))[0].items[1] definitions.append(process_templates(word, clean(etyl), locale)) return definitions for section in parsed_section.get_lists(): for idx, section_item in enumerate(section.items): if any(ignore_me in section_item.lower() for ignore_me in definitions_to_ignore[locale]): continue definitions.append( process_templates(word, clean(section_item), locale)) subdefinitions: List[SubDefinitions] = [] for sublist in section.sublists(i=idx): for idx2, subcode in enumerate(sublist.items): subdefinitions.append( process_templates(word, clean(subcode), locale)) if subdefinitions: definitions.append(tuple(subdefinitions)) return definitions
def test_trailing_space_setter(): s = Section('=t= \no') s.contents = 'n' assert '=t= \nn' == s.string
def test_set_title(self): s = Section('== section ==\ntext.') s.title = ' newtitle ' self.assertEqual(' newtitle ', s.title)
def test_set_contents(): s = Section('== title ==\ntext.') s.contents = ' newcontents ' assert ' newcontents ' == s.contents
def test_set_level(): s = Section('=== t ===\ntext') s.level = 2 assert '== t ==\ntext' == s.string
def test_set_lead_contents(): s = Section('lead') s.contents = 'newlead' assert 'newlead' == s.string
def test_nolevel7(self): s = Section('======= h6 =======\n') self.assertEqual(6, s.level) self.assertEqual('= h6 =', s.title)
def test_template_at_the_start(): ts = Section('{{t}}').templates assert ts[0].string == '{{t}}'
def test_setting_lead_section_contents(): s = Section('a\nb') s.contents = 'c' assert 'c' == s.string
def test_trailing_space_setter(self): s = Section('=t= \no') s.contents = 'n' self.assertEqual('=t= \nn', s.string)
def test_del_title(self): s = Section('== section ==\ntext.') del s.title self.assertEqual('text.', s.string) self.assertIsNone(s.title) del s.title # no change, no exception
def test_setting_lead_section_contents(self): s = Section('a\nb') s.contents = 'c' self.assertEqual('c', s.string)
def test_lead_set_title(self): s = Section('lead text') s.title = ' newtitle '
def test_level6(self): s = Section('====== == ======\n') self.assertEqual(6, s.level) self.assertEqual(' == ', s.title)
def test_set_contents(self): s = Section('== title ==\ntext.') s.contents = ' newcontents ' self.assertEqual(' newcontents ', s.contents)
def test_leadsection(self): s = Section('lead text. \n== section ==\ntext.') self.assertEqual(0, s.level) self.assertIsNone(s.title)
def test_section_heading_tabs(): s = Section('=\tt\t=\t') assert s.string == '=\tt\t=\t' assert s.title == '\tt\t' assert s.contents == ''