def extract_category(): re_category = re.compile(r'\[\[Category:.+\]\]') res = [] for line in load_wiki_data().split("\n"): m = re.match(re_category, line) if m: res.append(line) return res
def extract_country_data(): re_curly_brackets = r"(?<rec>\{\{(?:[^{}]+|(?&rec))*\}\})" country_data = [data for data in regex.findall(re_curly_brackets,load_wiki_data(), regex.VERBOSE) if re.match(r"^\{\{基礎情報 国", data)][0].split("\n")[1:-1] # dic_country_data = OrderedDict() dic_country_data = {} for e in country_data: m = re.match(r"^\|(.+) \= (.+)", e) if m: current_key = m.group(1) dic_country_data[current_key] = m.group(2) else: dic_country_data[current_key] += "\n"+e return dic_country_data
from Chap03_020 import load_wiki_data import re re_media = re.compile(r"(ファイル|File):(.+?)\|", re.I) for line in load_wiki_data().split("\n"): m = re.search(re_media, line) if m: print(m.group(2))