示例#1
0
def extract_category():
    re_category = re.compile(r'\[\[Category:.+\]\]')
    res = []
    for line in load_wiki_data().split("\n"):
        m = re.match(re_category, line)
        if m:
            res.append(line)
    return res
示例#2
0
def extract_country_data():
    re_curly_brackets = r"(?<rec>\{\{(?:[^{}]+|(?&rec))*\}\})"
    country_data = [data for data in regex.findall(re_curly_brackets,load_wiki_data(), regex.VERBOSE) if re.match(r"^\{\{基礎情報 国", data)][0].split("\n")[1:-1]
#    dic_country_data = OrderedDict()
    dic_country_data = {}
    for e in country_data:
        m = re.match(r"^\|(.+) \= (.+)", e)
        if m:
            current_key = m.group(1)
            dic_country_data[current_key] = m.group(2)
        else:
            dic_country_data[current_key] += "\n"+e
    return dic_country_data
示例#3
0
from Chap03_020 import load_wiki_data
import re


re_media = re.compile(r"(ファイル|File):(.+?)\|", re.I)
for line in load_wiki_data().split("\n"):
    m = re.search(re_media, line)
    if m:
        print(m.group(2))