Пример #1
0
def get_kiso(filter_regex):
    wiki_text = get_text()

    pattern = r'{{基礎情報(.+)\n}}\n'
    compiled_string = re.compile(pattern, flags=(re.MULTILINE | re.DOTALL))

    finded_result = compiled_string.findall(wiki_text)

    if filter_regex != None:
        replaced_str = re.sub(filter_regex, "", finded_result[0])
    else:
        replaced_str = finded_result[0]

    finded_list = replaced_str.split('\n|')
    result_dict = {}
    for s in finded_list:
        each_dict = s.split(" = ")
        if len(each_dict) > 1:
            result_dict[each_dict[0]] = each_dict[1]
        else:
            result_dict["self"] = each_dict[0]
    return result_dict
Пример #2
0
import re
from knock20 import get_text
# 正規表現
# MULTILINE: 複数行マッチング
# DOTALL: .を改行以外のあらゆる文字と解釈する
# 1. \|(.+?) |***から始まる

pattern = re.compile(r'^\|(.+?)\s=\s(.+?)(?=\n(\||\}))',
                     re.MULTILINE | re.DOTALL)
basic_info = {}
s = get_text()

for match in pattern.finditer(s):
    basic_info[match.group(1)] = match.group(2)

for (key, value) in basic_info.items():
    print("{}: {}".format(key, value))
Пример #3
0
import re
from knock20 import get_text

# 正規表現
# 1. 行頭が"[[Category:"で始まる
# 2. (.+?)は任意の文字列を表す
# 3. (\|.+)?で"|***"となる部分を0以上繰り返す
# 4. 最後に]]で閉じられる
pattern = re.compile(r'^\[\[Category:(.+?)(\|.+)?\]\]$')

for s in get_text().split("\n"):
    # 各行で該当箇所を探す
    text = pattern.search(s)
    # 該当箇所が見つかった場合
    if text is not None:
        # 1つ目(上の説明で言う2に該当する部分)を抜き出す
        print(text.group(1))
Пример #4
0
import re
from knock20 import get_text

wiki_text = get_text()
level_dict = {}
for i in range(2,6):
	regex_str = '\n={%d}([^=]+)={%d}\n' % (i,i)
	level_list = re.findall(regex_str, wiki_text)
	level_dict[i] = level_list
print(level_dict)