Exemplo n.º 1
0
def main():
    lines = mywiki.extrac_text_from_title(u'イギリス').split('\n')
    for text in lines:
        section_in_lines = re.search('^(=+)\s*(.*?)\s*(=+)$', text)
        if section_in_lines is not None:
            print(section_in_lines.group(2),
                  len(section_in_lines.group(1)) - 1)
Exemplo n.º 2
0
def main():
    lines = re.split(r'\n[\|}]', mywiki.extrac_text_from_title(u'イギリス'))
    var_dict = {}
    for text in lines:
        line = re.search("^(.*?)\s=\s(.*)", text, re.S)
        if line is not None:
            var_dict[line.group(1)] = re.sub(r"'{2,5}", r"", line.group(2))
    for k,v in sorted(var_dict.items(), key=lambda x: x[1]):
        print(k,v)
Exemplo n.º 3
0
def main():
    lines = mywiki.extrac_text_from_title(u'イギリス').split('\n')
    var_dict = {}
    for text in lines:
        line = re.search("^\|(.*?)\s=\s(.*)", text)
        if line is not None:
            var_dict[line.group(1)] = removeInnerLinkMarkup(line.group(2))
    for k, v in sorted(var_dict.items(), key=lambda x: x[0]):
        print(k, v)
Exemplo n.º 4
0
def main():
    lines = mywiki.extrac_text_from_title(u'イギリス').split('\n')
    var_dict = {}
    for text in lines:
        line = re.search("^\|(.*?)\s=\s(.*)", text)
        if line is not None:
            var_dict[line.group(1)] = removeInnerLinkMarkup(line.group(2))

    url = "https://en.wikipedia.org/w/api.php"
    payload = {
        "action": "query",
        "titles": "File:{}".format(var_dict[u'国旗画像']),
        "prop": "imageinfo",
        "format": "json",
        "iiprop": "url"
    }
    json_data = requests.get(url, params=payload).json()
    print(json_search(json_data)["url"])
Exemplo n.º 5
0
def main():
    lines = mywiki.extrac_text_from_title(u'イギリス').split('\n')
    for text in lines:
        category_in_line = re.search('^\[\[Category:(.*?)(|\|.*)\]\]$', text)
        if category_in_line is not None:
            print(category_in_line.group(1))
Exemplo n.º 6
0
def main():
    lines = mywiki.extrac_text_from_title(u'イギリス').split('\n')
    for text in lines:
        media_file = re.search('(File|ファイル):(.*?)\|', text)
        if media_file is not None:
            print(media_file.group(2))