def main(): path = 'jawiki-country.json' p = re.compile(r'[\[\]\'\'\*\{\}]|<(.*?)>|\{(.*?)\}|ファイル:|\(\&(.*?)\)') text = re.sub(r'http(.*?)\n', '\n', re.sub(p, '', k20.get_json(path))) dic = k25.get_info_dict(text) params = {'action': 'query', 'prop': 'imageinfo', 'format': 'json', 'iiprop': 'url', 'titles': 'Image:{0}'.format(dic[u'国旗画像'].split('|')[0])} uri = ''' https://en.wikipedia.org/w/api.php?action=query&\ titles=Main%20Page&prop=revisions&rvprop=content&format=json '''.strip() api = requests.get(uri, params=params).json() print(api['query']['pages']['23473560']['imageinfo'][0]['url'])
def main(): path = 'jawiki-country.json' print('\n'.join( item.split('|')[0] for item in re.findall('\[\[Category:(.*?)\]\]', k20.get_json(path))))
def main(): path = 'jawiki-country.json' text = re.sub(r'[\[\]\']', '', k20.get_json(path)) print(k25.get_info_dict(text))
def main(): path = 'jawiki-country.json' print('\n'.join( re.sub(r'\s', '', line[1])+str(len(line[0])-1) for line in re.findall('(={2,})(.*?)={2,}', k20.get_json(path))))
def main(): path = 'jawiki-country.json' pattern = re.compile('\[\[(File|ファイル):(.*?)\|') print('\n'.join(line[1] for line in re.findall(pattern, k20.get_json(path))))
def main(): path = 'jawiki-country.json' p = re.compile(r'[\[\]\'\'\*\{\}]|<(.*?)>|\{(.*?)\}|ファイル:|\(\&(.*?)\)') text = re.sub(r'http(.*?)\n', '\n', re.sub(p, '', k20.get_json(path))) print(k25.get_info_dict(text))