def data(wtext): infobox_data = {} for key in wtext_help.data_keys(wtext): infobox_data[key] = {} for key in infobox_data: infobox_data[key]['plain_text'] = parse.get_simple_text(wtext, key) infobox_data[key]['wiki_links'] = parse.extract_page_links(wtext, key) infobox_data[key]['raw_text'] = parse.get_simple_text(wtext, key, clean=False) return infobox_data
def page_name(wtext): name = parse.get_simple_text(wtext, ['name', 'show_name', 'season_name', 'film name']) if not name: name = extract_page_name(wtext) return clean_help.clean_text(name)
def get_image_filename(wtext): image_key_list = ['image', 'img', 'cover'] filename = parse.get_simple_text(wtext, image_key_list) if filename: filename = filename.replace(' ', '_').encode('utf-8') if '[[' in filename: filename = filename.replace('[[', '').replace(']]', '') if filename.startswith('File:'): filename = filename.replace('File:', '') return filename
def get_caption(wtext): return parse.get_simple_text(wtext, 'caption')
def get_alt(wtext): return parse.get_simple_text(wtext, 'alt')