def test_redirect(self): redirect_to_name = "Redirect here {}".format(uuid4()) redirect_from_name = "Redirect from {}".format(uuid4()) content = "Some content {}".format(uuid4()) create_wiki_page(page_name=redirect_to_name, content=content, summary="Unit test") create_redirect_wiki_page(page_name=redirect_from_name, redirect_to=redirect_to_name, summary="Unit test") redirect_from = get_wiki_page(page_name=redirect_from_name) redirect_to = get_wiki_page(page_name=redirect_to_name) self.assertEqual(redirect_from.redirects_to().text(), redirect_to.text()) # cleanup redirect_from.delete(reason="Unit test finished") redirect_to.delete(reason="Unit test finished")
def handle_categories(browse, create_category_pages): subjects = {} if browse.get('subject'): subjects = str_to_list(browse['subject']) nnl_to_subject = {} for subject in subjects: match = re.search(AUTHORITY_ID_PATTERN, subject) if match: nnl = match.group(3)[5:] term = match.group(1) if is_hebrew(term): # Hebrew term nnl_to_subject[nnl] = term result = "" for subject in nnl_to_subject.values(): category_page_name = "קטגוריה:{}".format(subject) result += CR + '[[{}]]'.format(category_page_name) if create_category_pages: create_wiki_page(category_page_name, "Creating empty category page", "") return result
def test_page_creation_deletion(self): # test creation & get content = "Some content {}".format(uuid4()) page_name = "New page {}".format(uuid4()) self._delete_page(page_name) # make sure it doesn't exist page = create_wiki_page(page_name=page_name, content=content, summary="Unit test") print("Created page", page) page = get_wiki_page(page_name) self.assertEqual(content, page.text()) # test deletion - if page doesn't exist then an execption would be thrown page.delete(reason="Unit test finished") print("Deleted page", page) non_existing_page = get_wiki_page(page_name) self.assertFalse(non_existing_page.text())
def create_page_from_dictionary(item_dict, debug=None, create_category_pages=False, site=None): """ create a wikipedia page from a dictionary that describes a primo item :param item_dict: primo item as a dictionary/json :param debug: if not debug then actually create the pages :param create_category_pages: whether to create empty category pages when encountered :return: page content in wiki markup """ document_id = item_dict['control']['recordid'] sourcerecordid = item_dict['control']['sourcerecordid'] originalsourceid = item_dict['control']['originalsourceid'] display = item_dict['display'] try: title = display['title'] if type(title) is list: title = title(0) # getting the main title short, the full title is displayed in another location in the page except: title = trim(item_dict['sort']['title']) item_type = display['type'].lower() try: display_type = type_dict[item_type][1] # hebrew type as a definite article, e.g. כתב העת display_type += " " except Exception as e: display_type = "" print("Unrecognized type '{}'".format(item_type)) creation_verb = type_dict[item_type][2] creators_field = display.get('creator') if display.get('contributor'): if creators_field: creators_field += creators_field + ';' + display.get('contributor') else: creators_field = display.get('contributor') creator = None if creators_field: authors_to_id = entries_to_authority_id(str_to_list(item_dict['browse']['author'])) creators = creators_field.split(";") creator = ", ".join(set([person_name(authors_to_id, creator.strip()) for creator in creators])) creator = comma_and(creator) else: authors_to_id = {} summary = display.get('lds20') comments = str_to_list(display.get('lds05')) comments_section = None if comments: comments_section = CR.join(["* " + comment for comment in comments]) # handle digital images: thumbnail display + links to digital images rosetta_link = item_dict["links"].get("linktorsrc") view_online = '' if rosetta_link: # handling the 'view' button & thumbnail image thumb_value = generate_thumb_link(rosetta_link) if thumb_value: view_online = VIEW_ONLINE view_online = view_online.replace('{IE}',thumb_value[0]) view_online = view_online.replace('{title}', title) creationdate = display.get('creationdate') ispartof = display.get('ispartof') performed_by = display.get('lds35') # list performed_by = str_to_list(performed_by) performed_by_str = None if performed_by: performed_by_str = ", ".join(person_name(authors_to_id, performer) for performer in performed_by) source = display['source'] lib_link = display.get('lds21') if not lib_link: lib_link = item_dict['links']['linktorsrc'] lib_link = lib_link[lib_link.find("http"):] # Building page's Wikicode content = "{{DISPLAYTITLE:%s}}\n" % title content += "{}'''{}''' {}".format(display_type, title, creation_verb) if creator: content += " על ידי {}".format(creator) if (creationdate): content += " בשנת {}".format(creationdate) content += CR if summary: content += CR + summary + CR if view_online: if len(view_online)>0: content += view_online + CR content += "==פרטים כלליים==" + CR if (performed_by_str): content += LIST_ITEM.format("שם מבצע", performed_by_str) if (ispartof): content += LIST_ITEM.format("מתוך", ispartof) if comments_section: content += comments_section content += CR + "==מידע נוסף==" + CR content += LIST_ITEM.format("מקור", source) content += "* מספר מערכת: [{} {}]\n".format(lib_link, sourcerecordid) content += "== קישורים נוספים ==\n" alef_link = ALEF_LINK.format(originalsourceid, sourcerecordid) content += "* [{} הפריט בקטלוג הספרייה הלאומית]\n".format(alef_link) browse = item_dict.get('browse') if browse: content += handle_categories(item_dict['browse'], create_category_pages) if debug: print(content) else: title = clean_title(title) if is_hebrew(title): title = limit_length(title) create_redirect_wiki_page(site, page_name=clean_title(title), redirect_to=document_id, summary="Creating redirect page for {}".format(document_id)) create_wiki_page(site, page_name=document_id, summary="Created from primo", content=content) return content
def create_page_from_node(person_node, records_list, debug=None, create_category_pages=False, site=None): """ Create a person page from a neo4j node :param person_node: neo4j node :param debug: :param create_category_pages: :return: """ wiki_page_name = person_node['id'] person_name = simple_person_name(person_node['person_name_heb']) this_record = json.loads(person_node['data']) birth_date = date8_to_heb_date(get_if_exists(this_record, '046', 0, 'f')) death_date = date8_to_heb_date(get_if_exists(this_record, '046', 0, 'g')) birth_place = get_if_exists(this_record, '370', 0, 'a') death_place = get_if_exists(this_record, '370', 0, 'b') other_names = get_if_exists(this_record, '400') other_names_value = BR.join(simple_person_name(other_name['a']) for other_name in other_names) address = get_if_exists(this_record, '371', 0, 'a') address_place = get_if_exists(this_record, '371', 0, 'b') address_country = get_if_exists(this_record, '371', 0, 'd') occupation = get_if_exists(this_record, '374', 0, 'a') gender = get_if_exists(this_record, '375', 0, 'a') # MALE/FEMALE female = gender.lower() == "female" value_image_url = "" content = "{{DISPLAYTITLE:%s}}\n" % person_name AUDIO = ["==פריטי שמע==", OPENDIV] VIDEO = ["==פריטי וידאו==", OPENDIV] BOOKS_BY = ["==ספרים שכתבה==" if female else "==ספרים שכתב==", OPENDIV] BOOKS_ABOUT = ["==ספרים אודותיה==" if female else "==ספרים אודותיו==", OPENDIV] IMAGES = ["==גלריית תמונות==", '{| class="wikitable" border="1"'] IMAGES_DESC = ['|-'] OTHER = ["==אחר==", OPENDIV] for record_rel in records_list: for record_type in records_list[record_rel]: for record in records_list[record_rel][record_type]: # getting record type if record_type == "other": item_type = "other" else: item_type = type_dict.get(record_type.lower())[3] # temporary - skip over non-hebrew records if item_type == 'print': if record['language']: if record['language']!='heb': continue # content_item = ITEM.format(**record) content_item = ITEM rosetta_link = record['rosetta'] or '' if len(rosetta_link) > 0: # handling the 'view' button & thumbnail image view_online = VIEW_ONLINE view_online = view_online.format(extract_link(rosetta_link)) content_item = content_item.replace('{view}', view_online) thumb_value = generate_thumb(rosetta_link) if thumb_value: content_item = content_item.replace('{thumb}',thumb_value) else: content_item = content_item.replace('{thumb}','') else: content_item = content_item.replace('{view}', '') content_item = content_item.replace('{thumb}','') content_item = content_item.format(**record) if record_rel == 'portrait_of': value_image_url = '<img src="http://rosetta.nli.org.il/delivery/DeliveryManagerServlet?' \ 'dps_pid={}&dps_func=stream"' \ ' style="max-height: 500px; max-width: 300px;"/>'.format(record['fl']) if item_type == 'print': if record_rel == 'subject_of': BOOKS_ABOUT.append(content_item) else: BOOKS_BY.append(content_item) elif item_type == 'audio': AUDIO.append(content_item) elif item_type == 'video': VIDEO.append(content_item) elif item_type == 'photograph': if len(IMAGES_DESC) > 6 or not record.get('fl'): continue IMAGES.append(GALLERY_ITEM.format(record['fl'])) IMAGES_DESC.append('| {}'.format(record['title'])) else: OTHER.append(content_item) template = TEMPLATE \ .replace(template_name, template_name + person_name) \ .replace(template_birth_date, template_birth_date + birth_date) \ .replace(template_death_date, template_death_date + death_date) \ .replace(template_birth_place, template_birth_place + birth_place) \ .replace(template_death_place, template_death_place + death_place) \ .replace(template_other_names, template_other_names + other_names_value) \ .replace(template_occupation, template_occupation + occupation) \ .replace(template_image_url, template_image_url + value_image_url) content += template notes1 = this_record.get('670') notes2 = this_record.get('678') notes3 = this_record.get('680') notes = [] for notes_i in (notes1, notes2, notes3): if notes_i: notes += notes_i if notes: content += CR content += "".join(note['a'] + BR for note in notes if note.get('a') and note.get('a') != "LCN") IMAGES_DESC.append('|}') content += CR + \ CR.join(BOOKS_BY) + CLOSEDIV + CR + \ CR.join(BOOKS_ABOUT) + CLOSEDIV + CR + \ CR.join(AUDIO) + CLOSEDIV + CR + \ CR.join(VIDEO) + CLOSEDIV + CR + \ CR.join(IMAGES + IMAGES_DESC) + CR + \ CR.join(OTHER) + CLOSEDIV + CR if debug: print(content) else: redicrect_page_name = "אישיות:" + person_name # create_redirect_wiki_page(page_name=redicrect_page_name, redirect_to=wiki_page_name, # summary="Creating redirect page for {}".format(wiki_page_name)) create_wiki_page(site, page_name=wiki_page_name, summary="Created from primo", content=content) return content
def create_page_from_node(person_node, records_list, debug=None, create_category_pages=False, site=None): """ Create a person page from a neo4j node :param person_node: neo4j node :param debug: :param create_category_pages: :return: """ wiki_page_name = person_node['id'] person_name = simple_person_name(person_node['person_name_heb']) this_record = json.loads(person_node['data']) birth_date = date8_to_heb_date(get_if_exists(this_record, '046', 0, 'f')) death_date = date8_to_heb_date(get_if_exists(this_record, '046', 0, 'g')) birth_place = get_if_exists(this_record, '370', 0, 'a') death_place = get_if_exists(this_record, '370', 0, 'b') other_names = get_if_exists(this_record, '400') other_names_value = BR.join( simple_person_name(other_name['a']) for other_name in other_names) address = get_if_exists(this_record, '371', 0, 'a') address_place = get_if_exists(this_record, '371', 0, 'b') address_country = get_if_exists(this_record, '371', 0, 'd') occupation = get_if_exists(this_record, '374', 0, 'a') gender = get_if_exists(this_record, '375', 0, 'a') # MALE/FEMALE female = gender.lower() == "female" value_image_url = "" content = "{{DISPLAYTITLE:%s}}\n" % person_name AUDIO = ["==פריטי שמע==", OPENDIV] VIDEO = ["==פריטי וידאו==", OPENDIV] BOOKS_BY = ["==ספרים שכתבה==" if female else "==ספרים שכתב==", OPENDIV] BOOKS_ABOUT = [ "==ספרים אודותיה==" if female else "==ספרים אודותיו==", OPENDIV ] IMAGES = ["==גלריית תמונות==", '{| class="wikitable" border="1"'] IMAGES_DESC = ['|-'] OTHER = ["==אחר==", OPENDIV] for record_rel in records_list: for record_type in records_list[record_rel]: for record in records_list[record_rel][record_type]: # getting record type if record_type == "other": item_type = "other" else: item_type = type_dict.get(record_type.lower())[3] # temporary - skip over non-hebrew records if item_type == 'print': if record['language']: if record['language'] != 'heb': continue # content_item = ITEM.format(**record) content_item = ITEM rosetta_link = record['rosetta'] or '' if len(rosetta_link) > 0: # handling the 'view' button & thumbnail image view_online = VIEW_ONLINE view_online = view_online.format( extract_link(rosetta_link)) content_item = content_item.replace('{view}', view_online) thumb_value = generate_thumb(rosetta_link) if thumb_value: content_item = content_item.replace( '{thumb}', thumb_value) else: content_item = content_item.replace('{thumb}', '') else: content_item = content_item.replace('{view}', '') content_item = content_item.replace('{thumb}', '') content_item = content_item.format(**record) if record_rel == 'portrait_of': value_image_url = '<img src="http://rosetta.nli.org.il/delivery/DeliveryManagerServlet?' \ 'dps_pid={}&dps_func=stream"' \ ' style="max-height: 500px; max-width: 300px;"/>'.format(record['fl']) if item_type == 'print': if record_rel == 'subject_of': BOOKS_ABOUT.append(content_item) else: BOOKS_BY.append(content_item) elif item_type == 'audio': AUDIO.append(content_item) elif item_type == 'video': VIDEO.append(content_item) elif item_type == 'photograph': if len(IMAGES_DESC) > 6 or not record.get('fl'): continue IMAGES.append(GALLERY_ITEM.format(record['fl'])) IMAGES_DESC.append('| {}'.format(record['title'])) else: OTHER.append(content_item) template = TEMPLATE \ .replace(template_name, template_name + person_name) \ .replace(template_birth_date, template_birth_date + birth_date) \ .replace(template_death_date, template_death_date + death_date) \ .replace(template_birth_place, template_birth_place + birth_place) \ .replace(template_death_place, template_death_place + death_place) \ .replace(template_other_names, template_other_names + other_names_value) \ .replace(template_occupation, template_occupation + occupation) \ .replace(template_image_url, template_image_url + value_image_url) content += template notes1 = this_record.get('670') notes2 = this_record.get('678') notes3 = this_record.get('680') notes = [] for notes_i in (notes1, notes2, notes3): if notes_i: notes += notes_i if notes: content += CR content += "".join(note['a'] + BR for note in notes if note.get('a') and note.get('a') != "LCN") IMAGES_DESC.append('|}') content += CR + \ CR.join(BOOKS_BY) + CLOSEDIV + CR + \ CR.join(BOOKS_ABOUT) + CLOSEDIV + CR + \ CR.join(AUDIO) + CLOSEDIV + CR + \ CR.join(VIDEO) + CLOSEDIV + CR + \ CR.join(IMAGES + IMAGES_DESC) + CR + \ CR.join(OTHER) + CLOSEDIV + CR if debug: print(content) else: redicrect_page_name = "אישיות:" + person_name # create_redirect_wiki_page(page_name=redicrect_page_name, redirect_to=wiki_page_name, # summary="Creating redirect page for {}".format(wiki_page_name)) create_wiki_page(site, page_name=wiki_page_name, summary="Created from primo", content=content) return content
def create_page_from_dictionary(item_dict, debug=None, create_category_pages=False, site=None): """ create a wikipedia page from a dictionary that describes a primo item :param item_dict: primo item as a dictionary/json :param debug: if not debug then actually create the pages :param create_category_pages: whether to create empty category pages when encountered :return: page content in wiki markup """ document_id = item_dict['control']['recordid'] sourcerecordid = item_dict['control']['sourcerecordid'] originalsourceid = item_dict['control']['originalsourceid'] display = item_dict['display'] try: title = display['title'] if type(title) is list: title = title( 0 ) # getting the main title short, the full title is displayed in another location in the page except: title = trim(item_dict['sort']['title']) item_type = display['type'].lower() try: display_type = type_dict[item_type][ 1] # hebrew type as a definite article, e.g. כתב העת display_type += " " except Exception as e: display_type = "" print("Unrecognized type '{}'".format(item_type)) creation_verb = type_dict[item_type][2] creators_field = display.get('creator') if display.get('contributor'): if creators_field: creators_field += creators_field + ';' + display.get('contributor') else: creators_field = display.get('contributor') creator = None if creators_field: authors_to_id = entries_to_authority_id( str_to_list(item_dict['browse']['author'])) creators = creators_field.split(";") creator = ", ".join( set([ person_name(authors_to_id, creator.strip()) for creator in creators ])) creator = comma_and(creator) else: authors_to_id = {} summary = display.get('lds20') comments = str_to_list(display.get('lds05')) comments_section = None if comments: comments_section = CR.join(["* " + comment for comment in comments]) # handle digital images: thumbnail display + links to digital images rosetta_link = item_dict["links"].get("linktorsrc") view_online = '' if rosetta_link: # handling the 'view' button & thumbnail image thumb_value = generate_thumb_link(rosetta_link) if thumb_value: view_online = VIEW_ONLINE view_online = view_online.replace('{IE}', thumb_value[0]) view_online = view_online.replace('{title}', title) creationdate = display.get('creationdate') ispartof = display.get('ispartof') performed_by = display.get('lds35') # list performed_by = str_to_list(performed_by) performed_by_str = None if performed_by: performed_by_str = ", ".join( person_name(authors_to_id, performer) for performer in performed_by) source = display['source'] lib_link = display.get('lds21') if not lib_link: lib_link = item_dict['links']['linktorsrc'] lib_link = lib_link[lib_link.find("http"):] # Building page's Wikicode content = "{{DISPLAYTITLE:%s}}\n" % title content += "{}'''{}''' {}".format(display_type, title, creation_verb) if creator: content += " על ידי {}".format(creator) if (creationdate): content += " בשנת {}".format(creationdate) content += CR if summary: content += CR + summary + CR if view_online: if len(view_online) > 0: content += view_online + CR content += "==פרטים כלליים==" + CR if (performed_by_str): content += LIST_ITEM.format("שם מבצע", performed_by_str) if (ispartof): content += LIST_ITEM.format("מתוך", ispartof) if comments_section: content += comments_section content += CR + "==מידע נוסף==" + CR content += LIST_ITEM.format("מקור", source) content += "* מספר מערכת: [{} {}]\n".format(lib_link, sourcerecordid) content += "== קישורים נוספים ==\n" alef_link = ALEF_LINK.format(originalsourceid, sourcerecordid) content += "* [{} הפריט בקטלוג הספרייה הלאומית]\n".format(alef_link) browse = item_dict.get('browse') if browse: content += handle_categories(item_dict['browse'], create_category_pages) if debug: print(content) else: title = clean_title(title) if is_hebrew(title): title = limit_length(title) create_redirect_wiki_page( site, page_name=clean_title(title), redirect_to=document_id, summary="Creating redirect page for {}".format(document_id)) create_wiki_page(site, page_name=document_id, summary="Created from primo", content=content) return content