def process_constituent_row(constituent, current_id): constituent_id = row[indices['constituent_id_index']] type_key = int(row[indices['type_id_index']]) type = CONSTITUENTTYPES.get(type_key) if constituent_id != current_id: # will likely have multiple rows for one constituent because of many related objects # only get a new constituent if we have a new constituent id, but first save old constituent to elasticsearch save(constituent) current_id = constituent_id constituent = {} if elasticsearch_connection.item_exists(constituent_id, type, ELASTICSEARCH_INDEX): constituent = elasticsearch_connection.get_item(constituent_id, type, ELASTICSEARCH_INDEX) else: print("%s could not be found!" % constituent_id) return (constituent, current_id) if 'altnames' not in constituent: constituent['altnames'] = [] altname = row[indices['altname_index']] name_type = row[indices['name_type_index']] constituent['altnames'].append({ 'name' : altname, 'type' : name_type }) return (constituent, current_id)
def process_media_row(media, current_id): id = row[indices['id_index']] media_type_key = int(row[indices['media_type_id_index']]) media_type = MEDIATYPES.get(media_type_key) # for now, ignore Microfilm and Document media types if (media_type_key in [4, 5]): return (media, current_id) if id != current_id: save(media) current_id = id media = {} if elasticsearch_connection.item_exists(id, media_type): media = elasticsearch_connection.get_item(id, media_type) else: print "%s could not be found!" % id return (media, current_id) if 'relateditems' not in media: media['relateditems'] = {} constituent_id = row[indices['constituent_id_index']] display_name = row[indices['display_name_index']] description = row[indices['remarks_index']] if row[ indices['remarks_index']] != "NULL" else "" display_date = "" if row[indices['display_date_index']] != "NULL": display_date = row[indices['display_date_index']] thumbnail_url = get_media_url(row[indices['thumb_path_index']], row[indices['thumb_file_index']]) constituent_dict = {} role = row[indices['role_index']] # update the set of roles for this media if role not in media['roles']: # make sure Photographer is first if role == "Photographer": media['roles'].insert(0, role) else: media['roles'].append(role) constituent_dict['role'] = row[indices['role_index']] constituent_dict['roleid'] = row[indices['role_id_index']] constituent_dict['id'] = constituent_id constituent_dict['displayname'] = display_name constituent_dict['displaydate'] = display_date constituent_dict['displaytext'] = display_name constituent_dict['description'] = description constituent_dict['thumbnail'] = thumbnail_url constituent_type_key = int(row[indices['constituent_type_id_index']]) constituent_type = CONSTITUENTTYPES.get(constituent_type_key) if constituent_type not in media['relateditems']: media['relateditems'][constituent_type] = [] media['relateditems'][constituent_type].append(constituent_dict) # keep the related items sorted media['relateditems'][constituent_type].sort( key=operator.itemgetter('displaytext')) return (media, current_id)
def process_constituent_row(constituent, current_id): constituent_id = row[indices["constituent_id_index"]] type_key = int(row[indices["type_id_index"]]) type = CONSTITUENTTYPES.get(type_key) # could have multiple rows if constituent_id != current_id: save(constituent) current_id = constituent_id constituent = {} constituent["type"] = type # loop through each row for index, value in enumerate(columns): key = value.lower() row_value = row[index] # cleanup row data if row_value.isdigit(): row_value = int(row_value) elif row_value == "NULL": row_value = None else: row_value = row_value.replace(",,", "") if key in ["begindate", "enddate"]: if row_value == 0: row_value = None constituent[key] = row_value display_text = constituent["displayname"] constituent["displaytext"] = display_text return (constituent, current_id)
def process_site_row(site, current_id): site_id = row[indices['site_id_index']] #if site_id not in SAMPLE_SITES: # continue if site_id != current_id: # will likely have multiple rows for one site because of many related constituents # only get a new site if we have a new site id, but first save old site to elasticsearch save(site) current_id = site_id site = {} if elasticsearch_connection.item_exists(site_id, 'sites'): site = elasticsearch_connection.get_item(site_id, 'sites') else: print "%s could not be found!" % site_id return(site, current_id) if 'relateditems' not in site: site['relateditems'] = {} constituent_id = row[indices['constituent_id_index']] display_name = row[indices['display_name_index']] display_date = "" if row[indices['display_date_index']] != "NULL": display_date = row[indices['display_date_index']] thumbnail_url = get_media_url(row[indices['thumb_path_index']], row[indices['thumb_file_index']]) constituent_dict = {} role = row[indices['role_index']] # update the set of roles for this site if role not in site['roles']: # make sure Tomb Owner is first if role == "Tomb Owner": site['roles'].insert(0, role) else: site['roles'].append(role) description = row[indices['remarks_index']] if row[indices['remarks_index']] != "NULL" else "" constituent_dict['role'] = role constituent_dict['id'] = constituent_id constituent_dict['displayname'] = display_name constituent_dict['displaydate'] = display_date constituent_dict['displaytext'] = display_name constituent_dict['description'] = description constituent_dict['thumbnail'] = thumbnail_url constituent_type_key = int(row[indices['constituent_type_id_index']]) constituent_type = CONSTITUENTTYPES.get(constituent_type_key) # add to array of people for easier searching if (constituent_type_key in [1,3]): site['people'].append(display_name) if constituent_type not in site['relateditems']: site['relateditems'][constituent_type] = [] site['relateditems'][constituent_type].append(constituent_dict) # keep the related items sorted site['relateditems'][constituent_type].sort(key=operator.itemgetter('displaytext')) if role == 'Tomb Owner': site['tombowner'] = "Yes" return(site, current_id)
def process_constituent_row(constituent, current_id): constituent_id = row[indices["constituent_id_index"]] type_key = int(row[indices["type_id_index"]]) type = CONSTITUENTTYPES.get(type_key) if constituent_id != current_id: # will likely have multiple rows for one constituent because of many related published # only get a new constituent if we have a new constituent id, but first save old constituent to elasticsearch save(constituent) current_id = constituent_id constituent = {} if elasticsearch_connection.item_exists(constituent_id, type): constituent = elasticsearch_connection.get_item(constituent_id, type) else: print "%s could not be found!" % constituent_id return (constituent, current_id) if "relateditems" not in constituent: constituent["relateditems"] = {} reference_id = row[indices["reference_id_index"]] title = row[indices["title_index"]] boiler_text = row[indices["boiler_text_index"]] date = row[indices["date_index"]] main_url = get_media_url(row[indices["path_index"]], row[indices["file_index"]]) if "pubdocs" not in constituent["relateditems"]: constituent["relateditems"]["pubdocs"] = [] constituent["relateditems"]["pubdocs"].append( {"id": reference_id, "boilertext": boiler_text, "displaytext": title, "date": date, "url": main_url} ) # keep the related items sorted constituent["relateditems"]["pubdocs"].sort(key=operator.itemgetter("displaytext")) return (constituent, current_id)
def process_constituent_row(constituent, current_id): constituent_id = row[indices['constituent_id_index']] type_key = int(row[indices['type_id_index']]) type = CONSTITUENTTYPES.get(type_key) # could have multiple rows if constituent_id != current_id: save(constituent) current_id = constituent_id constituent = {} constituent['type'] = type # loop through each row for index, value in enumerate(columns): key = value.lower() row_value = row[index] # cleanup row data if row_value.isdigit(): row_value = int(row_value) elif row_value == "NULL": row_value = None else: row_value = row_value.replace(',,','') if key in ['begindate', 'enddate']: if row_value == 0: row_value = None constituent[key] = row_value display_text = constituent['displayname'] constituent['displaytext'] = display_text return (constituent, current_id)
def process_constituent_row(constituent, current_id): constituent_id = row[indices['constituent_id_index']] type_key = int(row[indices['type_id_index']]) type = CONSTITUENTTYPES.get(type_key) if constituent_id != current_id: # will likely have multiple rows for one constituent because of many related objects # only get a new constituent if we have a new constituent id, but first save old constituent to elasticsearch save(constituent) current_id = constituent_id constituent = {} if elasticsearch_connection.item_exists(constituent_id, type): constituent = elasticsearch_connection.get_item( constituent_id, type) else: print "%s could not be found!" % constituent_id return (constituent, current_id) if 'relateditems' not in constituent: constituent['relateditems'] = {} classification_key = int(row[indices['classification_id_index']]) classification = CLASSIFICATIONS.get(classification_key) object_id = int(row[indices['object_id_index']]) thumbnail_url = get_media_url(row[indices['thumb_path_index']], row[indices['thumb_file_index']]) date = "" if row[indices['object_date_index']].lower( ) == "null" else row[indices['object_date_index']] object_title = row[indices['object_title_index']] object_number = row[indices['object_number_index']] if classification == "diarypages" and object_title.lower() == "null": idx = object_number.find('_') object_title = object_number[idx + 1:] if object_title.lower() == "null": object_title = "[No Title]" if classification not in constituent['relateditems']: constituent['relateditems'][classification] = [] constituent['relateditems'][classification].append({ 'id': object_id, 'title': object_title, 'displaytext': object_title, 'classificationid': classification_key, 'number': object_number, 'date': date, 'thumbnail': thumbnail_url }) # keep the related items sorted constituent['relateditems'][classification].sort( key=operator.itemgetter('displaytext')) return (constituent, current_id)
def process_pub_row(pub, current_id): pub_id = row[indices['pub_id_index']] if pub_id != current_id: # will likely have multiple rows for one pub because of many related constituents # only get a new pub if we have a new pub id, but first save old pub to elasticsearch save(pub) current_id = pub_id pub = {} if elasticsearch_connection.item_exists(pub_id, 'pubdocs'): pub = elasticsearch_connection.get_item(pub_id, 'pubdocs') else: print "%s could not be found!" % pub_id return (pub, current_id) if 'relateditems' not in pub: pub['relateditems'] = {} constituent_id = row[indices['constituent_id_index']] display_name = row[indices['display_name_index']] display_date = "" if row[indices['display_date_index']] != "NULL": display_date = row[indices['display_date_index']] thumbnail_url = get_media_url(row[indices['thumb_path_index']], row[indices['thumb_file_index']]) alpha_sort = row[indices['alpha_sort_index']] constituent_dict = {} role = row[indices['role_index']] # update the set of roles for this pub if role not in pub['roles']: pub['roles'].append(role) if role == "Author": pub["authors"].append(alpha_sort) description = row[indices['remarks_index']] if row[ indices['remarks_index']] != "NULL" else "" constituent_dict['role'] = role constituent_dict['id'] = constituent_id constituent_dict['displayname'] = display_name constituent_dict['displaydate'] = display_date constituent_dict['displaytext'] = display_name constituent_dict['description'] = description constituent_dict['thumbnail'] = thumbnail_url constituent_type_key = int(row[indices['constituent_type_id_index']]) constituent_type = CONSTITUENTTYPES.get(constituent_type_key) if constituent_type not in pub['relateditems']: pub['relateditems'][constituent_type] = [] pub['relateditems'][constituent_type].append(constituent_dict) # keep the related items sorted pub['relateditems'][constituent_type].sort( key=operator.itemgetter('displaytext')) return (pub, current_id)
def process_object_row(object, current_id): id = row[id_index] classification_key = int(row[classification_id_index]) classification = CLASSIFICATIONS.get(classification_key) if id != current_id: # may have multiple rows for one object because of many related constituents save(object) current_id = id object = {} if elasticsearch_connection.item_exists(id, classification): object = elasticsearch_connection.get_item(id, classification) else: print "%s could not be found!" % id return(object, current_id) if 'relateditems' not in object: object['relateditems'] = {} constituent_id = row[constituent_id_index] display_name = row[display_name_index] display_date = "" if row[display_date_index] != "NULL": display_date = row[display_date_index] constituent_dict = {} constituent_dict['role'] = row[role_index] constituent_dict['roleid'] = row[role_id_index] constituent_dict['id'] = constituent_id constituent_dict['displayname'] = display_name constituent_dict['displaydate'] = display_date constituent_dict['displaytext'] = display_name constituent_type_key = int(row[constituent_type_id_index]) constituent_type = CONSTITUENTTYPES.get(constituent_type_key) if constituent_type not in object['relateditems']: object['relateditems'][constituent_type] = [] object['relateditems'][constituent_type].append(constituent_dict) # parse out any constituents "Mentioned on this page" (RoleID==48) if constituent_dict['roleid'] == '48': if 'mentioned' not in object: object['mentioned'] = {} if 'people' not in object['mentioned']: object['mentioned']['people'] = [] object['mentioned']['people'].append(constituent_dict) # parse out any "Author" (RoleID==50) if constituent_dict['roleid'] == '50': if 'author' not in object: object['author'] = [] object['author'].append(constituent_dict) return(object, current_id)
def process_object_row(object, current_id): id = row[indices['id_index']] classification_key = int(row[indices['classification_id_index']]) classification = CLASSIFICATIONS.get(classification_key) if id != current_id: # may have multiple rows for one object because of many related constituents save(object) current_id = id object = {} if elasticsearch_connection.item_exists(id, classification): object = elasticsearch_connection.get_item(id, classification) else: print "%s could not be found!" % id return (object, current_id) if 'relateditems' not in object: object['relateditems'] = {} constituent_id = row[indices['constituent_id_index']] display_name = row[indices['display_name_index']] description = row[indices['remarks_index']] if row[ indices['remarks_index']] != "NULL" else "" display_date = "" if row[indices['display_date_index']] != "NULL": display_date = row[indices['display_date_index']] thumbnail_url = get_media_url(row[indices['thumb_path_index']], row[indices['thumb_file_index']]) constituent_dict = {} role = row[indices['role_index']] # update the set of roles for this object if role not in object['roles']: object['roles'].append(role) constituent_dict['role'] = row[indices['role_index']] constituent_dict['roleid'] = row[indices['role_id_index']] constituent_dict['id'] = constituent_id constituent_dict['displayname'] = display_name constituent_dict['displaydate'] = display_date constituent_dict['displaytext'] = display_name constituent_dict['description'] = description constituent_dict['thumbnail'] = thumbnail_url constituent_type_key = int(row[indices['constituent_type_id_index']]) constituent_type = CONSTITUENTTYPES.get(constituent_type_key) if constituent_type not in object['relateditems']: object['relateditems'][constituent_type] = [] object['relateditems'][constituent_type].append(constituent_dict) # keep the related items sorted object['relateditems'][constituent_type].sort( key=operator.itemgetter('displaytext')) return (object, current_id)
def process_pub_row(pub, current_id): pub_id = row[indices['pub_id_index']] if pub_id != current_id: # will likely have multiple rows for one pub because of many related constituents # only get a new pub if we have a new pub id, but first save old pub to elasticsearch save(pub) current_id = pub_id pub = {} if elasticsearch_connection.item_exists(pub_id, 'pubdocs'): pub = elasticsearch_connection.get_item(pub_id, 'pubdocs') else: print "%s could not be found!" % pub_id return(pub, current_id) if 'relateditems' not in pub: pub['relateditems'] = {} constituent_id = row[indices['constituent_id_index']] display_name = row[indices['display_name_index']] display_date = "" if row[indices['display_date_index']] != "NULL": display_date = row[indices['display_date_index']] thumbnail_url = get_media_url(row[indices['thumb_path_index']], row[indices['thumb_file_index']]) alpha_sort = row[indices['alpha_sort_index']] constituent_dict = {} role = row[indices['role_index']] # update the set of roles for this pub if role not in pub['roles']: pub['roles'].append(role) if role == "Author": pub["authors"].append(alpha_sort) description = row[indices['remarks_index']] if row[indices['remarks_index']] != "NULL" else "" constituent_dict['role'] = role constituent_dict['id'] = constituent_id constituent_dict['displayname'] = display_name constituent_dict['displaydate'] = display_date constituent_dict['displaytext'] = display_name constituent_dict['description'] = description constituent_dict['thumbnail'] = thumbnail_url constituent_type_key = int(row[indices['constituent_type_id_index']]) constituent_type = CONSTITUENTTYPES.get(constituent_type_key) if constituent_type not in pub['relateditems']: pub['relateditems'][constituent_type] = [] pub['relateditems'][constituent_type].append(constituent_dict) # keep the related items sorted pub['relateditems'][constituent_type].sort(key=operator.itemgetter('displaytext')) return(pub, current_id)
def process_object_row(object, current_id): id = row[indices['id_index']] classification_key = int(row[indices['classification_id_index']]) classification = CLASSIFICATIONS.get(classification_key) if id != current_id: # may have multiple rows for one object because of many related constituents save(object) current_id = id object = {} if elasticsearch_connection.item_exists(id, classification): object = elasticsearch_connection.get_item(id, classification) else: print "%s could not be found!" % id return(object, current_id) if 'relateditems' not in object: object['relateditems'] = {} constituent_id = row[indices['constituent_id_index']] display_name = row[indices['display_name_index']] description = row[indices['remarks_index']] if row[indices['remarks_index']] != "NULL" else "" display_date = "" if row[indices['display_date_index']] != "NULL": display_date = row[indices['display_date_index']] thumbnail_url = get_media_url(row[indices['thumb_path_index']], row[indices['thumb_file_index']]) constituent_dict = {} role = row[indices['role_index']] # update the set of roles for this object if role not in object['roles']: object['roles'].append(role) constituent_dict['role'] = row[indices['role_index']] constituent_dict['roleid'] = row[indices['role_id_index']] constituent_dict['id'] = constituent_id constituent_dict['displayname'] = display_name constituent_dict['displaydate'] = display_date constituent_dict['displaytext'] = display_name constituent_dict['description'] = description constituent_dict['thumbnail'] = thumbnail_url constituent_type_key = int(row[indices['constituent_type_id_index']]) constituent_type = CONSTITUENTTYPES.get(constituent_type_key) if constituent_type not in object['relateditems']: object['relateditems'][constituent_type] = [] object['relateditems'][constituent_type].append(constituent_dict) # keep the related items sorted object['relateditems'][constituent_type].sort(key=operator.itemgetter('displaytext')) return(object, current_id)
def process_constituent_row(constituent, current_id): constituent_id = row[indices["constituent_id_index"]] type_key = int(row[indices["type_id_index"]]) type = CONSTITUENTTYPES.get(type_key) if constituent_id != current_id: # will likely have multiple rows for one constituent because of many related objects # only get a new constituent if we have a new constituent id, but first save old constituent to elasticsearch save(constituent) current_id = constituent_id constituent = {} if elasticsearch_connection.item_exists(constituent_id, type): constituent = elasticsearch_connection.get_item(constituent_id, type) else: print "%s could not be found!" % constituent_id return (constituent, current_id) if "relateditems" not in constituent: constituent["relateditems"] = {} classification_key = int(row[indices["classification_id_index"]]) classification = CLASSIFICATIONS.get(classification_key) object_id = int(row[indices["object_id_index"]]) thumbnail_url = get_media_url(row[indices["thumb_path_index"]], row[indices["thumb_file_index"]]) date = "" if row[indices["object_date_index"]].lower() == "null" else row[indices["object_date_index"]] object_title = row[indices["object_title_index"]] object_number = row[indices["object_number_index"]] if classification == "diarypages" and object_title.lower() == "null": idx = object_number.find("_") object_title = object_number[idx + 1 :] if object_title.lower() == "null": object_title = "[No Title]" if classification not in constituent["relateditems"]: constituent["relateditems"][classification] = [] constituent["relateditems"][classification].append( { "id": object_id, "title": object_title, "displaytext": object_title, "classificationid": classification_key, "number": object_number, "date": date, "thumbnail": thumbnail_url, } ) # keep the related items sorted constituent["relateditems"][classification].sort(key=operator.itemgetter("displaytext")) return (constituent, current_id)
def process_constituent_row(constituent, current_id): constituent_id = row[indices["constituent_id_index"]] type_key = int(row[indices["type_id_index"]]) type = CONSTITUENTTYPES.get(type_key) if constituent_id != current_id: # will likely have multiple rows for one constituent because of many related photos # only get a new constituent if we have a new constituent id, but first save old constituent to elasticsearch save(constituent) current_id = constituent_id constituent = {} if elasticsearch_connection.item_exists(constituent_id, type): constituent = elasticsearch_connection.get_item(constituent_id, type) else: print "%s could not be found!" % constituent_id return (constituent, current_id) if "relateditems" not in constituent: constituent["relateditems"] = {} media_type_key = int(row[indices["media_type_id_index"]]) media_type = MEDIATYPES.get(media_type_key) media_master_id = row[indices["media_master_id_index"]] thumbnail_url = get_media_url(row[indices["thumb_path_index"]], row[indices["thumb_file_index"]]) main_url = get_media_url(row[indices["main_path_index"]], row[indices["main_file_index"]]) display_text = row[indices["caption_index"]] # this is a bit of a hack because the MediaFormats for videos (in the TMS database) does not correctly identify the type of video # so, make sure we are only using videos that are mp4s if media_type_key == 3: if not row[indices["main_file_index"]].endswith("mp4"): return (constituent, current_id) if media_type not in constituent["relateditems"]: constituent["relateditems"][media_type] = [] # add primary photo as a top level item as well if row[indices["primary_display_index"]] == "1": constituent["primarydisplay"] = {"thumbnail": thumbnail_url, "main": main_url, "displaytext": display_text} constituent["relateditems"][media_type].append( { "id": media_master_id, "displaytext": display_text, "primarydisplay": True if row[indices["primary_display_index"]] == "1" else False, "thumbnail": thumbnail_url, "main": main_url, } ) return (constituent, current_id)
def process_constituent_row(constituent, current_id): constituent_id = row[indices['constituent_id_index']] type_key = int(row[indices['type_id_index']]) type = CONSTITUENTTYPES.get(type_key) if constituent_id != current_id: # will likely have multiple rows for one constituent because of many related constituents # only get a new constituent if we have a new constituent id, but first save old constituent to elasticsearch save(constituent) current_id = constituent_id constituent = {} if elasticsearch_connection.item_exists(constituent_id, type, ELASTICSEARCH_INDEX): constituent = elasticsearch_connection.get_item(constituent_id, type, ELASTICSEARCH_INDEX) else: print("%s could not be found!" % constituent_id) return(constituent, current_id) if 'relateditems' not in constituent: constituent['relateditems'] = {} site_id = row[indices['site_id_index']] site_name = row[indices['site_name_index']] site_number = row[indices['site_number_index']] drs_id = "" if row[indices['drs_id']].lower() == "null" else row[indices['drs_id']] has_manifest = False if drs_id == "" else True thumbnail_url = get_media_url(row[indices['thumb_path_index']], row[indices['thumb_file_index']]) if not thumbnail_url and drs_id: thumbnail_url = create_thumbnail_url(drs_id) site_dict = {} site_dict['id'] = site_id site_dict['sitename'] = site_name site_dict['sitenumber'] = site_number site_dict['displaytext'] = "%s, %s" % (site_name, site_number) site_dict['thumbnail'] = thumbnail_url site_dict['has_manifest'] = has_manifest if 'sites' not in constituent['relateditems']: constituent['relateditems']['sites'] = [] constituent['relateditems']['sites'].append(site_dict) # keep the related items sorted constituent['relateditems']['sites'].sort(key=operator.itemgetter('displaytext')) return(constituent, current_id)
def process_constituent_row(constituent, current_id): constituent_id = row[indices['constituent_id_index']] type_key = int(row[indices['type_id_index']]) type = CONSTITUENTTYPES.get(type_key) if constituent_id != current_id: # will likely have multiple rows for one constituent because of many related published # only get a new constituent if we have a new constituent id, but first save old constituent to elasticsearch save(constituent) current_id = constituent_id constituent = {} if elasticsearch_connection.item_exists(constituent_id, type): constituent = elasticsearch_connection.get_item( constituent_id, type) else: print "%s could not be found!" % constituent_id return (constituent, current_id) if 'relateditems' not in constituent: constituent['relateditems'] = {} reference_id = row[indices['reference_id_index']] title = row[indices['title_index']] boiler_text = row[indices['boiler_text_index']] date = "" if row[indices['date_index']].lower() == "null" else row[ indices['date_index']] main_url = get_media_url(row[indices['path_index']], row[indices['file_index']]) if "pubdocs" not in constituent['relateditems']: constituent['relateditems']["pubdocs"] = [] constituent['relateditems']["pubdocs"].append({ 'id': reference_id, 'boilertext': boiler_text, 'displaytext': title, 'date': date, 'url': main_url }) # keep the related items sorted constituent['relateditems']['pubdocs'].sort( key=operator.itemgetter('displaytext')) return (constituent, current_id)
def process_site_row(site, current_id): site_id = row[site_id_index] #if site_id not in SAMPLE_SITES: # continue if site_id != current_id: # will likely have multiple rows for one site because of many related constituents # only get a new site if we have a new site id, but first save old site to elasticsearch save(site) current_id = site_id site = {} if elasticsearch_connection.item_exists(site_id, 'sites'): site = elasticsearch_connection.get_item(site_id, 'sites') else: print "%s could not be found!" % site_id return(site, current_id) if 'relateditems' not in site: site['relateditems'] = {} constituent_id = row[constituent_id_index] display_name = row[display_name_index] display_date = "" if row[display_date_index] != "NULL": display_date = row[display_date_index] constituent_dict = {} role = row[role_index] constituent_dict['role'] = role constituent_dict['id'] = constituent_id constituent_dict['displayname'] = display_name constituent_dict['displaydate'] = display_date constituent_dict['displaytext'] = display_name constituent_type_key = int(row[constituent_type_id_index]) constituent_type = CONSTITUENTTYPES.get(constituent_type_key) if constituent_type not in site['relateditems']: site['relateditems'][constituent_type] = [] site['relateditems'][constituent_type].append(constituent_dict) if role == 'Tomb Owner': site['tombowner'] = True return(site, current_id)
def process_constituent_row(constituent, current_id): constituent_id = row[indices["constituent_id_index"]] type_key = int(row[indices["type_id_index"]]) type = CONSTITUENTTYPES.get(type_key) if constituent_id != current_id: # will likely have multiple rows for one constituent because of many related constituents # only get a new constituent if we have a new constituent id, but first save old constituent to elasticsearch save(constituent) current_id = constituent_id constituent = {} if elasticsearch_connection.item_exists(constituent_id, type): constituent = elasticsearch_connection.get_item(constituent_id, type) else: print "%s could not be found!" % constituent_id return (constituent, current_id) if "relateditems" not in constituent: constituent["relateditems"] = {} site_id = row[indices["site_id_index"]] site_name = row[indices["site_name_index"]] site_number = row[indices["site_number_index"]] thumbnail_url = get_media_url(row[indices["thumb_path_index"]], row[indices["thumb_file_index"]]) site_dict = {} site_dict["id"] = site_id site_dict["sitename"] = site_name site_dict["sitenumber"] = site_number site_dict["displaytext"] = "%s, %s" % (site_name, site_number) site_dict["thumbnail"] = thumbnail_url if "sites" not in constituent["relateditems"]: constituent["relateditems"]["sites"] = [] constituent["relateditems"]["sites"].append(site_dict) # keep the related items sorted constituent["relateditems"]["sites"].sort(key=operator.itemgetter("displaytext")) return (constituent, current_id)
def process_constituent_row(constituent, current_id): constituent_id = row[indices["constituent_id_index"]] type_key = int(row[indices["type_id_index"]]) type = CONSTITUENTTYPES.get(type_key) if constituent_id != current_id: # will likely have multiple rows for one constituent because of many related objects # only get a new constituent if we have a new constituent id, but first save old constituent to elasticsearch save(constituent) current_id = constituent_id constituent = {} if elasticsearch_connection.item_exists(constituent_id, type): constituent = elasticsearch_connection.get_item(constituent_id, type) else: print "%s could not be found!" % constituent_id return (constituent, current_id) if "altnames" not in constituent: constituent["altnames"] = [] altname = row[indices["altname_index"]] name_type = row[indices["name_type_index"]] constituent["altnames"].append({"name": altname, "type": name_type}) return (constituent, current_id)
def process_site_row(site, current_id): site_id = row[indices['site_id_index']] #if site_id not in SAMPLE_SITES: # continue if site_id != current_id: # will likely have multiple rows for one site because of many related constituents # only get a new site if we have a new site id, but first save old site to elasticsearch save(site) current_id = site_id site = {} if elasticsearch_connection.item_exists(site_id, 'sites', ELASTICSEARCH_INDEX): site = elasticsearch_connection.get_item( site_id, 'sites', ELASTICSEARCH_INDEX) else: print("%s could not be found!" % site_id) return (site, current_id) if 'relateditems' not in site: site['relateditems'] = {} constituent_id = row[indices['constituent_id_index']] display_name = row[indices['display_name_index']] display_date = "" if row[indices['display_date_index']] != "NULL": display_date = row[indices['display_date_index']] drs_id = "" if row[indices['drs_id']].lower() == "null" else row[ indices['drs_id']] has_manifest = False if drs_id == "" else True thumbnail_url = get_media_url(row[indices['thumb_path_index']], row[indices['thumb_file_index']]) if not thumbnail_url and drs_id: thumbnail_url = create_thumbnail_url(drs_id) constituent_dict = {} role = row[indices['role_index']] # update the set of roles for this site if role not in site['roles']: # make sure Tomb Owner is first if role == "Tomb Owner": site['roles'].insert(0, role) else: site['roles'].append(role) description = row[indices['remarks_index']] if row[ indices['remarks_index']] != "NULL" else "" constituent_dict['role'] = role constituent_dict['id'] = constituent_id constituent_dict['displayname'] = display_name constituent_dict['displaydate'] = display_date constituent_dict['displaytext'] = display_name constituent_dict['description'] = description constituent_dict['thumbnail'] = thumbnail_url constituent_dict['has_manifest'] = has_manifest constituent_type_key = int(row[indices['constituent_type_id_index']]) constituent_type = CONSTITUENTTYPES.get(constituent_type_key) # add to array of people for easier searching if (constituent_type_key in [1, 3]): site['people'].append(display_name) if constituent_type not in site['relateditems']: site['relateditems'][constituent_type] = [] site['relateditems'][constituent_type].append(constituent_dict) # keep the related items sorted site['relateditems'][constituent_type].sort( key=operator.itemgetter('displaytext')) if role == 'Tomb Owner': site['tombowner'] = "Yes" return (site, current_id)
def process_constituent_row(constituent, current_id): constituent_id = row[indices['constituent_id_index']] type_key = int(row[indices['type_id_index']]) type = CONSTITUENTTYPES.get(type_key) if constituent_id != current_id: # will likely have multiple rows for one constituent because of many related photos # only get a new constituent if we have a new constituent id, but first save old constituent to elasticsearch save(constituent) current_id = constituent_id constituent = {} if elasticsearch_connection.item_exists(constituent_id, type): constituent = elasticsearch_connection.get_item( constituent_id, type) else: print "%s could not be found!" % constituent_id return (constituent, current_id) if 'relateditems' not in constituent: constituent['relateditems'] = {} media_type_key = int(row[indices['media_type_id_index']]) media_type = MEDIATYPES.get(media_type_key) number = "" if row[indices['rendition_number_index']].lower( ) == "null" else row[indices['rendition_number_index']] media_master_id = row[indices['media_master_id_index']] thumbnail_url = get_media_url(row[indices['thumb_path_index']], row[indices['thumb_file_index']]) main_url = get_media_url(row[indices['main_path_index']], row[indices['main_file_index']]) description = "" if row[indices['description_index']].lower( ) == "null" else row[indices['description_index']] mediaview = "" if row[indices['media_view_index']].lower( ) == "null" else row[indices['media_view_index']] caption = "" if row[indices['caption_index']].lower( ) == "null" else row[indices['caption_index']] display_text = ": ".join([mediaview, caption]) # this is a bit of a hack because the MediaFormats for videos (in the TMS database) does not correctly identify the type of video # so, make sure we are only using videos that are mp4s if media_type_key == 3: if not row[indices['main_file_index']].endswith('mp4'): return (constituent, current_id) if media_type not in constituent['relateditems']: constituent['relateditems'][media_type] = [] # add primary photo as a top level item as well if row[indices['primary_display_index']] == '1': constituent['primarydisplay'] = { 'thumbnail': thumbnail_url, 'main': main_url, 'displaytext': display_text, 'number': number, 'description': description } constituent['relateditems'][media_type].append({ 'id': media_master_id, 'displaytext': display_text, 'primarydisplay': True if row[indices['primary_display_index']] == '1' else False, 'thumbnail': thumbnail_url, 'main': main_url, 'number': number, 'description': description }) return (constituent, current_id)
def process_constituent_row(constituent, current_id): constituent_id = row[indices['constituent_id_index']] type_key = int(row[indices['type_id_index']]) type = CONSTITUENTTYPES.get(type_key) if constituent_id != current_id: # will likely have multiple rows for one constituent because of many related photos # only get a new constituent if we have a new constituent id, but first save old constituent to elasticsearch save(constituent) current_id = constituent_id constituent = {} if elasticsearch_connection.item_exists(constituent_id, type, ELASTICSEARCH_INDEX): constituent = elasticsearch_connection.get_item(constituent_id, type, ELASTICSEARCH_INDEX) else: print("%s could not be found!" % constituent_id) return(constituent, current_id) if 'relateditems' not in constituent: constituent['relateditems'] = {} media_type_key = int(row[indices['media_type_id_index']]) media_type = MEDIATYPES.get(media_type_key) number = "" if row[indices['rendition_number_index']].lower() == "null" else row[indices['rendition_number_index']] media_master_id = row[indices['media_master_id_index']] main_url = get_media_url(row[indices['main_path_index']], row[indices['main_file_index']]) description = "" if row[indices['description_index']].lower() == "null" else row[indices['description_index']] mediaview = "" if row[indices['media_view_index']].lower() == "null" else row[indices['media_view_index']] caption = "" if row[indices['caption_index']].lower() == "null" else row[indices['caption_index']] display_text = ": ".join([mediaview, caption]) drs_id = "" if row[indices['drs_id']].lower() == "null" else row[indices['drs_id']] has_manifest = False if drs_id == "" else True primary_display = True if row[indices['primary_display_index']] == '1' else False thumbnail_url = get_media_url(row[indices['thumb_path_index']], row[indices['thumb_file_index']]) if not thumbnail_url and drs_id: thumbnail_url = create_thumbnail_url(drs_id) # this is a bit of a hack because the MediaFormats for videos (in the TMS database) does not correctly identify the type of video # so, make sure we are only using videos that are mp4s if media_type_key == 3: if not row[indices['main_file_index']].endswith('mp4'): return(constituent, current_id) if media_type not in constituent['relateditems']: constituent['relateditems'][media_type] = [] # add primary photo as a top level item as well if primary_display: constituent['primarydisplay'] = { 'thumbnail' : thumbnail_url, 'main' : main_url, 'displaytext' : display_text, 'number' : number, 'description' : description, 'has_manifest' : has_manifest, 'media_id' : media_master_id } constituent['relateditems'][media_type].append({ 'id' : media_master_id, 'displaytext' : display_text, 'primarydisplay' : primary_display, 'thumbnail' : thumbnail_url, 'main' : main_url, 'number' : number, 'description' : description, 'has_manifest' : has_manifest, 'drs_id': drs_id }) if has_manifest: object = elasticsearch_connection.get_item(media_type+'-'+media_master_id, 'manifest', ELASTICSEARCH_IIIF_INDEX) resource = object['manifest']['sequences'][0]['canvases'][0]['images'][0]['resource'] canvas_label = object['manifest']['description'] canvas_metadata = object['manifest']['metadata'] #add photo manifest-level metadata as canvas-level metadata for constituent if constituent_id not in CONSTITUENT_RELATIONS.keys(): metadata = add_metadata_to_manifest(constituent) CONSTITUENT_RELATIONS[constituent_id] = { 'description': constituent['remarks'], 'label': constituent['displaytext'], 'resources': [resource], 'type': type, 'drs_ids' : [drs_id], 'canvas_labels' : [canvas_label], 'canvas_metadatas' : [canvas_metadata], 'metadata' : metadata } else: CONSTITUENT_RELATIONS[constituent_id]['resources'].append(resource) CONSTITUENT_RELATIONS[constituent_id]['drs_ids'].append(drs_id) CONSTITUENT_RELATIONS[constituent_id]['canvas_labels'].append(canvas_label) CONSTITUENT_RELATIONS[constituent_id]['canvas_metadatas'].append(canvas_metadata) if primary_display: CONSTITUENT_RELATIONS[constituent_id]['startCanvas'] = drs_id return(constituent, current_id)