def save(media): if media and 'id' in media: if not media['mediatype']: # ignore for now, but this should send an email notification that there is missing data # so that the classifications.py file can be updated print "%s is missing a media type, ignoring for now" % (media['id']) return elasticsearch_connection.add_or_update_item(media['id'], json.dumps(media), media['mediatype'])
def save(object): if object and 'id' in object: if not object['classification']: # ignore for now, but this should send an email notification that there is missing data # so that the classifications.py file can be updated print "%s is missing a classification, ignoring for now" % (object['id']) return elasticsearch_connection.add_or_update_item(object['id'], json.dumps(object), object['classification'])
def save(constituent): if constituent and 'id' in constituent: if not constituent['type']: print "%s is missing a type, ignoring for now" % ( constituent['id']) return elasticsearch_connection.add_or_update_item(constituent['id'], json.dumps(constituent), constituent['type'])
def save(object): if object and 'id' in object: if not object['classification']: # ignore for now, but this should send an email notification that there is missing data # so that the classifications.py file can be updated print "%s is missing a classification, ignoring for now" % ( object['id']) return elasticsearch_connection.add_or_update_item(object['id'], json.dumps(object), object['classification'])
def save_manifest(manifest, id): if manifest and 'id' in manifest: elasticsearch_connection.add_or_update_item(id, json.dumps(manifest), 'manifest', ELASTICSEARCH_IIIF_INDEX)
def save(site): if site and 'id' in site: elasticsearch_connection.add_or_update_item(site['id'], json.dumps(site), 'sites', ELASTICSEARCH_INDEX)
def save(site): if site and 'id' in site: elasticsearch_connection.add_or_update_item(site['id'], json.dumps(site), 'sites')
def save(pub): if pub and 'id' in pub: elasticsearch_connection.add_or_update_item(pub['id'], json.dumps(pub), 'pubdocs', ELASTICSEARCH_INDEX)
def create_library(): print("Creating Digital Library...") time.sleep( 3 ) # for some reason the library isn't always fully populated. see if a time delay helps author_ids = [] size = 20 results_from = 0 es = elasticsearch_connection.get_connection() es_index = ELASTICSEARCH_INDEX # delete library results = es.search(index=es_index, doc_type='library', body={ "size": 500, "stored_fields": ["_id", "name"], "query": { "match_all": {} } })['hits']['hits'] for r in results: elasticsearch_connection.delete(r['_id'], 'library', ELASTICSEARCH_INDEX) total = es.search(index=es_index, doc_type='pubdocs', body={ "size": 0, "query": { "match_all": {} } })['hits']['total'] while results_from < total: results = es.search(index=es_index, doc_type='pubdocs', body={ "size": size, "from": results_from, "query": { "match_all": {} } }) for r in results['hits']['hits']: result = r['_source'] if 'pdf' not in result or result['pdf'] == '': continue authors = result['authors'] # if this doc has no authors, set the author to 'No Author' and proceed if len(authors) == 0: authors.append('No Author') for author in authors: author_id = author.replace(' ', '') sortauthor = author.lower().strip() sortauthor = str( unicodedata.normalize('NFD', sortauthor).encode( 'ascii', 'ignore').decode("utf-8")) # see if this author already exists if author_id in author_ids: author_data = elasticsearch_connection.get_item( author_id, 'library', ELASTICSEARCH_INDEX) else: author_ids.append(author_id) author_data = {} author_data['name'] = author author_data['sortname'] = sortauthor author_data['docs'] = [] author_data['docs'].append({ 'displaytext': result['boilertext'], 'sorttext': result['notes'] if result['notes'] is not None else result['title'], 'format': result['format'], # add file size 'url': result['pdf'] }) author_data['docs'].sort(key=operator.itemgetter('sorttext')) data = json.dumps(author_data) elasticsearch_connection.add_or_update_item( author_id, data, 'library', ELASTICSEARCH_INDEX) results_from = results_from + size print("Finished Digital Library...")
def save(constituent): if constituent and "id" in constituent: if not constituent["type"]: print "%s is missing a type, ignoring for now" % (constituent["id"]) return elasticsearch_connection.add_or_update_item(constituent["id"], json.dumps(constituent), constituent["type"])
def save(pub): if pub and 'id' in pub: elasticsearch_connection.add_or_update_item(pub['id'], json.dumps(pub), 'pubdocs')
def create_library(): print "Creating Digital Library..." author_ids = [] size = 20 results_from = 0 es = elasticsearch_connection.get_connection() es_index = elasticsearch_connection.ELASTICSEARCH_INDEX # delete library results = es.search(index=es_index, doc_type='library', body={ "size" : 500, "fields" : ["_id", "name"], "query": { "match_all" : {} } })['hits']['hits'] for r in results: elasticsearch_connection.delete(r['_id'], 'library') total = es.search(index=es_index, doc_type='pubdocs', body={ "size" : 0, "query": { "match_all" : {} } })['hits']['total'] while results_from < total: results = es.search(index=es_index, doc_type='pubdocs', body={ "size" : size, "from" : results_from, "query": { "match_all" : {} } }) for r in results['hits']['hits']: result = r['_source'] if 'pdf' not in result or result['pdf'] == '': continue authors = result['authors'] # if this doc has no authors, set the author to 'No Author' and proceed if len(authors) == 0: authors.append('No Author') for author in authors: author_id = author.replace(' ', '') # see if this author already exists if author_id in author_ids: author_data = elasticsearch_connection.get_item(author_id, 'library') else: author_ids.append(author_id) author_data = {} author_data['name'] = author author_data['docs'] = [] author_data['docs'].append({ 'displaytext' : result['boilertext'], 'format' : result['format'], # add file size 'url' : result['pdf'] }) author_data['docs'].sort(key=operator.itemgetter('displaytext')) data = json.dumps(author_data) elasticsearch_connection.add_or_update_item(author_id, data, 'library') results_from = results_from + size print "Finished Digital Library..."
def save(manifest): if manifest and 'id' in manifest: elasticsearch_connection.add_or_update_item(manifest['id'], json.dumps(manifest), 'manifest', ELASTICSEARCH_INDEX)