def make_doi_metadata_by_uuid(self, uuid, oc_item=None): """ makes metadata for an ARK id """ metadata = None if oc_item is None: oc_item = OCitem() exists = oc_item.check_exists(uuid) if oc_item.exists: oc_item.generate_json_ld() meta_doi = metaDOI() if 'dc-terms:title' in oc_item.json_ld: meta_doi.title = oc_item.json_ld['dc-terms:title'] if 'dc-terms:issued' in oc_item.json_ld: meta_doi.publicationyear = oc_item.json_ld['dc-terms:issued'] elif 'dc-terms:modified' in oc_item.json_ld: meta_doi.publicationyear = oc_item.json_ld['dc-terms:modified'] else: meta_doi.publicationyear = str(datetime.datetime.now().year) creator_list = [] if 'dc-terms:contributor' in oc_item.json_ld: for dc_item in oc_item.json_ld['dc-terms:contributor']: creator_list.append(str(dc_item['label'])) if 'dc-terms:creator' in oc_item.json_ld and len(creator_list) < 1: for dc_item in oc_item.json_ld['dc-terms:creator']: creator_list.append(str(dc_item['label'])) meta_doi.make_creator_list(creator_list) metadata = meta_doi.make_metadata_dict() metadata['_target'] = oc_item.json_ld['id'] return metadata
def make_ark_metadata_by_uuid(self, uuid, oc_item=None): """ makes metadata for an ARK id """ metadata = None if oc_item is None: oc_item = OCitem() exists = oc_item.check_exists(uuid) if oc_item.exists: oc_item.generate_json_ld() meta_ark = metaARK() if 'dc-terms:title' in oc_item.json_ld: meta_ark.what = oc_item.json_ld['dc-terms:title'] if 'dc-terms:issued' in oc_item.json_ld: meta_ark.when = oc_item.json_ld['dc-terms:issued'] elif 'dc-terms:modified' in oc_item.json_ld: meta_ark.when = oc_item.json_ld['dc-terms:modified'] else: meta_ark.when = str(datetime.datetime.now().year) who_list = [] if 'dc-terms:contributor' in oc_item.json_ld: for who_item in oc_item.json_ld['dc-terms:contributor']: who_list.append(str(who_item['label'])) if 'dc-terms:creator' in oc_item.json_ld and len(who_list) < 1: for who_item in oc_item.json_ld['dc-terms:creator']: who_list.append(str(who_item['label'])) meta_ark.make_who_list(who_list) metadata = meta_ark.make_metadata_dict() metadata['_target'] = oc_item.json_ld['id'] return metadata
def make_save_ark_by_uuid(self, uuid, metadata=None): """ makes an saves an ARK identifier by a uuid """ ok = False oc_uri = None arks = StableIdentifer.objects.filter(uuid=uuid, stable_type='ark')[:1] if len(arks) < 1: # the item doesn't yet have an ARK id, so make one! oc_item = OCitem() exists = oc_item.check_exists(uuid) if oc_item.exists: if metadata is None: metadata = self.make_ark_metadata_by_uuid(uuid, oc_item) if isinstance(metadata, dict): if '_target' in metadata: oc_uri = metadata['_target'] else: oc_uri = URImanagement.make_oc_uri( oc_item.manifest.uuid, oc_item.item_type) if isinstance(oc_uri, str): print('Make ARK id for: ' + oc_uri) ark_id = self.ezid.mint_identifier( oc_uri, metadata, 'ark') if isinstance(ark_id, str): # success! we have an ARK id! stable_id = ark_id.replace('ark:/', '') ok = self.save_oc_item_stable_id( oc_item, stable_id, 'ark') return ok
def make_save_doi_by_uuid(self, uuid, metadata=None): """ makes an saves an DOI identifier by a uuid """ ok = False oc_uri = None dois = StableIdentifer.objects.filter(uuid=uuid, stable_type='doi')[:1] if len(dois) < 1: # the item doesn't yet have an ARK id, so make one! oc_item = OCitem() exists = oc_item.check_exists(uuid) if oc_item.exists: if metadata is None: metadata = self.make_doi_metadata_by_uuid(uuid, oc_item) if isinstance(metadata, dict): if '_target' in metadata: oc_uri = metadata['_target'] else: oc_uri = URImanagement.make_oc_uri(oc_item.manifest.uuid, oc_item.item_type) if isinstance(oc_uri, str): print('Make DOI id for: ' + oc_uri) ezid_response = self.ezid.mint_identifier(oc_uri, metadata, 'doi') if self.do_test: print('EZID response: ' + str(ezid_response)) if isinstance(ezid_response, str): if '|' in ezid_response: resp_ex = ezid_response.split('|') for resp_id in resp_ex: if 'doi:' in resp_id: ok = self.save_oc_item_stable_id(oc_item, resp_id, 'doi') else: pass else: ok = self.save_oc_item_stable_id(oc_item, ezid_response, 'doi') return ok
def save_item_and_rels(self, uuid, archive_proj_uuid, do_rels=True): """ saves an item based on its uuid, and optionally ALSO saves related items archive_proj_uuid is the uuid for the project we're archiving now. An item in that archive may actuall come from another project, but is included in this archive because of a dependency through referencing (context, people) """ if uuid in self.saved_uuids: # we have a memory of this already saved item_saved = True else: item_saved = False archive_proj = self.get_proj_manifest_obj(archive_proj_uuid) oc_item = OCitem(True) # use cannonical URIs exists = oc_item.check_exists(uuid) if exists and archive_proj is not None: act_dirs = [] act_dirs.append(archive_proj.slug ) # the archive project slug is the directory item_type = oc_item.manifest.item_type if item_type != 'projects': act_dirs.append( item_type ) # put items of different types into different directories file_name = oc_item.manifest.uuid + '.json' file_exists = self.check_exists(act_dirs, file_name) if file_exists: # we already saved it item_saved = True else: # we have not made the file, so make it now oc_item.generate_json_ld() self.save_serialized_json(act_dirs, file_name, oc_item.json_ld) new_file_exists = self.check_exists(act_dirs, file_name) if new_file_exists: # we saved the new file! item_saved = True self.saved_uuids.append(oc_item.manifest.uuid) else: # we have a problem with this file item_saved = False print('ERROR! Did not save: ' + oc_item.manifest.uuid) self.error_uuids.append(oc_item.manifest.uuid) if do_rels: rel_uuids = self.get_related_uuids(oc_item.json_ld) for rel_uuid in rel_uuids: rel_saved = self.save_item_and_rels( rel_uuid, archive_proj_uuid, False) return item_saved
def save_item_and_rels(self, uuid, archive_proj_uuid, do_rels=True): """ saves an item based on its uuid, and optionally ALSO saves related items archive_proj_uuid is the uuid for the project we're archiving now. An item in that archive may actuall come from another project, but is included in this archive because of a dependency through referencing (context, people) """ if uuid in self.saved_uuids: # we have a memory of this already saved item_saved = True else: item_saved = False archive_proj = self.get_proj_manifest_obj(archive_proj_uuid) oc_item = OCitem(True) # use cannonical URIs exists = oc_item.check_exists(uuid) if exists and archive_proj is not None: act_dirs = [] act_dirs.append(archive_proj.slug) # the archive project slug is the directory item_type = oc_item.manifest.item_type if item_type != 'projects': act_dirs.append(item_type) # put items of different types into different directories file_name = oc_item.manifest.uuid + '.json' file_exists = self.check_exists(act_dirs, file_name) if file_exists: # we already saved it item_saved = True else: # we have not made the file, so make it now oc_item.generate_json_ld() self.save_serialized_json(act_dirs, file_name, oc_item.json_ld) new_file_exists = self.check_exists(act_dirs, file_name) if new_file_exists: # we saved the new file! item_saved = True self.saved_uuids.append(oc_item.manifest.uuid) else: # we have a problem with this file item_saved = False print('ERROR! Did not save: ' + oc_item.manifest.uuid) self.error_uuids.append(oc_item.manifest.uuid) if do_rels: rel_uuids = self.get_related_uuids(oc_item.json_ld) for rel_uuid in rel_uuids: rel_saved = self.save_item_and_rels(rel_uuid, archive_proj_uuid, False) return item_saved
def make_save_ark_by_uuid(self, uuid, metadata=None): """ makes an saves an ARK identifier by a uuid """ ok = False oc_uri = None arks = StableIdentifer.objects.filter( uuid=uuid, stable_type='ark' )[:1] if len(arks) > 0: print('uuid {} has an ARK'.format(uuid)) return None # the item doesn't yet have an ARK id, so make one! oc_item = OCitem() exists = oc_item.check_exists(uuid) if not oc_item.exists: print('uuid {} does not exist'.format(uuid)) return None if metadata is None: metadata = self.make_ark_metadata_by_uuid(uuid, oc_item) if not isinstance(metadata, dict): raise RuntimeError('Cannot make metadata for {}'.format(uuid)) if '_target' in metadata: oc_uri = metadata['_target'] else: oc_uri = URImanagement.make_oc_uri( oc_item.manifest.uuid, oc_item.item_type ) if not isinstance(oc_uri, str): raise RuntimeError( 'Invalid URI for {} item_type {}'.format( oc_item.manifest.uuid, oc_item.item_type ) ) print('Make ARK id for: ' + oc_uri) ark_id = self.ezid.mint_identifier(oc_uri, metadata, 'ark') if not isinstance(ark_id, str): raise RuntimeWarning('EZID failed minting an ARK for {}'.format(oc_uri)) # success! we have an ARK id! stable_id = ark_id.replace('ark:/', '') ok = self.save_oc_item_stable_id( oc_item, stable_id, 'ark' ) return ok
def add_project_archive_dir_metadata(self, project_uuid, archive_dir, deposition_id): """ adds metadata about a project to Zenodo deposition by deposition_id """ ok = None dir_dict = self.arch_files_obj.get_dict_from_file(archive_dir, self.dir_content_file_json) oc_item = OCitem(True) # use cannonical URIs exists = oc_item.check_exists(project_uuid) if exists and isinstance(dir_dict, dict): oc_item.generate_json_ld() proj_dict = oc_item.json_ld arch_meta = ArchiveMetadata() meta = arch_meta.make_zenodo_proj_media_files_metadata(proj_dict, dir_dict, self.dir_content_file_json) ok = self.zenodo.update_metadata(deposition_id, meta) if ok is not False: ok = True print('Metadata created and updated for: ' + str(deposition_id)) return ok
def html_view_new(request, uuid): request = RequestNegotiation().anonymize_request(request) # Handle some content negotiation for the item. req_neg = RequestNegotiation('text/html') req_neg.supported_types = [] if 'HTTP_ACCEPT' in request.META: req_neg.check_request_support(request.META['HTTP_ACCEPT']) if not req_neg.supported: # The client may be wanting a non-HTML representation, so # use the following function to get it. return items_graph(request, uuid, item_type=ITEM_TYPE) # Proceed with constructing the HTML item ocitem = OCitemNew() if 'hashes' in request.GET: ocitem.assertion_hashes = True exists = ocitem.check_exists(uuid) if not exists: # Did not find a record for the table, check for redirects r_url = RedirectURL() r_ok = r_url.get_direct_by_type_id(ITEM_TYPE, uuid) if r_ok: # found a redirect!! return redirect(r_url.redirect, permanent=r_url.permanent) # raise Http404 raise Http404 # Construnct item the JSON-LD ocitem.generate_json_ld() rp = RootPath() base_url = rp.get_baseurl() proj_content = ProjectContent(ocitem.manifest.uuid, ocitem.manifest.slug, ocitem.json_ld) html_temp = HTMLtemplate() html_temp.proj_context_json_ld = ocitem.proj_context_json_ld html_temp.proj_content = proj_content.get_project_content() html_temp.read_jsonld_dict(ocitem.json_ld) template = loader.get_template('projects/view.html') context = {'item': html_temp, 'base_url': base_url, 'user': request.user} response = HttpResponse(template.render(context, request)) patch_vary_headers(response, ['accept', 'Accept', 'content-type']) return response
def make_save_doi_by_uuid(self, uuid, metadata=None): """ makes an saves an DOI identifier by a uuid """ ok = False oc_uri = None dois = StableIdentifer.objects.filter(uuid=uuid, stable_type='doi')[:1] if len(dois) < 1: # the item doesn't yet have an ARK id, so make one! oc_item = OCitem() exists = oc_item.check_exists(uuid) if oc_item.exists: if metadata is None: metadata = self.make_doi_metadata_by_uuid(uuid, oc_item) if isinstance(metadata, dict): if '_target' in metadata: oc_uri = metadata['_target'] else: oc_uri = URImanagement.make_oc_uri( oc_item.manifest.uuid, oc_item.item_type) if isinstance(oc_uri, str): print('Make DOI id for: ' + oc_uri) ezid_response = self.ezid.mint_identifier( oc_uri, metadata, 'doi') if self.do_test: print('EZID response: ' + str(ezid_response)) if isinstance(ezid_response, str): if '|' in ezid_response: resp_ex = ezid_response.split('|') for resp_id in resp_ex: if 'doi:' in resp_id: ok = self.save_oc_item_stable_id( oc_item, resp_id, 'doi') else: pass else: ok = self.save_oc_item_stable_id( oc_item, ezid_response, 'doi') return ok
def items_graph(request, identifier, return_media=None, item_type=None): # The new Open Context OCitem generator # that better integrates caching oc_item = OCitem() if 'hashes' in request.GET: oc_item.assertion_hashes = True if not oc_item.check_exists(identifier): # Did not find a record for the table, check for redirects r_ok = False if item_type: r_url = RedirectURL() r_ok = r_url.get_direct_by_type_id(item_type, identifier) if r_ok: # found a redirect!! return redirect(r_url.redirect, permanent=r_url.permanent) # raise Http404 raise Http404 if item_type and item_type != oc_item.manifest.item_type: # We have a rare case where the item_type is wrong, even though we found # something in the manifest, so throw an error. raise Http404 oc_item.generate_json_ld() req_neg = RequestNegotiation('application/json') req_neg.supported_types = ['application/ld+json'] if (not item_type or item_type not in ['persons', 'types', 'predicates', 'tables']): # We don't have specified item type, or the item_type is # not for a resource that's lacking a geospatial component. Therefore, # support GeoJSON as a media type. req_neg.supported_types.append('application/vnd.geo+json') req_neg.supported_types += RDF_SERIALIZATIONS if 'HTTP_ACCEPT' in request.META: req_neg.check_request_support(request.META['HTTP_ACCEPT']) if return_media: req_neg.check_request_support(return_media) req_neg.use_response_type = return_media # Associate the request media type with the request so we can # make sure that different representations of this resource get different # cache responses. request.content_type = req_neg.use_response_type if not req_neg.supported: # client wanted a mimetype we don't support response = HttpResponse(req_neg.error_message, content_type=req_neg.use_response_type + "; charset=utf8", status=415) patch_vary_headers(response, ['accept', 'Accept', 'content-type']) return response # Check first if the output is requested to be an RDF format graph_output = None if req_neg.use_response_type in RDF_SERIALIZATIONS: json_ld = oc_item.json_ld # We're making an RDF graph serialization, so consolidate all the # context resources so we don't have to make Web requests to generate # the graph consolidated_contexts = consolidate_contexts(oc_item.json_ld) json_ld['@context'] = consolidated_contexts # Now make and serialize the graph graph_output = graph_serialize(req_neg.use_response_type, json_ld) if graph_output: # Return with some sort of graph output response = HttpResponse(graph_output, content_type=req_neg.use_response_type + "; charset=utf8") patch_vary_headers(response, ['accept', 'Accept', 'content-type']) return response # We're outputing JSON if (req_neg.use_response_type == 'application/ld+json' or return_media == 'application/ld+json'): # A hack to remove non-point features so JSON-LD will validate. json_ld = strip_non_point_features(oc_item.json_ld) else: json_ld = oc_item.json_ld json_output = json.dumps(json_ld, indent=4, ensure_ascii=False) if 'callback' in request.GET: funct = request.GET['callback'] response = HttpResponse(funct + '(' + json_output + ');', content_type='application/javascript' + "; charset=utf8") patch_vary_headers(response, ['accept', 'Accept', 'content-type']) return response else: response = HttpResponse(json_output, content_type=req_neg.use_response_type + "; charset=utf8") patch_vary_headers(response, ['accept', 'Accept', 'content-type']) return response
def match_california_site(self, site_uuid): """ Attempts to match California site name with a tDAR site key word """ found_matches = 0 oc_item = OCitem() exists = oc_item.check_exists(site_uuid) if exists: la_check = LinkAnnotation.objects\ .filter(subject=site_uuid, predicate_uri='dc-terms:subject', object_uri__contains=self.TDAR_VOCAB)[:1] if exists and len(la_check) < 1: # we don't already have a tDAR id for this item, continue with matches # first, generate the item's JSON-LD oc_item.generate_json_ld() request_keywords = [] if 'oc-gen:has-obs' in oc_item.json_ld: if isinstance(oc_item.json_ld['oc-gen:has-obs'], list): for obs in oc_item.json_ld['oc-gen:has-obs']: if 'oc-pred:52-alternate-site-or-place-name' in obs: if isinstance(obs['oc-pred:52-alternate-site-or-place-name'], list): for name_obj in obs['oc-pred:52-alternate-site-or-place-name']: if 'xsd:string' in name_obj: if isinstance(name_obj['xsd:string'], str): name_str = name_obj['xsd:string'] request_keywords.append(name_str) print('Checking names in tDAR: ' + '; '.join(request_keywords)) for keyword in request_keywords: tdar_api = tdarAPI() results = tdar_api.get_site_keyword(keyword) if isinstance(results, list): for result in results[:self.max_results]: # assume it is a spurious match match_real = False lw_result = result['label'].lower() lw_keyword = keyword.lower() if lw_result == lw_keyword: # the trinomial and the tDAR result exactly match match_real = True if match_real: print('FOUND ' + result['label']) found_matches += 1 # OK! Found a match, first save the linked entity in the link entity table le_check = False try: le_check = LinkEntity.objects.get(uri=result['id']) except LinkEntity.DoesNotExist: le_check = False if le_check is False: le = LinkEntity() le.uri = result['id'] le.label = result['label'] le.alt_label = result['label'] le.vocab_uri = self.TDAR_VOCAB le.ent_type = 'type' le.save() # Now save the link annotation la = LinkAnnotation() la.subject = oc_item.manifest.uuid la.subject_type = oc_item.manifest.item_type la.project_uuid = oc_item.manifest.project_uuid la.source_id = 'tdar-api-lookup' la.predicate_uri = self.DC_TERMS_SUBJECT la.object_uri = result['id'] la.save() else: print('Almost! ' + result['label'] + ' is not exactly: ' + keyword) if tdar_api.request_error: self.request_error = True print('HTTP request to tDAR failed!') self.error_wait += self.base_wait if self.error_wait > self.max_wait: print('Too many failures, quiting...') sys.exit('Quitting process') else: # sleep some minutes before trying again print('Will try again in ' + str(self.error_wait) + ' seconds...') sleep(self.error_wait) else: self.request_error = False if self.error_wait >= self.base_wait: print('HTTP requests resumed OK, will continue.') self.error_wait = 0 return found_matches
def save_media_files(self, man_obj, license_uri): """ saves media files """ ok = False if isinstance(man_obj, Manifest): # first get metadata about the media item, especially creator + contribution informaiton oc_item = OCitem(True) # use cannonical URIs exists = oc_item.check_exists(man_obj.uuid) oc_item.generate_json_ld() project_uuid = man_obj.project_uuid part_num = self.get_current_part_num(license_uri, project_uuid) act_dir = self.make_act_files_dir_name(part_num, license_uri, project_uuid) dir_dict = self.dir_contents[project_uuid][act_dir] # now get the item media files! item_files_dict = self.get_item_media_files(man_obj) new_files = [] files_ok = 0 for file_uri_key, item_file_dict in item_files_dict.items(): # print('Checking ' + file_uri_key) file_name = item_file_dict['filename'] found = self.check_file_exists_in_all_project_dirs(project_uuid, file_name) if found: files_ok += 1 else: file_name = item_file_dict['filename'] for dir_file in dir_dict['files']: if dir_file['filename'] == file_name: found = True files_ok += 1 break if found is False: # we have a new file to save # Now set the full path to cache the file self.bin_file_obj.full_path_cache_dir = self.arch_files_obj.prep_directory(act_dir) # now retrieve and save the file # check first if there's a file in the temp-cache directory (from previous attempts) ok = self.copy_file_from_temp_cache(act_dir, file_name) if ok is False: ok = self.bin_file_obj.get_cache_remote_file_content(file_name, file_uri_key) if ok: files_ok += 1 dir_dict = self.record_associated_categories(dir_dict, oc_item.json_ld) dir_dict = self.record_citation_people(dir_dict, oc_item.json_ld) new_files.append(item_file_dict) else: self.errors.append(item_file_dict) dir_dict['size'] = self.arch_files_obj.get_directory_size(act_dir) print('Adding files for ' + man_obj.uuid + ' ' + str(len(new_files))) dir_dict['files'] += new_files self.arch_files_obj.save_serialized_json(act_dir, self.dir_content_file_json, dir_dict) self.dir_contents[project_uuid][act_dir] = dir_dict if len(item_files_dict) == files_ok: # we have saved the expected number of files for this item ok = True return ok
def match_california_site(self, site_uuid): """ Attempts to match California site name with a tDAR site key word """ found_matches = 0 oc_item = OCitem() exists = oc_item.check_exists(site_uuid) if exists: la_check = LinkAnnotation.objects\ .filter(subject=site_uuid, predicate_uri='dc-terms:subject', object_uri__contains=self.TDAR_VOCAB)[:1] if exists and len(la_check) < 1: # we don't already have a tDAR id for this item, continue with matches # first, generate the item's JSON-LD oc_item.generate_json_ld() request_keywords = [] if 'oc-gen:has-obs' in oc_item.json_ld: if isinstance(oc_item.json_ld['oc-gen:has-obs'], list): for obs in oc_item.json_ld['oc-gen:has-obs']: if 'oc-pred:52-alternate-site-or-place-name' in obs: if isinstance( obs['oc-pred:52-alternate-site-or-place-name'], list): for name_obj in obs[ 'oc-pred:52-alternate-site-or-place-name']: if 'xsd:string' in name_obj: if isinstance(name_obj['xsd:string'], str): name_str = name_obj['xsd:string'] request_keywords.append(name_str) print('Checking names in tDAR: ' + '; '.join(request_keywords)) for keyword in request_keywords: tdar_api = tdarAPI() results = tdar_api.get_site_keyword(keyword) if isinstance(results, list): for result in results[:self.max_results]: # assume it is a spurious match match_real = False lw_result = result['label'].lower() lw_keyword = keyword.lower() if lw_result == lw_keyword: # the trinomial and the tDAR result exactly match match_real = True if match_real: print('FOUND ' + result['label']) found_matches += 1 # OK! Found a match, first save the linked entity in the link entity table le_check = False try: le_check = LinkEntity.objects.get( uri=result['id']) except LinkEntity.DoesNotExist: le_check = False if le_check is False: le = LinkEntity() le.uri = result['id'] le.label = result['label'] le.alt_label = result['label'] le.vocab_uri = self.TDAR_VOCAB le.ent_type = 'type' le.save() # Now save the link annotation la = LinkAnnotation() la.subject = oc_item.manifest.uuid la.subject_type = oc_item.manifest.item_type la.project_uuid = oc_item.manifest.project_uuid la.source_id = 'tdar-api-lookup' la.predicate_uri = self.DC_TERMS_SUBJECT la.object_uri = result['id'] la.save() else: print('Almost! ' + result['label'] + ' is not exactly: ' + keyword) if tdar_api.request_error: self.request_error = True print('HTTP request to tDAR failed!') self.error_wait += self.base_wait if self.error_wait > self.max_wait: print('Too many failures, quiting...') sys.exit('Quitting process') else: # sleep some minutes before trying again print('Will try again in ' + str(self.error_wait) + ' seconds...') sleep(self.error_wait) else: self.request_error = False if self.error_wait >= self.base_wait: print('HTTP requests resumed OK, will continue.') self.error_wait = 0 return found_matches