def description(project_id, *args): pub_query = IndexedPublication.search()\ .filter(Q({'term': {'projectId._exact': project_id}}))\ .source(includes=['project.value.description']) desc = next(hit.project.value.description for hit in pub_query.execute().hits) return {'description': desc}
def test_listing(self, mock_search, mock_pub, mock_leg_pub): request = MagicMock() request.query_string = 'test_query' request.username = '******' fm = PublicationsSearchManager(request) mock_search().query().sort().extra().execute.return_value = [ IndexedPublication(projectId='PRJ-XXX'), IndexedPublicationLegacy() ] mock_pub().to_file.return_value = {'type': 'pub'} mock_leg_pub().to_file.return_value = {'type': 'leg_pub'} res = fm.listing() expected_result = { 'trail': [{ 'name': '$SEARCH', 'path': '/$SEARCH' }], 'name': '$SEARCH', 'path': '/', 'system': None, 'type': 'dir', 'children': [{ 'type': 'pub' }, { 'type': 'leg_pub' }], 'permissions': 'READ' } self.assertEqual(res, expected_result)
def test_listing(self, mock_search, mock_pub, mock_leg_pub): fm = PublicationsManager(None) mock_search().filter().sort().extra().execute.return_value = [ IndexedPublication(projectId='PRJ-XXX'), IndexedPublicationLegacy() ] mock_pub().to_file.return_value = {'type': 'pub'} mock_leg_pub().to_file.return_value = {'type': 'leg_pub'} res = fm.listing(**{'type_filters': []}) expected_result = { 'trail': [{ 'name': '$SEARCH', 'path': '/$SEARCH' }], 'name': '$SEARCH', 'path': '/', 'system': None, 'type': 'dir', 'children': [{ 'type': 'pub' }, { 'type': 'leg_pub' }], 'permissions': 'READ' } self.assertEqual(res, expected_result)
def get(self, request, project_id, revision=None): """ Get a publication. If a revision is not supplied, return the "Original" publication. Include the latest version if it is not being queried. """ es_client = new_es_client() pub = BaseESPublication(project_id=project_id, revision=revision, using=es_client) latest_revision = IndexedPublication.max_revision( project_id=project_id, using=es_client) latest_pub_dict = None if latest_revision > 0 and latest_revision != revision: latest_pub = BaseESPublication(project_id=project_id, revision=latest_revision, using=es_client) if latest_pub is not None and hasattr(latest_pub, 'project'): latest_pub_dict = latest_pub.to_dict() if pub is not None and hasattr(pub, 'project'): pub_dict = pub.to_dict() if pub_dict['project']['value']['projectType'] != 'other': metrics.info('Data Depot', extra={ 'user': request.user.username, 'sessionId': getattr(request.session, 'session_key', ''), 'operation': 'listing', 'agent': request.META.get('HTTP_USER_AGENT'), 'ip': get_client_ip(request), 'info': { 'api': 'agave', 'systemId': 'designsafe.storage.published', 'filePath': project_id, 'query': {} } }) if latest_pub_dict: pub_dict['latestRevision'] = latest_pub_dict return JsonResponse(pub_dict) else: return JsonResponse({ 'status': 404, 'message': 'Not found' }, status=404)
def generate_manifest_other(project_id, version=None): doc = IndexedPublication.from_id(project_id, revision=version) uuid = doc.project.uuid if version: project_id = '{}v{}'.format(project_id, str(version)) manifest = [] archive_path = os.path.join(PUBLICATIONS_MOUNT_ROOT, project_id) for path in get_child_paths(archive_path): manifest.append({ 'parent_entity': uuid, 'corral_path': path, 'checksum': get_sha1_hash(path) }) return manifest
def save_publication(publication, status='publishing'): """Save publication.""" publication['projectId'] = publication['project']['value']['projectId'] publication['created'] = datetime.datetime.now().isoformat() publication['status'] = status publication['version'] = 2 publication['licenses'] = publication.pop('license', []) publication['license'] = '' try: pub = IndexedPublication.from_id(publication['projectId']) pub.update(**publication) except DocumentNotFound: pub = IndexedPublication(project_id=publication['projectId'], **publication) pub.save() pub.save() return pub
def initilize_publication(publication, status='publishing', revision=None, revision_text=None, revision_titles=None): """initilize publication.""" publication['projectId'] = publication['project']['value']['projectId'] publication['status'] = status publication['version'] = 2 publication['licenses'] = publication.pop('license', []) publication['license'] = '' es_client = new_es_client() if revision: base_pub = IndexedPublication.from_id(publication['projectId'], revision=None, using=es_client) publication['created'] = base_pub['created'] publication['revision'] = revision publication['revisionDate'] = datetime.datetime.now().isoformat() publication['revisionText'] = revision_text if revision_titles: publication['revisionTitles'] = revision_titles elif 'created' not in publication: publication['created'] = datetime.datetime.now().isoformat() try: pub = IndexedPublication.from_id(publication['projectId'], revision=revision, using=es_client) pub.update(using=es_client, **publication) except DocumentNotFound: pub = IndexedPublication(project_id=publication['projectId'], **publication) pub.save(using=es_client) pub.save(using=es_client) # Refresh index so that search works in subsequent pipeline operations. IndexedPublication._index.refresh(using=es_client) return pub
def listing(offset=0, limit=100, limit_fields=True, *args): pub_query = IndexedPublication.search() pub_query = pub_query.filter(Q('term', status='published')) pub_query = pub_query.extra(from_=offset, size=limit) if limit_fields: pub_query = pub_query.source(includes=[ 'project.value.title', 'project.value.pi', 'project.value.keywords', 'project.value.projectType', 'project.value.dataType', 'created', 'projectId', 'users', 'system' ]) pub_query = pub_query.sort({'created': {'order': 'desc'}}) res = pub_query.execute() hits = list( map( lambda h: { **h.to_dict(), 'pi': _get_user_by_username( h, h.project.value.pi) }, res.hits)) return {'listing': hits}
def post(self, request, **kwargs): """ Amend a Publication """ if request.is_ajax(): data = json.loads(request.body) else: data = request.POST project_id = data['projectId'] authors = data.get('authors', None) amendments = data.get('amendments', None) current_revision = IndexedPublication.max_revision( project_id=project_id) (tasks.amend_publication_data.s(project_id, amendments, authors, current_revision).set(queue='api') | tasks.zip_publication_files.si( project_id, current_revision).set(queue='files')).apply_async() return JsonResponse({'success': 'Publication is being amended.'}, status=200)
def search(offset=0, limit=100, query_string='', limit_fields=True, *args): query_dict = json.loads(urllib.parse.unquote(query_string)) type_filters = query_dict['typeFilters'] has_type_filters = True in list(map(bool, type_filters.values())) def filter_query(type): return Q('term', **{'project.value.projectType._exact': type}) selected_filters = list( filter(lambda key: bool(type_filters[key]), type_filters.keys())) type_query = Q('bool', should=list(map(filter_query, selected_filters))) search = IndexedPublication.search() if has_type_filters: search = search.filter(type_query) query_filters = [] # Query string fields author = query_dict['queries']['author'] title = query_dict['queries']['title'] keywords = query_dict['queries']['keyword'] description = query_dict['queries']['description'] if author: query_filters.append(search_utils.author_query(author)) if title: query_filters.append(search_utils.title_query(title)) if keywords: query_filters.append(search_utils.keyword_query(keywords)) if description: query_filters.append(search_utils.description_query(description)) # Experimental advanced filters facility_name = query_dict['advancedFilters']['experimental'][ 'experimentalFacility'] experiment_type = query_dict['advancedFilters']['experimental'][ 'experimentType'] if facility_name: query_filters.append( search_utils.experimental_facility_query(facility_name)) if experiment_type: query_filters.append(search_utils.experiment_type_query) # Simulation advanced filters simulation_type = query_dict['advancedFilters']['simulation'][ 'simulationType'] if simulation_type: query_filters.append( search_utils.simulation_type_query(simulation_type)) # Field recon advanced filters nh_type = query_dict['advancedFilters']['field_recon']['naturalHazardType'] nh_event = query_dict['advancedFilters']['field_recon'][ 'naturalHazardEvent'] if nh_type: query_filters.append(search_utils.nh_type_query(nh_type)) if nh_event: query_filters.append(search_utils.nh_event_query(nh_event)) # Other advanced filters data_type = query_dict['advancedFilters']['other']['dataType'] if data_type: query_filters.append(search_utils.other_type_query(data_type)) # Hybrid sim advanced filters sim_type = data_type = query_dict['advancedFilters']['hybrid_simulation'][ 'hybridSimulationType'] if sim_type: query_filters.append(search_utils.hybrid_sim_type_query(sim_type)) search = search.filter('bool', must=query_filters) search = search.filter(Q('term', status='published')) search = search.extra(from_=offset, size=limit) if limit_fields: search = search.source(includes=[ 'project.value.title', 'project.value.pi', 'project.value.keywords', 'project.value.projectType', 'project.value.dataType', 'created', 'projectId', 'users', 'system' ]) search = search.sort({'created': {'order': 'desc'}}) res = search.execute() hits = list( map( lambda h: { **h.to_dict(), 'pi': _get_user_by_username( h, h.project.value.pi) }, res.hits)) return {'listing': hits}
def to_dataset_json(self): """ Serialize project to json for google dataset search https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/BMNJPS """ dataset_json = { "@context": "http://schema.org", "@type": "Dataset", "@id": "", "identifier": "", "logo": "https://www.designsafe-ci.org/static/images/nsf-designsafe-logo.014999b259f6.png", "name": self.title, "creator": [{ "name": "", "affiliation": "", "@id": "", "identifier": "" }], "author": [{ "name": "", "affiliation": "", "@id": "", "identifier": "" }], "datePublished": self.created, "dateModified": self.to_body_dict()['lastUpdated'], "description": self.description, "keywords": self.keywords.split(','), "license": { "@type": "Dataset", "text": "" }, "publisher": { "@type": "Organization", "name": "Designsafe-CI" }, "provider": { "@type": "Organization", "name": "Designsafe-CI" }, "includedInDataCatalog": { "@type": "Organization", "name": "Designsafe-CI", "url": "https://designsafe-ci.org" }, } if self.dois: dataset_json["distribution"] = { "@type": "DataDownload", "name": self.to_body_dict()['value']['projectId'] + "_archive.zip", "fileFormat": "application/zip", "contentSize": "", "@id": "", "identifier": "" } if self.dois: dataset_json['@id'] = self.dois[0] dataset_json['identifier'] = self.dois[0] else: related_ents = self.related_entities() logger.debug(related_ents) if getattr(self, 'team_order', False): authors = sorted(self.team_order, key=lambda x: x['order']) else: authors = [{ 'name': username } for username in [self.pi] + self.co_pis] dataset_json['creator'] = generate_creators(authors) dataset_json['author'] = generate_creators(authors) try: pub = IndexedPublication.from_id(self.project_id) dataset_json['license'] = pub.licenses.works except (DocumentNotFound, AttributeError): pass try: pub = IndexedPublicationLegacy.from_id(self.project_id) dataset_json['license'] = pub.licenses.works except DocumentNotFound: pass return dataset_json
def walk_experimental(project_id, version=None): """ Walk an experimental project and reconstruct parent/child relationships Params ------ project_id: Project ID to look up (e.g. PRJ-1234) Returns ------- dict: dict in form {'uuid-ex-1': {'children': ['title of child 1', ...], 'parent': 'title of parent', 'container_path': 'path/relative/to/fcroot', 'fedora_mapping': {}}} """ from urllib import parse doc = IndexedPublication.from_id(project_id, revision=version) relation_map = [] project_meta = format_metadata_for_fedora(project_id, version=version) if version: project_id = '{}v{}'.format(project_id, str(version)) license = project_meta.get('license', None) full_author_list = [] project_map = { 'uuid': doc.project.uuid, 'container_path': project_id, 'fedora_mapping': { **project_meta, 'generated': [], 'license': None }, 'fileObjs': [] } experiments_list = doc.experimentsList for expt in experiments_list: # Do stuff with experiment. expt_container_path = "{}/{}".format(project_id, parse.quote(expt.value.title)) print('experiment ' + expt.value.title) exp_doi = expt.doi project_map['fedora_mapping']['generated'].append( 'Experiment: {}'.format(exp_doi)) experiment_map = { 'uuid': expt.uuid, 'container_path': expt_container_path, 'fedora_mapping': { **format_experiment(expt), 'license': license, 'wasGeneratedBy': project_id, 'generated': [] }, 'fileObjs': expt.fileObjs } full_author_list += experiment_map['fedora_mapping']['creator'] reports = filter(lambda report: expt.uuid in report.value.experiments, getattr(doc, 'reportsList', [])) for report in reports: # Do stuff with report. report_container_path = "{}/{}".format( expt_container_path, parse.quote(report.value.title)) print('\treport ' + report.value.title) experiment_map['fedora_mapping']['generated'].append( 'Report: {}'.format(report.value.title)) report_map = { 'uuid': report.uuid, 'fileObjs': report.fileObjs, 'container_path': report_container_path, 'fedora_mapping': { **format_report(report), 'wasGeneratedBy': 'Experiment: {}'.format(exp_doi) } } relation_map.append(report_map) analysis_list = filter( lambda analysis: expt.uuid in analysis.value.experiments, getattr(doc, 'analysisList', [])) for analysis in analysis_list: # Do stuff with analysis. analysis_container_path = "{}/{}".format( expt_container_path, parse.quote(analysis.value.title)) print('\tanalysis ' + analysis.value.title) experiment_map['fedora_mapping']['generated'].append( 'Analysis: {}'.format(analysis.value.title)) analysis_map = { 'uuid': analysis.uuid, 'fileObjs': analysis.fileObjs, 'container_path': analysis_container_path, 'fedora_mapping': { **format_analysis(analysis), 'wasGeneratedBy': 'Experiment: {}'.format(exp_doi) } } relation_map.append(analysis_map) model_configs = filter( lambda model_config: expt.uuid in model_config.value.experiments, getattr(doc, 'modelConfigs', [])) for mc in model_configs: # Do stuff with model config. configs_container_path = "{}/{}".format( expt_container_path, parse.quote(mc.value.title)) print('\tmodel config ' + mc.value.title) experiment_map['fedora_mapping']['generated'].append( 'Model Configuration: {}'.format(mc.value.title)) mc_map = { 'uuid': mc.uuid, 'fileObjs': mc.fileObjs, 'container_path': configs_container_path, 'fedora_mapping': { **format_model_config(mc), 'wasGeneratedBy': exp_doi } } sensor_lists = filter( lambda sensor_list: mc.uuid in sensor_list.value.modelConfigs and expt.uuid in sensor_list.associationIds, getattr(doc, 'sensorLists', [])) for sl in sensor_lists: # Do stuff with sensor list. sl_container_path = "{}/{}".format(configs_container_path, parse.quote(sl.value.title)) print('\t\tsensor list ' + sl.value.title) experiment_map['fedora_mapping']['generated'].append( 'Sensor: {}'.format(sl.value.title)) sl_map = { 'uuid': sl.uuid, 'fileObjs': sl.fileObjs, 'container_path': sl_container_path, 'fedora_mapping': { **format_sensor_info(sl), 'wasGeneratedBy': 'Experiment: {}'.format(exp_doi), 'wasDerivedFrom': 'Model Configuration: {}'.format(mc.value.title), 'influenced': [] } } events = filter( lambda event: sl.uuid in event.value.sensorLists and expt. uuid in event.associationIds and mc.uuid in event. associationIds, getattr(doc, 'eventsList', [])) for event in events: # Do stuff with events. evt_container_path = "{}/{}".format( sl_container_path, parse.quote(event.value.title)) print('\t\t\tevent ' + event.value.title) sl_map['fedora_mapping']['influenced'].append( 'Event: {}'.format(event.value.title)) experiment_map['fedora_mapping']['generated'].append( 'Event: {}'.format(event.value.title)) event_map = { 'uuid': event.uuid, 'fileObjs': event.fileObjs, 'container_path': evt_container_path, 'fedora_mapping': { **format_event(event), 'wasGeneratedBy': 'Experiment: {}'.format(exp_doi), 'wasDerivedFrom': 'Model Configuration: {}'.format(mc.value.title), 'wasInfluencedBy': 'Sensor: {}'.format(sl.value.title) } } relation_map.append(event_map) relation_map.append(sl_map) relation_map.append(mc_map) relation_map.append(experiment_map) project_map['fedora_mapping']['creator'] = list(set(full_author_list)) relation_map.append(project_map) return relation_map[::-1]
def format_metadata_for_fedora(project_id, version=None): """ Format a publication's metadata so that it can be ingested into Fedora. """ doc = IndexedPublication.from_id(project_id, revision=version) pub_meta = doc.project.value author_list = [] try: ordered_team = sorted(pub_meta.teamOrder, key=lambda member: member.order) author_list = list( map(lambda member: "{}, {}".format(member.lname, member.fname), ordered_team)) except AttributeError: author_list = [_get_user_by_username(doc, pub_meta.pi)] award_numbers = getattr(pub_meta, 'awardNumbers', []) contributors = [] for award in award_numbers: contributors.append(award['name'] or None) contributors.append(award['number'] or None) identifiers = [ pub_meta.projectId, 'https://www.designsafe-ci.org/' 'data/browser/public/designsafe.storage.published/{}'.format( pub_meta.projectId), doc.project.uuid ] identifiers += getattr(pub_meta, 'dois', []) identifiers += [getattr(doc.project, 'doi', None)] project_type = pub_meta.projectType if project_type == 'other': project_type = getattr(pub_meta, 'dataType', "other"), fc_meta = { 'title': pub_meta.title, 'entity': 'Project', 'description': pub_meta.description, 'identifier': identifiers, 'subject': pub_meta.keywords.split(', '), 'creator': author_list, 'issued': doc.project.created.isoformat(), 'contributor': contributors, 'type': project_type, 'publisher': 'Designsafe', } licenses = getattr(doc, 'licenses', None) if licenses: fc_meta['license'] = list(licenses.to_dict().values()) associated_projects = getattr(pub_meta, 'associatedProjects', None) if associated_projects: references = list( map(lambda assoc: assoc['title'], associated_projects)) try: relation = list( map(lambda assoc: assoc['href'], associated_projects)) except KeyError: relation = [] fc_meta['references'] = references fc_meta['relation'] = relation return fc_meta
def to_dataset_json(self, **kwargs): """ Serialize project to json for google dataset search https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/BMNJPS """ dataset_json = { "@context": "http://schema.org", "@type": "Dataset", "@id": "", "identifier": "", "logo": "https://www.designsafe-ci.org/static/images/nsf-designsafe-logo.014999b259f6.png", "name": self.title, "creator": [{ "name": "", "affiliation": "", "@id": "", "identifier": "" }], "author": [{ "name": "", "affiliation": "", "@id": "", "identifier": "" }], "datePublished": self.created, "dateModified": self.to_body_dict()['lastUpdated'], "description": self.description, "keywords": self.keywords.split(','), "license": { "@type": "CreativeWork", "license": "", "url": "" }, "publisher": { "@type": "Organization", "name": "Designsafe-CI", "url": "https://designsafe-ci.org" }, "provider": { "@type": "Organization", "name": "Designsafe-CI" }, "includedInDataCatalog": { "@type": "DataCatalog", "name": "Designsafe-CI", "url": "https://designsafe-ci.org" }, } if getattr(self, 'team_order', False): authors = sorted(self.team_order, key=lambda x: x['order']) else: authors = [{ 'name': username } for username in [self.pi] + self.co_pis] dataset_json['creator'] = generate_creators(authors) dataset_json['author'] = generate_creators(authors) try: pub = IndexedPublication.from_id(self.project_id) license_info = generate_licenses(pub) dataset_json['license'] = license_info[0]["url"] except (DocumentNotFound, AttributeError): pass if self.dois: dataset_json['@id'] = self.dois[0] dataset_json['identifier'] = self.dois[0] dataset_json["distribution"] = { "@context": "http://schema.org", "@type": "Dataset", "@id": "", "identifier": "", "logo": "https://www.designsafe-ci.org/static/images/nsf-designsafe-logo.014999b259f6.png", "name": self.title, "creator": [{ "name": "", "affiliation": "", "@id": "", "identifier": "" }], "author": [{ "name": "", "affiliation": "", "@id": "", "identifier": "" }], "datePublished": self.created, "dateModified": self.to_body_dict()['lastUpdated'], "description": self.description, "keywords": self.keywords.split(','), "license": { "@type": "CreativeWork", "license": "", "url": "" }, "publisher": { "@type": "Organization", "name": "Designsafe-CI", "url": "https://designsafe-ci.org" }, "provider": { "@type": "Organization", "name": "Designsafe-CI" }, "includedInDataCatalog": { "@type": "DataCatalog", "name": "Designsafe-CI", "url": "https://designsafe-ci.org" }, } else: related_ents = self.related_entities() for i in range(len(related_ents)): if hasattr(related_ents[i], 'dois') and related_ents[i].dois: dataset_json['relatedIdentifier_' + str(i)] = { "@context": "http://schema.org", "@type": "Dataset", "@id": "", "identifier": "", "logo": "https://www.designsafe-ci.org/static/images/nsf-designsafe-logo.014999b259f6.png", "name": related_ents[i].title, "creator": [{ "name": "", "affiliation": "", "@id": "", "identifier": "" }], "author": [{ "name": "", "affiliation": "", "@id": "", "identifier": "" }], "datePublished": related_ents[i].created, "dateModified": related_ents[i].to_body_dict()['lastUpdated'], "description": related_ents[i].description, "license": { "@type": "CreativeWork", "license": "", "url": "" }, "publisher": { "@type": "Organization", "name": "Designsafe-CI" }, "provider": { "@type": "Organization", "name": "Designsafe-CI" }, "includedInDataCatalog": { "@type": "DataCatalog", "name": "Designsafe-CI", "url": "https://designsafe-ci.org" }, } dataset_json['relatedIdentifier_' + str(i)]['@id'] = related_ents[i].dois[0] dataset_json[ 'relatedIdentifier_' + str(i)]['identifier'] = related_ents[i].dois[0] if getattr(related_ents[i], 'team_order', False): authors = sorted(related_ents[i].team_order, key=lambda x: x['order']) else: authors = [{ 'name': username } for username in [self.pi] + self.co_pis] dataset_json['relatedIdentifier_' + str(i)]['creator'] = generate_creators( authors) dataset_json['relatedIdentifier_' + str(i)]['author'] = generate_creators(authors) try: dataset_json[ 'relatedIdentifier_' + str(i)]['license'] = dataset_json['license'] except (DocumentNotFound, AttributeError): pass return dataset_json
def post(self, request, **kwargs): """ Publish a project or version a publication """ if request.is_ajax(): data = json.loads(request.body) else: data = request.POST status = data.get('status', 'saved') revision = data.get('revision', None) revision_text = data.get('revisionText', None) revision_titles = data.get('revisionTitles', None) revised_authors = data.get('revisionAuthors', None) selected_files = data.get('selectedFiles', None) project_id = data['publication']['project']['value']['projectId'] current_revision = None # If revision is truthy, increment the revision count and pass it to the pipeline. if revision: latest_revision = IndexedPublication.max_revision( project_id=project_id) current_revision = latest_revision + 1 if latest_revision >= 2 else 2 pub = initilize_publication(data['publication'], status, revision=current_revision, revision_text=revision_text, revision_titles=revision_titles) if data.get('status', 'save').startswith('publish'): (tasks.freeze_publication_meta.s( project_id=pub.projectId, entity_uuids=data.get('mainEntityUuids'), revision=current_revision, revised_authors=revised_authors).set(queue='api') | group( tasks.save_publication.si( project_id=pub.projectId, entity_uuids=data.get('mainEntityUuids'), revision=current_revision, revised_authors=revised_authors).set(queue='files', countdown=60), tasks.copy_publication_files_to_corral.si( project_id=pub.projectId, revision=current_revision, selected_files=selected_files).set(queue='files', countdown=60)) | tasks.swap_file_tag_uuids.si(pub.projectId, revision=current_revision) | tasks.set_publish_status.si( project_id=pub.projectId, entity_uuids=data.get('mainEntityUuids'), revision=current_revision) | tasks.zip_publication_files.si( pub.projectId, revision=current_revision) | tasks.email_user_publication_request_confirmation.si( request.user.username) | tasks.check_published_files.si( pub.projectId, revision=current_revision, selected_files=selected_files)).apply_async() return JsonResponse({'success': 'Project is publishing.'}, status=200)