def publish_resource(project_id, entity_uuid=None): """Publish a resource. Retrieves a project and/or an entity and set any saved DOIs as published. If no DOIs are saved in the specified project or entity it will fail silently. We need to specify the project id because this function also changes the status of the locally saved publication to `"published"` that way it shows up in the published listing. :param str project_id: Project Id to publish. :param str entity_uuid: Entity uuid to publish. """ mgr = ProjectsManager(service_account()) prj = mgr.get_project_by_id(project_id) entity = None if entity_uuid: entity = mgr.get_entity_by_uuid(entity_uuid) responses = [] for doi in prj.dois: res = DataciteManager.publish_doi(doi) responses.append(res) if entity: for doi in entity.dois: res = DataciteManager.publish_doi(doi) responses.append(res) pub = BaseESPublication(project_id=project_id) pub.update(status='published') for res in responses: LOG.info("DOI published: %(doi)s", {"doi": res['data']['id']}) return responses
def save_to_fedora(self, project_id, revision=None): import requests import magic from designsafe.libs.elasticsearch.docs.publications import BaseESPublication try: es_client = new_es_client() pub = BaseESPublication(project_id=project_id, revision=revision, using=es_client) pub.update(status='published', using=es_client) if pub.project.value.projectType == 'other': from designsafe.libs.fedora.fedora_operations import ingest_project ingest_project(project_id, version=revision) return if pub.project.value.projectType == 'experimental': from designsafe.libs.fedora.fedora_operations import ingest_project_experimental ingest_project_experimental(project_id, version=revision) return _root = os.path.join('/corral-repl/tacc/NHERI/published', project_id) fedora_base = 'http://fedoraweb01.tacc.utexas.edu:8080/fcrepo/rest/publications_01' res = requests.get(fedora_base) if res.status_code == 404 or res.status_code == 410: requests.put(fedora_base) fedora_project_base = ''.join([fedora_base, '/', project_id]) res = requests.get(fedora_project_base) if res.status_code == 404 or res.status_code == 410: requests.put(fedora_project_base) headers = {'Content-Type': 'text/plain'} #logger.debug('walking: %s', _root) for root, dirs, files in os.walk(_root): for name in files: mime = magic.Magic(mime=True) headers['Content-Type'] = mime.from_file( os.path.join(root, name)) #files full_path = os.path.join(root, name) _path = full_path.replace(_root, '', 1) _path = _path.replace('[', '-') _path = _path.replace(']', '-') url = ''.join([fedora_project_base, urllib.parse.quote(_path)]) #logger.debug('uploading: %s', url) with open(os.path.join(root, name), 'rb') as _file: requests.put(url, data=_file, headers=headers) for name in dirs: #dirs full_path = os.path.join(root, name) _path = full_path.replace(_root, '', 1) url = ''.join([fedora_project_base, _path]) #logger.debug('creating: %s', _path) requests.put(url) except Exception as exc: logger.error('Proj Id: %s. %s', project_id, exc) raise self.retry(exc=exc)
def publish_resource(project_id, entity_uuids=None, publish_dois=False, revision=None): """Publish a resource. Retrieves a project and/or an entity and set any saved DOIs as published. If no DOIs are saved in the specified project or entity it will fail silently. We need to specify the project id because this function also changes the status of the locally saved publication to `"published"` that way it shows up in the published listing. If publish_dois is False Datacite will keep the newly created DOIs in "DRAFT" status, and not "PUBLISHED". A DOI on DataCite can only be deleted if it is in "DRAFT" status. Once a DOI is set to "PUBLISHED" or "RESERVED" it can't be deleted. :param str project_id: Project Id to publish. :param list entity_uuids: list of str Entity uuids to publish. :param int revision: Revision number to publish. """ es_client = new_es_client() # If revision number passed, set status to "published" for specified revision and # set status to "revised" for old versions mgr = ProjectsManager(service_account()) prj = mgr.get_project_by_id(project_id) responses = [] if publish_dois: if entity_uuids: for ent_uuid in entity_uuids: entity = None if ent_uuid: entity = mgr.get_entity_by_uuid(ent_uuid) if entity: for doi in entity.dois: res = DataciteManager.publish_doi(doi) responses.append(res) for doi in prj.dois: res = DataciteManager.publish_doi(doi) responses.append(res) pub = BaseESPublication(project_id=project_id, revision=revision, using=es_client) pub.update(status='published', using=es_client) IndexedPublication._index.refresh(using=es_client) if revision: # Revising a publication sets the status of the previous document to 'archived' last_revision = revision - 1 if revision > 2 else 0 archived_pub = BaseESPublication(project_id=project_id, revision=last_revision) archived_pub.update(status='archived') for res in responses: logger.info( "DOI published: %(doi)s", {"doi": res['data']['id']} ) return responses
def freeze_project_and_entity_metadata(project_id, entity_uuids=None): """Freeze project and entity metadata. Given a project id and an entity uuid (should be a main entity) this function retrieves all metadata related to these entities and stores it into Elasticsearch as :class:`~designafe.libs.elasticsearch.docs.publications.BaseESPublication` :param str project_id: Project id. :param list of entity_uuid strings: Entity uuids. """ mgr = ProjectsManager(service_account()) prj = mgr.get_project_by_id(project_id) pub_doc = BaseESPublication(project_id=project_id) publication = pub_doc.to_dict() if entity_uuids: # clear any existing entities in publication entity = mgr.get_entity_by_uuid(entity_uuids[0]) pub_entities_field_name = FIELD_MAP[entity.name] publication[pub_entities_field_name] = [] for ent_uuid in entity_uuids: entity = None entity = mgr.get_entity_by_uuid(ent_uuid) entity_json = entity.to_body_dict() if entity: pub_entities_field_name = FIELD_MAP[entity.name] publication['authors'] = entity_json['value']['authors'][:] entity_json['authors'] = [] _populate_entities_in_publication(entity, publication) _transform_authors(entity_json, publication) if entity_json['value']['dois']: entity_json['doi'] = entity_json['value']['dois'][-1] _delete_unused_fields(entity_json) publication[pub_entities_field_name].append(entity_json) prj_json = prj.to_body_dict() _delete_unused_fields(prj_json) award_number = publication.get('project', {}).get('value', {}).pop( 'awardNumber', []) or [] if not isinstance(award_number, list): award_number = [] prj_json['value']['awardNumbers'] = award_number prj_json['value'].pop('awardNumber', None) if publication.get('project'): publication['project'].update(prj_json) else: publication['project'] = prj_json pub_doc.update(**publication) return pub_doc
def amend_publication(project_id, amendments=None, authors=None, revision=None): """Amend a Publication Update Amendable fields on a publication and the corrosponding DataCite records. These changes do not produce a new version of a publication, but they do allow for limited changes to a published project. This is currently configured to support "Other" publications only. :param str project_id: Project uuid to amend :param int revision: Revision number to amend """ es_client = new_es_client() mgr = ProjectsManager(service_account()) prj = mgr.get_project_by_id(project_id) pub = BaseESPublication(project_id=project_id, revision=revision, using=es_client) prj_dict = prj.to_body_dict() pub_dict = pub.to_dict() _delete_unused_fields(prj_dict) if pub.project.value.projectType != 'other': pub_entity_uuids = pub.entities() for uuid in pub_entity_uuids: if uuid in amendments: entity = amendments[uuid] else: entity = mgr.get_entity_by_uuid(uuid) entity = entity.to_body_dict() _delete_unused_fields(entity) for pub_ent in pub_dict[FIELD_MAP[entity['name']]]: if pub_ent['uuid'] == entity['uuid']: for key in entity['value']: ent_type = 'entity' if 'dois' in entity['value'] else 'subentity' if key not in UNAMENDABLE_FIELDS[ent_type]: pub_ent['value'][key] = entity['value'][key] if 'authors' in entity['value']: pub_ent['value']['authors'] = authors[entity['uuid']] _set_authors(pub_ent, pub_dict) # weird key swap for old issues with awardnumber(s) award_number = prj.award_number or [] if not isinstance(award_number, list): award_number = [] prj_dict['value']['awardNumbers'] = award_number prj_dict['value'].pop('awardNumber', None) for key in prj_dict['value']: if key not in UNAMENDABLE_FIELDS['project']: pub_dict['project']['value'][key] = prj_dict['value'][key] if authors and prj_dict['value']['projectType'] == 'other': pub_dict['project']['value']['teamOrder'] = authors pub.update(**pub_dict) IndexedPublication._index.refresh(using=es_client) return pub
def publish_resource(project_id, entity_uuids=None, publish_dois=False): """Publish a resource. Retrieves a project and/or an entity and set any saved DOIs as published. If no DOIs are saved in the specified project or entity it will fail silently. We need to specify the project id because this function also changes the status of the locally saved publication to `"published"` that way it shows up in the published listing. If publish_dois is False Datacite will keep the newly created DOIs in "DRAFT" status, but they will not be set to "PUBLISHED". A DOI on DataCite can only be deleted if it is in "DRAFT" status. Once a DOI is set to "PUBLISHED" or "RESERVED" it can't be deleted. :param str project_id: Project Id to publish. :param list entity_uuids: list of str Entity uuids to publish. """ mgr = ProjectsManager(service_account()) prj = mgr.get_project_by_id(project_id) responses = [] if publish_dois: for ent_uuid in entity_uuids: entity = None if ent_uuid: entity = mgr.get_entity_by_uuid(ent_uuid) if entity: for doi in entity.dois: res = DataciteManager.publish_doi(doi) responses.append(res) for doi in prj.dois: res = DataciteManager.publish_doi(doi) responses.append(res) pub = BaseESPublication(project_id=project_id) pub.update(status='published') for res in responses: LOG.info( "DOI published: %(doi)s", {"doi": res['data']['id']} ) return responses
def freeze_project_and_entity_metadata(project_id, entity_uuids=None): """Freeze project and entity metadata. Given a project id and an entity uuid (should be a main entity) this function retrieves all metadata related to these entities and stores it into Elasticsearch as :class:`~designafe.libs.elasticsearch.docs.publications.BaseESPublication` When publishing for the first time or publishing over an existing publication. We will clear any existing entities (if any) from the published metadata. We'll use entity_uuids (the entities getting DOIs) to rebuild the rest of the publication. These entities usually do not have files associated to them (except published reports/documents). :param str project_id: Project id. :param list of entity_uuid strings: Entity uuids. """ mgr = ProjectsManager(service_account()) prj = mgr.get_project_by_id(project_id) pub_doc = BaseESPublication(project_id=project_id) publication = pub_doc.to_dict() if entity_uuids: # clear any existing sub entities in publication and keep updated fileObjs fields_to_clear = [] entities_with_files = [] for key in list(FIELD_MAP.keys()): if FIELD_MAP[key] in list(publication.keys()): fields_to_clear.append(FIELD_MAP[key]) fields_to_clear = set(fields_to_clear) for field in fields_to_clear: for ent in publication[field]: if 'fileObjs' in ent: entities_with_files.append(ent) if ent['uuid'] in entity_uuids: publication[field] = [] for ent_uuid in entity_uuids: entity = None entity = mgr.get_entity_by_uuid(ent_uuid) if entity: entity_json = entity.to_body_dict() pub_entities_field_name = FIELD_MAP[entity.name] for e in entities_with_files: if e['uuid'] == entity_json['uuid']: entity_json['fileObjs'] = e['fileObjs'] publication['authors'] = list(entity_json['value']['authors']) entity_json['authors'] = [] _populate_entities_in_publication(entity, publication) _transform_authors(entity_json, publication) if entity_json['value']['dois']: entity_json['doi'] = entity_json['value']['dois'][-1] _delete_unused_fields(entity_json) publication[pub_entities_field_name].append(entity_json) prj_json = prj.to_body_dict() _delete_unused_fields(prj_json) award_number = publication.get('project', {}).get('value', {}).pop( 'awardNumber', [] ) or [] if not isinstance(award_number, list): award_number = [] prj_json['value']['awardNumbers'] = award_number prj_json['value'].pop('awardNumber', None) if publication.get('project'): publication['project'].update(prj_json) else: publication['project'] = prj_json pub_doc.update(**publication) return pub_doc
def fix_file_tags(project_id): pub = BaseESPublication(project_id=project_id) pub_dict = pub.to_dict() entities_to_check = list(set(pub_dict.keys()).intersection(list(FIELD_MAP.values()))) entities_to_check.append('project') def check_complete_tags(tags): for tag in tags: if 'path' not in tag: return False return True def fix_tags_path(entity): for tag in entity['value']['fileTags']: try: pub_file = BaseFileResource.listing( service_account(), system="designsafe.storage.published", path="{}{}".format(project_id, tag['path']) ) tag['fileUuid'] = pub_file.uuid except Exception as err: LOG.info('error: {}'.format(err)) continue def fix_tags_no_path(entity): if entity['name'] == 'designsafe.project': proj_other = BaseFileResource.listing(service_account(), system="project-{}".format(entity['uuid']), path="") for child in proj_other.children: try: pub_file = BaseFileResource.listing(service_account(), system="designsafe.storage.published", path="{}{}".format(project_id, child.path)) proj_file = BaseFileResource.listing(service_account(), system="project-{}".format(entity['uuid']), path=child.path) for tag in entity['value']['fileTags']: if tag['fileUuid'] == proj_file.uuid: tag['fileUuid'] = pub_file.uuid except Exception as err: LOG.info('error: {}'.format(err)) continue else: for fobj in entity['fileObjs']: try: pub_file = BaseFileResource.listing(service_account(), system="designsafe.storage.published", path="{}{}".format(project_id, fobj['path'])) proj_file = BaseFileResource.listing(service_account(), system="project-{}".format(pub_dict['project']['uuid']), path=fobj['path']) for tag in entity['value']['fileTags']: if tag['fileUuid'] == proj_file.uuid: tag['fileUuid'] = pub_file.uuid except Exception as err: LOG.info('error: {}'.format(err)) continue for entname in entities_to_check: if type(pub_dict[entname]) == list: for entity in pub_dict[entname]: if 'value' in entity and 'fileTags' in entity['value'] and check_complete_tags(entity['value']['fileTags']): fix_tags_path(entity) elif 'value' in entity and 'fileTags' in entity['value']: fix_tags_no_path(entity) else: if 'value' in pub_dict[entname] and 'fileTags' in pub_dict[entname]['value'] and check_complete_tags(pub_dict[entname]['value']['fileTags']): fix_tags_path(pub_dict[entname]) elif 'value' in pub_dict[entname] and 'fileTags' in pub_dict[entname]['value']: fix_tags_no_path(pub_dict[entname]) pub.update(**pub_dict)