示例#1
0
def neessearch(offset=0, limit=100, query_string='', limit_fields=True, *args):

    nees_pi_query = Q({
        "nested": {
            "path": "pis",
            "ignore_unmapped": True,
            "query": {
                "query_string": {
                    "query": query_string,
                    "fields": ["pis.firstName", "pis.lastName"],
                    "lenient": True
                }
            }
        }
    })

    nees_query_string_query = Q('query_string',
                                query=query_string,
                                default_operator='and')
    client = new_es_client()
    pub_query = IndexedPublicationLegacy.search(
        using=client).filter(nees_pi_query | nees_query_string_query)
    pub_query = pub_query.extra(from_=offset, size=limit)
    if limit_fields:
        pub_query = pub_query.source(
            includes=['project', 'pis', 'title', 'startDate', 'path'])
    pub_query = pub_query.sort(
        {'created': {
            'order': 'desc',
            'unmapped_type': 'long'
        }})
    res = pub_query.execute()
    hits = list(map(lambda h: h.to_dict(), res.hits))
    return {'listing': hits}
示例#2
0
def save_to_fedora(self, project_id, revision=None):
    import requests
    import magic
    from designsafe.libs.elasticsearch.docs.publications import BaseESPublication
    try:
        es_client = new_es_client()
        pub = BaseESPublication(project_id=project_id,
                                revision=revision,
                                using=es_client)
        pub.update(status='published', using=es_client)

        if pub.project.value.projectType == 'other':
            from designsafe.libs.fedora.fedora_operations import ingest_project
            ingest_project(project_id, version=revision)
            return
        if pub.project.value.projectType == 'experimental':
            from designsafe.libs.fedora.fedora_operations import ingest_project_experimental
            ingest_project_experimental(project_id, version=revision)
            return

        _root = os.path.join('/corral-repl/tacc/NHERI/published', project_id)
        fedora_base = 'http://fedoraweb01.tacc.utexas.edu:8080/fcrepo/rest/publications_01'
        res = requests.get(fedora_base)
        if res.status_code == 404 or res.status_code == 410:
            requests.put(fedora_base)

        fedora_project_base = ''.join([fedora_base, '/', project_id])
        res = requests.get(fedora_project_base)
        if res.status_code == 404 or res.status_code == 410:
            requests.put(fedora_project_base)

        headers = {'Content-Type': 'text/plain'}
        #logger.debug('walking: %s', _root)
        for root, dirs, files in os.walk(_root):
            for name in files:
                mime = magic.Magic(mime=True)
                headers['Content-Type'] = mime.from_file(
                    os.path.join(root, name))
                #files
                full_path = os.path.join(root, name)
                _path = full_path.replace(_root, '', 1)
                _path = _path.replace('[', '-')
                _path = _path.replace(']', '-')
                url = ''.join([fedora_project_base, urllib.parse.quote(_path)])
                #logger.debug('uploading: %s', url)
                with open(os.path.join(root, name), 'rb') as _file:
                    requests.put(url, data=_file, headers=headers)

            for name in dirs:
                #dirs
                full_path = os.path.join(root, name)
                _path = full_path.replace(_root, '', 1)
                url = ''.join([fedora_project_base, _path])
                #logger.debug('creating: %s', _path)
                requests.put(url)

    except Exception as exc:
        logger.error('Proj Id: %s. %s', project_id, exc)
        raise self.retry(exc=exc)
示例#3
0
def publish_resource(project_id, entity_uuids=None, publish_dois=False, revision=None):
    """Publish a resource.

    Retrieves a project and/or an entity and set any saved DOIs
    as published. If no DOIs are saved in the specified project or entity
    it will fail silently. We need to specify the project id because
    this function also changes the status of the locally saved publication
    to `"published"` that way it shows up in the published listing.

    If publish_dois is False Datacite will keep the newly created DOIs in
    "DRAFT" status, and not "PUBLISHED". A DOI on DataCite can only be
    deleted if it is in "DRAFT" status. Once a DOI is set to "PUBLISHED"
    or "RESERVED" it can't be deleted.

    :param str project_id: Project Id to publish.
    :param list entity_uuids: list of str Entity uuids to publish.
    :param int revision: Revision number to publish.
    """
    es_client = new_es_client()

    # If revision number passed, set status to "published" for specified revision and
    # set status to "revised" for old versions
    mgr = ProjectsManager(service_account())
    prj = mgr.get_project_by_id(project_id)
    responses = []

    if publish_dois:
        if entity_uuids:
            for ent_uuid in entity_uuids:
                entity = None
                if ent_uuid:
                    entity = mgr.get_entity_by_uuid(ent_uuid)

                if entity:
                    for doi in entity.dois:
                        res = DataciteManager.publish_doi(doi)
                        responses.append(res)

        for doi in prj.dois:
            res = DataciteManager.publish_doi(doi)
            responses.append(res)

    pub = BaseESPublication(project_id=project_id, revision=revision, using=es_client)
    pub.update(status='published', using=es_client)
    IndexedPublication._index.refresh(using=es_client)

    if revision:
        # Revising a publication sets the status of the previous document to 'archived'
        last_revision = revision - 1 if revision > 2 else 0
        archived_pub = BaseESPublication(project_id=project_id, revision=last_revision)
        archived_pub.update(status='archived')

    for res in responses:
        logger.info(
            "DOI published: %(doi)s",
            {"doi": res['data']['id']}
        )
    return responses
示例#4
0
def amend_publication(project_id, amendments=None, authors=None, revision=None):
    """Amend a Publication
    
    Update Amendable fields on a publication and the corrosponding DataCite
    records. These changes do not produce a new version of a publication, but
    they do allow for limited changes to a published project. This is currently
    configured to support "Other" publications only.
    
    :param str project_id: Project uuid to amend
    :param int revision: Revision number to amend
    """
    es_client = new_es_client()
    mgr = ProjectsManager(service_account())
    prj = mgr.get_project_by_id(project_id)
    pub = BaseESPublication(project_id=project_id, revision=revision, using=es_client)

    prj_dict = prj.to_body_dict()
    pub_dict = pub.to_dict()
    _delete_unused_fields(prj_dict)

    if pub.project.value.projectType != 'other':
        pub_entity_uuids = pub.entities()
        for uuid in pub_entity_uuids:
            if uuid in amendments:
                entity = amendments[uuid]
            else:
                entity = mgr.get_entity_by_uuid(uuid)
                entity = entity.to_body_dict()
            _delete_unused_fields(entity)

            for pub_ent in pub_dict[FIELD_MAP[entity['name']]]:
                if pub_ent['uuid'] == entity['uuid']:
                    for key in entity['value']:
                        ent_type = 'entity' if 'dois' in entity['value'] else 'subentity'
                        if key not in UNAMENDABLE_FIELDS[ent_type]:
                            pub_ent['value'][key] = entity['value'][key]
                    if 'authors' in entity['value']:
                        pub_ent['value']['authors'] = authors[entity['uuid']]
                        _set_authors(pub_ent, pub_dict)
            
    # weird key swap for old issues with awardnumber(s)
    award_number = prj.award_number or []
    if not isinstance(award_number, list):
        award_number = []
    prj_dict['value']['awardNumbers'] = award_number
    prj_dict['value'].pop('awardNumber', None)

    for key in prj_dict['value']:
        if key not in UNAMENDABLE_FIELDS['project']:
            pub_dict['project']['value'][key] = prj_dict['value'][key]
    if authors and prj_dict['value']['projectType'] == 'other':
        pub_dict['project']['value']['teamOrder'] = authors

    pub.update(**pub_dict)
    IndexedPublication._index.refresh(using=es_client)
    return pub
示例#5
0
    def get(self, request, project_id, revision=None):
        """
        Get a publication. If a revision is not supplied, return
        the "Original" publication. Include the latest version if it
        is not being queried.
        """
        es_client = new_es_client()
        pub = BaseESPublication(project_id=project_id,
                                revision=revision,
                                using=es_client)
        latest_revision = IndexedPublication.max_revision(
            project_id=project_id, using=es_client)
        latest_pub_dict = None
        if latest_revision > 0 and latest_revision != revision:
            latest_pub = BaseESPublication(project_id=project_id,
                                           revision=latest_revision,
                                           using=es_client)
            if latest_pub is not None and hasattr(latest_pub, 'project'):
                latest_pub_dict = latest_pub.to_dict()

        if pub is not None and hasattr(pub, 'project'):
            pub_dict = pub.to_dict()

            if pub_dict['project']['value']['projectType'] != 'other':
                metrics.info('Data Depot',
                             extra={
                                 'user':
                                 request.user.username,
                                 'sessionId':
                                 getattr(request.session, 'session_key', ''),
                                 'operation':
                                 'listing',
                                 'agent':
                                 request.META.get('HTTP_USER_AGENT'),
                                 'ip':
                                 get_client_ip(request),
                                 'info': {
                                     'api': 'agave',
                                     'systemId':
                                     'designsafe.storage.published',
                                     'filePath': project_id,
                                     'query': {}
                                 }
                             })

            if latest_pub_dict:
                pub_dict['latestRevision'] = latest_pub_dict
            return JsonResponse(pub_dict)
        else:
            return JsonResponse({
                'status': 404,
                'message': 'Not found'
            },
                                status=404)
示例#6
0
def neeslisting(offset=0, limit=100, limit_fields=True, *args):
    client = new_es_client()
    pub_query = IndexedPublicationLegacy.search(using=client)
    pub_query = pub_query.extra(from_=offset, size=limit)
    if limit_fields:
        pub_query = pub_query.source(
            includes=['project', 'pis', 'title', 'startDate', 'path'])
    pub_query = pub_query.sort(
        {'created': {
            'order': 'desc',
            'unmapped_type': 'long'
        }})
    res = pub_query.execute()
    hits = list(map(lambda h: h.to_dict(), res.hits))

    return {'listing': hits}
示例#7
0
def initilize_publication(publication,
                          status='publishing',
                          revision=None,
                          revision_text=None,
                          revision_titles=None):
    """initilize publication."""
    publication['projectId'] = publication['project']['value']['projectId']
    publication['status'] = status
    publication['version'] = 2
    publication['licenses'] = publication.pop('license', [])
    publication['license'] = ''
    es_client = new_es_client()
    if revision:
        base_pub = IndexedPublication.from_id(publication['projectId'],
                                              revision=None,
                                              using=es_client)
        publication['created'] = base_pub['created']
        publication['revision'] = revision
        publication['revisionDate'] = datetime.datetime.now().isoformat()
        publication['revisionText'] = revision_text
        if revision_titles:
            publication['revisionTitles'] = revision_titles
    elif 'created' not in publication:
        publication['created'] = datetime.datetime.now().isoformat()
    try:
        pub = IndexedPublication.from_id(publication['projectId'],
                                         revision=revision,
                                         using=es_client)
        pub.update(using=es_client, **publication)
    except DocumentNotFound:
        pub = IndexedPublication(project_id=publication['projectId'],
                                 **publication)
        pub.save(using=es_client)
    pub.save(using=es_client)

    # Refresh index so that search works in subsequent pipeline operations.
    IndexedPublication._index.refresh(using=es_client)
    return pub
示例#8
0
def listing(offset=0, limit=100, limit_fields=True, *args):
    client = new_es_client()
    pub_query = IndexedPublication.search(using=client)
    pub_query = pub_query.filter(Q('term', status='published'))
    pub_query = pub_query.extra(from_=offset, size=limit)
    if limit_fields:
        pub_query = pub_query.source(includes=[
            'project.value.title', 'project.value.pi',
            'project.value.keywords', 'project.value.projectType',
            'project.value.dataType', 'created', 'projectId', 'users',
            'system', 'revision'
        ])
    pub_query = pub_query.sort({'created': {'order': 'desc'}})

    res = pub_query.execute()

    hits = list(
        map(
            lambda h: {
                **h.to_dict(), 'pi': _get_user_by_username(
                    h, h.project.value.pi)
            }, res.hits))

    return {'listing': hits}
示例#9
0
def freeze_project_and_entity_metadata(project_id, entity_uuids=None, revision=None, revised_authors=None):
    """Freeze project and entity metadata.

    Given a project id and an entity uuid (should be a main entity) this function
    retrieves all metadata related to these entities and stores it into Elasticsearch
    as :class:`~designafe.libs.elasticsearch.docs.publications.BaseESPublication`

    When publishing for the first time or publishing over an existing publication. We
    will clear any existing entities (if any) from the published metadata. We'll use entity_uuids
    (the entities getting DOIs) to rebuild the rest of the publication. These entities
    usually do not have files associated to them (except published reports/documents).

    :param str project_id: Project id.
    :param list of entity_uuid strings: Entity uuids.
    """
    es_client = new_es_client()
    mgr = ProjectsManager(service_account())
    prj = mgr.get_project_by_id(project_id)
    pub_doc = BaseESPublication(project_id=project_id, revision=revision, using=es_client)
    original_pub = BaseESPublication(project_id=project_id) if revised_authors else None
    publication = pub_doc.to_dict()

    if entity_uuids:
        # clear any existing sub entities in publication and keep updated fileObjs
        fields_to_clear = []
        entities_with_files = []
        for key in list(FIELD_MAP.keys()):
            if FIELD_MAP[key] in list(publication.keys()):
                fields_to_clear.append(FIELD_MAP[key])
        fields_to_clear = set(fields_to_clear)

        for field in fields_to_clear:
            for ent in publication[field]:
                if 'fileObjs' in ent:
                    entities_with_files.append(ent)
                if ent['uuid'] in entity_uuids:
                    publication[field] = []

        for ent_uuid in entity_uuids:
            entity = None
            entity = mgr.get_entity_by_uuid(ent_uuid)

            if entity:
                entity_dict = entity.to_body_dict()
                pub_entities_field_name = FIELD_MAP[entity.name]

                for e in entities_with_files:
                    if e['uuid'] == entity_dict['uuid']:
                        entity_dict['fileObjs'] = e['fileObjs']

                _set_related_entities(entity, publication)
                _delete_unused_fields(entity_dict)

                if entity_dict['value']['dois']:
                    entity_dict['doi'] = entity_dict['value']['dois'][-1]
                    if revision:
                        _preserve_project_values(original_pub, entity_dict, revised_authors)
                _set_authors(entity_dict, publication)
                publication[pub_entities_field_name].append(entity_dict)
    
    prj_dict = prj.to_body_dict()
    _delete_unused_fields(prj_dict)
    if revision:
        _preserve_project_values(original_pub, prj_dict, revised_authors)

    award_number = prj.award_number or []

    if not isinstance(award_number, list):
        award_number = []

    prj_dict['value']['awardNumbers'] = award_number
    prj_dict['value'].pop('awardNumber', None)
    if publication.get('project'):
        publication['project'].update(prj_dict)
    else:
        publication['project'] = prj_dict

    pub_doc.update(using=es_client, **publication)
    IndexedPublication._index.refresh(using=es_client)
    return pub_doc
示例#10
0
def archive(project_id, revision=None):
    """Archive Published Files and Metadata

    When given a project_id, this function will copy and compress all of the published files
    for a project, and it will also include a formatted json document of the published metadata.
    Note: This metadata file is will only be used until the Fedora system is set up again.
    """

    es_client = new_es_client()
    pub = BaseESPublication(project_id=project_id, revision=revision, using=es_client)
    if revision:
        archive_prefix = '{}v{}'.format(pub.projectId, revision)
    else:
        archive_prefix = pub.projectId
    archive_name = '{}_archive.zip'.format(archive_prefix)
    metadata_name = '{}_metadata.json'.format(archive_prefix)
    pub_dir = settings.DESIGNSAFE_PUBLISHED_PATH
    arc_dir = os.path.join(pub_dir, 'archives/')
    archive_path = os.path.join(arc_dir, archive_name)
    metadata_path = os.path.join(arc_dir, metadata_name)

    def set_perms(dir, octal, subdir=None):
        try:
            os.chmod(dir, octal)
            if subdir:
                if not os.path.isdir(subdir):
                    raise Exception('subdirectory does not exist!')
                for root, dirs, files in os.walk(subdir):
                    os.chmod(root, octal)
                    for d in dirs:
                        os.chmod(os.path.join(root, d), octal)
                    for f in files:
                        os.chmod(os.path.join(root, f), octal)
        except Exception as e:
            logger.exception("Failed to set permissions for {}".format(dir))
            os.chmod(dir, 0o555)

    # compress published files into a zip archive
    def create_archive():
        arc_source = os.path.join(pub_dir, archive_prefix)

        try:
            logger.debug("Creating archive for {}".format(archive_prefix))
            zf = zipfile.ZipFile(archive_path, mode='w', allowZip64=True)
            for dirs, _, files in os.walk(arc_source):
                for f in files:
                    if f == archive_name:
                        continue
                    zf.write(os.path.join(dirs, f), os.path.join(dirs.replace(pub_dir, ''), f))
            zf.write(metadata_path, metadata_name)
            zf.close()
        except Exception as e:
            logger.exception("Archive creation failed for {}".format(arc_source))
        finally:
            set_perms(pub_dir, 0o555, arc_source)
            set_perms(arc_dir, 0o555)

    # create formatted metadata for user download
    def create_metadata():
        mgr = ProjectsManager(service_account())
        pub_dict = pub._wrapped.to_dict()
        meta_dict = {}

        entity_type_map = {
            'experimental': 'experimentsList',
            'simulation': 'simulations',
            'hybrid_simulation': 'hybrid_simulations',
            'field_recon': 'missions', # TODO: this should support 'reports' as well (aka Documents)
        }

        project_uuid = pub_dict['project']['uuid']
        try:
            logger.debug("Creating metadata for {}".format(archive_prefix))
            if pub_dict['project']['value']['projectType'] in entity_type_map:
                ent_type = entity_type_map[pub_dict['project']['value']['projectType']]
                entity_uuids = []
                if ent_type in pub_dict.keys():
                    entity_uuids = [x['uuid'] for x in pub_dict[ent_type]]
                meta_dict = mgr.get_entity_by_uuid(project_uuid).to_datacite_json()
                meta_dict['published_resources'] = []
                meta_dict['url'] = TARGET_BASE.format(project_id=pub_dict['project_id'])
                for uuid in entity_uuids:
                    entity = mgr.get_entity_by_uuid(uuid)
                    ent_json = entity.to_datacite_json()
                    ent_json['doi'] = entity.dois[0]
                    ent_json['url'] = ENTITY_TARGET_BASE.format(
                        project_id=pub_dict['project_id'],
                        entity_uuid=uuid
                    )
                    meta_dict['published_resources'].append(ent_json)
            else:
                project = mgr.get_entity_by_uuid(project_uuid)
                meta_dict = project.to_datacite_json()
                meta_dict['doi'] = project.dois[0]
                meta_dict['url'] = TARGET_BASE.format(project_id=pub_dict['project_id'])

            with open(metadata_path, 'w') as meta_file:
                json.dump(meta_dict, meta_file)
        except:
            logger.exception("Failed to create metadata!")

    try:
        set_perms(pub_dir, 0o755, os.path.join(pub_dir, archive_prefix))
        set_perms(arc_dir, 0o755)
        create_metadata()
        create_archive()
    except Exception as e:
        logger.exception('Failed to archive publication!')
示例#11
0
def fix_file_tags(project_id, revision=None):
    es_client = new_es_client()
    pub = BaseESPublication(project_id=project_id, revision=revision, using=es_client)
    pub_dict = pub.to_dict()

    entities_to_check = list(set(pub_dict.keys()).intersection(list(FIELD_MAP.values())))
    entities_to_check.append('project')

    def check_complete_tags(tags):
        for tag in tags:
            if 'path' not in tag:
                return False
        return True

    def fix_tags_path(entity):
        for tag in entity['value']['fileTags']:
            try:
                pub_base = "{}v{}".format(project_id, revision) if revision else project_id
                pub_file = BaseFileResource.listing(
                    service_account(),
                    system="designsafe.storage.published",
                    path="{}{}".format(pub_base, tag['path'])
                )
                tag['fileUuid'] = pub_file.uuid
            except Exception as err:
                logger.info('error: {}'.format(err))
                continue

    def fix_tags_no_path(entity):
        if entity['name'] == 'designsafe.project':
            proj_other = BaseFileResource.listing(service_account(), system="project-{}".format(entity['uuid']), path="")
            for child in proj_other.children:
                try:
                    pub_base = "{}v{}".format(project_id, revision) if revision else project_id
                    pub_file = BaseFileResource.listing(service_account(), system="designsafe.storage.published", path="{}{}".format(pub_base, child.path))
                    proj_file = BaseFileResource.listing(service_account(), system="project-{}".format(entity['uuid']), path=child.path)
                    for tag in entity['value']['fileTags']:
                        if tag['fileUuid'] == proj_file.uuid:
                            tag['fileUuid'] = pub_file.uuid
                    
                except Exception as err:
                    logger.info('error: {}'.format(err))
                    continue
        else:
            for fobj in entity['fileObjs']:
                try:
                    pub_base = "{}v{}".format(project_id, revision) if revision else project_id
                    pub_file = BaseFileResource.listing(service_account(), system="designsafe.storage.published", path="{}{}".format(pub_base, fobj['path']))
                    proj_file = BaseFileResource.listing(service_account(), system="project-{}".format(pub_dict['project']['uuid']), path=fobj['path'])
                    for tag in entity['value']['fileTags']:
                        if tag['fileUuid'] == proj_file.uuid:
                            tag['fileUuid'] = pub_file.uuid
                    
                except Exception as err:
                    logger.info('error: {}'.format(err))
                    continue

    for entname in entities_to_check:
        if type(pub_dict[entname]) == list:
            for entity in pub_dict[entname]:
                if 'value' in entity and 'fileTags' in entity['value'] and check_complete_tags(entity['value']['fileTags']):
                    fix_tags_path(entity)
                elif 'value' in entity and 'fileTags' in entity['value']:
                    fix_tags_no_path(entity)
        else:
            if 'value' in pub_dict[entname] and 'fileTags' in pub_dict[entname]['value'] and check_complete_tags(pub_dict[entname]['value']['fileTags']):
                fix_tags_path(pub_dict[entname])
            elif 'value' in pub_dict[entname] and 'fileTags' in pub_dict[entname]['value']:
                fix_tags_no_path(pub_dict[entname])

    pub.update(using=es_client, **pub_dict)
    IndexedPublication._index.refresh(using=es_client)
示例#12
0
def check_published_files(project_id, revision=None, selected_files=None):

    #get list of files that should be in the publication
    es_client = new_es_client()
    publication = BaseESPublication(project_id=project_id,
                                    revision=revision,
                                    using=es_client)
    if selected_files:
        #it's type other, use this for comparison
        filepaths = selected_files
    else:
        filepaths = publication.related_file_paths()

    #empty dirs
    missing_files = []
    existing_files = []
    empty_folders = []

    #strip leading forward slash from file paths
    updated_filepaths = [
        file_path.strip('/') for file_path in filepaths
        if (file_path != '.Trash')
    ]

    pub_directory = '/corral-repl/tacc/NHERI/published/{}'.format(project_id)
    if revision:
        pub_directory += 'v{}'.format(revision)

    #navigate through publication files paths and
    #compare to the previous list of files
    for pub_file in updated_filepaths:
        file_to_check = os.path.join(pub_directory, pub_file)
        try:
            if os.path.isfile(file_to_check):
                existing_files.append(pub_file)
            elif os.path.isdir(file_to_check):
                #check directory for items in it
                dir_list = os.listdir(file_to_check)
                if dir_list != []:
                    existing_files.append(pub_file)
                else:
                    empty_folders.append(pub_file)
            else:
                missing_files.append(pub_file)
        except OSError as exc:
            logger.info(exc)

    #send email if there are files/folders missing/empty
    if (missing_files or empty_folders):
        #log for potential later queries
        logger.info("check_published_files missing files: " + project_id +
                    " " + str(missing_files))
        logger.info("check_published_files empty folders: " + project_id +
                    " " + str(empty_folders))

        #send email to dev admins
        service = get_service_account_client()
        prj_admins = settings.DEV_PROJECT_ADMINS_EMAIL
        for admin in prj_admins:
            email_body = """
                <p>Hello,</p>
                <p>
                    The following project has been published with either missing files/folders or empty folders:
                    <br>
                    <b>{prjID} - revision {revision}</b>
                    <br>
                    Path to publication files: {pubFiles}
                </p>
                <p>
                    These are the missing files/folders for this publication:
                    <br>
                    {missingFiles}
                </p>
                <p>
                    These are the empty folders for this publication:
                    <br>
                    {emptyFolders}
                </p>
                This is a programmatically generated message. Do NOT reply to this message.
                """.format(pubFiles=pub_directory,
                           prjID=project_id,
                           missingFiles=missing_files,
                           emptyFolders=empty_folders,
                           revision=revision)

            send_mail(
                "DesignSafe Alert: Published Project has missing files/folders",
                email_body,
                settings.DEFAULT_FROM_EMAIL, [admin],
                html_message=email_body)
示例#13
0
def copy_publication_files_to_corral(self,
                                     project_id,
                                     revision=None,
                                     selected_files=None):
    """
    Takes a project ID and copies project files to a published directory.

    :param str project_id: Project ID
    :param int revision: The revision number of the publication
    :param list of selected_files strings: Only provided if project type == other.
    """

    es_client = new_es_client()
    publication = BaseESPublication(project_id=project_id,
                                    revision=revision,
                                    using=es_client)

    filepaths = publication.related_file_paths()
    if not len(filepaths) and selected_files:
        # Project is "Other" so we just copy the selected files
        filepaths = [
            file_path.strip('/') for file_path in selected_files
            if (file_path != '.Trash')
        ]

    filepaths = list(set(filepaths))
    filepaths = sorted(filepaths)
    base_path = ''.join(['/', publication.projectId])
    os.chmod('/corral-repl/tacc/NHERI/published', 0o755)
    prefix_dest = '/corral-repl/tacc/NHERI/published/{}'.format(project_id)
    if revision:
        prefix_dest += 'v{}'.format(revision)
    if not os.path.isdir(prefix_dest):
        os.mkdir(prefix_dest)

    prefix_src = '/corral-repl/tacc/NHERI/projects/{}'.format(
        publication.project['uuid'])
    for filepath in filepaths:
        local_src_path = '{}/{}'.format(prefix_src, filepath)
        local_dst_path = '{}/{}'.format(prefix_dest, filepath)
        logger.info('Trying to copy: %s to %s', local_src_path, local_dst_path)
        if os.path.isdir(local_src_path):
            try:
                #os.mkdir(local_dst_path)
                if not os.path.isdir(os.path.dirname(local_dst_path)):
                    os.makedirs(os.path.dirname(local_dst_path))
                shutil.copytree(local_src_path, local_dst_path)
                for root, dirs, files in os.walk(local_dst_path):
                    for d in dirs:
                        os.chmod(os.path.join(root, d), 0o555)
                    for f in files:
                        os.chmod(os.path.join(root, f), 0o444)
                os.chmod(local_dst_path, 0o555)
            except OSError as exc:
                logger.info(exc)
            except IOError as exc:
                logger.info(exc)
        else:
            try:
                if not os.path.isdir(os.path.dirname(local_dst_path)):
                    os.makedirs(os.path.dirname(local_dst_path))
                for root, dirs, files in os.walk(
                        os.path.dirname(local_dst_path)):
                    for d in dirs:
                        os.chmod(os.path.join(root, d), 0o555)
                    for f in files:
                        os.chmod(os.path.join(root, f), 0o444)

                shutil.copy(local_src_path, local_dst_path)
                os.chmod(local_dst_path, 0o444)
            except OSError as exc:
                logger.info(exc)
            except IOError as exc:
                logger.info(exc)

    os.chmod(prefix_dest, 0o555)
    os.chmod('/corral-repl/tacc/NHERI/published', 0o555)

    save_to_fedora.apply_async(args=[project_id, revision])

    index_path = '/' + project_id
    if revision:
        index_path += 'v{}'.format(revision)
    agave_indexer.apply_async(kwargs={
        'username': '******',
        'systemId': 'designsafe.storage.published',
        'filePath': index_path,
        'recurse': True
    },
                              queue='indexing')
示例#14
0
def search(offset=0, limit=100, query_string='', limit_fields=True, *args):
    query_dict = json.loads(urllib.parse.unquote(query_string))

    type_filters = query_dict['typeFilters']
    has_type_filters = True in list(map(bool, type_filters.values()))

    def filter_query(type):
        return Q('term', **{'project.value.projectType._exact': type})

    selected_filters = list(
        filter(lambda key: bool(type_filters[key]), type_filters.keys()))

    type_query = Q('bool', should=list(map(filter_query, selected_filters)))
    client = new_es_client()
    search = IndexedPublication.search(using=client)
    if has_type_filters:
        search = search.filter(type_query)

    query_filters = []

    # Query string fields
    author = query_dict['queries']['author']
    title = query_dict['queries']['title']
    keywords = query_dict['queries']['keyword']
    description = query_dict['queries']['description']
    if author:
        query_filters.append(search_utils.author_query(author))
    if title:
        query_filters.append(search_utils.title_query(title))
    if keywords:
        query_filters.append(search_utils.keyword_query(keywords))
    if description:
        query_filters.append(search_utils.description_query(description))

    # Experimental advanced filters
    facility = query_dict['advancedFilters']['experimental'][
        'experimentalFacility']
    experiment_type = query_dict['advancedFilters']['experimental'][
        'experimentType']
    if facility['name']:
        query_filters.append(
            search_utils.experimental_facility_query(facility))
    if experiment_type:
        query_filters.append(
            search_utils.experiment_type_query(experiment_type))

    # Simulation advanced filters
    simulation_type = query_dict['advancedFilters']['simulation'][
        'simulationType']
    if simulation_type:
        query_filters.append(
            search_utils.simulation_type_query(simulation_type))

    # Field recon advanced filters
    nh_type = query_dict['advancedFilters']['field_recon']['naturalHazardType']
    nh_event = query_dict['advancedFilters']['field_recon'][
        'naturalHazardEvent']
    if nh_type:
        query_filters.append(search_utils.nh_type_query(nh_type))
    if nh_event:
        query_filters.append(search_utils.nh_event_query(nh_event))

    # Other advanced filters
    data_type = query_dict['advancedFilters']['other']['dataType']
    if data_type:
        query_filters.append(search_utils.other_type_query(data_type))

    # Hybrid sim advanced filters
    sim_type = data_type = query_dict['advancedFilters']['hybrid_simulation'][
        'hybridSimulationType']
    if sim_type:
        query_filters.append(search_utils.hybrid_sim_type_query(sim_type))

    search = search.filter('bool', must=query_filters)
    search = search.filter(Q('term', status='published'))
    search = search.extra(from_=offset, size=limit)
    if limit_fields:
        search = search.source(includes=[
            'project.value.title', 'project.value.pi',
            'project.value.keywords', 'project.value.projectType',
            'project.value.dataType', 'created', 'projectId', 'users',
            'system', 'revision'
        ])

    search = search.sort({'created': {'order': 'desc'}})
    res = search.execute()
    hits = list(
        map(
            lambda h: {
                **h.to_dict(), 'pi': _get_user_by_username(
                    h, h.project.value.pi)
            }, res.hits))

    return {'listing': hits}