Пример #1
0
    def _get_filename(self, obj, **kwargs):
        """Get the expected filename with absolute path"""
        base_dir = kwargs.get('base_dir', None)
        dir_only = kwargs.get('dir_only', False)
        obj_dir = kwargs.get('obj_dir', False)
        path = self._construct_path(obj, **kwargs)

        # for JOB_WORK directory
        if base_dir and dir_only and obj_dir:
            return os.path.abspath(path)
        cache_path = self._get_cache_path(path)
        if dir_only:
            if not os.path.exists(cache_path):
                os.makedirs(cache_path)
            return cache_path
        if self._in_cache(path):
            return cache_path
        elif self._exists(obj, **kwargs):
            if not dir_only:
                self._pull_into_cache(path)
                return cache_path
        raise ObjectNotFound(
            'objectstore.get_filename, no cache_path: {obj}, '
            'kwargs: {kwargs}'.format(obj=obj, kwargs=kwargs))
Пример #2
0
    def _get_filename(self, obj, **kwargs):
        base_dir = kwargs.get('base_dir', None)
        dir_only = kwargs.get('dir_only', False)
        obj_dir = kwargs.get('obj_dir', False)
        rel_path = self._construct_path(obj, **kwargs)

        # for JOB_WORK directory
        if base_dir and dir_only and obj_dir:
            return os.path.abspath(rel_path)

        cache_path = self._get_cache_path(rel_path)
        # S3 does not recognize directories as files so cannot check if those exist.
        # So, if checking dir only, ensure given dir exists in cache and return
        # the expected cache path.
        # dir_only = kwargs.get('dir_only', False)
        # if dir_only:
        #     if not os.path.exists(cache_path):
        #         os.makedirs(cache_path)
        #     return cache_path
        # Check if the file exists in the cache first
        if self._in_cache(rel_path):
            return cache_path
        # Check if the file exists in persistent storage and, if it does, pull it into cache
        elif self._exists(obj, **kwargs):
            if dir_only:  # Directories do not get pulled into cache
                return cache_path
            else:
                if self._pull_into_cache(rel_path):
                    return cache_path
        # For the case of retrieving a directory only, return the expected path
        # even if it does not exist.
        # if dir_only:
        #     return cache_path
        raise ObjectNotFound(
            'objectstore.get_filename, no cache_path: %s, kwargs: %s' %
            (str(obj), str(kwargs)))
Пример #3
0
 def _update_from_file(self, obj, file_name=None, create=False, **kwargs):
     if create:
         self._create(obj, **kwargs)
     if self._exists(obj, **kwargs):
         rel_path = self._construct_path(obj, **kwargs)
         # Chose whether to use the dataset file itself or an alternate file
         if file_name:
             source_file = os.path.abspath(file_name)
             # Copy into cache
             cache_file = self._get_cache_path(rel_path)
             try:
                 if source_file != cache_file:
                     # FIXME? Should this be a `move`?
                     shutil.copy2(source_file, cache_file)
                 self._fix_permissions(cache_file)
             except OSError:
                 log.exception("Trouble copying source file '%s' to cache '%s'", source_file, cache_file)
         else:
             source_file = self._get_cache_path(rel_path)
         # Update the file on S3
         self._push_to_os(rel_path, source_file)
     else:
         raise ObjectNotFound('objectstore.update_from_file, object does not exist: %s, kwargs: %s'
                              % (str(obj), str(kwargs)))
Пример #4
0
    def check_modifiable(self, trans, ld):
        """
        Check whether the current user has permissions to modify library dataset.

        :param  ld: library dataset
        :type   ld: galaxy.model.LibraryDataset

        :returns:   the original library dataset
        :rtype:     galaxy.model.LibraryDataset

        :raises:    ObjectNotFound
        """
        if ld.deleted:
            raise ObjectNotFound(
                'Library dataset with the id provided is deleted.')
        elif trans.user_is_admin:
            return ld
        if not trans.app.security_agent.can_modify_library_item(
                trans.get_current_user_roles(), ld):
            raise InsufficientPermissionsException(
                'You do not have proper permission to modify this library dataset.'
            )
        else:
            return ld
Пример #5
0
    def search(self, trans, search_term, page, page_size, boosts):
        """
        Perform the search on the given search_term

        :param search_term: unicode encoded string with the search term(s)
        :param boosts: namedtuple containing custom boosts for searchfields, see api/repositories.py
        :param page_size: integer defining a length of one page
        :param page: integer with the number of page requested

        :returns results: dictionary containing hits themselves and the hits summary
        """
        log.debug('raw search query: #' + str(search_term))
        lower_search_term = search_term.lower()
        allow_query, search_term_without_filters = self._parse_reserved_filters(
            lower_search_term)
        log.debug('term without filters: #' + str(search_term_without_filters))

        whoosh_index_dir = trans.app.config.whoosh_index_dir
        index_exists = whoosh.index.exists_in(whoosh_index_dir)
        if index_exists:
            index = whoosh.index.open_dir(whoosh_index_dir)
            try:
                # Some literature about BM25F:
                # http://trec.nist.gov/pubs/trec13/papers/microsoft-cambridge.web.hard.pdf
                # http://en.wikipedia.org/wiki/Okapi_BM25
                # __Basically__ the higher number the bigger weight.
                repo_weighting = RepoWeighting(
                    field_B={
                        'name_B': boosts.repo_name_boost,
                        'description_B': boosts.repo_description_boost,
                        'long_description_B':
                        boosts.repo_long_description_boost,
                        'homepage_url_B': boosts.repo_homepage_url_boost,
                        'remote_repository_url_B':
                        boosts.repo_remote_repository_url_boost,
                        'repo_owner_username_B':
                        boosts.repo_owner_username_boost,
                        'categories_B': boosts.categories_boost
                    })
                searcher = index.searcher(weighting=repo_weighting)
                parser = MultifieldParser([
                    'name', 'description', 'long_description', 'homepage_url',
                    'remote_repository_url', 'repo_owner_username',
                    'categories'
                ],
                                          schema=schema)

                # If user query has just filters prevent wildcard search.
                if len(search_term_without_filters) < 1:
                    user_query = Every('name')
                    sortedby = 'name'
                else:
                    user_query = parser.parse('*' +
                                              search_term_without_filters +
                                              '*')
                    sortedby = ''
                try:
                    hits = searcher.search_page(user_query,
                                                page,
                                                pagelen=page_size,
                                                filter=allow_query,
                                                terms=True,
                                                sortedby=sortedby)
                    log.debug('total hits: ' + str(len(hits)))
                    log.debug('scored hits: ' + str(hits.scored_length()))
                except ValueError:
                    raise ObjectNotFound('The requested page does not exist.')
                results = {}
                results['total_results'] = str(len(hits))
                results['page'] = str(page)
                results['page_size'] = str(page_size)
                results['hits'] = []
                for hit in hits:
                    log.debug('matched terms: ' + str(hit.matched_terms()))
                    hit_dict = {}
                    hit_dict['id'] = trans.security.encode_id(hit.get('id'))
                    hit_dict['repo_owner_username'] = hit.get(
                        'repo_owner_username')
                    hit_dict['name'] = hit.get('name')
                    hit_dict['long_description'] = hit.get('long_description')
                    hit_dict['remote_repository_url'] = hit.get(
                        'remote_repository_url')
                    hit_dict['homepage_url'] = hit.get('homepage_url')
                    hit_dict['description'] = hit.get('description')
                    hit_dict['last_updated'] = hit.get('last_updated')
                    hit_dict['full_last_updated'] = hit.get(
                        'full_last_updated')
                    hit_dict['repo_lineage'] = hit.get('repo_lineage')
                    hit_dict['categories'] = hit.get('categories')
                    hit_dict['approved'] = hit.get('approved')
                    hit_dict['times_downloaded'] = hit.get('times_downloaded')
                    results['hits'].append({
                        'repository': hit_dict,
                        'score': hit.score
                    })
                return results
            finally:
                searcher.close()
        else:
            raise exceptions.InternalServerError(
                'The search index file is missing.')
Пример #6
0
    def send(self,
             trans,
             history_id,
             bucket_name,
             authz_id,
             dataset_ids=None,
             overwrite_existing=False):
        """
        Implements the logic of sending dataset(s) from a given history to a given cloud-based storage
        (e.g., Amazon S3).

        :type  trans:               galaxy.webapps.base.webapp.GalaxyWebTransaction
        :param trans:               Galaxy web transaction

        :type  history_id:          string
        :param history_id:          the (encoded) id of history from which the object should be sent.

        :type  bucket_name:         string
        :param bucket_name:         the name of a bucket to which data should be sent (e.g., a bucket
                                    name on AWS S3).

        :type  authz_id:            int
        :param authz_id:            the ID of CloudAuthz to be used for authorizing access to the resource provider.
                                    You may get a list of the defined authorizations via `/api/cloud/authz`. Also,
                                    you can use `/api/cloud/authz/create` to define a new authorization.

        :type  dataset_ids:         set
        :param dataset_ids:         [Optional] The list of (decoded) dataset ID(s) belonging to the given
                                    history which should be sent to the given provider. If not provided,
                                    Galaxy sends all the datasets belonging to the given history.

        :type  overwrite_existing:  boolean
        :param overwrite_existing:  [Optional] If set to "True", and an object with same name of the
                                    dataset to be sent already exist in the bucket, Galaxy replaces
                                    the existing object with the dataset to be sent. If set to
                                    "False", Galaxy appends datetime to the dataset name to prevent
                                    overwriting the existing object.

        :rtype:                     tuple
        :return:                    A tuple of two lists of labels of the objects that were successfully and
                                    unsuccessfully sent to cloud.
        """
        if CloudProviderFactory is None:
            raise Exception(NO_CLOUDBRIDGE_ERROR_MESSAGE)

        if not hasattr(trans.app, 'authnz_manager'):
            err_msg = "The OpenID Connect protocol, a required feature for sending data to cloud, " \
                      "is not enabled on this Galaxy instance."
            log.debug(err_msg)
            raise MessageException(err_msg)

        cloudauthz = trans.app.authnz_manager.try_get_authz_config(
            trans.sa_session, trans.user.id, authz_id)

        history = trans.sa_session.query(
            trans.app.model.History).get(history_id)
        if not history:
            raise ObjectNotFound("History with ID `{}` not found.".format(
                trans.app.security.encode_id(history_id)))

        sent = []
        failed = []
        for hda in history.datasets:
            if hda.deleted or hda.purged or hda.state != "ok" or hda.creating_job.tool_id == SEND_TOOL:
                continue
            if dataset_ids is None or hda.dataset.id in dataset_ids:
                try:
                    object_label = hda.name.replace(" ", "_")
                    args = {
                        # We encode ID here because the tool wrapper expects
                        # an encoded ID and attempts decoding it.
                        "authz_id": trans.security.encode_id(cloudauthz.id),
                        "bucket": bucket_name,
                        "object_label": object_label,
                        "filename": hda,
                        "overwrite_existing": overwrite_existing
                    }
                    incoming = (util.Params(args, sanitize=False)).__dict__
                    d2c = trans.app.toolbox.get_tool(SEND_TOOL,
                                                     SEND_TOOL_VERSION)
                    if not d2c:
                        log.debug(
                            f"Failed to get the `send` tool per user `{trans.user.id}` request."
                        )
                        failed.append(
                            json.dumps({
                                "object":
                                object_label,
                                "error":
                                "Unable to get the `send` tool."
                            }))
                        continue
                    res = d2c.execute(trans, incoming, history=history)
                    job = res[0]
                    sent.append(
                        json.dumps({
                            "object":
                            object_label,
                            "job_id":
                            trans.app.security.encode_id(job.id)
                        }))
                except Exception as e:
                    err_msg = "maybe invalid or unauthorized credentials. {}".format(
                        util.unicodify(e))
                    log.debug(
                        "Failed to send the dataset `{}` per user `{}` request to cloud, {}"
                        .format(object_label, trans.user.id, err_msg))
                    failed.append(
                        json.dumps({
                            "object": object_label,
                            "error": err_msg
                        }))
        return sent, failed
Пример #7
0
    def get(self,
            trans,
            history_id,
            bucket_name,
            objects,
            authz_id,
            input_args=None):
        """
        Implements the logic of getting a file from a cloud-based storage (e.g., Amazon S3)
        and persisting it as a Galaxy dataset.

        This manager does NOT require use credentials, instead, it uses a more secure method,
        which leverages CloudAuthz (https://github.com/galaxyproject/cloudauthz) and automatically
        requests temporary credentials to access the defined resources.

        :type  trans:       galaxy.webapps.base.webapp.GalaxyWebTransaction
        :param trans:       Galaxy web transaction

        :type  history_id:  string
        :param history_id:  the (decoded) id of history to which the object should be received to.

        :type  bucket_name: string
        :param bucket_name: the name of a bucket from which data should be fetched (e.g., a bucket name on AWS S3).

        :type  objects:     list of string
        :param objects:     the name of objects to be fetched.

        :type  authz_id:    int
        :param authz_id:    the ID of CloudAuthz to be used for authorizing access to the resource provider. You may
                            get a list of the defined authorizations sending GET to `/api/cloud/authz`. Also, you can
                            POST to `/api/cloud/authz` to define a new authorization.

        :type  input_args:  dict
        :param input_args:  a [Optional] a dictionary of input parameters:
                            dbkey, file_type, space_to_tab, to_posix_lines (see galaxy/webapps/galaxy/api/cloud.py)

        :rtype:             list of galaxy.model.Dataset
        :return:            a list of datasets created for the fetched files.
        """
        if CloudProviderFactory is None:
            raise Exception(NO_CLOUDBRIDGE_ERROR_MESSAGE)

        if input_args is None:
            input_args = {}

        if not hasattr(trans.app, 'authnz_manager'):
            err_msg = "The OpenID Connect protocol, a required feature for getting data from cloud, " \
                      "is not enabled on this Galaxy instance."
            log.debug(err_msg)
            raise MessageException(err_msg)

        cloudauthz = trans.app.authnz_manager.try_get_authz_config(
            trans.sa_session, trans.user.id, authz_id)
        credentials = trans.app.authnz_manager.get_cloud_access_credentials(
            cloudauthz, trans.sa_session, trans.user.id, trans.request)
        connection = self.configure_provider(cloudauthz.provider, credentials)
        try:
            bucket = connection.storage.buckets.get(bucket_name)
            if bucket is None:
                raise RequestParameterInvalidException(
                    f"The bucket `{bucket_name}` not found.")
        except Exception as e:
            raise ItemAccessibilityException(
                "Could not get the bucket `{}`: {}".format(
                    bucket_name, util.unicodify(e)))

        datasets = []
        for obj in objects:
            try:
                key = bucket.objects.get(obj)
            except Exception as e:
                raise MessageException(
                    "The following error occurred while getting the object {}: {}"
                    .format(obj, util.unicodify(e)))
            if key is None:
                log.exception(
                    "Could not get object `{}` for user `{}`. Object may not exist, or the provided credentials are "
                    "invalid or not authorized to read the bucket/object.".
                    format(obj, trans.user.id))
                raise ObjectNotFound(
                    "Could not get the object `{}`. Please check if the object exists, and credentials are valid and "
                    "authorized to read the bucket and object. ".format(obj))

            params = Params(self._get_inputs(obj, key, input_args),
                            sanitize=False)
            incoming = params.__dict__
            history = trans.sa_session.query(
                trans.app.model.History).get(history_id)
            if not history:
                raise ObjectNotFound("History with ID `{}` not found.".format(
                    trans.app.security.encode_id(history_id)))
            output = trans.app.toolbox.get_tool('upload1').handle_input(
                trans, incoming, history=history)

            job_errors = output.get('job_errors', [])
            if job_errors:
                raise ValueError(
                    'Following error occurred while getting the given object(s) from {}: {}'
                    .format(cloudauthz.provider, job_errors))
            else:
                for d in output['out_data']:
                    datasets.append(d[1].dataset)

        return datasets
Пример #8
0
    def search(self, trans, search_term, page, page_size, boosts):
        """
        Perform the search on the given search_term

        :param search_term: unicode encoded string with the search term(s)

        :returns results: dictionary containing number of hits, hits themselves and matched terms for each
        """
        tool_index_dir = os.path.join(trans.app.config.whoosh_index_dir,
                                      'tools')
        index_exists = whoosh.index.exists_in(tool_index_dir)
        if index_exists:
            index = whoosh.index.open_dir(tool_index_dir)
            try:
                # Some literature about BM25F:
                # http://trec.nist.gov/pubs/trec13/papers/microsoft-cambridge.web.hard.pdf
                # http://en.wikipedia.org/wiki/Okapi_BM25
                # __Basically__ the higher number the bigger weight.
                tool_weighting = scoring.BM25F(
                    field_B={
                        'name_B':
                        boosts.tool_name_boost,
                        'description_B':
                        boosts.tool_description_boost,
                        'help_B':
                        boosts.tool_help_boost,
                        'repo_owner_username_B':
                        boosts.tool_repo_owner_username_boost
                    })
                searcher = index.searcher(weighting=tool_weighting)

                parser = MultifieldParser(
                    ['name', 'description', 'help', 'repo_owner_username'],
                    schema=schema)

                user_query = parser.parse('*' + search_term + '*')

                try:
                    hits = searcher.search_page(user_query,
                                                page,
                                                pagelen=page_size,
                                                terms=True)
                except ValueError:
                    raise ObjectNotFound('The requested page does not exist.')

                log.debug('searching tools for: #' + str(search_term))
                log.debug('total hits: ' + str(len(hits)))
                log.debug('scored hits: ' + str(hits.scored_length()))
                results = {}
                results['total_results'] = str(len(hits))
                results['page'] = str(page)
                results['page_size'] = str(page_size)
                results['hits'] = []
                for hit in hits:
                    hit_dict = {}
                    hit_dict['id'] = hit.get('id')
                    hit_dict['repo_owner_username'] = hit.get(
                        'repo_owner_username')
                    hit_dict['repo_name'] = hit.get('repo_name')
                    hit_dict['name'] = hit.get('name')
                    hit_dict['description'] = hit.get('description')
                    matched_terms = {
                        k: unicodify(v)
                        for k, v in hit.matched_terms()
                    }
                    results['hits'].append({
                        'tool': hit_dict,
                        'matched_terms': matched_terms,
                        'score': hit.score
                    })
                return results
            finally:
                searcher.close()
        else:
            raise exceptions.InternalServerError(
                'The search index file is missing.')
Пример #9
0
 def _empty(self, obj, **kwargs):
     if self._exists(obj, **kwargs):
         return bool(self._size(obj, **kwargs) > 0)
     else:
         raise ObjectNotFound('objectstore.empty, object does not exist: %s, kwargs: %s'
                              % (str(obj), str(kwargs)))
Пример #10
0
                try:
                    if source_file != cache_file:
                        # FIXME? Should this be a `move`?
                        shutil.copy2(source_file, cache_file)
                    self._fix_permissions(cache_file)
                except OSError, ex:
                    log.error(
                        "Trouble copying source file '%s' to cache '%s': %s" %
                        (source_file, cache_file, ex))
            else:
                source_file = self._get_cache_path(rel_path)
            # Update the file on S3
            self._push_to_os(rel_path, source_file)
        else:
            raise ObjectNotFound(
                'objectstore.update_from_file, object does not exist: %s, kwargs: %s'
                % (str(obj), str(kwargs)))

    def get_object_url(self, obj, **kwargs):
        if self.exists(obj, **kwargs):
            rel_path = self._construct_path(obj, **kwargs)
            try:
                key = Key(self.bucket, rel_path)
                return key.generate_url(expires_in=86400)  # 24hrs
            except S3ResponseError, ex:
                log.warning("Trouble generating URL for dataset '%s': %s" %
                            (rel_path, ex))
        return None

    def get_store_usage_percent(self):
        return 0.0
Пример #11
0
 def _get_tool(self, id, tool_version=None, user=None):
     id = urllib.unquote_plus(id)
     tool = self.app.toolbox.get_tool(id, tool_version)
     if not tool or not tool.allow_user_access(user):
         raise ObjectNotFound("Could not find tool with id '%s'" % id)
     return tool
Пример #12
0
    def contents(self,
                 trans,
                 hdca_id,
                 parent_id,
                 instance_type='history',
                 limit=None,
                 offset=None,
                 **kwds):
        """
        GET /api/dataset_collection/{hdca_id}/contents/{parent_id}?limit=100&offset=0

        Shows direct child contents of indicated dataset collection parent id

        :type   string:     encoded string id
        :param  id:         HDCA.id
        :type   string:     encoded string id
        :param  parent_id:  parent dataset_collection.id for the dataset contents to be viewed
        :type   integer:    int
        :param  limit:      pagination limit for returned dataset collection elements
        :type   integer:    int
        :param  offset:     pagination offset for returned dataset collection elements
        :rtype:     list
        :returns:   list of dataset collection elements and contents
        """
        svc = self.__service(trans)
        encode_id = trans.app.security.encode_id

        # validate HDCA for current user, will throw error if not permitted
        # TODO: refactor get_dataset_collection_instance
        hdca = svc.get_dataset_collection_instance(trans,
                                                   id=hdca_id,
                                                   check_ownership=True,
                                                   instance_type=instance_type)

        # check to make sure the dsc is part of the validated hdca
        decoded_parent_id = decode_id(self.app, parent_id)
        if parent_id != hdca_id and not hdca.contains_collection(
                decoded_parent_id):
            errmsg = 'Requested dataset collection is not contained within indicated history content'
            raise ObjectNotFound(errmsg)

        # retrieve contents
        contents_qry = svc.get_collection_contents_qry(decoded_parent_id,
                                                       limit=limit,
                                                       offset=offset)

        # dictify and tack on a collection_url for drilling down into nested collections
        def process_element(dsc_element):
            result = dictify_element_reference(dsc_element,
                                               recursive=False,
                                               security=trans.security)
            if result["element_type"] == "dataset_collection":
                result["object"]["contents_url"] = routes.url_for(
                    'contents_dataset_collection',
                    hdca_id=encode_id(hdca.id),
                    parent_id=encode_id(result["object"]["id"]))
            trans.security.encode_all_ids(result, recursive=True)
            return result

        results = contents_qry.with_session(trans.sa_session()).all()
        return [process_element(el) for el in results]
Пример #13
0
    def display_data(self, trans, data, preview=False, filename=None, to_ext=None, **kwd):
        """
        Displays data in central pane if preview is `True`, else handles download.

        Datatypes should be very careful if overridding this method and this interface
        between datatypes and Galaxy will likely change.

        TOOD: Document alternatives to overridding this method (data
        providers?).
        """
        headers = kwd.get("headers", {})
        # Relocate all composite datatype display to a common location.
        composite_extensions = trans.app.datatypes_registry.get_composite_extensions()
        composite_extensions.append('html')  # for archiving composite datatypes
        # Prevent IE8 from sniffing content type since we're explicit about it.  This prevents intentionally text/plain
        # content from being rendered in the browser
        headers['X-Content-Type-Options'] = 'nosniff'
        if isinstance(data, str):
            return smart_str(data), headers
        if filename and filename != "index":
            # For files in extra_files_path
            extra_dir = data.dataset.extra_files_path_name
            file_path = trans.app.object_store.get_filename(data.dataset, extra_dir=extra_dir, alt_name=filename)
            if os.path.exists(file_path):
                if os.path.isdir(file_path):
                    with tempfile.NamedTemporaryFile(mode='w', delete=False, dir=trans.app.config.new_file_path, prefix='gx_html_autocreate_') as tmp_fh:
                        tmp_file_name = tmp_fh.name
                        dir_items = sorted(os.listdir(file_path))
                        base_path, item_name = os.path.split(file_path)
                        tmp_fh.write('<html><head><h3>Directory %s contents: %d items</h3></head>\n' % (escape(item_name), len(dir_items)))
                        tmp_fh.write('<body><p/><table cellpadding="2">\n')
                        for index, fname in enumerate(dir_items):
                            if index % 2 == 0:
                                bgcolor = '#D8D8D8'
                            else:
                                bgcolor = '#FFFFFF'
                            # Can't have an href link here because there is no route
                            # defined for files contained within multiple subdirectory
                            # levels of the primary dataset.  Something like this is
                            # close, but not quite correct:
                            # href = url_for(controller='dataset', action='display',
                            # dataset_id=trans.security.encode_id(data.dataset.id),
                            # preview=preview, filename=fname, to_ext=to_ext)
                            tmp_fh.write(f'<tr bgcolor="{bgcolor}"><td>{escape(fname)}</td></tr>\n')
                        tmp_fh.write('</table></body></html>\n')
                    return self._yield_user_file_content(trans, data, tmp_file_name, headers), headers
                mime = mimetypes.guess_type(file_path)[0]
                if not mime:
                    try:
                        mime = trans.app.datatypes_registry.get_mimetype_by_extension(".".split(file_path)[-1])
                    except Exception:
                        mime = "text/plain"
                self._clean_and_set_mime_type(trans, mime, headers)
                return self._yield_user_file_content(trans, data, file_path, headers), headers
            else:
                raise ObjectNotFound(f"Could not find '{filename}' on the extra files path {file_path}.")
        self._clean_and_set_mime_type(trans, data.get_mime(), headers)

        trans.log_event(f"Display dataset id: {str(data.id)}")
        from galaxy.datatypes import (  # DBTODO REMOVE THIS AT REFACTOR
            binary,
            images,
            text,
        )

        if to_ext or isinstance(
            data.datatype, binary.Binary
        ):  # Saving the file, or binary file
            if data.extension in composite_extensions:
                return self._archive_composite_dataset(trans, data, headers, do_action=kwd.get('do_action', 'zip'))
            else:
                headers['Content-Length'] = str(os.stat(data.file_name).st_size)
                filename = self._download_filename(data, to_ext, hdca=kwd.get("hdca"), element_identifier=kwd.get("element_identifier"), filename_pattern=kwd.get("filename_pattern"))
                headers['content-type'] = "application/octet-stream"  # force octet-stream so Safari doesn't append mime extensions to filename
                headers["Content-Disposition"] = f'attachment; filename="{filename}"'
                return open(data.file_name, 'rb'), headers
        if not os.path.exists(data.file_name):
            raise ObjectNotFound(f"File Not Found ({data.file_name}).")
        max_peek_size = DEFAULT_MAX_PEEK_SIZE  # 1 MB
        if isinstance(data.datatype, text.Html):
            max_peek_size = 10000000  # 10 MB for html
        preview = util.string_as_bool(preview)
        if (
            not preview
            or isinstance(data.datatype, images.Image)
            or os.stat(data.file_name).st_size < max_peek_size
        ):
            return self._yield_user_file_content(trans, data, data.file_name, headers), headers
        else:
            headers["content-type"] = "text/html"
            return trans.fill_template_mako("/dataset/large_file.mako",
                                            truncated_data=open(data.file_name, 'rb').read(max_peek_size),
                                            data=data), headers
Пример #14
0
    def chroms(self, trans, dbkey=None, num=None, chrom=None, low=None):
        """
        Returns a naturally sorted list of chroms/contigs for a given dbkey.
        Use either chrom or low to specify the starting chrom in the return list.
        """
        self.check_and_reload()
        # If there is no dbkey owner, default to current user.
        dbkey_owner, dbkey = decode_dbkey(dbkey)
        if dbkey_owner:
            dbkey_user = trans.sa_session.query(
                trans.app.model.User).filter_by(username=dbkey_owner).first()
        else:
            dbkey_user = trans.user

        #
        # Get/create genome object.
        #
        genome = None
        twobit_file = None

        # Look first in user's custom builds.
        if dbkey_user and 'dbkeys' in dbkey_user.preferences:
            user_keys = loads(dbkey_user.preferences['dbkeys'])
            if dbkey in user_keys:
                dbkey_attributes = user_keys[dbkey]
                dbkey_name = dbkey_attributes['name']

                # If there's a fasta for genome, convert to 2bit for later use.
                if 'fasta' in dbkey_attributes:
                    build_fasta = trans.sa_session.query(
                        trans.app.model.HistoryDatasetAssociation).get(
                            dbkey_attributes['fasta'])
                    len_file = build_fasta.get_converted_dataset(
                        trans, 'len').file_name
                    build_fasta.get_converted_dataset(trans, 'twobit')
                    # HACK: set twobit_file to True rather than a file name because
                    # get_converted_dataset returns null during conversion even though
                    # there will eventually be a twobit file available for genome.
                    twobit_file = True
                # Backwards compatibility: look for len file directly.
                elif 'len' in dbkey_attributes:
                    len_file = trans.sa_session.query(
                        trans.app.model.HistoryDatasetAssociation).get(
                            user_keys[dbkey]['len']).file_name
                if len_file:
                    genome = Genome(dbkey,
                                    dbkey_name,
                                    len_file=len_file,
                                    twobit_file=twobit_file)

        # Look in history and system builds.
        if not genome:
            # Look in history for chromosome len file.
            len_ds = trans.db_dataset_for(dbkey)
            if len_ds:
                genome = Genome(dbkey, dbkey_name, len_file=len_ds.file_name)
            # Look in system builds.
            elif dbkey in self.genomes:
                genome = self.genomes[dbkey]

        if not genome:
            raise ObjectNotFound(f'genome not found for key {dbkey}')

        return genome.to_dict(num=num, chrom=chrom, low=low)
Пример #15
0
    def download(self,
                 trans,
                 history_id,
                 provider,
                 bucket_name,
                 credentials,
                 dataset_ids=None,
                 overwrite_existing=False):
        """
        Implements the logic of downloading dataset(s) from a given history to a given cloud-based storage
        (e.g., Amazon S3).

        :type  trans:               galaxy.web.framework.webapp.GalaxyWebTransaction
        :param trans:               Galaxy web transaction

        :type  history_id:          string
        :param history_id:          the (encoded) id of history from which the object should be downloaded.

        :type  provider:            string
        :param provider:            the name of cloud-based resource provided. A list of supported providers
                                    is given in `SUPPORTED_PROVIDERS` variable.

        :type  bucket_name:         string
        :param bucket_name:         the name of a bucket to which data should be downloaded (e.g., a bucket
                                    name on AWS S3).

        :type  credentials:         dict
        :param credentials:         a dictionary containing all the credentials required to authenticated
                                    to the specified provider (e.g., {"secret_key": YOUR_AWS_SECRET_TOKEN,
                                    "access_key": YOUR_AWS_ACCESS_TOKEN}).

        :type  dataset_ids:         set
        :param dataset_ids:         [Optional] The list of (decoded) dataset ID(s) belonging to the given
                                    history which should be downloaded to the given provider. If not provided,
                                    Galaxy downloads all the datasets belonging to the given history.

        :type  overwrite_existing:  boolean
        :param overwrite_existing:  [Optional] If set to "True", and an object with same name of the
                                    dataset to be downloaded already exist in the bucket, Galaxy replaces
                                    the existing object with the dataset to be downloaded. If set to
                                    "False", Galaxy appends datetime to the dataset name to prevent
                                    overwriting the existing object.

        :rtype:                     list
        :return:                    A list of labels for the objects that were uploaded.
        """
        if CloudProviderFactory is None:
            raise Exception(NO_CLOUDBRIDGE_ERROR_MESSAGE)
        connection = self._configure_provider(provider, credentials)

        bucket = connection.storage.buckets.get(bucket_name)
        if bucket is None:
            raise ObjectNotFound(
                "Could not find the specified bucket `{}`.".format(
                    bucket_name))

        history = trans.sa_session.query(
            trans.app.model.History).get(history_id)
        downloaded = []
        for hda in history.datasets:
            if dataset_ids is None or hda.dataset.id in dataset_ids:
                object_label = hda.name
                if overwrite_existing is False and bucket.objects.get(
                        object_label) is not None:
                    object_label += "-" + datetime.datetime.now().strftime(
                        "%y-%m-%d-%H-%M-%S")
                created_obj = bucket.objects.create(object_label)
                created_obj.upload_from_file(hda.dataset.get_file_name())
                downloaded.append(object_label)
        return downloaded
Пример #16
0
    def upload(self,
               trans,
               history_id,
               provider,
               bucket_name,
               objects,
               credentials,
               input_args=None):
        """
        Implements the logic of uploading a file from a cloud-based storage (e.g., Amazon S3)
        and persisting it as a Galaxy dataset.

        :type  trans:       galaxy.web.framework.webapp.GalaxyWebTransaction
        :param trans:       Galaxy web transaction

        :type  history_id:  string
        :param history_id:  the (decoded) id of history to which the object should be uploaded to.

        :type  provider:    string
        :param provider:    the name of cloud-based resource provided. A list of supported providers is given in
                            `SUPPORTED_PROVIDERS` variable.

        :type  bucket_name: string
        :param bucket_name: the name of a bucket from which data should be uploaded (e.g., a bucket name on AWS S3).

        :type  objects:     list of string
        :param objects:     the name of objects to be uploaded.

        :type  credentials: dict
        :param credentials: a dictionary containing all the credentials required to authenticated to the
                            specified provider (e.g., {"secret_key": YOUR_AWS_SECRET_TOKEN,
                            "access_key": YOUR_AWS_ACCESS_TOKEN}).

        :type  input_args:  dict
        :param input_args:  a [Optional] a dictionary of input parameters:
                            dbkey, file_type, space_to_tab, to_posix_lines (see galaxy/webapps/galaxy/api/cloud.py)

        :rtype:             list of galaxy.model.Dataset
        :return:            a list of datasets created for the uploaded files.
        """
        if CloudProviderFactory is None:
            raise Exception(NO_CLOUDBRIDGE_ERROR_MESSAGE)

        if input_args is None:
            input_args = {}

        connection = self._configure_provider(provider, credentials)
        try:
            bucket = connection.storage.buckets.get(bucket_name)
            if bucket is None:
                raise RequestParameterInvalidException(
                    "The bucket `{}` not found.".format(bucket_name))
        except Exception as e:
            raise ItemAccessibilityException(
                "Could not get the bucket `{}`: {}".format(
                    bucket_name, str(e)))

        datasets = []
        for obj in objects:
            try:
                key = bucket.objects.get(obj)
            except Exception as e:
                raise MessageException(
                    "The following error occurred while getting the object {}: {}"
                    .format(obj, str(e)))
            if key is None:
                raise ObjectNotFound(
                    "Could not get the object `{}`.".format(obj))

            params = Params(self._get_inputs(obj, key, input_args),
                            sanitize=False)
            incoming = params.__dict__
            history = trans.sa_session.query(
                trans.app.model.History).get(history_id)
            if not history:
                raise ObjectNotFound("History with ID `{}` not found.".format(
                    trans.app.security.encode_id(history_id)))
            output = trans.app.toolbox.get_tool('upload1').handle_input(
                trans, incoming, history=history)

            job_errors = output.get('job_errors', [])
            if job_errors:
                raise ValueError(
                    'Following error occurred while uploading the given object(s) from {}: {}'
                    .format(provider, job_errors))
            else:
                for d in output['out_data']:
                    datasets.append(d[1].dataset)

        return datasets