示例#1
0
def get_status(analysisId):  # noqa: E501
    """Retrieves the status for the specified analysis.

     # noqa: E501

    :param analysisId: The analysis identifier returned by '/analysis'
    :type analysisId: str

    :rtype: InlineResponse200
    """
    try:
        storage = ReactomeStorage()

        status = storage.get_status(analysisId)

        if status is None:
            LOGGER.debug("Unknown identifier passed to get_status: " +
                         analysisId)
            abort(404, "Unknown identifier")
        else:
            # return a Response object to prevent connexion from
            # de-serializing the object into a JSON object
            return Response(response=status,
                            status=200,
                            headers={"content-type": "application/json"})
    except ReactomeStorageException as e:
        LOGGER.error("Failed to connect to redis: " + str(e))
        abort(
            503,
            "Failed to connect to storage system. Please try again in a few minutes."
        )
示例#2
0
    def test_stored_tsv(self):
        with app.app.test_client() as client:
            response = client.post(
                "/upload",
                data={
                    "file":
                    (io.BytesIO(self.test_tsv.encode("UTF-8")), "test.tsv")
                })

            self.assertEqual(200, response.status_code)

            result_obj = json.loads(response.data.decode())

            # make sure the samples are correct
            self.assertEqual("Sample 1:Sample 2:Sample 3",
                             ":".join(result_obj["sample_names"]))
            self.assertEqual(4, result_obj["n_lines"])
            self.assertEqual("CD19:CD20:MITF",
                             ":".join(result_obj["top_identifiers"]))

            # make sure the data was stored correctly
            self.assertIsNotNone(result_obj["data_token"])
            token = result_obj["data_token"]

            # create a new redis instance
            storage = ReactomeStorage()

            self.assertTrue(storage.request_token_exists(token))

            stored_obj = storage.get_request_data(token)

            self.assertEqual("\t" + self.test_tsv, stored_obj.decode("UTF-8"))
示例#3
0
def get_summary(datasetId):  # noqa: E501
    """Retrieves a summary of the loaded data. This function is only available once. The data is fully loaded.

     # noqa: E501

    :param datasetId: The dataset identifier used to trigger the download
    :type loadingId: str

    :rtype: ExternalData
    """
    try:
        storage = ReactomeStorage()

        if not storage.request_data_summary_exists(datasetId):
            abort(404, "Unknown identifier passed.")

        summary_data = storage.get_request_data_summary(datasetId)

        if summary_data is not None:
            return Response(response=summary_data,
                            status=200,
                            headers={"content-type": "application/json"})

        abort(404, "Unknown identifier passed.")
    except ReactomeStorageException as e:
        LOGGER.error("Failed to connect to redis: " + str(e))
        abort(
            503,
            "Failed to connect to storage system. Please try again in a few minutes."
        )
示例#4
0
def get_data_loading_status(loadingId):  # noqa: E501
    """Retrieves the status for the dataset loading process.

     # noqa: E501

    :param loadingId: The loading identifier returned by '/data/load'
    :type loadingId: str

    :rtype: DatasetLoadingStatus
    """
    try:
        storage = ReactomeStorage()

        status = storage.get_status(analysis_identifier=loadingId,
                                    data_type="dataset")

        if status is None:
            LOGGER.debug("Unknown identifier passed to get_status: " +
                         loadingId)
            abort(404, "Unknown identifier")
        else:
            # return a Response object to prevent connexion from
            # de-serializing the object into a JSON object
            return Response(response=status,
                            status=200,
                            headers={"content-type": "application/json"})
    except ReactomeStorageException as e:
        LOGGER.error("Failed to connect to redis: " + str(e))
        abort(
            503,
            "Failed to connect to storage system. Please try again in a few minutes."
        )
    def _get_storage(self):
        """
        Returns the current connection to the reactome storage
        :return: A ReactomeStorage object
        """
        if not self._storage:
            try:
                self._storage = ReactomeStorage()
            except Exception as e:
                LOGGER.error("Failed to connect to storage service: " + str(e))
                raise Exception("Failed to connect to storage service", e)

        return self._storage
def start_analysis(body):  # noqa: E501
    """Performs the specified gene set analysis

     # noqa: E501

    :param body: Specification of analysis to perform
    :type body: dict | bytes

    :rtype: str
    """
    # get the JSON-encoded dict from the request object
    if connexion.request.is_json:
        analysis_dict = connexion.request.get_json(cache=False)
    # de-compress if it's a gzipped string
    elif connexion.request.content_type == "application/gzip":
        LOGGER.debug("Received gzipped analysis request. Decompressing...")

        decompressed_string = zlib.decompress(connexion.request.data)
        analysis_dict = json.loads(decompressed_string)

        # free the memory again
        del decompressed_string
    else:
        LOGGER.debug(
            "Invalid analysis request submitted. Request body does not describe a JSON object."
        )
        abort(
            406,
            "Invalid analysis request submitted. Request body does not describe a JSON object."
        )
        return

    try:
        analysis_request = input_deserializer.create_analysis_input_object(
            analysis_dict)
    except Exception as e:
        LOGGER.debug("Unknown analysis method submitted: " +
                     analysis_dict["methodName"])
        abort(404, "Unknown analysis method selected.")

    # make sure all datasets have unique names
    all_names = [dataset.name for dataset in analysis_request.datasets]

    if len(all_names) != len(set(all_names)):
        LOGGER.debug("Analysis request contains duplicate names")
        abort(406, "Datasets must not have duplicate names")

    # make sure the analysis design is present
    for n_dataset in range(0, len(analysis_request.datasets)):
        if not analysis_request.datasets[n_dataset].design:
            LOGGER.debug("Analysis request misses design")
            abort(
                406,
                "Invalid request. Dataset '{name}' misses the required experimental design."
                .format(name=analysis_request.datasets[n_dataset].name))
        if not analysis_request.datasets[n_dataset].design.comparison:
            LOGGER.debug("Analysis request misses design comparison")
            abort(
                406,
                "Invalid request. Dataset '{name}' misses the required comparison specification."
                .format(name=analysis_request.datasets[n_dataset].name))

    # generate an analysis id
    analysis_id = str(uuid.uuid1())

    try:
        storage = ReactomeStorage()

        # a very basic sanity check to make sure it's unique
        while storage.analysis_exists(analysis_id):
            analysis_id = str(uuid.uuid1())

        # Load request data from storage
        for n_dataset in range(0, len(analysis_dict["datasets"])):
            data = analysis_dict["datasets"][n_dataset]["data"]

            # Update for external datasets
            if data[0:4] == "rqu_" or len(data) < 20:
                # make sure the request data exists
                if not storage.request_token_exists(data):
                    MISSING_DATA_TOKEN_COUNTER.inc()
                    abort(
                        500, "No data available for storage token '{}'".format(
                            data))

                # load the data
                stored_data = storage.get_request_data(data)

                # update the request object
                analysis_dict["datasets"][n_dataset][
                    "data"] = stored_data.decode("UTF-8")

        # Set the initial status
        encoder = JSONEncoder()

        status = AnalysisStatus(id=analysis_id,
                                status="running",
                                completed=0,
                                description="Queued")
        storage.set_status(analysis_id, encoder.encode(status))

        # Save the request data
        analysis_dict["analysisId"] = analysis_id
        storage.set_analysis_request_data(token=analysis_id,
                                          data=encoder.encode(analysis_dict))

        try:
            # Submit the request to the queue
            queue = ReactomeMQ()
            queue.post_analysis(
                AnalysisRequest(request_id=analysis_id).to_json(),
                analysis_request.method_name)
            LOGGER.debug("Analysis " + analysis_id + " submitted to queue")
            queue.close()

            STARTED_ANALYSIS_COUNTER.inc()

            return analysis_id
        except socket.gaierror as e:
            # update the status
            LOGGER.error("Failed to connect to queuing system: " + str(e))
            status = AnalysisStatus(
                id=analysis_id,
                status="failed",
                completed=0,
                description="Failed to connect to queuing system.")
            storage.set_status(analysis_id, encoder.encode(status))

            abort(
                503,
                "Failed to connect to queuing system. Please try again in a few seconds."
            )
        except ReactomeMQException as e:
            LOGGER.error("Failed to post message to queuing system: " + str(e))
            # update the status
            status = AnalysisStatus(
                id=analysis_id,
                status="failed",
                completed=0,
                description="Failed to connect to queuing system.")
            storage.set_status(analysis_id, encoder.encode(status))

            abort(
                503,
                "The number of analysis requests is currently too high. Please try again in a few minutes."
            )
    except ReactomeStorageException as e:
        LOGGER.error("Failed to connect to redis: " + str(e))
        abort(
            503,
            "Failed to connect to storage system. Please try again in a few minutes."
        )
    except (socket.timeout, socket.gaierror) as e:
        LOGGER.error(
            "Socket timeout connecting to storage or queuing system: " +
            str(e))
        abort(
            503,
            "Failed to connect to downstream system. Please try again in a few minutes."
        )
示例#7
0
def process_file_upload():
    # test whether the file should be stored or returned
    store_file = request.args.get('store', 'true').lower() == "true"

    # make sure only one file is uploaded
    if len(request.files) != 1:
        abort(400, "Incorrect number of uploaded files. Function requires exactly one file.")

    if "file" not in request.files:
        abort(400, "File must be uploaded as 'file' in the form.")

    # get the uploaded file
    user_file = request.files['file']
    user_filename = user_file.filename

    # initialize the return object
    return_object = {"sample_names": None, "top_identifiers": list(), "n_lines": None}
    return_lines = list()
    n_samples = -1

    # read the file
    try:
        all_lines = [line.decode("UTF-8") for line in user_file.readlines()]
    except Exception as e:
        LOGGER.error("Invalid file {name} uploaded: {error}".format(name = user_filename, error=str(e)))
        abort(400, "Uploaded file is not a text file.")

    # guess the delimiter
    delimiter = None
    if "\t" in all_lines[0]:
        delimiter = "\t"
    elif ";" in all_lines[0]:
        delimiter = ";"
    elif "," in all_lines[0]:
        delimiter = ","

    if not delimiter:
        abort(500, "Failed to detect used delimiter")

    csv_reader = csv.reader(all_lines, delimiter=delimiter)
    header_line = csv_reader.__next__()
    current_line = 1

    for line in csv_reader:
        current_line += 1

        if n_samples == -1:
            n_samples = len(line)

            # make sure the file was parsed more or less correctly
            if n_samples < 2:
                abort(400, "Failed to parse the file. Only one column detected.")

            # add an empty cell if there is exactly one column less than the number of samples
            if len(header_line) == n_samples - 1:
                header_line = [""] + header_line

            # make sure the header matches
            if len(header_line) != n_samples:
                abort(400, "Different number of column names than entries in row 1: header contains {} fields, "
                           "first line contains {} fields".format(str(len(header_line)), str(n_samples)))

            # save the sample names
            return_object["sample_names"] = header_line[1:]

            # start creating the converted object
            return_lines.append("\t".join(header_line))

        # make sure the number of samples is OK
        if len(line) != n_samples:
            abort(400, "Different number of entries in line {}. File contains {} columns but line {} contains {}"
                  .format(str(current_line), str(n_samples), str(current_line), str(len(line))))

        # save the first few identifiers as samples
        if current_line < 10:
            return_object["top_identifiers"].append(line[0])

        # save the line
        return_lines.append("\t".join(line))

    # save the results
    return_object["n_lines"] = current_line

    # create the complete result string
    result_string = "\n".join(return_lines)

    # add the file if it shouldn't be saved
    if not store_file:
        return_object["data"] = result_string
    else:
        # store the file
        try:
            storage = ReactomeStorage()

            # create an identifier
            token = "rqu_" + str(uuid.uuid1())

            while storage.request_token_exists(token):
                token = "rqu_" + str(uuid.uuid1())

            # save the data - expire after 6 hours
            storage.set_request_data(token=token, data=result_string, expire=60*60*6)

            return_object["data_token"] = token
        except ReactomeStorageException as e:
            LOGGER.error("Failed to store request data: " + str(e))
            abort(500, "Failed to store request data. Please try again later.")

    # return the JSON data
    response_object = make_response(json.dumps(return_object))
    # Using the content-type "text/html" instead of the more
    # appropriate "application/json" to circumvent the lacking
    # support for JSON in GWT (used by Reactome's pathway browser)
    response_object.headers["Content-Type"] = "text/html"

    return response_object
示例#8
0
def get_result(analysisId):  # noqa: E501
    """Retrieves the result for the completed analysis task

     # noqa: E501

    :param analysisId: The analysis identified returned by &#39;/analysis&#39;
    :type analysisId: str

    :rtype: AnalysisResult
    """
    try:
        # check if an extension was present
        extension = None

        if "." in analysisId:
            extension = analysisId[analysisId.find(".") + 1:]
            analysisId = analysisId[:analysisId.find(".")]

        storage = ReactomeStorage()

        if extension == "xlsx":
            xlsx_file = storage.get_result(analysis_identifier=analysisId,
                                           data_type="report")

            if xlsx_file is not None:
                return Response(response=xlsx_file,
                                status=200,
                                headers={"content-type": "application/xlsx"})
        elif extension == "pdf":
            pdf_file = storage.get_result(analysis_identifier=analysisId,
                                          data_type="pdf_report")

            if pdf_file is not None:
                return Response(response=pdf_file,
                                status=200,
                                headers={"content-type": "application/pdf"})
        elif extension == "r":
            r_file = storage.get_result(analysis_identifier=analysisId,
                                        data_type="r_script")

            if r_file is not None:
                return Response(
                    response=r_file,
                    status=200,
                    headers={
                        "content-type":
                        "text/plain",
                        "content-disposition":
                        "attachment; filename=\"ReactomeGSA_analysis_script.R\""
                    })
        else:
            result = storage.get_result(analysisId)

            if result is not None:
                return Response(response=result,
                                status=200,
                                headers={"content-type": "application/json"})

        # find out why the result doesn't exist
        status = storage.get_status(analysisId)

        if not status:
            LOGGER.debug("Unknown identifier to get_result: " + analysisId)
            abort(404, "Unknown analysis identifier passed.")

        # the identifier is valid, so for some reason the result is not ready (yet)
        abort(406, "Analysis is not complete.")
    except ReactomeStorageException as e:
        LOGGER.error("Failed to connect to redis: " + str(e))
        abort(
            503,
            "Failed to connect to storage system. Please try again in a few minutes."
        )
示例#9
0
def load_data(resourceId, parameters):  # noqa: E501
    """Start the retrieval of an external or example dataset.

     # noqa: E501

    :param resourceId: The identifier of the data source to load from
    :type resourceId: str

    :param parameters: The parameters for the selected resource.

    :rtype: str
    """
    try:
        storage = ReactomeStorage()

        # generate an id for the request
        loading_id = str(uuid.uuid1())

        # Set the initial status
        encoder = JSONEncoder()
        status = DatasetLoadingStatus(id=loading_id,
                                      status="running",
                                      completed=0,
                                      description="Queued")
        storage.set_status(loading_id,
                           encoder.encode(status),
                           data_type="dataset")

        # convert the parameters
        request_parameters = list()

        for dict_param in parameters:
            request_parameters.append(
                DatasetRequestParameter(name=dict_param["name"],
                                        value=dict_param["value"]))

        # create the request
        request = DatasetRequest(loading_id=loading_id,
                                 resource_id=resourceId,
                                 parameters=request_parameters)

        try:
            queue = ReactomeMQ(queue_name=DATASET_QUEUE)
            queue.post_analysis(analysis=request.to_json(),
                                method="DatasetLoading")
            LOGGER.debug("Dataset process " + loading_id +
                         " submitted to queue")
            queue.close()

            DATASET_LOADING_COUNTER.inc()

            return loading_id
        except socket.gaierror as e:
            # update the status
            LOGGER.error("Failed to connect to queuing system: " + str(e))
            status = DatasetLoadingStatus(
                id=loading_id,
                status="failed",
                completed=0,
                description="Failed to connect to queuing system.")
            storage.set_status(loading_id,
                               encoder.encode(status),
                               data_type="dataset")

            abort(
                503,
                "Failed to connect to queuing system. Please try again in a few seconds."
            )
        except ReactomeMQException as e:
            LOGGER.error("Failed to post message to queuing system: " + str(e))
            # update the status
            status = DatasetLoadingStatus(
                id=loading_id,
                status="failed",
                completed=0,
                description="Failed to connect to queuing system.")
            storage.set_status(loading_id,
                               encoder.encode(status),
                               data_type="dataset")

            abort(
                503,
                "The number of analysis requests is currently too high. Please try again in a few minutes."
            )
    except ReactomeStorageException as e:
        LOGGER.error("Failed to connect to redis: " + str(e))
        abort(
            503,
            "Failed to connect to storage system. Please try again in a few minutes."
        )
    except (socket.timeout, socket.gaierror) as e:
        LOGGER.error(
            "Socket timeout connecting to storage or queuing system: " +
            str(e))
        abort(
            503,
            "Failed to connect to downstream system. Please try again in a few minutes."
        )