def test_get_identifier_changes(self):
        identifier_fc_string_1 = "Identifier\tlogFC\tadj.P.Val\nP1\t0.01\t1\nP2\t0.02\t1\nP3\t0.03\t1\n"
        identifier_fc_string_2 = "Identifier\tlogFC\tadj.P.Val\nP1\t0.01\t1\nP2\t0.02\t1\nP4\t0.03\t1\n"

        all_identifiers = {"P1", "P2", "P3", "P4"}
        identifier_fcs = [util.string_to_array(identifier_fc_string_1), util.string_to_array(identifier_fc_string_2)]

        identifier_changes = result_converter._get_identifier_changes(identifier_fcs, all_identifiers)

        self.assertIsNotNone(identifier_changes)
        self.assertEqual(4, len(identifier_changes))

        for i in all_identifiers:
            self.assertTrue(i in identifier_changes)
            self.assertEqual(2, len(identifier_changes[i]))
    def test_get_pathway_changes(self):
        pathway_fc_string_1 = "Pathway\tDirection\tFDR\nP1\tUp\t0.02\nP2\tDown\t0.01\nP3\tUp\t0.07\nP4\tDown\t0.09\n"
        pathway_fc_string_2 = "Pathway\tDirection\tFDR\nP1\tDown\t0.02\nP2\tUp\t0.01\nP5\tDown\t0.07\nP4\tUp\t0.09\n"

        pathway_fcs = [util.string_to_array(pathway_fc_string_1), util.string_to_array(pathway_fc_string_2)]
        all_pathways = {"P1", "P2", "P3", "P4", "P5"}

        pathway_changes = result_converter._get_pathway_changes(pathway_fcs, all_pathways, 0.05)

        self.assertIsNotNone(pathway_changes)
        self.assertEqual(5, len(pathway_changes))

        for p in all_pathways:
            self.assertTrue(p in pathway_changes)
            self.assertEqual(2, len(pathway_changes[p]))
    def test_get_pathway_p_values(self):
        pathway_fc_string_1 = "Pathway\tDirection\tPValue\nP1\tUp\t0.02\nP2\tDown\t0.01\nP3\tUp\t0.07\nP4\tDown\t0.09\n"
        pathway_fc_string_2 = "Pathway\tDirection\tPValue\nP1\tUp\t0.02\nP2\tUp\t0.01\nP5\tDown\t0.07\nP4\tUp\t0.09\n"

        pathway_fcs = [util.string_to_array(pathway_fc_string_1), util.string_to_array(pathway_fc_string_2)]

        pathway_p = result_converter._get_pathway_p_values(pathway_fcs)

        self.assertIsNotNone(pathway_p)
        self.assertEqual(5, len(pathway_p))

        for pathway in {"P1", "P2", "P3", "P4", "P5"}:
            self.assertTrue(pathway in pathway_p)

        self.assertGreater(0.02, pathway_p["P1"]["p"])
        self.assertLess(0.01, pathway_p["P2"]["p"])
        self.assertEqual(0.07, pathway_p["P3"]["p"])
    def test_ssgsea(self):
        json_obj = json.loads(self.test_json)
        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        # get the mappings
        mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"})

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(gene_set, identifier_mapping=mappings,
                                                         identifiers=request.datasets[0].df[:][
                                                             gene_id_colname].tolist())

        analyser = ReactomeGSVARAnalyser()
        result = analyser.analyse_request(request=request,
                                          gene_set_mappings={request.datasets[0].name: gene_set_mapping},
                                          identifier_mappings=mappings,
                                          gene_set=gene_set)

        # test the result
        self.assertEqual(1, len(result))
        self.assertIsNotNone(result[0].pathways)
        self.assertIsNotNone(result[0].fold_changes)

        # test the actual result
        reader = csv.DictReader(result[0].pathways.split("\n"), delimiter="\t")
        self.assertEqual(5, len(reader.fieldnames))

        required_fields = ["Pathway", "Name", "Sample.1", "Sample.2", "Sample.3"]
        for required_field in required_fields:
            self.assertTrue(required_field in reader.fieldnames)

        # test the pathways
        found_pathways = 0
        found_p1 = False
        found_p2 = False

        for pathway in reader:
          found_pathways += 1

          if pathway["Pathway"] == "R-HSA-1280218":
            found_p1 = True
            self.assertEqual("0.0", pathway["Sample.1"].strip())
            self.assertEqual("0.02880908", pathway["Sample.2"].strip())
            self.assertEqual("0.02880908", pathway["Sample.3"].strip())

          if pathway["Pathway"] == "R-HSA-392499":
            found_p2 = True
            self.assertEqual(-0.5, float(pathway["Sample.1"]))
            self.assertEqual(-0.5, float(pathway["Sample.2"]))
            self.assertEqual(-0.5, float(pathway["Sample.3"]))

        self.assertEqual(143, found_pathways)

        self.assertTrue(found_p1)
        self.assertTrue(found_p2)
示例#5
0
    def testOneLineConversion(self):
        """
        Test basic data structure conversion
        :return:
        """
        text = "\\tSample 1\\tSample2\\nGene 1\\t10\\t20\\n"

        array = util.string_to_array(text)
        self.assertEqual(0, array.ndim)
        self.assertEqual("Gene 1", array[array.dtype.names[0]].item())
示例#6
0
    def testConversion(self):
        """
        Test basic data structure conversion
        :return:
        """
        text = "\\tSample 1\\tSample2\\nGene 1\\t10\\t20\\nGene 2\\t10\\t30\\nGene 3\\t10\\t30\\n"

        array = util.string_to_array(text)
        self.assertEqual(1, array.ndim)
        self.assertEqual("Gene 1", array[0][array.dtype.names[0]])
        self.assertEqual("Gene 2", array[1][array.dtype.names[0]])
示例#7
0
def _get_identifier_zscores(result: AnalysisResult) -> dict:
    """
    Extract the expression values for every identifier and returns them as a dict with the
    identifier as a key and the expression values across all datasets as value (list). Expression
    values are z-score normalised.
    :param result: An AnalysisResult object
    :return: A dict with the pathway as key and the expression values as value
    """

    # get the observed proteins
    all_identifiers = set()
    identifier_fcs = list()

    for dataset in result.results:
        if not dataset.fold_changes:
            raise ConversionException(
                "Fold-change data missing in dataset '{}'".format(
                    dataset.name))

        identifier_fc = util.string_to_array(dataset.fold_changes)
        identifier_fcs.append(identifier_fc)

        identifier_index = identifier_fc.dtype.names.index("Identifier")
        dataset_identifiers = [row[identifier_index] for row in identifier_fc]

        all_identifiers.update(dataset_identifiers)

    # Use the z-score across the genes as identifier expression values
    z_scores_per_identifier = dict([(identifier, list())
                                    for identifier in all_identifiers])

    for identifier_fc in identifier_fcs:
        processed_identifiers = set()
        n_samples = len(identifier_fc.dtype.names) - 1

        for identifier_row in identifier_fc:
            identifier = identifier_row[0]
            expression_values = identifier_row.tolist()[1:]
            z_scores = zscore(expression_values)

            z_scores_per_identifier[identifier] += z_scores.tolist()
            processed_identifiers.add(identifier)

        # add the missing values
        for identifier in all_identifiers:
            if identifier not in processed_identifiers:
                z_scores_per_identifier[identifier] += [0] * n_samples

    return z_scores_per_identifier
    def test_analysis(self):
        json_obj = json.loads(self.test_json)
        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        # get the mappings
        mappings = util.map_identifiers({
            "MITF", "CD19", "MS4A1", "SDC1", "CD38", "EGFR", "IL10", "IL6",
            "GRB2", "GAB1", "SHC1"
        })

        # filter the dataset
        request.datasets[0].df = ReactomeAnalysisWorker._filter_dataset(
            request.datasets[0].df, mappings, request.datasets[0].design, 1)

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(
            gene_set,
            identifier_mapping=mappings,
            identifiers=request.datasets[0].df[:][gene_id_colname].tolist())

        analyser = ReactomeRAnalyser()
        result = analyser.analyse_request(
            request=request,
            gene_set_mappings={request.datasets[0].name: gene_set_mapping},
            identifier_mappings=mappings,
            gene_set=gene_set)

        # test the result
        self.assertEqual(1, len(result))
        self.assertIsNotNone(result[0].pathways)

        result_lines = result[0].pathways.split("\n")
        self.assertEqual(233, len(result_lines))

        reader = csv.DictReader(result_lines, delimiter="\t")
        required_fields = ("Pathway", "Name", "Direction", "FDR", "PValue",
                           "NGenes")
        for field in required_fields:
            self.assertTrue(field in reader.fieldnames,
                            "Missing field " + field)
    def test_pathway_string(self):
        json_obj = json.loads(self.test_json)

        # add the parameters
        json_obj["parameters"] = [{"name": "pathways", "value": "R-HSA-1280218,R-HSA-392499"},
                                  {"name": "create_reactome_visualization", "value": "False"}]

        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        # get the mappings
        mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"})

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(gene_set, identifier_mapping=mappings,
                                                         identifiers=request.datasets[0].df[:][
                                                             gene_id_colname].tolist())

        analyser = ReactomeGSVARAnalyser()
        result = analyser.analyse_request(request=request,
                                          gene_set_mappings={request.datasets[0].name: gene_set_mapping},
                                          identifier_mappings=mappings,
                                          gene_set=gene_set)

        # test the result
        self.assertEqual(1, len(result))
        self.assertIsNotNone(result[0].pathways)
        self.assertIsNotNone(result[0].fold_changes)

        # test the actual result
        reader = csv.DictReader(result[0].pathways.split("\n"), delimiter="\t")
        self.assertEqual(5, len(reader.fieldnames))

        # there should only be two entries
        n_entries = 0

        for line in reader:
            n_entries += 1

        self.assertEqual(2, n_entries)
def convert_string_data(str_data: str,
                        result_queue: multiprocessing.Queue) -> None:
    """
    Launch this function in a new process to convert a string encoded data object
    to a Number array.
    :param str_data: The data matrix as a tab-delimited string
    :param result_queue: A queue object where the result will be stored or an exception
    """
    try:
        result_data = util.string_to_array(str_data)

        # change the gene names to string (not the case for NCBI gene ids)
        if not str(result_data.dtype[0]).startswith("<U"):
            dt = result_data.dtype.descr
            dt[0] = (dt[0][0], '<U15')
            result_data = result_data.astype(dt)

        result_queue.put(result_data)
    # Mark the analysis as failed if the conversion caused an error.
    except util.ConversionException as e:
        result_queue.put(e)
示例#11
0
    def test_heartbeat(self):
        json_obj = json.loads(self.test_json)
        json_obj["parameters"].append({
            "name": "max_missing_values",
            "value": "1"
        })

        # remove the patient since this coefficient cannot be estimated
        json_obj["datasets"][0]["design"].pop("patient")

        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        # get the mappings
        mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"})

        # filter the dataset
        request.datasets[0].df = ReactomeAnalysisWorker._filter_dataset(
            request.datasets[0].df, mappings, request.datasets[0].design, 1)

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(
            gene_set,
            identifier_mapping=mappings,
            identifiers=request.datasets[0].df[:][gene_id_colname].tolist())

        analyser = ReactomeRAnalyser()
        analyser.set_heartbeat_callback(self.update_heartbeat)
        start_time = int(time.time()) - 1

        result = analyser.analyse_request(
            request=request,
            gene_set_mappings={request.datasets[0].name: gene_set_mapping},
            identifier_mappings=mappings,
            gene_set=gene_set)

        # make sure the heartbeat was updated
        self.assertGreater(self.last_heartbeat, start_time)
示例#12
0
def _get_gsva_pathway_expression(result: AnalysisResult) -> dict:
    """
    Extracts the GSVA expression values per pathway and returns the concatenated values
    across all datasets as a dict with the pathway id as key and the expression values in a list as value.
    :param result: An AnalysisResult object
    :return: A dict with the pathway id as key and expression values as value
    """
    # get all observed pathways
    all_pathways = set()
    pathway_fcs = list()

    for dataset in result.results:
        pathway_fc = util.string_to_array(dataset.pathways)
        pathway_fcs.append(pathway_fc)
        pathway_id = pathway_fc.dtype.names.index("Pathway")
        dataset_pathways = [row[pathway_id] for row in pathway_fc]
        all_pathways.update(dataset_pathways)

    # Use the GSVA score as pathway expression value of all experiments
    gsva_expr_per_pathway = dict([(pathway_id, list())
                                  for pathway_id in all_pathways])

    for pathway_fc in pathway_fcs:
        processed_pathways = set()
        # -2 since the pathway id + name column should be ignored
        n_samples = len(pathway_fc.dtype.names) - 2

        for pathway_row in pathway_fc:
            pathway_id = pathway_row[0]
            gsva_expr_per_pathway[pathway_id] += pathway_row.tolist()[2:]
            processed_pathways.add(pathway_id)

        # add missing values
        for pathway_id in all_pathways:
            if pathway_id not in processed_pathways:
                gsva_expr_per_pathway[pathway_id] += [0] * n_samples

    return gsva_expr_per_pathway
示例#13
0
    def test_parameter_passing(self):
        json_obj = json.loads(self.test_json)
        json_obj["parameters"].append({
            "name": "max_missing_values",
            "value": "1"
        })

        # remove the patient since this coefficient cannot be estimated
        json_obj["datasets"][0]["design"].pop("patient")

        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        self.assertEqual(3, len(request.parameters))
        # default values inserted automatically
        self.assertEqual(6, len(request.parameter_dict))
        self.assertTrue("max_missing_values" in request.parameter_dict)

        # get the mappings
        mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"})

        # filter the dataset
        request.datasets[0].df = ReactomeAnalysisWorker._filter_dataset(
            request.datasets[0].df, mappings, request.datasets[0].design, 1)

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(
            gene_set,
            identifier_mapping=mappings,
            identifiers=request.datasets[0].df[:][gene_id_colname].tolist())

        analyser = ReactomeRAnalyser()
        result = analyser.analyse_request(
            request=request,
            gene_set_mappings={request.datasets[0].name: gene_set_mapping},
            identifier_mappings=mappings,
            gene_set=gene_set)

        # test the result
        self.assertEqual(1, len(result))
        self.assertIsNotNone(result[0].pathways)

        result_lines = result[0].pathways.split("\n")
        self.assertEqual(24, len(result_lines))

        reader = csv.DictReader(result_lines, delimiter="\t")
        required_fields = ("Pathway", "Name", "Direction", "FDR", "PValue",
                           "NGenes")
        for field in required_fields:
            self.assertTrue(field in reader.fieldnames,
                            "Missing field " + field)

        pathways_up = ("R-HSA-392499", "R-HSA-597592", "R-HSA-2990846",
                       "R-HSA-3108232", "R-HSA-3232118")
        for row in reader:
            if reader.line_num == 2:
                self.assertTrue(row["Pathway"] == "R-HSA-392499")
            if reader.line_num == 6:
                self.assertTrue(row["Pathway"] == "R-HSA-3232118")
            if reader.line_num == 15:
                self.assertTrue(row["Pathway"] == "R-HSA-162582")
            if reader.line_num == 24:
                self.assertTrue(row["Pathway"] == "R-HSA-6811558")

            if row["Pathway"] in pathways_up:
                self.assertTrue(row["Direction"] == "Down")
                self.assertTrue(
                    float(row["av_foldchange"]) < 0,
                    "Incorrect regulation for " + row["Pathway"])
            else:
                self.assertTrue(row["Direction"] == "Up")
                self.assertTrue(float(row["av_foldchange"]) > 0)

        # test the FC result
        self.assertIsNotNone(result[0].fold_changes)
        fc_lines = result[0].fold_changes.split("\n")
        self.assertEqual(4, len(fc_lines))

        fc_reader = csv.DictReader(fc_lines, delimiter="\t")
        fc_fields = ("logFC", "Identifier")

        for field in fc_fields:
            self.assertTrue(field in fc_reader.fieldnames,
                            "Missing FC field " + field)

        mitf_found = False

        for row in fc_reader:
            if row["Identifier"] == "MITF":
                self.assertAlmostEqual(4.53, float(row["logFC"]), delta=0.01)
                mitf_found = True

        self.assertTrue(mitf_found, "Failed to find MITF in FC data")
示例#14
0
def _convert_gsa_result(
    result: AnalysisResult,
    reactome_blueprint: dict,
    min_p: float = 0.05,
    use_p: bool = False,
    excluded_pathways: list = list()) -> dict:
    """
    Adds the data of the passed AnalysisResult to the passed reactome_blueprint.
    :param result: The AnalysisResult to draw the data from
    :param reactome_blueprint: The result retrieved from the REACTOME ORA analysis
    :param min_p: The minimum p-value in order to consider a pathway as significantly regulated.
    :param use_p: If set, p-values instead of fold-changes / pathway direction are used as "expression values"
    :param excluded_pathways: An optional list of excluded pathways. If set, pathways present in this list will be marked
                              as excluded and will not trigger an exception if no expression data is available.
    :return: The adapted reactome result as a dict.
    """
    reactome_blueprint = copy.deepcopy(reactome_blueprint)

    # get all observed pathways
    all_pathways = set()
    pathway_fcs = list()

    for dataset in result.results:
        pathway_fc = util.string_to_array(dataset.pathways)
        pathway_fcs.append(pathway_fc)
        pathway_id = pathway_fc.dtype.names.index("Pathway")
        dataset_pathways = [row[pathway_id] for row in pathway_fc]
        all_pathways.update(dataset_pathways)

    # get the pathway-level changes
    pathway_expr = _get_pathway_changes(pathway_fcs,
                                        all_pathways=all_pathways,
                                        min_p=min_p,
                                        return_p=use_p)
    pathway_p = _get_pathway_p_values(pathway_fcs)

    # get the observed proteins
    all_identifiers = set()
    identifier_fcs = list()

    for dataset in result.results:
        if not dataset.fold_changes:
            raise ConversionException(
                "Fold-change data missing in dataset '{}'".format(
                    dataset.name))

        identifier_fc = util.string_to_array(dataset.fold_changes)
        identifier_fcs.append(identifier_fc)

        identifier_index = identifier_fc.dtype.names.index("Identifier")
        dataset_identifiers = [row[identifier_index] for row in identifier_fc]

        all_identifiers.update(dataset_identifiers)

    # get the gene-/protein-level changes
    identifier_expr = _get_identifier_changes(identifier_fcs,
                                              all_identifiers,
                                              return_p=use_p)

    # set the type
    reactome_blueprint["summary"][
        "type"] = "GSA_STATISTICS" if use_p else "GSA_REGULATION"
    reactome_blueprint["summary"]["sampleName"] = "Multi-sample analysis"

    # add the dataset names as column names
    reactome_blueprint["expressionSummary"]["columnNames"] = [
        dataset.name for dataset in result.results
    ]
    reactome_blueprint["expressionSummary"]["min"] = 0 if use_p else -2
    reactome_blueprint["expressionSummary"]["max"] = 1 if use_p else 2

    # initialize lists for missing values
    missing_expr = [0 for r in result.results]

    # populate the pathway data
    for i in range(0, len(reactome_blueprint["pathways"])):
        pathway_id = reactome_blueprint["pathways"][i]["stId"]

        # add the entity-level expression values
        for identifier_level in ("entities", "interactors"):
            for entity_index in range(
                    0,
                    len(reactome_blueprint["pathways"][i]["data"]
                        [identifier_level])):
                org_id = reactome_blueprint["pathways"][i]["data"][
                    identifier_level][entity_index]["id"]

                if org_id not in identifier_expr:
                    raise ConversionException(
                        "Missing expression values for " + org_id)

                reactome_blueprint["pathways"][i]["data"][identifier_level][
                    entity_index]["exp"] = identifier_expr[org_id]

        # update the statistics
        for resource_index in range(
                0,
                len(reactome_blueprint["pathways"][i]["data"]["statistics"])):
            # set the pathway p-value to 1 if the pathway was excluded
            if _ignore_pathway(reactome_blueprint["pathways"][i],
                               excluded_pathways=excluded_pathways):
                reactome_blueprint["pathways"][i]["data"]["statistics"][
                    resource_index]["entitiesPValue"] = 1
                reactome_blueprint["pathways"][i]["data"]["statistics"][
                    resource_index]["entitiesFDR"] = 1
                reactome_blueprint["pathways"][i]["data"]["statistics"][
                    resource_index]["exp"] = missing_expr
            else:
                if pathway_id not in pathway_expr:
                    raise ConversionException(
                        "Missing pathway regulation information for '" +
                        pathway_id + "'")
                if pathway_id not in pathway_p:
                    raise ConversionException("Missing p-value for pathway '" +
                                              pathway_id + "'")

                reactome_blueprint["pathways"][i]["data"]["statistics"][resource_index]["entitiesPValue"] = \
                    pathway_p[pathway_id]["p"]
                reactome_blueprint["pathways"][i]["data"]["statistics"][resource_index]["entitiesFDR"] = \
                    pathway_p[pathway_id]["fdr"]

                reactome_blueprint["pathways"][i]["data"]["statistics"][
                    resource_index]["exp"] = pathway_expr[pathway_id]

    # populate the "not found" data
    for i in range(0, len(reactome_blueprint["notFound"])):
        identifier = reactome_blueprint["notFound"][i]["id"]

        if identifier not in identifier_expr:
            raise ConversionException("Missing expression data for " +
                                      identifier)

        # add the expression data
        reactome_blueprint["notFound"][i]["exp"] = identifier_expr[identifier]

    return reactome_blueprint
示例#15
0
    def test_no_design(self):
        test_json = """
                        {
                  "analysisId": "test_01",
                  "datasets": [
                    {
                      "data": "\\tSample 1\\tSample2\\tSample 3\\nCD19\\t10\\t20\\t2\\nMS4A1\\t10\\t20\\t2\\n\
                      MITF\\t10\\t0\\t0\\n",
                      "name": "First experiment",
                      "type": "rnaseq_counts"
                    }
                  ],
                  "methodName": "ssgsea"
                }
                """
        json_obj = json.loads(test_json)
        request = create_analysis_input_object(json_obj)
        request.datasets[0].df = util.string_to_array(request.datasets[0].data)

        self.assertIsNotNone(request)

        # get the mappings
        mappings = util.map_identifiers({"MITF", "CD19", "MS4A1"})

        gene_set = self._get_gene_set()
        gene_id_colname = request.datasets[0].df.dtype.names[0]
        gene_set_mapping = GeneSetMapping.create_mapping(gene_set, identifier_mapping=mappings,
                                                         identifiers=request.datasets[0].df[:][
                                                             gene_id_colname].tolist())

        analyser = ReactomeGSVARAnalyser()
        result = analyser.analyse_request(request=request,
                                          gene_set_mappings={request.datasets[0].name: gene_set_mapping},
                                          identifier_mappings=mappings,
                                          gene_set=gene_set)

        # test the result
        self.assertEqual(1, len(result))
        self.assertIsNotNone(result[0].pathways)
        self.assertIsNotNone(result[0].fold_changes)

        # test the actual result
        reader = csv.DictReader(result[0].pathways.split("\n"), delimiter="\t")
        self.assertEqual(5, len(reader.fieldnames))

        required_fields = ["Pathway", "Sample_1", "Sample2", "Sample_3"]
        for required_field in required_fields:
            self.assertTrue(required_field in reader.fieldnames, "Missing required field " + required_field)

        # test the pathways
        found_pathways = 0

        for pathway in reader:
            found_pathways += 1

            if pathway["Pathway"] == "R-HSA-1280218":
                self.assertEqual("0.0", pathway["Sample_1"].strip())
                self.assertEqual("0.02880908", pathway["Sample2"].strip())
                self.assertEqual("0.02880908", pathway["Sample_3"].strip())

            if pathway["Pathway"] == "R-HSA-392499":
                self.assertEqual(-0.5, float(pathway["Sample_1"]))
                self.assertEqual(-0.5, float(pathway["Sample2"]))
                self.assertEqual(-0.5, float(pathway["Sample_3"]))

        self.assertEqual(143, found_pathways)