Пример #1
0
    def test_affy_to_pcl(self):
        """ """
        job = prepare_ba_job()
        # Make sure that a previous test didn't leave a directory around.
        shutil.rmtree("/home/user/data_store/processor_job_" + str(job.id), ignore_errors=True)
        job_context = array_express.affy_to_pcl(job.pk)

        self.assertEqual(job_context["platform_accession_code"], "hugene10st")
        self.assertEqual(job_context["brainarray_package"], "hugene10sthsensgprobe")

        updated_job = ProcessorJob.objects.get(pk=job.pk)
        self.assertTrue(updated_job.success)
        self.assertEqual(len(ComputationalResult.objects.all()), 1)
        self.assertEqual(len(ComputedFile.objects.all()), 1)
        self.assertEqual(ComputedFile.objects.all()[0].filename, "GSM1426071_CD_colon_active_1.PCL")
        output_filename = ComputedFile.objects.all()[0].absolute_file_path

        expected_data = pd.read_csv(
            "/home/user/data_store/TEST/PCL/GSM1426071_CD_colon_active_1.PCL", sep="\t"
        )["GSM1426071_CD_colon_active_1.CEL"]
        actual_data = pd.read_csv(output_filename, sep="\t")["GSM1426071_CD_colon_active_1.CEL"]

        assertMostlyAgrees(self, expected_data, actual_data)

        os.remove(output_filename)
Пример #2
0
    def test_convert_simple_pcl_with_header(self):
        """PCL with header
        > ID_REF, VALUE
        """
        job = prepare_job({
            "accession_code": "GSM1234847",
            "source_filename":
            "https://www.ebi.ac.uk/arrayexpress/experiments/E-GEOD-51013/",
            "filename": "GSM1234847_sample_table.txt",
            "absolute_file_path":
            "/home/user/data_store/raw/TEST/NO_OP/GSM1234847_sample_table.txt",
            "platform_accession_code": "A-AFFY-38",
        })
        final_context = assertRunsSuccessfully(self, job)

        expected_data = pd.read_csv(
            "/home/user/data_store/TEST/NO_OP/EXPECTED/gene_converted_GSM1234847-tbl-1.txt",
            sep="\t",
            index_col=0,
        )["VALUE"]
        actual_data = pd.read_csv(final_context["output_file_path"],
                                  sep="\t",
                                  index_col=0)["VALUE"]

        assertMostlyAgrees(self, expected_data, actual_data)
Пример #3
0
    def test_affy_to_pcl_no_brainarray(self):
        """ """
        job = prepare_non_ba_job()
        # Make sure that a previous test didn't leave a directory around.
        shutil.rmtree("/home/user/data_store/processor_job_" + str(job.id), ignore_errors=True)
        array_express.affy_to_pcl(job.pk)

        updated_job = ProcessorJob.objects.get(pk=job.pk)
        self.assertTrue(updated_job.success)
        self.assertEqual(len(ComputationalResult.objects.all()), 1)
        self.assertEqual(len(ComputedFile.objects.all()), 1)
        self.assertEqual(ComputedFile.objects.all()[0].filename, "GSM45588.PCL")
        output_filename = ComputedFile.objects.all()[0].absolute_file_path

        expected_data = pd.read_csv("/home/user/data_store/TEST/PCL/GSM45588.PCL", sep="\t")[
            "GSM45588.CEL"
        ]
        actual_data = pd.read_csv(output_filename, sep="\t")["GSM45588.CEL"]

        assertMostlyAgrees(self, expected_data, actual_data)

        os.remove(output_filename)
Пример #4
0
    def test_affy_to_pcl_huex_v1(self):
        """Special Case because there is no CDL for Huex V1"""
        job = prepare_huex_v1_job()
        shutil.rmtree("/home/user/data_store/processor_job_" + str(job.id), ignore_errors=True)
        array_express.affy_to_pcl(job.pk)

        updated_job = ProcessorJob.objects.get(pk=job.pk)
        self.assertTrue(updated_job.success)
        self.assertEqual(len(ComputationalResult.objects.all()), 1)
        self.assertEqual(len(ComputedFile.objects.all()), 1)
        self.assertEqual(
            ComputedFile.objects.all()[0].filename, "GSM1364667_U_110208_7-02-10_S2.PCL"
        )
        output_filename = ComputedFile.objects.all()[0].absolute_file_path

        expected_data = pd.read_csv(
            "/home/user/data_store/TEST/PCL/GSM1364667_U_110208_7-02-10_S2.PCL", sep="\t"
        )["GSM1364667_U_110208_7-02-10_S2.CEL"]
        actual_data = pd.read_csv(output_filename, sep="\t")["GSM1364667_U_110208_7-02-10_S2.CEL"]

        assertMostlyAgrees(self, expected_data, actual_data)

        os.remove(ComputedFile.objects.all()[0].absolute_file_path)
Пример #5
0
    def test_convert_illumina_no_header(self):
        """Illumina file without header, ex:
        > ILMN_1885639    10.0000   0.7931
        > ILMN_2209417    10.0000   0.2029
        > ILMN_1765401    152.0873  0.0000
        """
        job = prepare_job({
            "accession_code": "GSM1089291",
            "source_filename":
            "https://github.com/AlexsLemonade/refinebio/files/2255178/GSM1089291-tbl-1.txt",
            "filename": "GSM1089291-tbl-1.txt",
            "absolute_file_path":
            "/home/user/data_store/raw/TEST/NO_OP/GSM1089291-tbl-1.txt",
            "platform_accession_code": "A-MEXP-1171",
            "manufacturer": "ILLUMINA",
        })
        final_context = assertRunsSuccessfully(self, job)

        self.assertTrue(
            no_op.check_output_quality(final_context["output_file_path"]))

        # To:
        # ENSG00000105675 10
        # ENSG00000085721 152.0873
        # ENSG00000278494 152.0873
        expected_data = pd.read_csv(
            "/home/user/data_store/TEST/NO_OP/EXPECTED/gene_converted_GSM1089291-tbl-1.txt",
            sep="\t",
            names=["", "VALUE"],
            index_col=0,
        )["VALUE"]
        actual_data = pd.read_csv(final_context["output_file_path"],
                                  sep="\t",
                                  names=["", "VALUE"],
                                  index_col=0)["VALUE"]

        assertMostlyAgrees(self, expected_data, actual_data)
Пример #6
0
    def test_convert_processed_illumina(self):
        """Illumina file with header, ex:
        > Reporter Identifier VALUE   Detection Pval
        > ILMN_1343291    14.943602   0
        > ILMN_1343295    13.528082   0
        """
        job = prepare_job({
            "accession_code": "GSM557500",
            "source_filename":
            "https://www.ebi.ac.uk/arrayexpress/experiments/E-GEOD-22433/",
            "filename": "GSM557500-tbl-1.txt",
            "absolute_file_path":
            "/home/user/data_store/raw/TEST/NO_OP/GSM557500-tbl-1.txt",
            "platform_accession_code": "A-MEXP-1171",
            "manufacturer": "ILLUMINA",
        })
        final_context = assertRunsSuccessfully(self, job)

        self.assertTrue(
            no_op.check_output_quality(final_context["output_file_path"]))

        # To:
        # ENSG00000156508 14.943602
        # ENSG00000111640 13.528082
        expected_data = pd.read_csv(
            "/home/user/data_store/TEST/NO_OP/EXPECTED/gene_converted_GSM557500-tbl-1.txt",
            sep="\t",
            names=["", "VALUE"],
            index_col=0,
        )["VALUE"]
        actual_data = pd.read_csv(final_context["output_file_path"],
                                  sep="\t",
                                  names=["", "VALUE"],
                                  index_col=0)["VALUE"]

        assertMostlyAgrees(self, expected_data, actual_data)