Пример #1
0
 def test_intersect(self):
     isamp = self.gen_fake_isamp()
     series = Series('GSE0')
     sample_list = [Sample('GSM10', series), Sample('GSM20', series)]
     for __ in sample_list :
         series.add_passed_sample(__)
     self.assertEqual(ppr.intersect(series, isamp), sample_list)
Пример #2
0
 def test_intersect_with_discrenpacy(self, L):
     isamp = self.gen_fake_isamp()
     series = Series('GSE0')
     sample_list = [Sample('GSM10', series)]
     for __ in sample_list :
         series.add_passed_sample(__)
     self.assertEqual(ppr.intersect(series, isamp), sample_list)
     L.check(('rsempipeline.utils.pre_pipeline_run', 'ERROR',
              'Discrepancy for GSE0: 1 GSMs in soft, 2 GSMs in isamp, and only 1 left after intersection.'),)
Пример #3
0
 def test_analyze_one(self, mock_parse, L):
     fake_isamp = self.gen_fake_isamp()
     fake_series = Series('GSE0')
     sample_list = [Sample('GSM10', fake_series)]
     for __ in sample_list :
         __.organism = 'H**o Sapiens'
         fake_series.add_passed_sample(__)
     mock_parse.return_value = fake_series
     self.assertEqual(ppr.analyze_one('GSE0_family.soft.subset', fake_isamp, ['H**o sapiens']),
                      sample_list)
     L.check(('rsempipeline.utils.pre_pipeline_run', 'ERROR',
              'Discrepancy for GSE0: 1 GSMs in soft, 2 GSMs in isamp, and only 1 left after intersection.'),)
Пример #4
0
    def test_gen_orig_params_per_with_a_single_sra(self):
        # mock a series and sample
        series = Series('GSE123456', 'GSE123456_family.soft.subset')
        sample = Sample('GSM1', series)
        sample.outdir = 'some_outdir/GSE123456/some_species/GSM1'
        series.add_passed_sample(sample)

        with mock.patch('rsempipeline.utils.download.open',
                        mock.mock_open(read_data=SRA_INFO_YAML_SINGLE_SRA)):
            vals = download.gen_orig_params_per(sample)
        self.assertEqual(vals, [
            [None, ['some_outdir/GSE123456/some_species/GSM1/SRX685892/SRR1557065/SRR1557065.sra',
                    'some_outdir/GSE123456/some_species/GSM1/SRR1557065.sra.download.COMPLETE'], sample]])
Пример #5
0
    def test_gen_all_samples_from_soft_and_isamp(
            self, mock_get_isamp, mock_analyze_one, mock_sanity_check):
        mock_sanity_check.return_value = True
        mock_get_isamp.return_value = self.gen_fake_isamp()
        fake_series = Series('GSE0')
        sample_list = [Sample('GSM10', fake_series), Sample('GSM20', fake_series)]
        for __ in sample_list :
            __.organism = 'H**o Sapiens'
            fake_series.add_passed_sample(__)
        mock_analyze_one.return_value = sample_list

        self.assertEqual(ppr.gen_all_samples_from_soft_and_isamp(
            ['soft1'], 'isamp_file_or_str', {'INTERESTED_ORGANISMS': ['H**o Sapiens']}),
            sample_list)
Пример #6
0
 def test_add(self, mock_is_info_complete):
     series = Series('GSE123456', 'GSE123456_family.soft.subset')
     current_sample = Sample('GSM1', series)
     mock_is_info_complete.return_value = True
     self.assertEqual(soft_parser.add(current_sample, series, 1), 2)
     mock_is_info_complete.return_value = False
     self.assertEqual(soft_parser.add(current_sample, series, 1), 1)
Пример #7
0
    def test_gen_orig_params_per_with_a_single_sra(self):
        # mock a series and sample
        series = Series('GSE123456', 'GSE123456_family.soft.subset')
        sample = Sample('GSM1', series)
        sample.outdir = 'some_outdir/GSE123456/some_species/GSM1'
        series.add_passed_sample(sample)

        with mock.patch('rsempipeline.utils.download.open',
                        mock.mock_open(read_data=SRA_INFO_YAML_SINGLE_SRA)):
            vals = download.gen_orig_params_per(sample)
        self.assertEqual(vals, [[
            None,
            [
                'some_outdir/GSE123456/some_species/GSM1/SRX685892/SRR1557065/SRR1557065.sra',
                'some_outdir/GSE123456/some_species/GSM1/SRR1557065.sra.download.COMPLETE'
            ], sample
        ]])
Пример #8
0
    def test_gen_orig_params_per_with_multiple_sras(self):
        # mock a series and sample
        series = Series('GSE123456', 'GSE123456_family.soft.subset')
        sample = Sample('GSM1', series)
        sample.outdir = 'some_outdir/GSE123456/some_species/GSM1'
        series.add_passed_sample(sample)

        with mock.patch('rsempipeline.utils.download.open',
                        mock.mock_open(read_data=SRA_INFO_YAML_MULTIPLE_SRAS)):
            vals = download.gen_orig_params_per(sample)
        self.assertEqual(vals, [
            # in the format of input, outputs, other params
            [None, ['some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453140/SRR453140.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR453140.sra.download.COMPLETE'], sample],
            [None, ['some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453141/SRR453141.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR453141.sra.download.COMPLETE'], sample],
            [None, ['some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453142/SRR453142.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR453142.sra.download.COMPLETE'], sample],
            [None, ['some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453143/SRR453143.sra', 'some_outdir/GSE123456/some_species/GSM1/SRR453143.sra.download.COMPLETE'], sample]
        ])
Пример #9
0
    def test_gen_orig_params_per_with_multiple_sras(self):
        # mock a series and sample
        series = Series('GSE123456', 'GSE123456_family.soft.subset')
        sample = Sample('GSM1', series)
        sample.outdir = 'some_outdir/GSE123456/some_species/GSM1'
        series.add_passed_sample(sample)

        with mock.patch('rsempipeline.utils.download.open',
                        mock.mock_open(read_data=SRA_INFO_YAML_MULTIPLE_SRAS)):
            vals = download.gen_orig_params_per(sample)
        self.assertEqual(
            vals,
            [
                # in the format of input, outputs, other params
                [
                    None,
                    [
                        'some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453140/SRR453140.sra',
                        'some_outdir/GSE123456/some_species/GSM1/SRR453140.sra.download.COMPLETE'
                    ], sample
                ],
                [
                    None,
                    [
                        'some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453141/SRR453141.sra',
                        'some_outdir/GSE123456/some_species/GSM1/SRR453141.sra.download.COMPLETE'
                    ], sample
                ],
                [
                    None,
                    [
                        'some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453142/SRR453142.sra',
                        'some_outdir/GSE123456/some_species/GSM1/SRR453142.sra.download.COMPLETE'
                    ], sample
                ],
                [
                    None,
                    [
                        'some_outdir/GSE123456/some_species/GSM1/SRX135160/SRR453143/SRR453143.sra',
                        'some_outdir/GSE123456/some_species/GSM1/SRR453143.sra.download.COMPLETE'
                    ], sample
                ]
            ])
Пример #10
0
def parse(soft_file, interested_organisms):
    """Parse the soft file
    :param interested_organisms: a list of interested organisms: ['H**o
                                 sapiens', 'Mus musculus']
    """
    logger.info("Parsing file: {0} ...".format(soft_file))
    series_name_from_file = get_series_name_from(soft_file)
    print series_name_from_file

    # Assume one GSE per soft file
    # index: the index of all passed samples, unpassed samples are not indexed
    index, series, current_sample = 1, None, None
    with open(soft_file, 'rb') as inf:
        for line in inf:
            label, value = [__.strip() for __ in line.split('=')]
            if label == '^SERIES':
                series = Series(value, os.path.abspath(soft_file))
                if series.name != series_name_from_file:
                    msg = ('series contained in the soft file doesn\'t match '
                           'that in the filename: {0} != {1}'.format(
                               series, series_name_from_file))
                    raise ValueError(msg)
            elif label == '^SAMPLE':
                index = add(current_sample, series, index)
                current_sample = Sample(name=value, series=series)

            if current_sample:
                current_sample = update(current_sample, label, value,
                                        interested_organisms)
        if series is not None:
            # add the last sample
            add(current_sample, series, index)

            logger.info("{0}: {1}/{2} samples passed".format(
                series.name, series.num_passed_samples(),
                series.num_samples()))
            logger.info('=' * 30)
            return series
Пример #11
0
def parse(soft_file, interested_organisms):
    """Parse the soft file
    :param interested_organisms: a list of interested organisms: ['H**o
                                 sapiens', 'Mus musculus']
    """
    logger.info("Parsing file: {0} ...".format(soft_file))
    series_name_from_file = get_series_name_from(soft_file)
    print series_name_from_file

    # Assume one GSE per soft file
    # index: the index of all passed samples, unpassed samples are not indexed
    index, series, current_sample = 1, None, None
    with open(soft_file, 'rb') as inf:
        for line in inf:
            label, value = [__.strip() for __ in line.split('=')]
            if label == '^SERIES':
                series = Series(value, os.path.abspath(soft_file))
                if series.name != series_name_from_file:
                    msg = ('series contained in the soft file doesn\'t match '
                           'that in the filename: {0} != {1}'.format(
                               series, series_name_from_file))
                    raise ValueError(msg)
            elif label == '^SAMPLE':
                index = add(current_sample, series, index)
                current_sample = Sample(name=value, series=series)

            if current_sample:
                current_sample = update(current_sample, label, value,
                                        interested_organisms)
        if series is not None:
            # add the last sample
            add(current_sample, series, index)

            logger.info("{0}: {1}/{2} samples passed".format(
                series.name, series.num_passed_samples(), series.num_samples()))
            logger.info('=' * 30)
            return series
Пример #12
0
 def test_add_with_current_sample_being_None(self):
     series = Series('GSE123456', 'GSE123456_family.soft.subset')
     self.assertEqual(soft_parser.add(None, series, 1), 1)
Пример #13
0
 def setUp(self):
     self.series = Series("GSE123456", "GSE123456_family.soft.subset")
Пример #14
0
class SeriesTestCase(unittest.TestCase):
    def setUp(self):
        self.series = Series("GSE123456", "GSE123456_family.soft.subset")

    def test___init__(self):
        self.assertEqual(self.series.name, "GSE123456")
        self.assertEqual(self.series.passed_samples, [])
        self.assertEqual(self.series.samples, [])
        self.assertEqual(self.series.soft_file, "GSE123456_family.soft.subset")
        self.assertEqual(self.series.num_passed_samples(), 0)
        self.assertEqual(self.series.num_samples(), 0)

    def test_add_sample(self):
        sample = Sample("GSM1", self.series)
        self.series.add_sample(sample)
        self.assertEqual(self.series.num_samples(), 1)
        self.assertEqual(self.series.num_passed_samples(), 0)

    def test_add_passed_sample(self):
        sample1 = Sample("GSM1", self.series)
        sample2 = Sample("GSM2", self.series)
        self.series.add_sample(sample1)
        self.series.add_passed_sample(sample2)
        self.assertEqual(self.series.num_samples(), 2)
        self.assertEqual(self.series.num_passed_samples(), 1)

    def test___str__(self):
        self.assertEqual(str(self.series), "GSE123456 (passed samples: 0/0)")

    def test___repr__(self):
        self.assertEqual(repr(self.series), "GSE123456 (passed samples: 0/0)")
Пример #15
0
 def test_analyze_one_soft_series_name_not_in_isamp_series_names_list(self, mock_parse, L):
     fake_isamp = self.gen_fake_isamp()
     fake_series = Series('GSE9999')
     mock_parse.return_value = fake_series
     self.assertIsNone(ppr.analyze_one('GSE9999_family.soft.subset', fake_isamp, []))