class TestSnpPileupCWL(PlutoTestCase):
    cwl_file = CWLFile('snp-pileup-wrapper.cwl')
    def test_snp_pileup1(self):
        """
        """
        self.input = {
            "snps_vcf": {
                "path": FACETS_SNPS_VCF,
                "class": "File"
            },
            # the smallest pair of bam files in the test dataset
            "normal_bam": {
                "path": os.path.join(DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample23.rg.md.abra.printreads.bam"),
                "class": "File"
            },
            "tumor_bam": {
                "path": os.path.join(DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample24.rg.md.abra.printreads.bam"),
                "class": "File"
            },
            "output_prefix": "Sample24.Sample23"
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file': OFile(name = "Sample24.Sample23.snp_pileup.gz", size = 34851004, hash = "755a8b64f45c819b4e2c481e64bf2fe36d1f5361", dir = output_dir)
            }
        self.maxDiff = None
        self.assertCWLDictEqual(output_json, expected_output)
Пример #2
0
class TestFusionFilter(PlutoTestCase):
    cwl_file = CWLFile('fusion_filter.cwl')

    def test_fusion_filter1(self):
        """
        """
        fusion_file = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                                   "Sample1.Sample2.svs.pass.vep.portal.txt")

        self.input = {
            "fusions_file": {
                "class": "File",
                "path": fusion_file
            },
            "output_filename": "data_fusions.txt",
            "known_fusions_file": {
                "class": "File",
                "path": KNOWN_FUSIONS_FILE
            }
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            "output_file":
            OFile(name='data_fusions.txt',
                  size=99,
                  hash='c16f763b248813fcdde76f7486f1ddc4e9856038',
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)
Пример #3
0
class TestFusionToSV(PlutoTestCase):
    cwl_file = CWLFile('fusion_to_sv.cwl')

    def test_fusion_to_sv(self):
        """
        Test fusion to sv conversion
        """
        fusion_file = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.svs.pass.vep.portal.txt")

        self.input = {
            "fusion_file": {
                  "class": "File",
                  "path": fusion_file
                },
            "output_filename": "data_SV.txt"

        }        

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file': OFile(
                name='data_SV.txt', size=1103, hash='02fda70b7838931321544f6797de4782adaf1a46', dir=output_dir)

            
        }

        self.maxDiff = None
        strip_related_keys = [
        ('basename', 'report.html', ['size', 'checksum'])
        ]
        self.assertCWLDictEqual(output_json, expected_output, related_keys = strip_related_keys)
Пример #4
0
class TestMaf2VcfGz(PlutoTestCase):
    cwl_file = CWLFile('maf2vcf_gz_workflow.cwl')

    def test_convert_maf_to_vcf(self):
        """
        Convert a single input maf file into a .vcf.gz with a .tbi
        """
        self.maxDiff = None
        self.input = {
            "maf_file": {
                "class":
                "File",
                "path":
                os.path.join(self.DATA_SETS['Proj_08390_G']['MAF_DIR'],
                             "Sample4.Sample3.muts.maf")
            },
            "ref_fasta": {
                "class": "File",
                "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA']
            },
        }

        output_json, output_dir = self.run_cwl()

        # do not include size and checksum since they are not consistent with .gz
        output_json['output_file'].pop('checksum')
        output_json['output_file'].pop('size')
        output_json['output_file']['secondaryFiles'][0].pop('checksum')
        output_json['output_file']['secondaryFiles'][0].pop('size')

        expected_output = {
            'output_file': {
                'location':
                'file://' + os.path.join(output_dir, 'variants.vcf.gz'),
                'basename':
                'variants.vcf.gz',
                'class':
                'File',
                # 'checksum': 'sha1$dcda65da7665683dcf97ed9c3989fd75c7a839c8',
                # 'size': 14885,
                'secondaryFiles': [{
                    'basename':
                    'variants.vcf.gz.tbi',
                    'location':
                    'file://' +
                    os.path.join(output_dir, 'variants.vcf.gz.tbi'),
                    'class':
                    'File',
                    # 'checksum': 'sha1$c537928ca8bd7f33d19302d42c84ed6370687fca',
                    # 'size': 9320,
                    'path':
                    os.path.join(output_dir, 'variants.vcf.gz.tbi')
                }],
                'path':
                os.path.join(output_dir, 'variants.vcf.gz')
            }
        }

        self.assertCWLDictEqual(output_json, expected_output)
Пример #5
0
class TestMergeBed(PlutoTestCase):
    cwl_file = CWLFile('mergebed.cwl')

    def test_merge_bed(self):
        lines1 = [
            '1\t118166398\t118166398\n', '5\t35876484\t35876484\n',
            '17\t11998935\t11998935\n', '20\t62321135\t62321135\n'
        ]

        lines2 = [
            '5\t35876484\t35876484\n',  # in common
            '7\t116418998\t116418998\n',
            '7\t151845367\t151845367\n',
            '17\t11998935\t11998935\n',  # in common
        ]
        bed1 = os.path.join(self.tmpdir, "1.bed")
        bed2 = os.path.join(self.tmpdir, "2.bed")
        with open(bed1, "w") as fout:
            for line in lines1:
                fout.write(line)
        with open(bed2, "w") as fout:
            for line in lines2:
                fout.write(line)

        self.input = {
            "bed_files": [{
                "class": "File",
                "path": bed1
            }, {
                "class": "File",
                "path": bed2
            }]
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file': {
                'location': 'file://' + os.path.join(output_dir, 'merged.bed'),
                'basename': 'merged.bed',
                'class': 'File',
                'checksum': 'sha1$638f9f3ef43802b8e372c3cef3848b16f2af1c66',
                'size': 149,
                'path': os.path.join(output_dir, 'merged.bed')
            }
        }
        self.assertCWLDictEqual(output_json, expected_output)

        output_file = os.path.join(output_dir, 'merged.bed')
        with open(output_file) as fin:
            lines = [line for line in fin]

        expected_lines = [
            '1\t118166398\t118166398\n', '5\t35876484\t35876484\n',
            '17\t11998935\t11998935\n', '20\t62321135\t62321135\n',
            '7\t116418998\t116418998\n', '7\t151845367\t151845367\n',
            '17\t11998935\t11998935\n'
        ]
        self.assertEqual(lines, expected_lines)
Пример #6
0
class TestExampleWorkflow(PlutoTestCase):
    cwl_file = CWLFile('example_workflow.cwl')

    def test_example_workflow(self):
        """
        Test case for the example workflow
        """
        self.maxDiff = None
        self.input = {
            'value': "ABC",
            "samples": [{
                "sample_id": "1"
            }, {
                "sample_id": "2"
            }]
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            "output_file": {
                "location":
                "file://" + os.path.join(output_dir, "output.concat.tsv"),
                "basename":
                "output.concat.tsv",
                "class":
                "File",
                "checksum":
                "sha1$be6fb2e96f81c63a0b5fc6392a317ba3afbbca19",
                "size":
                30,
                "path":
                os.path.join(output_dir, "output.concat.tsv")
            },
            'env': {
                'basename': 'env.txt',
                # 'checksum': 'sha1$e2f2bf6581461560dc9d4c4c970b5b7b1ba15852',
                'class': 'File',
                'location': "file://" + os.path.join(output_dir, "env.txt"),
                'path': os.path.join(output_dir, 'env.txt')
                # 'size': 456
            }
        }
        output_json['env'].pop('checksum')
        output_json['env'].pop('size')

        self.assertDictEqual(output_json, expected_output)

        output_file = output_json['output_file']['path']
        with open(output_file) as f:
            lines = [l.strip() for l in f]

        expected_lines = [
            'SampleID\tValue',
            '1\tABC',
            '2\tABC',
        ]
        self.assertEqual(lines, expected_lines)
Пример #7
0
class LsCWL(Operator):
    cwl_file = CWLFile('ls.cwl')

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.input = {}

        if 'input_files' in self.args:
            if len(self.args['input_files']) > 0:
                self.input = generate_input(self.args,
                                            list_File_keys=['input_files'])
Пример #8
0
class ExampleWorkflow(Operator):
    cwl_file = CWLFile('example_workflow.cwl')

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.generate_input_data()

    def generate_input_data(self):
        input_args = copy.deepcopy(self.args)
        self.input = {'value': input_args['value'], 'samples': []}
        for sample in input_args['sampleIDs']:
            self.input['samples'].append({'sample_id': sample})
Пример #9
0
class ConsensusMaf(Operator):
    cwl_file = CWLFile('consensus_maf.cwl')

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        if self.args.get('maf_files_list', False):
            self.args['maf_files'] = self.args.pop('maf_files_list')
            self.input = generate_input(self.args,
                                        array_File_keys=['maf_files'])
        else:
            self.input = generate_input(self.args,
                                        list_File_keys=['maf_files'])
Пример #10
0
class TestAddMSIStatus(PlutoTestCase):
    cwl_file = CWLFile('add_msi_status.cwl')

    def test_add_msi_status(self):
        """
        Test case for adding the MSI Status label to a table
        """
        lines1 = [[
            'Total_Number_of_Sites', 'Number_of_Somatic_Sites', 'MSI_SCORE',
            'SAMPLE_ID'
        ], ['123', '987', '11', 'Sample1-T'], ['456', '654', '2', 'Sample2-T'],
                  ['789', '321', '5', 'Sample3-T']]

        tmb_file = self.write_table(self.tmpdir,
                                    filename="msi.tsv",
                                    lines=lines1)
        self.input = {
            'input_filename': {
                "class": "File",
                "path": tmb_file
            },
            'output_filename': 'output.tsv',
            'header': 'MSI_STATUS'
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file': {
                'location': 'file://' + os.path.join(output_dir, 'output.tsv'),
                'basename': 'output.tsv',
                'class': 'File',
                'checksum': 'sha1$ea9eb5b227f25bd03a50fbcec282259e89d176ac',
                'size': 168,
                'path': os.path.join(output_dir, 'output.tsv')
            }
        }
        self.assertCWLDictEqual(output_json, expected_output)

        output_file = expected_output['output_file']['path']

        lines = self.read_table(output_file)

        expected_lines = [[
            'Total_Number_of_Sites', 'Number_of_Somatic_Sites', 'MSI_SCORE',
            'SAMPLE_ID', 'MSI_STATUS'
        ], ['123', '987', '11', 'Sample1-T', 'Instable'],
                          ['456', '654', '2', 'Sample2-T', 'Stable'],
                          ['789', '321', '5', 'Sample3-T', 'Indeterminate']]
        self.assertEqual(lines, expected_lines)
Пример #11
0
class ConcatTablesDirCWL(Operator):
    cwl_file = CWLFile('concat-tables_dir.cwl')

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        if self.args.get('input_files_list', False):
            self.args['input_files'] = self.args.pop('input_files_list')
            self.input = generate_input(self.args,
                                        array_File_keys=['input_files'],
                                        bool_keys=['comments'])
        else:
            self.input = generate_input(self.args,
                                        list_File_keys=['input_files'],
                                        bool_keys=['comments'])
Пример #12
0
class TestExampleWorkflow(PlutoTestCase):
    cwl_file = CWLFile('example_workflow.cwl')

    def test_example_workflow(self):
        """
        Test case for the example workflow
        """
        self.maxDiff = None
        self.input = {
            'value': "ABC",
            "samples": [{
                "sample_id": "1"
            }, {
                "sample_id": "2"
            }]
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            "output_file":
            OFile(name='output.concat.tsv',
                  hash='d4297dfdad25ac92ffae2ce61c6cfe12c4089c28',
                  size=27,
                  dir=output_dir),
            'env':
            OFile(name='env.txt', dir=output_dir)
        }
        strip_related_keys = [
            ('basename', 'env.txt', ['size', 'checksum']),
        ]
        self.assertCWLDictEqual(output_json,
                                expected_output,
                                related_keys=strip_related_keys)

        output_file = os.path.join(output_dir, "output.concat.tsv")
        with open(output_file) as f:
            lines = [l.strip() for l in f]

        expected_lines = [
            'SampleID\tValue',
            '1\tABC',
            '2\tABC',
        ]
        self.assertEqual(lines, expected_lines)
class TestFilterUncalledMutations(PlutoTestCase):
    cwl_file = CWLFile('filterUncalledMutations.cwl')

    def test_1(self):
        maf_file = os.path.join(DATA_SETS["Fillout01"]["OUTPUT_DIR"],
                                "output.maf")

        comments, mutations = self.load_mutations(maf_file, strip=True)
        self.assertEqual(len(mutations), 475)

        self.input = {"input_file": {"class": "File", "path": maf_file}}

        output_json, output_dir = self.run_cwl()

        output_data_mutations_extended = os.path.join(
            output_dir, 'data_mutations_extended.txt')
        output_data_mutations_uncalled = os.path.join(
            output_dir, 'data_mutations_uncalled.txt')

        expected_output = {
            'called_file':
            OFile(name='data_mutations_extended.txt',
                  dir=output_dir,
                  hash='e7430656d9fcbce36fa57eb92460db57742168ae',
                  size=347254),
            'uncalled_file':
            OFile(name='data_mutations_uncalled.txt',
                  dir=output_dir,
                  hash='58129786cc299011202eb078734b3ff513d54081',
                  size=287883),
        }

        self.maxDiff = None

        self.assertCWLDictEqual(output_json, expected_output)

        comments, mutations = self.load_mutations(
            output_data_mutations_extended, strip=True)
        self.assertEqual(len(mutations), 253)

        comments, mutations = self.load_mutations(
            output_data_mutations_uncalled, strip=True)
        self.assertEqual(len(mutations), 222)
Пример #14
0
class TestReplaceColname(PlutoTestCase):
    cwl_file = CWLFile('replace_colname.cwl')

    def setUp(self):
        # initialize the tmpdir
        super().setUp()
        lines = [["A", "%", "C"], ["1", "2", "3"], ["foo", "bar", "baz"]]
        self.table = self.write_table(self.tmpdir,
                                      filename="table.tsv",
                                      lines=lines)

    def test_change_colnames(self):
        self.maxDiff = None
        self.input = {
            "old_name": "%",
            "new_name": "pcnt",
            "input_file": {
                "class": "File",
                "path": self.table
            }
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file': {
                'location': 'file://' + os.path.join(output_dir, 'output.tsv'),
                'basename': 'output.tsv',
                'class': 'File',
                'checksum': 'sha1$dfe58e029a2c3262dbdf4270b0b5af6bf420589e',
                'size': 27,
                'path': os.path.join(output_dir, 'output.tsv')
            }
        }
        self.assertCWLDictEqual(output_json, expected_output)

        output_file = os.path.join(output_dir, 'output.tsv')

        lines = self.read_table(output_file)

        expected_lines = [['A', 'pcnt', 'C'], ['1', '2', '3'],
                          ['foo', 'bar', 'baz']]
        self.assertEqual(lines, expected_lines)
Пример #15
0
class TestRunFacetsWrapperCWL(PlutoTestCase):
    cwl_file = CWLFile('run-facets-wrapper.cwl')

    def test_run_facets_wrapper(self):
        """
        """
        self.input = {
            "snp_pileup": {
                "path": os.path.join(DATA_SETS['Proj_08390_G']['SNP_PILEUP_DIR'], "Sample24.Sample23.snp_pileup.gz"),
                "class": "File"
            },
            "sample_id": "Sample24"
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
        'failed': False,
        'arm_level_txt': OFile(name = "Sample24.arm_level.txt", hash = "df37c54ae4969257e436a7a7a595c42ef19ecbb5", size = 1824, dir = output_dir),
        'gene_level_txt': OFile(name = "Sample24.gene_level.txt", hash = "4e916a52458151007486bf536acfff539fdc2ecc", size = 148195, dir = output_dir),
        'hisens_rds': OFile(name = "Sample24_hisens.rds", hash = "6bfd6c7f29c49ec8ef538dd468a3b4626b05bda2", size = 213986, dir = output_dir),
        'hisens_seg': OFile(name = "Sample24_hisens.seg", hash = "652f9c6d0245af49bac6ca67a089af7d4e46801b", size = 1897, dir = output_dir),
        'hisens_png': OFile(name = "Sample24_hisens.png", hash = "6af56798d0d8e3b49c26ab9d0adc855c3c8a5a50", size = 168166, dir = output_dir),
        'purity_rds': OFile(name = "Sample24_purity.rds", hash = "dd8b967f84b191ff76214c6110db8d0e65f6514c", size = 213356, dir = output_dir),
        'purity_png': OFile(name = "Sample24_purity.png", hash = "7db765d900c8a431ab0325098b81eda2cd0780bf", size = 164021, dir = output_dir),
        'purity_seg': OFile(name = "Sample24_purity.seg", hash = "591e6d8b432e1e910fe4fb4b1814508131f960c9", size = 1285, dir = output_dir),
        'qc_txt': OFile(name = "Sample24.qc.txt", hash = "d4a36726a5fcb7b268aae02d97ce4e382e42d9f6", size = 1339, dir = output_dir),
        'output_txt': OFile(name = "Sample24.txt", hash = "4769dc7b8d4b127383e1936c07cdba1e2e09aecb", size = 480, dir = output_dir),
        "stderr_txt": OFile(name = "facets_stderr.txt", dir = output_dir),
        "stdout_txt": OFile(name = "facets_stdout.txt", dir = output_dir),
        }
        strip_related_keys = [
            ('basename', 'facets_stderr.txt', ['size', 'checksum']),
            ('basename', 'facets_stdout.txt', ['size', 'checksum']),
        ]
        self.assertCWLDictEqual(output_json, expected_output, related_keys = strip_related_keys)

        with open(os.path.join(output_dir, 'Sample24_hisens.seg')) as fin:
            self.assertEqual(len(fin.readlines()), 37)
        with open(os.path.join(output_dir, 'Sample24_purity.seg')) as fin:
            self.assertEqual(len(fin.readlines()), 25)
Пример #16
0
class TestPasteCol(PlutoTestCase):
    cwl_file = CWLFile('paste-col.cwl')

    def test_paste_col_1(self):
        """
        """
        # make a dummy file with some lines
        input_lines = ["HEADER1", "foo1", "bar1"]
        input_file = os.path.join(self.tmpdir, "input.txt")
        with open(input_file, "w") as fout:
            for line in input_lines:
                fout.write(line + '\n')

        self.input = {
            "input_file": {
                "class": "File",
                "path": input_file
            },
            "output_filename": "output.txt",
            "header": "HEADER2",
            "value": "foo2"
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file':
            OFile(name="output.txt",
                  size=36,
                  hash="34753fd98b2355d54740f3fdfc6490262c15dd59",
                  dir=output_dir)
        }

        self.assertCWLDictEqual(output_json, expected_output)

        output_file = expected_output['output_file']['path']
        with open(output_file) as fin:
            output_lines = [line.strip() for line in fin]

        expected_lines = ['HEADER1\tHEADER2', 'foo1\tfoo2', 'bar1\tfoo2']
        self.assertEqual(output_lines, expected_lines)
class TestRunFacetsLegacyWrapperCWL(PlutoTestCase):
    cwl_file = CWLFile('run-facets-legacy-wrapper.cwl')
    def test_run_facets_wrapper(self):
        """
        """
        self.input = {
            "snp_pileup": {
                "path": os.path.join(DATA_SETS['Proj_08390_G']['SNP_PILEUP_DIR'], "Sample24.Sample23.snp_pileup.gz"),
                "class": "File"
            },
            "sample_id": "Sample24"
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'arm_level_txt': OFile(name = "Sample24.arm_level.txt", hash = "df37c54ae4969257e436a7a7a595c42ef19ecbb5", size = 1824, dir = output_dir),
            'failed': False,
            'gene_level_txt': OFile(name = "Sample24.gene_level.txt", hash = "4e916a52458151007486bf536acfff539fdc2ecc", size = 148195, dir = output_dir),
            'hisens_Rdata': OFile(name = "Sample24_hisens.Rdata", hash = "6cc3fd1fad17111e32c7c88b259a523092539181", size = 214260, dir = output_dir),
            'hisens_seg': OFile(name = "Sample24_hisens.seg", hash = "652f9c6d0245af49bac6ca67a089af7d4e46801b", size = 1897, dir = output_dir),
            'purity_Rdata': OFile(name = "Sample24_purity.Rdata", hash = "ca2c46ceebbb960a02fb960134ba1711983e71c8", size = 213542, dir = output_dir),
            'purity_seg': OFile(name = "Sample24_purity.seg", hash = "591e6d8b432e1e910fe4fb4b1814508131f960c9", size = 1285, dir = output_dir),
            'qc_txt': OFile(name = "Sample24.qc.txt", hash = "d4a36726a5fcb7b268aae02d97ce4e382e42d9f6", size = 1339, dir = output_dir),
            'hisens_cncf_txt': OFile(name = "Sample24_hisens.cncf.txt", hash = "db9131a33889a1cac82e3bd6b3f0e5e182c65105", size = 5238, dir = output_dir),
            'purity_cncf_txt': OFile(name = "Sample24_purity.cncf.txt", hash = "b331530e1e46b5ba1bdcedeb67f2aa82da6ebc5f", size = 3630, dir = output_dir),
            'stderr_txt': OFile(name = "facets_legacy_stderr.txt", dir = output_dir),
            'stdout_txt': OFile(name = 'facets_legacy_stdout.txt', dir = output_dir),
        }
        self.maxDiff = None

        strip_related_keys = [
        ('basename', 'facets_legacy_stdout.txt', ['size', 'checksum']),
        ('basename', 'facets_legacy_stderr.txt', ['size', 'checksum']),
        ]
        self.assertCWLDictEqual(output_json, expected_output, related_keys = strip_related_keys)

        with open(os.path.join(output_dir, 'Sample24_hisens.cncf.txt')) as fin:
            self.assertEqual(len(fin.readlines()), 37)
Пример #18
0
class TestReplace(PlutoTestCase):
    cwl_file = CWLFile('replace.cwl')

    def test_replace1(self):
        """
        Test that strings get replaced
        """
        # make a dummy file with some lines
        input_lines = ["HEADER", "foo", "ILLOGICAL", "baz"]
        input_file = os.path.join(self.tmpdir, "input.txt")
        with open(input_file, "w") as fout:
            for line in input_lines:
                fout.write(line + '\n')

        self.input = {
            "input_file": {
                "class": "File",
                "path": input_file
            },
        }

        output_json, output_dir = self.run_cwl()

        # check the contents of the concatenated file; should be the same as the input
        output_file = os.path.join(output_dir, 'output.txt')
        with open(output_file) as fin:
            output_lines = [line.strip() for line in fin]

        expected_lines = ["HEADER", "foo", "NA", "baz"]
        self.assertEqual(output_lines, expected_lines)

        expected_output = {
            'output_file':
            OFile(name="output.txt",
                  size=18,
                  hash="62255c8ee13b8ba6e01c7e17262a8ba1f174e5cb",
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)
Пример #19
0
class TestReduceSigFigs(PlutoTestCase):
    cwl_file = CWLFile('reduce_sig_figs.cwl')

    def test_reduce_sig_figs(self):
        """
        Test that significant figures are reduced correctly
        """
        input_lines = ["seg.mean", "3.141592", "2.718281828"]
        input_file = os.path.join(self.tmpdir, "input.txt")
        with open(input_file, "w") as fout:
            for line in input_lines:
                fout.write(line + '\n')

        self.input = {"input_file": {"class": "File", "path": input_file}}

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file':
            OFile(name="output.txt",
                  size=26,
                  hash="d9f5ec4a9aa27a69ee64edb97eb10d6db65c7ad7",
                  dir=output_dir)
        }

        self.assertCWLDictEqual(output_json, expected_output)

        # check the contents of the file
        output_file = expected_output['output_file']['path']
        with open(output_file) as fin:
            reader = csv.DictReader(fin)
            rows = [row for row in reader]

        self.assertEqual(len(rows), 2)
        self.assertDictEqual(rows[0], OrderedDict([('seg.mean', '3.1416')]))
        self.assertDictEqual(rows[1], OrderedDict([('seg.mean', '2.7183')]))
Пример #20
0
class TestConsensusBed(PlutoTestCase):
    cwl_file = CWLFile('consensus_bed.cwl')

    def setUp(self):
        super().setUp()
        self.comments = [
        ['# comment 1'],
        ['# comment 2']
        ]
        self.maf_row1 = OrderedDict([
        ('Hugo_Symbol', 'RTEL1'),
        ('Entrez_Gene_Id', '51750'),
        ('Center', 'mskcc.org'),
        ('NCBI_Build', 'GRCh37'),
        ('Chromosome', '20'),
        ('Start_Position', '62321135'),
        ('End_Position', '62321135')
        ])
        self.maf_row2 = OrderedDict([
        ('Hugo_Symbol', 'FAM46C'),
        ('Entrez_Gene_Id', '54855'),
        ('Center', 'mskcc.org'),
        ('NCBI_Build', 'GRCh37'),
        ('Chromosome', '1'),
        ('Start_Position', '118166398'),
        ('End_Position', '118166398')
        ])
        self.maf_row3 = OrderedDict([
        ('Hugo_Symbol', 'IL7R'),
        ('Entrez_Gene_Id', '3575'),
        ('Center', 'mskcc.org'),
        ('NCBI_Build', 'GRCh37'),
        ('Chromosome', '5'),
        ('Start_Position', '35876484'),
        ('End_Position', '35876484')
        ])
        self.maf_row4 = OrderedDict([
        ('Hugo_Symbol', 'KMT2C'),
        ('Entrez_Gene_Id', '58508'),
        ('Center', 'mskcc.org'),
        ('NCBI_Build', 'GRCh37'),
        ('Chromosome', '7'),
        ('Start_Position', '151845367'),
        ('End_Position', '151845367')
        ])
        self.maf_row5 = OrderedDict([
        ('Hugo_Symbol', 'MET'),
        ('Entrez_Gene_Id', '4233'),
        ('Center', 'mskcc.org'),
        ('NCBI_Build', 'GRCh37'),
        ('Chromosome', '7'),
        ('Start_Position', '116418998'),
        ('End_Position', '116418998')
        ])
        self.maf_row6 = OrderedDict([
        ('Hugo_Symbol', 'MAP2K4'),
        ('Entrez_Gene_Id', '6416'),
        ('Center', 'mskcc.org'),
        ('NCBI_Build', 'GRCh37'),
        ('Chromosome', '17'),
        ('Start_Position', '11998935'),
        ('End_Position', '11998935')
        ])

        maf_rows1 = [ self.maf_row1, self.maf_row2, self.maf_row3, self.maf_row4 ]
        maf_rows2 = [ self.maf_row1, self.maf_row2, self.maf_row5, self.maf_row6 ]
        maf_lines1 = []
        maf_lines2 = []
        for comment in self.comments:
            maf_lines1.append(comment[0] + '\n')
            maf_lines2.append(comment[0] + '\n')
        header = '\t'.join([ k for k in maf_rows1[0].keys() ])
        header += '\n'
        maf_lines1.append(header)
        maf_lines2.append(header)
        for row in maf_rows1:
            values = [ v for v in row.values() ]
            line = '\t'.join(values)
            line += '\n'
            maf_lines1.append(line)
        for row in maf_rows2:
            values = [ v for v in row.values() ]
            line = '\t'.join(values)
            line += '\n'
            maf_lines2.append(line)
        self.maf1 = os.path.join(self.tmpdir, "input1.maf")
        self.maf2 = os.path.join(self.tmpdir, "input2.maf")
        with open(self.maf1, "w") as fout:
            for line in maf_lines1:
                fout.write(line)
        with open(self.maf2, "w") as fout:
            for line in maf_lines2:
                fout.write(line)

    def test_consensus_bed_workflow(self):
        """
        """
        self.maxDiff = None
        self.input = {
            'maf_files': [
                {'class': 'File', 'path': self.maf1},
                {'class': 'File', 'path': self.maf2},
            ]
        }
        output_json, output_dir = self.run_cwl()
        expected_output = {
            'output_file': {
                'location': 'file://' + os.path.join(output_dir,'merged.bed'),
                'basename': 'merged.bed',
                'class': 'File',
                'checksum': 'sha1$f831dc91b70c02f10f69da2dae21de57d580b654',
                'size': 149,
                'path':  os.path.join(output_dir,'merged.bed')
                }
            }
        self.assertDictEqual(output_json, expected_output)

        output_file = output_json['output_file']['path']

        with open(output_file) as f:
            lines = [ l for l in f ]

        expected_lines = [
            '1\t118166398\t118166398\n',
            '5\t35876484\t35876484\n',
            '7\t116418998\t116418998\n',
            '17\t11998935\t11998935\n',
            '20\t62321135\t62321135\n',
            '7\t151845367\t151845367\n',
            '20\t62321135\t62321135\n'
        ]
        self.assertDictEqual(lines, expected_lines)
Пример #21
0
class TestSamplesFilloutIndexBatch(PlutoTestCase):
    cwl_file = CWLFile('samples_fillout_index_batch_workflow.cwl')

    def setUp(self):
        super().setUp()
        self.maxDiff = None
        self.runner_args[
            'use_cache'] = False  # do not use cache for samples fillout workflow it breaks on split_vcf_to_mafs

        self.sample1_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'],
                                        'Sample1.FillOutUnitTest01.muts.maf')
        self.sample2_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'],
                                        'Sample2.FillOutUnitTest01.muts.maf')
        self.sample3_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'],
                                        'Sample3.FillOutUnitTest01.muts.maf')
        self.sample4_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'],
                                        'Sample4.FillOutUnitTest01.muts.maf')
        self.sample5_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'],
                                        'Sample5.FillOutUnitTest01.muts.maf')

        self.sample1_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'],
                                        'Sample1.UnitTest01.bam')
        self.sample2_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'],
                                        'Sample2.UnitTest01.bam')
        self.sample3_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'],
                                        'Sample3.UnitTest01.bam')
        self.sample4_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'],
                                        'Sample4.UnitTest01.bam')
        self.sample5_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'],
                                        'Sample5.UnitTest01.bam')

    def test_one_group(self):
        """
        Test case for running the fillout workflow on a number of samples, each with a bam and maf
        """
        sample_group1 = [
            {
                "sample_id": "Sample1",
                "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                "sample_type": "research",
                "prefilter": True,
                "maf_file": {
                    "class": "File",
                    "path": self.sample1_maf
                },
                "bam_file": {
                    "class": "File",
                    "path": self.sample1_bam
                }
            },
            {
                "sample_id": "Sample2",
                "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                "sample_type": "research",
                "prefilter": True,
                "maf_file": {
                    "class": "File",
                    "path": self.sample2_maf
                },
                "bam_file": {
                    "class": "File",
                    "path": self.sample2_bam
                }
            },
            {
                "sample_id": "Sample3",
                "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                "sample_type": "clinical",
                "prefilter": False,
                "maf_file": {
                    "class": "File",
                    "path": self.sample3_maf
                },
                "bam_file": {
                    "class": "File",
                    "path": self.sample3_bam
                }
            },
            {
                "sample_id": "Sample4",
                "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                "sample_type": "clinical",
                "prefilter": False,
                "maf_file": {
                    "class": "File",
                    "path": self.sample4_maf
                },
                "bam_file": {
                    "class": "File",
                    "path": self.sample4_bam
                }
            },
            {
                "sample_id": "Sample5",
                "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                "sample_type": "research",
                "prefilter": True,
                "maf_file": {
                    "class": "File",
                    "path": self.sample5_maf
                },
                "bam_file": {
                    "class": "File",
                    "path": self.sample5_bam
                }
            },
        ]

        self.input = {
            "sample_groups": [sample_group1],
            "fillout_output_fname": 'output.maf',
            "ref_fasta": {
                "class": "File",
                "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA']
            },
        }

        output_json, output_dir = self.run_cwl()
        output_file = os.path.join(output_dir, 'output.maf')
        filtered_output_path = os.path.join(output_dir, 'output.filtered.maf')
        portal_output_path = os.path.join(output_dir,
                                          'data_mutations_extended.txt')
        uncalled_output_path = os.path.join(output_dir,
                                            'data_mutations_uncalled.txt')

        expected_output = {
            'output_file':
            OFile(name='output.maf', dir=output_dir),
            'filtered_file':
            OFile(name='output.filtered.maf', dir=output_dir),
            'portal_file':
            OFile(name='data_mutations_extended.txt', dir=output_dir),
            'uncalled_file':
            OFile(name='data_mutations_uncalled.txt', dir=output_dir),
        }

        # file contents are inconsistent so strip some keys from the output dict
        strip_related_keys = [
            ('basename', 'output.maf', ['size', 'checksum']),
            ('basename', 'output.filtered.maf', ['size', 'checksum']),
            ('basename', 'data_mutations_extended.txt', ['size', 'checksum']),
            ('basename', 'data_mutations_uncalled.txt', ['size', 'checksum'])
        ]
        self.assertCWLDictEqual(output_json,
                                expected_output,
                                related_keys=strip_related_keys)

        self.assertNumMutationsHash(output_file, 310,
                                    '18fafe6dd335cb62f515e0323e6b74b2')
        self.assertNumMutationsHash(filtered_output_path, 225,
                                    '450b97a2b93ed9421c141837f99240ce')
        self.assertNumMutationsHash(portal_output_path, 159,
                                    '52a95dcfaf0b767fe90f4115e11f3b0e')
        self.assertNumMutationsHash(uncalled_output_path, 66,
                                    '790f7faefb7b7c039fd48a8ede1cfe35')
        self.assertEqualNumMutations(
            [portal_output_path, uncalled_output_path], filtered_output_path)
        self.assertMutFieldContains(
            output_file,
            "Tumor_Sample_Barcode",
            ["Sample1", "Sample2", "Sample3", "Sample4", "Sample5"],
            containsAll=True)

    def test_two_groups(self):
        sample_group1 = [
            {
                "sample_id": "Sample1",
                "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                "sample_type": "research",
                "prefilter": True,
                "maf_file": {
                    "class": "File",
                    "path": self.sample1_maf
                },
                "bam_file": {
                    "class": "File",
                    "path": self.sample1_bam
                }
            },
            {
                "sample_id": "Sample2",
                "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                "sample_type": "research",
                "prefilter": True,
                "maf_file": {
                    "class": "File",
                    "path": self.sample2_maf
                },
                "bam_file": {
                    "class": "File",
                    "path": self.sample2_bam
                }
            },
            {
                "sample_id": "Sample3",
                "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                "sample_type": "clinical",
                "prefilter": False,
                "maf_file": {
                    "class": "File",
                    "path": self.sample3_maf
                },
                "bam_file": {
                    "class": "File",
                    "path": self.sample3_bam
                }
            },
        ]

        sample_group2 = [
            {
                "sample_id": "Sample4",
                "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                "sample_type": "clinical",
                "prefilter": False,
                "maf_file": {
                    "class": "File",
                    "path": self.sample4_maf
                },
                "bam_file": {
                    "class": "File",
                    "path": self.sample4_bam
                }
            },
            {
                "sample_id": "Sample5",
                "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                "sample_type": "research",
                "prefilter": True,
                "maf_file": {
                    "class": "File",
                    "path": self.sample5_maf
                },
                "bam_file": {
                    "class": "File",
                    "path": self.sample5_bam
                }
            },
        ]

        self.input = {
            "sample_groups": [sample_group1, sample_group2],
            "fillout_output_fname": 'output.maf',
            "ref_fasta": {
                "class": "File",
                "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA']
            },
        }

        output_json, output_dir = self.run_cwl()
        output_file = os.path.join(output_dir, 'output.maf')
        filtered_output_path = os.path.join(output_dir, 'output.filtered.maf')
        portal_output_path = os.path.join(output_dir,
                                          'data_mutations_extended.txt')
        uncalled_output_path = os.path.join(output_dir,
                                            'data_mutations_uncalled.txt')

        expected_output = {
            'output_file':
            OFile(name='output.maf', dir=output_dir),
            'filtered_file':
            OFile(name='output.filtered.maf', dir=output_dir),
            'portal_file':
            OFile(name='data_mutations_extended.txt', dir=output_dir),
            'uncalled_file':
            OFile(name='data_mutations_uncalled.txt', dir=output_dir),
        }

        # file contents are inconsistent so strip some keys from the output dict
        strip_related_keys = [
            ('basename', 'output.maf', ['size', 'checksum']),
            ('basename', 'output.filtered.maf', ['size', 'checksum']),
            ('basename', 'data_mutations_extended.txt', ['size', 'checksum']),
            ('basename', 'data_mutations_uncalled.txt', ['size', 'checksum'])
        ]
        self.assertCWLDictEqual(output_json,
                                expected_output,
                                related_keys=strip_related_keys)

        self.assertNumMutationsHash(output_file, 235,
                                    '4e4c91ef129a853a35b86f7fa6f1268a')
        self.assertNumMutationsHash(filtered_output_path, 150,
                                    '8397a12302977db14e798a1b2e3ba151')
        self.assertNumMutationsHash(portal_output_path, 120,
                                    '9d171233ecd91f3518fee98b5948978d')
        self.assertNumMutationsHash(uncalled_output_path, 30,
                                    'ae90ff0cc0d0d0ab08029553fdccf381')
        self.assertEqualNumMutations(
            [portal_output_path, uncalled_output_path], filtered_output_path)
        self.assertMutFieldContains(
            output_file,
            "Tumor_Sample_Barcode",
            ["Sample1", "Sample2", "Sample3", "Sample4", "Sample5"],
            containsAll=True)

    def test_three_groups(self):
        """
        Three groups, one of which contains a single sample (singleton)
        """
        sample_group1 = [
            {
                "sample_id": "Sample1",
                "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                "sample_type": "research",
                "prefilter": True,
                "maf_file": {
                    "class": "File",
                    "path": self.sample1_maf
                },
                "bam_file": {
                    "class": "File",
                    "path": self.sample1_bam
                }
            },
            {
                "sample_id": "Sample2",
                "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                "sample_type": "research",
                "prefilter": True,
                "maf_file": {
                    "class": "File",
                    "path": self.sample2_maf
                },
                "bam_file": {
                    "class": "File",
                    "path": self.sample2_bam
                }
            },
        ]

        sample_group2 = [{
            "sample_id": "Sample3",
            "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
            "sample_type": "clinical",
            "prefilter": False,
            "maf_file": {
                "class": "File",
                "path": self.sample3_maf
            },
            "bam_file": {
                "class": "File",
                "path": self.sample3_bam
            }
        }, {
            "sample_id": "Sample4",
            "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
            "sample_type": "clinical",
            "prefilter": False,
            "maf_file": {
                "class": "File",
                "path": self.sample4_maf
            },
            "bam_file": {
                "class": "File",
                "path": self.sample4_bam
            }
        }]

        # Singleton sample; no DMP clinical matches
        sample_group3 = [
            {
                "sample_id": "Sample5",
                "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                "sample_type": "research",
                "prefilter": True,
                "maf_file": {
                    "class": "File",
                    "path": self.sample5_maf
                },
                "bam_file": {
                    "class": "File",
                    "path": self.sample5_bam
                }
            },
        ]

        self.input = {
            "sample_groups": [sample_group1, sample_group2, sample_group3],
            "fillout_output_fname": 'output.maf',
            "ref_fasta": {
                "class": "File",
                "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA']
            },
        }

        output_json, output_dir = self.run_cwl()
        output_file = os.path.join(output_dir, 'output.maf')
        filtered_output_path = os.path.join(output_dir, 'output.filtered.maf')
        portal_output_path = os.path.join(output_dir,
                                          'data_mutations_extended.txt')
        uncalled_output_path = os.path.join(output_dir,
                                            'data_mutations_uncalled.txt')

        expected_output = {
            'output_file':
            OFile(name='output.maf', dir=output_dir),
            'filtered_file':
            OFile(name='output.filtered.maf', dir=output_dir),
            'portal_file':
            OFile(name='data_mutations_extended.txt', dir=output_dir),
            'uncalled_file':
            OFile(name='data_mutations_uncalled.txt', dir=output_dir),
        }

        # file contents are inconsistent so strip some keys from the output dict
        strip_related_keys = [
            ('basename', 'output.maf', ['size', 'checksum']),
            ('basename', 'output.filtered.maf', ['size', 'checksum']),
            ('basename', 'data_mutations_extended.txt', ['size', 'checksum']),
            ('basename', 'data_mutations_uncalled.txt', ['size', 'checksum'])
        ]
        self.assertCWLDictEqual(output_json,
                                expected_output,
                                related_keys=strip_related_keys)

        self.assertNumMutationsHash(output_file, 188,
                                    '2f4c5e5cb13430f456bbc41a0a93dc41')
        self.assertNumMutationsHash(filtered_output_path, 126,
                                    '3dda4952d2ae396079155b4bc8cc276f')
        self.assertNumMutationsHash(portal_output_path, 108,
                                    '37b87cea1d161efda602bef860eabdba')
        self.assertNumMutationsHash(uncalled_output_path, 18,
                                    'cb601fb73ecf937db024351d69a441f1')
        self.assertEqualNumMutations(
            [portal_output_path, uncalled_output_path], filtered_output_path)
        self.assertMutFieldContains(
            output_file,
            "Tumor_Sample_Barcode",
            ["Sample1", "Sample2", "Sample3", "Sample4", "Sample5"],
            containsAll=True)

    def test_four_groups(self):
        """
        Four groups, two of which contains a single sample (singleton)
        """
        sample_group1 = [
            {
                "sample_id": "Sample1",
                "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                "sample_type": "research",
                "prefilter": True,
                "maf_file": {
                    "class": "File",
                    "path": self.sample1_maf
                },
                "bam_file": {
                    "class": "File",
                    "path": self.sample1_bam
                }
            },
            {
                "sample_id": "Sample2",
                "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                "sample_type": "research",
                "prefilter": True,
                "maf_file": {
                    "class": "File",
                    "path": self.sample2_maf
                },
                "bam_file": {
                    "class": "File",
                    "path": self.sample2_bam
                }
            },
        ]

        sample_group2 = [
            {
                "sample_id": "Sample3",
                "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                "sample_type": "clinical",
                "prefilter": False,
                "maf_file": {
                    "class": "File",
                    "path": self.sample3_maf
                },
                "bam_file": {
                    "class": "File",
                    "path": self.sample3_bam
                }
            },
        ]

        sample_group3 = [
            {
                "sample_id": "Sample5",
                "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                "sample_type": "research",
                "prefilter": True,
                "maf_file": {
                    "class": "File",
                    "path": self.sample5_maf
                },
                "bam_file": {
                    "class": "File",
                    "path": self.sample5_bam
                }
            },
        ]

        sample_group4 = [{
            "sample_id": "Sample4",
            "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
            "sample_type": "clinical",
            "prefilter": False,
            "maf_file": {
                "class": "File",
                "path": self.sample4_maf
            },
            "bam_file": {
                "class": "File",
                "path": self.sample4_bam
            }
        }]

        self.input = {
            "sample_groups":
            [sample_group1, sample_group2, sample_group3, sample_group4],
            "fillout_output_fname":
            'output.maf',
            "ref_fasta": {
                "class": "File",
                "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA']
            },
        }

        output_json, output_dir = self.run_cwl()
        output_file = os.path.join(output_dir, 'output.maf')
        filtered_output_path = os.path.join(output_dir, 'output.filtered.maf')
        portal_output_path = os.path.join(output_dir,
                                          'data_mutations_extended.txt')
        uncalled_output_path = os.path.join(output_dir,
                                            'data_mutations_uncalled.txt')

        expected_output = {
            'output_file':
            OFile(name='output.maf', dir=output_dir),
            'filtered_file':
            OFile(name='output.filtered.maf', dir=output_dir),
            'portal_file':
            OFile(name='data_mutations_extended.txt', dir=output_dir),
            'uncalled_file':
            OFile(name='data_mutations_uncalled.txt', dir=output_dir),
        }

        # file contents are inconsistent so strip some keys from the output dict
        strip_related_keys = [
            ('basename', 'output.maf', ['size', 'checksum']),
            ('basename', 'output.filtered.maf', ['size', 'checksum']),
            ('basename', 'data_mutations_extended.txt', ['size', 'checksum']),
            ('basename', 'data_mutations_uncalled.txt', ['size', 'checksum'])
        ]
        self.assertCWLDictEqual(output_json,
                                expected_output,
                                related_keys=strip_related_keys)

        self.assertNumMutationsHash(
            output_file, 157,
            'ed7dcce977a13f360463e45f5a07154b')  # , _print = True
        self.assertNumMutationsHash(filtered_output_path, 36,
                                    '5ea9c4b66287a100fc90e05619d52364')
        self.assertNumMutationsHash(portal_output_path, 36,
                                    'ed7be9c6b425b526e167bdcf8c954637')
        self.assertNumMutationsHash(uncalled_output_path, 0,
                                    'd751713988987e9331980363e24189ce')
        self.assertEqualNumMutations(
            [portal_output_path, uncalled_output_path], filtered_output_path)
        self.assertMutFieldContains(
            output_file,
            "Tumor_Sample_Barcode",
            ["Sample1", "Sample2", "Sample3", "Sample4", "Sample5"],
            containsAll=True)
Пример #22
0
class EnvContainerCWL(Operator):
    cwl_file = CWLFile('env_container.cwl')

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.input = {}
Пример #23
0
class TestMsiWorkflow(PlutoTestCase):
    cwl_file = CWLFile('msi_workflow.cwl')

    # def setUp(self):
    #     # initialize the tmpdir
    #     super().setUp()

    def test_msi_workflow_demo1(self):
        """
        Test case for running the MSI workflow on single sample
        """
        normal_bam = os.path.join(self.DATA_SETS['demo']['BAM_DIR'],
                                  "Sample2.bam")
        tumor_bam = os.path.join(self.DATA_SETS['demo']['BAM_DIR'],
                                 "Sample1.bam")

        self.input = {
            "threads":
            "16",
            "microsatellites_file": {
                "class": "File",
                "path": MICROSATELLITES_LIST
            },
            "pairs": [{
                "pair_id": "Sample1.Sample2",
                "tumor_id": "Sample1",
                "normal_id": "Sample2"
            }],
            "normal_bam_files": [{
                "path": normal_bam,
                "class": "File"
            }],
            "tumor_bam_files": [{
                "path": tumor_bam,
                "class": "File"
            }]
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'pairs': [{
                "pair_id":
                "Sample1.Sample2",
                "tumor_id":
                "Sample1",
                "normal_id":
                "Sample2",
                "msi_tsv":
                OFile(name='Sample1.Sample2.msi.tsv',
                      hash="92576a9be4d6a36c67b26d16fdc4134b0d1b9cd9",
                      size=54,
                      dir=output_dir)
            }]
        }
        self.assertCWLDictEqual(output_json, expected_output)

        output_file = os.path.join(output_dir, 'Sample1.Sample2.msi.tsv')
        lines = self.read_table(output_file)

        expected_lines = [['MSI_SCORE', 'MSI_STATUS', 'SAMPLE_ID'],
                          ['20.90', 'Instable', 'Sample1']]
        self.assertEqual(lines, expected_lines)

    # @unittest.skipIf(ENABLE_LARGE_TESTS!=True, "is a large test")
    def test_msi_workflow1(self):
        """
        Test case for running the MSI workflow on multiple samples
        """
        # data_clinical_file = self.write_table(self.tmpdir, filename = "data_clinical_sample.txt", lines = self.data_clinical_lines)
        normal_bam = os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'],
                                  "Sample23.rg.md.abra.printreads.bam")
        tumor_bam = os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'],
                                 "Sample24.rg.md.abra.printreads.bam")
        normal_bam2 = os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'],
                                   "Sample35.rg.md.abra.printreads.bam")
        tumor_bam2 = os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'],
                                  "Sample36.rg.md.abra.printreads.bam")

        self.input = {
            "microsatellites_file": {
                "class": "File",
                "path": MICROSATELLITES_LIST
            },
            "pairs": [{
                "pair_id": "Sample1-T.Sample1-N",
                "tumor_id": "Sample1-T",
                "normal_id": "Sample1-N"
            }, {
                "pair_id": "Sample2-T.Sample2-N",
                "tumor_id": "Sample2-T",
                "normal_id": "Sample2-N"
            }],
            "normal_bam_files": [{
                "path": normal_bam,
                "class": "File"
            }, {
                "path": normal_bam2,
                "class": "File"
            }],
            "tumor_bam_files": [{
                "path": tumor_bam,
                "class": "File"
            }, {
                "path": tumor_bam2,
                "class": "File"
            }]
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            "pairs": [{
                "pair_id":
                "Sample1-T.Sample1-N",
                "tumor_id":
                "Sample1-T",
                "normal_id":
                "Sample1-N",
                "msi_tsv":
                OFile(name='Sample1-T.Sample1-N.msi.tsv',
                      hash="bc132f6ab9b779d7cba51e7ddfa82af724134f03",
                      size=56,
                      dir=output_dir)
            }, {
                "pair_id":
                "Sample2-T.Sample2-N",
                "tumor_id":
                "Sample2-T",
                "normal_id":
                "Sample2-N",
                "msi_tsv":
                OFile(name='Sample2-T.Sample2-N.msi.tsv',
                      hash="11fcf9459010aa5ea06e62e72155807c9723d45a",
                      size=56,
                      dir=output_dir)
            }]
        }
        self.assertCWLDictEqual(output_json, expected_output)

        output_file = os.path.join(output_dir, 'Sample2-T.Sample2-N.msi.tsv')
        lines = self.read_table(output_file)
        expected_lines = [['MSI_SCORE', 'MSI_STATUS', 'SAMPLE_ID'],
                          ['40.14', 'Instable', 'Sample2-T']]
        self.assertEqual(lines, expected_lines)

        output_file = os.path.join(output_dir, 'Sample1-T.Sample1-N.msi.tsv')
        lines = self.read_table(output_file)
        expected_lines = [['MSI_SCORE', 'MSI_STATUS', 'SAMPLE_ID'],
                          ['21.97', 'Instable', 'Sample1-T']]
        self.assertEqual(lines, expected_lines)

    # @unittest.skipIf(ENABLE_LARGE_TESTS!=True, "is a large test")
    def test_msi_workflow2(self):
        """
        Test case for running the MSI workflow on single sample
        """
        normal_bam = os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'],
                                  "Sample23.rg.md.abra.printreads.bam")
        tumor_bam = os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'],
                                 "Sample24.rg.md.abra.printreads.bam")

        self.input = {
            "microsatellites_file": {
                "class": "File",
                "path": MICROSATELLITES_LIST
            },
            "pairs": [{
                "pair_id": "Sample1-T.Sample1-N",
                "tumor_id": "Sample1-T",
                "normal_id": "Sample1-N"
            }],
            "normal_bam_files": [{
                "path": normal_bam,
                "class": "File"
            }],
            "tumor_bam_files": [{
                "path": tumor_bam,
                "class": "File"
            }]
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            "pairs": [{
                "pair_id":
                "Sample1-T.Sample1-N",
                "tumor_id":
                "Sample1-T",
                "normal_id":
                "Sample1-N",
                "msi_tsv":
                OFile(name='Sample1-T.Sample1-N.msi.tsv',
                      hash="bc132f6ab9b779d7cba51e7ddfa82af724134f03",
                      size=56,
                      dir=output_dir)
            }]
        }
        self.assertCWLDictEqual(output_json, expected_output)

        output_file = os.path.join(output_dir, 'Sample1-T.Sample1-N.msi.tsv')
        lines = self.read_table(output_file)

        expected_lines = [['MSI_SCORE', 'MSI_STATUS', 'SAMPLE_ID'],
                          ['21.97', 'Instable', 'Sample1-T']]
        self.assertEqual(lines, expected_lines)
Пример #24
0
class TestConcatTablesDir(PlutoTestCase):
    cwl_file = CWLFile('concat-tables_dir.cwl')

    def test_concat_two_tables(self):
        """
        Test that two files are concatenated correctly
        """
        self.skipTest("Assertion fails for output")
        # make a dummy file with some lines
        input_lines1 = ["HEADER1", "foo1", "bar1"]
        input_file1 = os.path.join(self.tmpdir, "input1.txt")
        with open(input_file1, "w") as fout:
            for line in input_lines1:
                fout.write(line + '\n')

        input_lines2 = ["HEADER2", "foo2", "bar2"]
        input_file2 = os.path.join(self.tmpdir, "input2.txt")
        with open(input_file2, "w") as fout:
            for line in input_lines2:
                fout.write(line + '\n')

        self.input = {
            "input_files": [{
                  "class": "File",
                  "path": input_file1
                },
                {
                  "class": "File",
                  "path": input_file2
                }
                ],
            "output_filename": "output.txt"
            }

        output_json, output_dir = self.run_cwl()
        output_path = os.path.join(output_dir, 'output.txt')

        # check the contents of the concatenated file; should be the same as the input
        output_file = output_path
        with open(output_file) as fin:
            output_lines = [ line.strip() for line in fin ]

        expected_lines = ['HEADER1\tHEADER2', 'foo1\tNA', 'bar1\tNA', 'NA\tfoo2', 'NA\tbar2']
        self.assertEqual(output_lines, expected_lines)

        expected_output = {
            'output_file': {
                'location': 'file://' + output_path,
                'basename': 'output.txt',
                'class': 'File',
                'checksum': 'sha1$d92a4e707cb5dad2ec557edfe976680dfffc5f3f',
                'size': 53,
                'path': output_path
                }
            }
        self.assertCWLDictEqual(output_json, expected_output)

    def test_concat_one_tables(self):
        """
        Test that one file is returned correctly from the script
        """
        # make a dummy file with some lines
        input_lines1 = ["HEADER1", "foo1", "bar1"]
        input_file1 = os.path.join(self.tmpdir, "input1.txt")
        with open(input_file1, "w") as fout:
            for line in input_lines1:
                fout.write(line + '\n')

        self.input = {
            "input_files": [{
                  "class": "File",
                  "path": input_file1
                },
                ],
            "output_filename": "output.txt"
            }

        output_json, output_dir = self.run_cwl()
        output_path = os.path.join(output_dir, 'output.txt')

        # check the contents of the concatenated file; should be the same as the input
        output_file = output_path
        with open(output_file) as fin:
            output_lines = [ line.strip() for line in fin ]

        expected_lines = ['HEADER1', 'foo1', 'bar1']
        self.assertEqual(output_lines, expected_lines)

        expected_output = {
            'output_file': {
                'location': 'file://' + output_path,
                'basename': 'output.txt',
                'class': 'File',
                'checksum': 'sha1$2274c54c24a98e8235e34d78b700d04cb95f48dd',
                'size': 21,
                'path': output_path
                }
            }
        self.assertCWLDictEqual(output_json, expected_output)
Пример #25
0
class TestMafFilter(PlutoTestCase):
    cwl_file = CWLFile('maf_filter.cwl')

    def test_filter_a_maf_file(self):
        """
        Test that a filtered maf file comes out as expected
        """
        input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                                 "Sample1.Sample2.muts.maf")
        self.assertNumMutations(input_maf, 12514)

        self.input = {
            "maf_file": {
                "class": "File",
                "path": input_maf
            },
            "argos_version_string": ARGOS_VERSION_STRING,
            "is_impact": True,
            "analysis_mutations_filename": "Proj_08390_G.muts.maf",
            "cbio_mutation_data_filename": 'data_mutations_extended.txt'
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'analysis_mutations_file':
            OFile(name='Proj_08390_G.muts.maf',
                  size=28079,
                  hash='24421ab8d1a39a71f48eecbb0dd167d5d9f5c529',
                  dir=output_dir),
            'cbio_mutation_data_file':
            OFile(name='data_mutations_extended.txt',
                  size=4534,
                  hash='6131494536ce956d741c820378e7e2ce1c714403',
                  dir=output_dir),
            'rejected_file':
            OFile(name='rejected.muts.maf',
                  size=18627626,
                  hash='a06789623715703c5006db6876ecb58b8498f938',
                  dir=output_dir),
        }
        self.assertCWLDictEqual(output_json, expected_output)

        self.assertNumMutations(
            os.path.join(output_dir, "Proj_08390_G.muts.maf"), 22)

        # validate output mutation file contents
        self.assertCompareMutFiles(
            os.path.join(output_dir, "Proj_08390_G.muts.maf"),
            os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'],
                         "Sample1", "analyst_file.txt"),
            muts_only=True,
            compare_len=True)

        self.assertCompareMutFiles(
            os.path.join(output_dir, 'data_mutations_extended.txt'),
            os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'],
                         "Sample1", "portal_file.txt"),
            muts_only=True,
            compare_len=True)

    def test_maf_filter_argos_3_2_0(self):
        """
        Test the maf filter script results when used with argos_version_string 3.2.0
        """
        input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                                 "Sample1.Sample2.muts.maf")
        self.assertNumMutations(input_maf, 12514)

        self.input = {
            "maf_file": {
                "class": "File",
                "path": input_maf
            },
            "argos_version_string": "3.2.0",
            "is_impact": True,
            "analysis_mutations_filename": "Proj_08390_G.muts.maf",
            "cbio_mutation_data_filename": 'data_mutations_extended.txt'
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'analysis_mutations_file':
            OFile(name='Proj_08390_G.muts.maf',
                  size=28081,
                  hash='fd78842c9410e7e622dee270ec9c0e7628811f18',
                  dir=output_dir),
            'cbio_mutation_data_file':
            OFile(name='data_mutations_extended.txt',
                  size=4536,
                  hash='47e716eabbfda3408b2d9a08b9bb432b2cb8fce8',
                  dir=output_dir),
            'rejected_file':
            OFile(name='rejected.muts.maf',
                  size=18627626,
                  hash='a06789623715703c5006db6876ecb58b8498f938',
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)

        self.assertNumMutations(
            expected_output['analysis_mutations_file']['path'], 22)

        # validate output mutation file contents
        self.assertCompareMutFiles(
            expected_output['analysis_mutations_file']['path'],
            os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'],
                         "Sample1", "analyst_file.txt"),
            muts_only=True,
            compare_len=True)

        self.assertCompareMutFiles(
            expected_output['cbio_mutation_data_file']['path'],
            os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'],
                         "Sample1", "portal_file.txt"),
            muts_only=True,
            compare_len=True)

    def test_filter_maf_file_impact_false(self):
        """
        Test that a filtered maf file comes out as expected
        """
        self.maxDiff = None
        input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                                 "Sample1.Sample2.muts.maf")
        self.assertNumMutations(input_maf, 12514)

        self.input = {
            "maf_file": {
                "class": "File",
                "path": input_maf
            },
            "argos_version_string": ARGOS_VERSION_STRING,
            "is_impact": False,
            "analysis_mutations_filename": "Proj_08390_G.muts.maf",
            "cbio_mutation_data_filename": 'data_mutations_extended.txt'
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'analysis_mutations_file':
            OFile(name='Proj_08390_G.muts.maf',
                  size=24524,
                  hash='9fb9d43c71e546750ddec6aea2313dda28547b3a',
                  dir=output_dir),
            'cbio_mutation_data_file':
            OFile(name='data_mutations_extended.txt',
                  size=3931,
                  hash='15ca06249511c32c32e058c246a757ec8df11d83',
                  dir=output_dir),
            'rejected_file':
            OFile(name='rejected.muts.maf',
                  size=18790398,
                  hash='e7441703699e82cef500d9557bfcbd3464ce8eab',
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)
        self.assertNumMutations(
            expected_output['analysis_mutations_file']['path'], 18)

    def test_large_maf_file(self):
        """
        Test that a giant maf file with tons of variants gets filtered as expected
        """
        input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'],
                                 "Proj_08390_G", "Proj_08390_G.muts.maf")

        self.input = {
            "maf_file": {
                "class": "File",
                "path": input_maf
            },
            "argos_version_string": "2.x",
            "is_impact": True,
            "analysis_mutations_filename": "Proj_08390_G.muts.maf",
            "cbio_mutation_data_filename": 'data_mutations_extended.txt'
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'analysis_mutations_file':
            OFile(name='Proj_08390_G.muts.maf',
                  size=2386906,
                  hash='4ef341ab4280140f9be15e65a0258a4170ff651d',
                  dir=output_dir),
            'cbio_mutation_data_file':
            OFile(name='data_mutations_extended.txt',
                  size=278458,
                  hash='af36cf815820fdf41f1401578138b5cbd551a217',
                  dir=output_dir),
            'rejected_file':
            OFile(name='rejected.muts.maf',
                  size=1047796463,
                  hash='345953da2c7cb801fa08368260469cf7c153055f',
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)

        self.assertNumMutations(
            expected_output['analysis_mutations_file']['path'], 1662)
        self.assertNumMutations(
            expected_output['cbio_mutation_data_file']['path'], 1139)

        # validate output mutation file contents
        self.assertCompareMutFiles(
            expected_output['analysis_mutations_file']['path'],
            os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'],
                         "Proj_08390_G", "analyst_file.txt"),
            muts_only=True,
            compare_len=True)

        self.assertCompareMutFiles(
            expected_output['cbio_mutation_data_file']['path'],
            os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'],
                         "Proj_08390_G", "portal_file.txt"),
            muts_only=True,
            compare_len=True)
class TestSamplesFillout(PlutoTestCase):
    cwl_file = CWLFile('samples_fillout_workflow.cwl')

    def test_Nick_testcase(self):
        """
        Test case using Nick's custom made maf and bam files for fillout testing

        This test cases uses the germline filter to exclude some mutations in the output

        Takes about 10min to run
        """
        self.maxDiff = None
        self.runner_args[
            'use_cache'] = False  # do not use cache because it breaks for some reason
        self.runner_args['debug'] = True
        self.runner_args['js_console'] = True

        sample1_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'],
                                   'Sample1.FillOutUnitTest01.muts.maf')
        sample2_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'],
                                   'Sample2.FillOutUnitTest01.muts.maf')
        sample3_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'],
                                   'Sample3.FillOutUnitTest01.muts.maf')
        sample4_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'],
                                   'Sample4.FillOutUnitTest01.muts.maf')
        sample5_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'],
                                   'Sample5.FillOutUnitTest01.muts.maf')

        sample1_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'],
                                   'Sample1.UnitTest01.bam')
        sample2_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'],
                                   'Sample2.UnitTest01.bam')
        sample3_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'],
                                   'Sample3.UnitTest01.bam')
        sample4_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'],
                                   'Sample4.UnitTest01.bam')
        sample5_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'],
                                   'Sample5.UnitTest01.bam')

        self.input = {
            "samples": [
                {
                    "sample_id": "Sample1",
                    "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                    "sample_type": "research",
                    "maf_file": {
                        "class": "File",
                        "path": sample1_maf
                    },
                    "bam_file": {
                        "class": "File",
                        "path": sample1_bam
                    }
                },
                {
                    "sample_id": "Sample2",
                    "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                    "sample_type": "research",
                    "maf_file": {
                        "class": "File",
                        "path": sample2_maf
                    },
                    "bam_file": {
                        "class": "File",
                        "path": sample2_bam
                    }
                },
                {
                    "sample_id": "Sample3",
                    "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                    "sample_type": "clinical",
                    "maf_file": {
                        "class": "File",
                        "path": sample3_maf
                    },
                    "bam_file": {
                        "class": "File",
                        "path": sample3_bam
                    }
                },
                {
                    "sample_id": "Sample4",
                    "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                    "sample_type": "clinical",
                    "maf_file": {
                        "class": "File",
                        "path": sample4_maf
                    },
                    "bam_file": {
                        "class": "File",
                        "path": sample4_bam
                    }
                },
                {
                    "sample_id": "Sample5",
                    "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                    "sample_type": "research",
                    "maf_file": {
                        "class": "File",
                        "path": sample5_maf
                    },
                    "bam_file": {
                        "class": "File",
                        "path": sample5_bam
                    }
                },
            ],
            "ref_fasta": {
                "class": "File",
                "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA']
            }
        }
        output_json, output_dir = self.run_cwl()
        output_path = os.path.join(output_dir, 'output.maf')
        filtered_output_path = os.path.join(output_dir, 'output.filtered.maf')
        portal_output_path = os.path.join(output_dir,
                                          'data_mutations_extended.txt')
        uncalled_output_path = os.path.join(output_dir,
                                            'data_mutations_uncalled.txt')

        expected_output = {
            'output_file':
            OFile(name='output.maf', dir=output_dir),
            'filtered_file':
            OFile(name='output.filtered.maf', dir=output_dir),
            'portal_file':
            OFile(name='data_mutations_extended.txt', dir=output_dir),
            'uncalled_file':
            OFile(name='data_mutations_uncalled.txt', dir=output_dir),
        }

        # file contenst are inconsistent so strip some keys from the output dict
        strip_related_keys = [
            ('basename', 'output.maf', ['size', 'checksum']),
            ('basename', 'output.filtered.maf', ['size', 'checksum']),
            ('basename', 'data_mutations_extended.txt', ['size', 'checksum']),
            ('basename', 'data_mutations_uncalled.txt', ['size', 'checksum'])
        ]
        self.assertCWLDictEqual(output_json,
                                expected_output,
                                related_keys=strip_related_keys)
        # all_effects field is variable and changes bytes and checksum
        # need to check number of variant outputs instead
        self.assertNumMutationsHash(output_path, 475,
                                    'd041bc641d85761b60c6b7ef8606bab2')
        self.assertNumMutationsHash(filtered_output_path, 230,
                                    'c9cde01507d1b2470057c5d120eaab68')
        self.assertNumMutationsHash(portal_output_path, 163,
                                    '8dd6f3af030a2eca3b5fa0698896361a')
        self.assertNumMutationsHash(uncalled_output_path, 67,
                                    'a474b61268d2a4c25fd27cc2ccbbce96')
        self.assertEqualNumMutations(
            [portal_output_path, uncalled_output_path], filtered_output_path)

    def test_Nick_testcase_2(self):
        """
        Test case using Nick's custom made maf and bam files for fillout testing

        This test cases uses the germline filter to exclude some mutations in the output

        This test case uses only research samples

        Takes about 10min to run
        """
        self.maxDiff = None
        self.runner_args[
            'use_cache'] = False  # do not use cache because it breaks for some reason
        self.runner_args['debug'] = True
        self.runner_args['js_console'] = True

        sample1_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'],
                                   'Sample1.FillOutUnitTest01.muts.maf')
        sample2_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'],
                                   'Sample2.FillOutUnitTest01.muts.maf')
        sample3_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'],
                                   'Sample3.FillOutUnitTest01.muts.maf')
        sample4_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'],
                                   'Sample4.FillOutUnitTest01.muts.maf')
        sample5_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'],
                                   'Sample5.FillOutUnitTest01.muts.maf')

        sample1_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'],
                                   'Sample1.UnitTest01.bam')
        sample2_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'],
                                   'Sample2.UnitTest01.bam')
        sample3_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'],
                                   'Sample3.UnitTest01.bam')
        sample4_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'],
                                   'Sample4.UnitTest01.bam')
        sample5_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'],
                                   'Sample5.UnitTest01.bam')

        self.input = {
            "samples": [
                {
                    "sample_id": "Sample1",
                    "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                    "sample_type": "research",
                    "maf_file": {
                        "class": "File",
                        "path": sample1_maf
                    },
                    "bam_file": {
                        "class": "File",
                        "path": sample1_bam
                    }
                },
                {
                    "sample_id": "Sample2",
                    "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                    "sample_type": "research",
                    "maf_file": {
                        "class": "File",
                        "path": sample2_maf
                    },
                    "bam_file": {
                        "class": "File",
                        "path": sample2_bam
                    }
                },
                {
                    "sample_id": "Sample3",
                    "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                    "sample_type": "research",
                    "maf_file": {
                        "class": "File",
                        "path": sample3_maf
                    },
                    "bam_file": {
                        "class": "File",
                        "path": sample3_bam
                    }
                },
                {
                    "sample_id": "Sample4",
                    "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                    "sample_type": "research",
                    "maf_file": {
                        "class": "File",
                        "path": sample4_maf
                    },
                    "bam_file": {
                        "class": "File",
                        "path": sample4_bam
                    }
                },
                {
                    "sample_id": "Sample5",
                    "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
                    "sample_type": "research",
                    "maf_file": {
                        "class": "File",
                        "path": sample5_maf
                    },
                    "bam_file": {
                        "class": "File",
                        "path": sample5_bam
                    }
                },
            ],
            "ref_fasta": {
                "class": "File",
                "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA']
            }
        }
        output_json, output_dir = self.run_cwl()
        output_path = os.path.join(output_dir, 'output.maf')
        filtered_output_path = os.path.join(output_dir, 'output.filtered.maf')
        portal_output_path = os.path.join(output_dir,
                                          'data_mutations_extended.txt')
        uncalled_output_path = os.path.join(output_dir,
                                            'data_mutations_uncalled.txt')

        expected_output = {
            'output_file':
            OFile(name='output.maf', dir=output_dir),
            'filtered_file':
            OFile(name='output.filtered.maf', dir=output_dir),
            'portal_file':
            OFile(name='data_mutations_extended.txt', dir=output_dir),
            'uncalled_file':
            OFile(name='data_mutations_uncalled.txt', dir=output_dir),
        }

        # file contenst are inconsistent so strip some keys from the output dict
        strip_related_keys = [
            ('basename', 'output.maf', ['size', 'checksum']),
            ('basename', 'output.filtered.maf', ['size', 'checksum']),
            ('basename', 'data_mutations_extended.txt', ['size', 'checksum']),
            ('basename', 'data_mutations_uncalled.txt', ['size', 'checksum'])
        ]
        self.assertCWLDictEqual(output_json,
                                expected_output,
                                related_keys=strip_related_keys)
        # all_effects field is variable and changes bytes and checksum
        # need to check number of variant outputs instead
        self.assertNumMutationsHash(output_path, 475,
                                    'd041bc641d85761b60c6b7ef8606bab2')
        self.assertNumMutationsHash(filtered_output_path, 475,
                                    'd041bc641d85761b60c6b7ef8606bab2')
        self.assertNumMutationsHash(portal_output_path, 408,
                                    '63969ef90cb7a4524ab9063b4889bbde')
        self.assertNumMutationsHash(uncalled_output_path, 67,
                                    'a474b61268d2a4c25fd27cc2ccbbce96')
        self.assertEqualNumMutations(
            [portal_output_path, uncalled_output_path], filtered_output_path)
unit tests for the snp-pileup-wrapper.cwl file
"""
import os
import sys
import json
import unittest
from tempfile import TemporaryDirectory

THIS_DIR = os.path.dirname(os.path.realpath(__file__))
PARENT_DIR = os.path.dirname(THIS_DIR)
sys.path.insert(0, PARENT_DIR)
from pluto.tools import run_command, CWLFile
from pluto.settings import CWL_ARGS
sys.path.pop(0)

cwl_file = CWLFile('concat_with_comments.cwl')

class TestConcatWithCommentsCWL(unittest.TestCase):
    def test_concat_0(self):
        """
        Test concat when no comments are present in the original file
        """
        with TemporaryDirectory() as tmpdir:
            # make a dummy file with some lines
            input_lines = ["HEADER", "foo", "bar", "baz"]
            input_file = os.path.join(tmpdir, "input.txt")
            with open(input_file, "w") as fout:
                for line in input_lines:
                    fout.write(line + '\n')

            input_json = {
Пример #28
0
class TestAddHeader(PlutoTestCase):
    cwl_file = CWLFile('add_header.cwl')

    def test_add_header(self):
        """
        Test case for adding a header to a file
        """
        self.maxDiff = None

        input_file = os.path.join(self.tmpdir, "input.txt")
        with open(input_file, "w") as f:
            f.write("foo")
        header_str = "HEADER"

        self.input = {
            "input_file": {
                "class": "File",
                "path": input_file
            },
            "header_str": header_str,
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file': {
                'location': 'file://' + os.path.join(output_dir, 'output.txt'),
                'basename': 'output.txt',
                'class': 'File',
                'checksum': 'sha1$01838a0977d542fb12680e271393e1d4baaefa8f',
                'size': 10,
                'path': os.path.join(output_dir, 'output.txt')
            }
        }
        self.assertDictEqual(output_json, expected_output)

        output_file = expected_output['output_file']['path']
        with open(output_file) as f:
            lines = [l.strip() for l in f]
        expected_lines = ['HEADER', 'foo']
        self.assertEqual(lines, expected_lines)

    def test_add_header_empty_file(self):
        """
        Test case for adding a header to an empty file should return only the header
        """
        input_file = os.path.join(self.tmpdir, "input.txt")
        with open(input_file, "w") as f:
            pass

        header_str = "HEADER"

        self.input = {
            "input_file": {
                "class": "File",
                "path": input_file
            },
            "header_str": header_str,
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file': {
                'location': 'file://' + os.path.join(output_dir, 'output.txt'),
                'basename': 'output.txt',
                'class': 'File',
                'checksum': 'sha1$b4cf58442d6321c81db6bab562806e14bf54bf72',
                'size': 7,
                'path': os.path.join(output_dir, 'output.txt')
            }
        }
        self.assertDictEqual(output_json, expected_output)

        output_file = expected_output['output_file']['path']
        with open(output_file) as f:
            lines = [l.strip() for l in f]
        expected_lines = ['HEADER']
        self.assertEqual(lines, expected_lines)
class TestConcatWithCommentsCWL(PlutoTestCase):
    cwl_file = CWLFile('concat_with_comments.cwl')

    def test_concat_0(self):
        """
        Test concat when no comments are present in the original file
        """
        # make a dummy file with some lines
        input_lines = ["HEADER", "foo", "bar", "baz"]
        input_file = os.path.join(self.tmpdir, "input.txt")
        with open(input_file, "w") as fout:
            for line in input_lines:
                fout.write(line + '\n')

        self.input = {
            "input_files": [{
                "class": "File",
                "path": input_file
            }],
            "comment_label": "comment_label",
            "comment_value": "comment_value"
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file':
            OFile(name='output.txt',
                  size=49,
                  hash='7cef8f6de47289a55de99de77563beb3fa371deb',
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)

        # check the contents of the concatenated file; should be the same as the input
        self.assertFileLinesEqual(
            expected_output['output_file']['path'],
            ['#comment_label: comment_value', "HEADER", 'foo', 'bar', 'baz'])

    def test_concat1(self):
        """
        Test concat when original file has a comment line
        """
        # make a dummy file with some lines
        input_lines = ["# comment here", "HEADER", "foo", "bar", "baz"]
        input_file = os.path.join(self.tmpdir, "input.txt")
        with open(input_file, "w") as fout:
            for line in input_lines:
                fout.write(line + '\n')

        self.input = {
            "input_files": [{
                "class": "File",
                "path": input_file
            }],
            "comment_label": "comment_label",
            "comment_value": "comment_value"
        }
        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file':
            OFile(name='output.txt',
                  size=64,
                  hash='14ee1247f314dba1e3c28aa8aec9ff7b137a1f41',
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)

        # check the contents of the concatenated file; should be the same as the input
        self.assertFileLinesEqual(expected_output['output_file']['path'], [
            '# comment here', '#comment_label: comment_value', "HEADER", 'foo',
            'bar', 'baz'
        ])

    def test_concat2(self):
        """
        Test concat when multiple files have comments
        """
        # make a dummy file with some lines
        input_lines1 = ["# comment 1 here", "HEADER", "foo1", "bar1"]
        input_file1 = os.path.join(self.tmpdir, "input1.txt")
        with open(input_file1, "w") as fout:
            for line in input_lines1:
                fout.write(line + '\n')

        input_lines2 = ["# comment 2 here", "HEADER", "foo2", "bar2"]
        input_file2 = os.path.join(self.tmpdir, "input2.txt")
        with open(input_file2, "w") as fout:
            for line in input_lines2:
                fout.write(line + '\n')

        self.input = {
            "input_files": [
                {
                    "class": "File",
                    "path": input_file1
                },
                {
                    "class": "File",
                    "path": input_file2
                },
            ],
            "comment_label":
            "comment_label",
            "comment_value":
            "comment_value"
        }
        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file':
            OFile(name='output.txt',
                  size=91,
                  hash='5dbce16f9bfef135d6b8288b16350351a33998f3',
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)

        self.assertFileLinesEqual(expected_output['output_file']['path'], [
            '# comment 1 here', '# comment 2 here',
            '#comment_label: comment_value', "HEADER", 'foo1', 'bar1', 'foo2',
            'bar2'
        ])
Пример #30
0
unit tests for the concat.cwl
"""
import os
import sys
import json
import unittest
from tempfile import TemporaryDirectory

THIS_DIR = os.path.dirname(os.path.realpath(__file__))
PARENT_DIR = os.path.dirname(THIS_DIR)
sys.path.insert(0, PARENT_DIR)
from pluto.tools import run_command, CWLFile
from pluto.settings import CWL_ARGS
sys.path.pop(0)

cwl_file = CWLFile('concat.cwl')


class TestConcat(unittest.TestCase):
    def test_concat_simple_file(self):
        """
        Test that a single file with no header comes out looking as expected
        """
        with TemporaryDirectory() as tmpdir:

            # make a dummy file with some lines
            input_lines = ["foo", "bar", "baz"]
            input_file = os.path.join(tmpdir, "input.txt")
            with open(input_file, "w") as fout:
                for line in input_lines:
                    fout.write(line + '\n')