示例#1
0
 def setUp(self):
     """Setup"""
     self.configuration_file = 'tests/data/application.cfg'
     self.meme_fasta = 'tests/data/expected_out/macsPeak.fasta'
     self.pipeline = Pipeline(self.configuration_file)
示例#2
0
class TestPipeline(unittest.TestCase):
    """Test pipeline """

    def setUp(self):
        """Setup"""
        self.configuration_file = 'tests/data/application.cfg'
        self.meme_fasta = 'tests/data/expected_out/macsPeak.fasta'
        self.pipeline = Pipeline(self.configuration_file)


    def test_meme(self):
        """Test meme runner"""
        if os.path.exists('tests/data/generated_out/meme_analysis'):
            shutil.rmtree('tests/data/generated_out/meme_analysis')
        meme_args = self.pipeline.get_meme_default_params
        output = self.pipeline.run_meme(fasta_in=self.meme_fasta,
                                        out_dir='tests/data/generated_out/meme_analysis',
                                        strargs=meme_args.replace(' -p {}'.format(get_cpu_count()), ''))
        #TODO Check if meme.txt is same and created
        #TODO This check is too stringent, specially if logos are being produced.
        #MEME installation leads to hard coded paths
        print output
        assert output['exitcode'] == 0
        meme_record = read_memefile('tests/data/generated_out/meme_analysis/meme.txt')
        assert meme_record['total_motifs'] == 5

        motif_record1 = meme_record['motif_records'][0]
        motif_record2 = meme_record['motif_records'][1]
        motif_record3 = meme_record['motif_records'][2]

        assert motif_record1.consensus == 'CAGAACGCTGCTGCCAACCCGACCT'
        assert motif_record2.consensus == 'AGCAGA'
        assert motif_record3.consensus == 'CAGTTT'


    def test_fimo(self):
        """Test fimo runner"""
        fimo_output = self.pipeline.run_fimo(motif_file='tests/data/expected_out/meme_analysis/meme.txt',
                                             motif_num=1,
                                             sequence_file=self.meme_fasta,
                                             out_dir='tests/data/generated_out/fimo_analysis',
                                             strargs=None)
        assert fimo_output['exitcode'] == 0

    def test_fimotosites(self):
        """Test fimo_to_sites"""
        fimo_file = 'tests/data/expected_out/fimo_analysis/fimo.txt'
        record_dict = SeqIO.to_dict(SeqIO.parse(open(self.meme_fasta), 'fasta'))
        fimo_df = fimo_to_sites(os.path.abspath(fimo_file))
        for _, row in fimo_df.iterrows():
            record_id = row['sequence name']
            fimo_sequence = row['matched sequence']
            start = row['start']-1 # 0-based start
            end = row['stop'] # 1-based end
            strand = row['strand']
            fasta_sequence = record_dict[record_id].seq
            if strand == '+':
                assert str(fasta_sequence)[start:end] == fimo_sequence
            else:
                assert str(fasta_sequence[start:end].reverse_complement()) == fimo_sequence

    def test_fimostd(self):
        """Test fimo to sites with
        standard sequence names"""
        fimo_file = 'tests/data/fimo.std.txt'
        fimo_df = fimo_to_sites(os.path.abspath(fimo_file))
        for _, row in fimo_df.iterrows():
            record_id = row['sequence name']
            fimo_sequence = row['matched sequence']
            start = row['start']
            end = row['stop'] # 1-based end
            strand = row['strand']
            assert start-1 == row['motifStartZeroBased']
            assert end == row['motifEndOneBased']



    def test_shuffler(self):
        """Smoke test for shuffler"""
        fasta_in = 'tests/data/expected_out/macsPeak.fasta'
        fasta_out = 'tests/data/generated_out/macsPeak.shuffled.fastsa'
        output = self.pipeline.run_fasta_shuffler(fasta_in=fasta_in, fasta_out=fasta_out)
        print output
        with open(fasta_out) as f:
            assert 'chr1' in f.readline()