def test_gmap_without_mapping_output():
        '''It test that the gmap doesn't map anything'''

        mappers_dir = join(TEST_DATA_DIR, 'mappers')
        cmap_dir = join(TEST_DATA_DIR, 'mappers', 'gmap')
        work_dir = NamedTemporaryDir()
        temp_genome = join(work_dir.name, 'genome.fa')
        os.symlink(join(mappers_dir, 'genome.fa'), temp_genome)

        reads_fhand = NamedTemporaryFile()
        reads_fhand.write('>seq\natgtgatagat\n')
        reads_fhand.flush()


        out_bam_fhand = NamedTemporaryFile()
        out_bam_fpath = out_bam_fhand.name
        out_bam_fhand.close()
        parameters = {'threads':None, 'kmer':13}
        map_reads_with_gmap(temp_genome, reads_fhand.name, out_bam_fpath,
                            parameters)
        reads_fhand.close()
        temp_sam_fhand = NamedTemporaryFile(suffix='.sam')
        bam2sam(out_bam_fpath, temp_sam_fhand.name, True)
        result = open(temp_sam_fhand.name).read()
        assert 'seq\t4\t*\t0\t0' in result
    def test_bwa_mapping():
        '''It test that the gmap doesn't map anything'''
        reference = join(TEST_DATA_DIR, 'blast/arabidopsis_genes')
        work_dir = NamedTemporaryDir()
        reference_fpath = join(work_dir.name, 'arabidopsis_genes')
        os.symlink(reference, reference_fpath)

        reads_fhand = NamedTemporaryFile(suffix='.sfastq')
        reads_fhand.write(SOLEXA)
        reads_fhand.flush()

        out_bam_fhand = NamedTemporaryFile()
        out_bam_fpath = out_bam_fhand.name
        out_bam_fhand.close()

        parameters = {'colorspace': False, 'reads_length':'short',
                      'threads':None, 'java_conf':None}
        map_reads_with_bwa(reference_fpath, reads_fhand.name, out_bam_fpath,
                           parameters)
        test_sam_fhand = NamedTemporaryFile(suffix='sam')
        bam2sam(out_bam_fpath, test_sam_fhand.name)
        result = open(test_sam_fhand.name).read()
        assert 'seq17' in result

        unmapped_fhand = StringIO.StringIO()
        parameters = {'colorspace': False, 'reads_length':'short',
                      'threads':None, 'java_conf':None,
                      'unmapped_fhand':unmapped_fhand}
        map_reads_with_bwa(reference_fpath, reads_fhand.name, out_bam_fpath,
                           parameters)
        assert 'seq17' in unmapped_fhand.getvalue()
        test_sam_fhand = NamedTemporaryFile(suffix='sam')
        bam2sam(out_bam_fpath, test_sam_fhand.name)
        result = open(test_sam_fhand.name).read()
        assert 'seq17' not in result
    def test_gmap_mapper():
        'It test the gmap mapper'
        mappers_dir = join(TEST_DATA_DIR, 'mappers')
        gmap_dir = join(TEST_DATA_DIR, 'mappers', 'gmap')
        work_dir = NamedTemporaryDir()
        temp_genome = join(work_dir.name, 'genome.fa')
        os.symlink(join(mappers_dir, 'genome.fa'), temp_genome)

        reads_fpath = join(gmap_dir, 'lb_lib1.pl_sanger.sm_sam1.fa')

        out_bam_fhand = NamedTemporaryFile(suffix='.bam')
        parameters = {'threads':None, 'kmer':13}
        map_reads_with_gmap(temp_genome, reads_fpath, out_bam_fhand.name,
                            parameters)

        sam_fhand = NamedTemporaryFile(suffix='.sam')
        bam2sam(out_bam_fhand.name, sam_fhand.name, header=True)
        result = open(sam_fhand.name).read()
        assert exists(out_bam_fhand.name)
        assert '36M2I204M' in result
        assert 'SN:SL2.30ch00' in result
        assert 'seq9_rev_MOD' in result

        work_dir.close()
        out_bam_fhand.close()
        sam_fhand.close()

        work_dir = NamedTemporaryDir()
        temp_genome = join(work_dir.name, 'genome.fa')
        os.symlink(join(mappers_dir, 'genome.fa'), temp_genome)

        reads_fpath = join(gmap_dir, 'lb_lib1.pl_sanger.sm_sam1.sfastq')
        out_bam_fhand = NamedTemporaryFile(suffix='.bam')
        unmapped_fhand = StringIO.StringIO()
        parameters = {'threads':None, 'kmer':13,
                      'unmapped_fhand':unmapped_fhand}
        map_reads_with_gmap(temp_genome, reads_fpath, out_bam_fhand.name,
                            parameters)

        sam_fhand = NamedTemporaryFile(suffix='.sam')
        bam2sam(out_bam_fhand.name, sam_fhand.name, header=True)
        result = open(sam_fhand.name).read()
        assert exists(out_bam_fhand.name)
        assert '36M2I204M' in result
        assert 'SN:SL2.30ch00' in result
        assert 'seq9_rev_MOD' in result
        assert '?????????????????' in result
        work_dir.close()
        out_bam_fhand.close()
        sam_fhand.close()
Exemplo n.º 4
0
    def testsam2bam():
        'It test sam2bam function'
        bampath = os.path.join(TEST_DATA_DIR, 'seq.bam')
        sampath = NamedTemporaryFile(suffix='.sam').name
        bam2sam(bampath, sampath, header=True)
        assert 'SN:SGN-U572743' in open(sampath).readline()

        newbam = NamedTemporaryFile(suffix='.bam')
        sam2bam(sampath, newbam.name)
        newsam = NamedTemporaryFile(suffix='.sam')
        bam2sam(newbam.name, newsam.name, header=True)
        newsam_content = open(newsam.name).read()
        oldsam_content = open(sampath).read()

        assert newsam_content == oldsam_content
Exemplo n.º 5
0
    def test_sample_bam():
        'it tests sample bam function'
        sam = NamedTemporaryFile(suffix='.sam')
        sam.write(SAM)
        sam.flush()
        bam_fhand = NamedTemporaryFile()
        sam2bam(sam.name, bam_fhand.name)
        bam_fhand.flush()
        out_bam = NamedTemporaryFile(suffix='.bam')
        sample_bam(bam_fhand, out_bam, 2)
        out_sam = NamedTemporaryFile(suffix='.sam')
        bam2sam(out_bam.name, out_sam.name, header=True)

        sam = open(out_sam.name).read().splitlines()
        assert len(sam) == 6
Exemplo n.º 6
0
    def test_sam_creator():
        'It test sam creator'
        alignement = '''ref\taggttttataaaacAAAAaattaagtctacagag-caacta
sample\taggttttataaaacAAA-aattaagtctacagagtcaacta
read\taggttttataaaacAA-Aaattaagtctacagagtcaacta
read\taggttttataaaacA-AAaattaagtctacagagtcaacta
read\taggttttataaaac-AAAaattaagtctacagagtcaacta'''

        fhand = StringIO(alignement)
        out_bam_fhand = NamedTemporaryFile(suffix = '.bam')
        out_ref_fhand  = NamedTemporaryFile(suffix = '.fasta')


        sam_creator(fhand, out_bam_fhand.name, out_ref_fhand.name)

        ref = open(out_ref_fhand.name).read()

        assert  '>ref\naggttttataaaacAAAAaattaagtctacagagcaacta' in  ref
        out_sam_fhand = NamedTemporaryFile(suffix = '.sam')
        bam2sam(out_bam_fhand.name, out_sam_fhand.name)
        bam = open(out_sam_fhand.name).read()

        assert  '16M1D17M1I6M' in bam
        assert  '15M1D18M1I6M' in bam
        assert  '14M1D19M1I6M' in bam

        alignement = '''ref\taggttttataaaac----aattaagtctacagagcaacta
sample\taggttttataaaacAAATaattaagtctacagagcaacta
read\taggttttataaaac****aaAtaa
read1\t ggttttataaaac****aaAtaaTt
read2\t     ttataaaacAAATaattaagtctaca
read3\t        CaaaT****aattaagtctacagagcaac
read4\t          aaT****aattaagtctacagagcaact
read5\t            T****aattaagtctacagagcaacta'''

        fhand = StringIO(alignement)
        out_bam_fhand = NamedTemporaryFile(suffix = '.bam')
        out_ref_fhand  = NamedTemporaryFile(suffix = '.fasta')


        sam_creator(fhand, out_bam_fhand.name, out_ref_fhand.name)
        out_sam_fhand = NamedTemporaryFile(suffix = '.sam')
        bam2sam(out_bam_fhand.name, out_sam_fhand.name)
        bam = open(out_sam_fhand.name).read()
        assert 'AGGTTTTATAAAACAAATA' in bam
        assert '20M' in bam
Exemplo n.º 7
0
def add_header_and_tags_bams(work_dir, output_dir):
    'it adds readgroupto bams and return added reaadgroups'
    #add to each of the bams the readgroup_tag
    for bam in os.listdir(work_dir):
        if bam.endswith('.bam'):
            #get the readgroup from the name:
            prefix = ".".join(bam.split('.')[:-1])
            sam = open(os.path.join(output_dir, prefix + '.sam'), 'w')

            temp_sam = NamedTemporaryFile(prefix='%s.' % prefix , suffix='.sam')

            bam2sam(os.path.join(work_dir, bam), temp_sam.name)

            add_header_and_tags_to_sam(temp_sam, sam)

            # close and remove temporal stuff
            sam.close()
            temp_sam.close()
Exemplo n.º 8
0
    def test_remove_unmapped_reads():
        'Tests remove_unmapped_reads'
        sam = NamedTemporaryFile(suffix='.sam')
        sam.write(SAM)
        sam.flush()
        bam_fhand = NamedTemporaryFile()
        sam2bam(sam.name, bam_fhand.name)

        out_bam_fhand = NamedTemporaryFile()
        out_removed_reads_fhand = NamedTemporaryFile()
        remove_unmapped_reads(bam_fhand, out_bam_fhand, out_removed_reads_fhand)
        reads = open(out_removed_reads_fhand.name).read()
        assert '@SGN-E221406' in reads
        assert 'FFMMMJJ@@755225889>0.' in reads

        out_sam = NamedTemporaryFile(suffix='.sam')
        bam2sam(out_bam_fhand.name, out_sam.name, header=True)
        sam_out = open(out_sam.name).read()
        assert 'SGN-U572743' in sam_out
        assert 'SGN-E221403' in sam_out
Exemplo n.º 9
0
 def testbam2sam():
     'It test bam2sam function'
     bampath = os.path.join(TEST_DATA_DIR, 'seq.bam')
     sampath = NamedTemporaryFile(suffix='.sam').name
     bam2sam(bampath, sampath, header=True)
     assert 'SN:SGN-U572743' in open(sampath).readline()
Exemplo n.º 10
0
    def run(self):
        '''It runs the analysis.'''
        self._log({'analysis_started':True})
        settings = self._project_settings
        project_path = settings['General_settings']['project_path']
        tmp_dir      = settings['General_settings']['tmpdir']

        inputs = self._get_input_fpaths()
        bam_paths = inputs['bams']
        reference_path = inputs['reference']

        output_dir = self._create_output_dirs()['result']
        merged_bam_path = VersionedPath(os.path.join(output_dir,
                                        BACKBONE_BASENAMES['merged_bam']))

        merged_bam_fpath = merged_bam_path.next_version

        #Do we have to add the default qualities to the sam file?
        #do we have characters different from ACTGN?
        add_qualities = settings['Sam_processing']['add_default_qualities']
        #memory for the java programs
        java_mem = settings['Other_settings']['java_memory']
        picard_path = settings['Other_settings']['picard_path']

        if add_qualities:
            default_sanger_quality = settings['Other_settings']['default_sanger_quality']
            default_sanger_quality = int(default_sanger_quality)
        else:
            default_sanger_quality = None

        temp_dir = NamedTemporaryDir()
        for bam_path in bam_paths:
            bam_basename = bam_path.basename
            temp_sam = NamedTemporaryFile(prefix='%s.' % bam_basename,
                                          suffix='.sam')
            sam_fpath = os.path.join(temp_dir.name, bam_basename + '.sam')
            bam2sam(bam_path.last_version, temp_sam.name)
            sam_fhand = open(sam_fpath, 'w')
            # First we need to create the sam with added tags and headers
            add_header_and_tags_to_sam(temp_sam, sam_fhand)
            temp_sam.close()
            sam_fhand.close()
            #the standardization
            temp_sam2 = NamedTemporaryFile(prefix='%s.' % bam_basename,
                                           suffix='.sam', delete=False)
            standardize_sam(open(sam_fhand.name), temp_sam2,
                            default_sanger_quality,
                            add_def_qual=add_qualities,
                            only_std_char=True)
            temp_sam2.flush()
            shutil.move(temp_sam2.name, sam_fhand.name)

            temp_sam2.close()

        get_sam_fpaths = lambda dir_: [os.path.join(dir_, fname) for fname in os.listdir(dir_) if fname.endswith('.sam')]

        # Once the headers are ready we are going to merge
        sams = get_sam_fpaths(temp_dir.name)
        sams = [open(sam) for sam in sams]

        temp_sam = NamedTemporaryFile(suffix='.sam')
        reference_fhand = open(reference_path.last_version)
        try:
            merge_sam(sams, temp_sam, reference_fhand)
        except Exception:
            if os.path.exists(merged_bam_fpath):
                os.remove(merged_bam_fpath)
            raise
        reference_fhand.close()

        # close files
        for sam in sams:
            sam.close()
        # Convert sam into a bam,(Temporary)
        temp_bam = NamedTemporaryFile(suffix='.bam')
        sam2bam(temp_sam.name, temp_bam.name)

        # finally we need to order the bam
        #print 'unsorted.bam', temp_bam.name
        #raw_input()
        sort_bam_sam(temp_bam.name, merged_bam_fpath,
                     java_conf={'java_memory':java_mem,
                                'picard_path':picard_path}, tmp_dir=tmp_dir )
        temp_bam.close()
        temp_sam.close()
        create_bam_index(merged_bam_fpath)

        self._log({'analysis_finished':True})