def test_gmap_without_mapping_output(): '''It test that the gmap doesn't map anything''' mappers_dir = join(TEST_DATA_DIR, 'mappers') cmap_dir = join(TEST_DATA_DIR, 'mappers', 'gmap') work_dir = NamedTemporaryDir() temp_genome = join(work_dir.name, 'genome.fa') os.symlink(join(mappers_dir, 'genome.fa'), temp_genome) reads_fhand = NamedTemporaryFile() reads_fhand.write('>seq\natgtgatagat\n') reads_fhand.flush() out_bam_fhand = NamedTemporaryFile() out_bam_fpath = out_bam_fhand.name out_bam_fhand.close() parameters = {'threads':None, 'kmer':13} map_reads_with_gmap(temp_genome, reads_fhand.name, out_bam_fpath, parameters) reads_fhand.close() temp_sam_fhand = NamedTemporaryFile(suffix='.sam') bam2sam(out_bam_fpath, temp_sam_fhand.name, True) result = open(temp_sam_fhand.name).read() assert 'seq\t4\t*\t0\t0' in result
def test_bwa_mapping(): '''It test that the gmap doesn't map anything''' reference = join(TEST_DATA_DIR, 'blast/arabidopsis_genes') work_dir = NamedTemporaryDir() reference_fpath = join(work_dir.name, 'arabidopsis_genes') os.symlink(reference, reference_fpath) reads_fhand = NamedTemporaryFile(suffix='.sfastq') reads_fhand.write(SOLEXA) reads_fhand.flush() out_bam_fhand = NamedTemporaryFile() out_bam_fpath = out_bam_fhand.name out_bam_fhand.close() parameters = {'colorspace': False, 'reads_length':'short', 'threads':None, 'java_conf':None} map_reads_with_bwa(reference_fpath, reads_fhand.name, out_bam_fpath, parameters) test_sam_fhand = NamedTemporaryFile(suffix='sam') bam2sam(out_bam_fpath, test_sam_fhand.name) result = open(test_sam_fhand.name).read() assert 'seq17' in result unmapped_fhand = StringIO.StringIO() parameters = {'colorspace': False, 'reads_length':'short', 'threads':None, 'java_conf':None, 'unmapped_fhand':unmapped_fhand} map_reads_with_bwa(reference_fpath, reads_fhand.name, out_bam_fpath, parameters) assert 'seq17' in unmapped_fhand.getvalue() test_sam_fhand = NamedTemporaryFile(suffix='sam') bam2sam(out_bam_fpath, test_sam_fhand.name) result = open(test_sam_fhand.name).read() assert 'seq17' not in result
def test_gmap_mapper(): 'It test the gmap mapper' mappers_dir = join(TEST_DATA_DIR, 'mappers') gmap_dir = join(TEST_DATA_DIR, 'mappers', 'gmap') work_dir = NamedTemporaryDir() temp_genome = join(work_dir.name, 'genome.fa') os.symlink(join(mappers_dir, 'genome.fa'), temp_genome) reads_fpath = join(gmap_dir, 'lb_lib1.pl_sanger.sm_sam1.fa') out_bam_fhand = NamedTemporaryFile(suffix='.bam') parameters = {'threads':None, 'kmer':13} map_reads_with_gmap(temp_genome, reads_fpath, out_bam_fhand.name, parameters) sam_fhand = NamedTemporaryFile(suffix='.sam') bam2sam(out_bam_fhand.name, sam_fhand.name, header=True) result = open(sam_fhand.name).read() assert exists(out_bam_fhand.name) assert '36M2I204M' in result assert 'SN:SL2.30ch00' in result assert 'seq9_rev_MOD' in result work_dir.close() out_bam_fhand.close() sam_fhand.close() work_dir = NamedTemporaryDir() temp_genome = join(work_dir.name, 'genome.fa') os.symlink(join(mappers_dir, 'genome.fa'), temp_genome) reads_fpath = join(gmap_dir, 'lb_lib1.pl_sanger.sm_sam1.sfastq') out_bam_fhand = NamedTemporaryFile(suffix='.bam') unmapped_fhand = StringIO.StringIO() parameters = {'threads':None, 'kmer':13, 'unmapped_fhand':unmapped_fhand} map_reads_with_gmap(temp_genome, reads_fpath, out_bam_fhand.name, parameters) sam_fhand = NamedTemporaryFile(suffix='.sam') bam2sam(out_bam_fhand.name, sam_fhand.name, header=True) result = open(sam_fhand.name).read() assert exists(out_bam_fhand.name) assert '36M2I204M' in result assert 'SN:SL2.30ch00' in result assert 'seq9_rev_MOD' in result assert '?????????????????' in result work_dir.close() out_bam_fhand.close() sam_fhand.close()
def testsam2bam(): 'It test sam2bam function' bampath = os.path.join(TEST_DATA_DIR, 'seq.bam') sampath = NamedTemporaryFile(suffix='.sam').name bam2sam(bampath, sampath, header=True) assert 'SN:SGN-U572743' in open(sampath).readline() newbam = NamedTemporaryFile(suffix='.bam') sam2bam(sampath, newbam.name) newsam = NamedTemporaryFile(suffix='.sam') bam2sam(newbam.name, newsam.name, header=True) newsam_content = open(newsam.name).read() oldsam_content = open(sampath).read() assert newsam_content == oldsam_content
def test_sample_bam(): 'it tests sample bam function' sam = NamedTemporaryFile(suffix='.sam') sam.write(SAM) sam.flush() bam_fhand = NamedTemporaryFile() sam2bam(sam.name, bam_fhand.name) bam_fhand.flush() out_bam = NamedTemporaryFile(suffix='.bam') sample_bam(bam_fhand, out_bam, 2) out_sam = NamedTemporaryFile(suffix='.sam') bam2sam(out_bam.name, out_sam.name, header=True) sam = open(out_sam.name).read().splitlines() assert len(sam) == 6
def test_sam_creator(): 'It test sam creator' alignement = '''ref\taggttttataaaacAAAAaattaagtctacagag-caacta sample\taggttttataaaacAAA-aattaagtctacagagtcaacta read\taggttttataaaacAA-Aaattaagtctacagagtcaacta read\taggttttataaaacA-AAaattaagtctacagagtcaacta read\taggttttataaaac-AAAaattaagtctacagagtcaacta''' fhand = StringIO(alignement) out_bam_fhand = NamedTemporaryFile(suffix = '.bam') out_ref_fhand = NamedTemporaryFile(suffix = '.fasta') sam_creator(fhand, out_bam_fhand.name, out_ref_fhand.name) ref = open(out_ref_fhand.name).read() assert '>ref\naggttttataaaacAAAAaattaagtctacagagcaacta' in ref out_sam_fhand = NamedTemporaryFile(suffix = '.sam') bam2sam(out_bam_fhand.name, out_sam_fhand.name) bam = open(out_sam_fhand.name).read() assert '16M1D17M1I6M' in bam assert '15M1D18M1I6M' in bam assert '14M1D19M1I6M' in bam alignement = '''ref\taggttttataaaac----aattaagtctacagagcaacta sample\taggttttataaaacAAATaattaagtctacagagcaacta read\taggttttataaaac****aaAtaa read1\t ggttttataaaac****aaAtaaTt read2\t ttataaaacAAATaattaagtctaca read3\t CaaaT****aattaagtctacagagcaac read4\t aaT****aattaagtctacagagcaact read5\t T****aattaagtctacagagcaacta''' fhand = StringIO(alignement) out_bam_fhand = NamedTemporaryFile(suffix = '.bam') out_ref_fhand = NamedTemporaryFile(suffix = '.fasta') sam_creator(fhand, out_bam_fhand.name, out_ref_fhand.name) out_sam_fhand = NamedTemporaryFile(suffix = '.sam') bam2sam(out_bam_fhand.name, out_sam_fhand.name) bam = open(out_sam_fhand.name).read() assert 'AGGTTTTATAAAACAAATA' in bam assert '20M' in bam
def add_header_and_tags_bams(work_dir, output_dir): 'it adds readgroupto bams and return added reaadgroups' #add to each of the bams the readgroup_tag for bam in os.listdir(work_dir): if bam.endswith('.bam'): #get the readgroup from the name: prefix = ".".join(bam.split('.')[:-1]) sam = open(os.path.join(output_dir, prefix + '.sam'), 'w') temp_sam = NamedTemporaryFile(prefix='%s.' % prefix , suffix='.sam') bam2sam(os.path.join(work_dir, bam), temp_sam.name) add_header_and_tags_to_sam(temp_sam, sam) # close and remove temporal stuff sam.close() temp_sam.close()
def test_remove_unmapped_reads(): 'Tests remove_unmapped_reads' sam = NamedTemporaryFile(suffix='.sam') sam.write(SAM) sam.flush() bam_fhand = NamedTemporaryFile() sam2bam(sam.name, bam_fhand.name) out_bam_fhand = NamedTemporaryFile() out_removed_reads_fhand = NamedTemporaryFile() remove_unmapped_reads(bam_fhand, out_bam_fhand, out_removed_reads_fhand) reads = open(out_removed_reads_fhand.name).read() assert '@SGN-E221406' in reads assert 'FFMMMJJ@@755225889>0.' in reads out_sam = NamedTemporaryFile(suffix='.sam') bam2sam(out_bam_fhand.name, out_sam.name, header=True) sam_out = open(out_sam.name).read() assert 'SGN-U572743' in sam_out assert 'SGN-E221403' in sam_out
def testbam2sam(): 'It test bam2sam function' bampath = os.path.join(TEST_DATA_DIR, 'seq.bam') sampath = NamedTemporaryFile(suffix='.sam').name bam2sam(bampath, sampath, header=True) assert 'SN:SGN-U572743' in open(sampath).readline()
def run(self): '''It runs the analysis.''' self._log({'analysis_started':True}) settings = self._project_settings project_path = settings['General_settings']['project_path'] tmp_dir = settings['General_settings']['tmpdir'] inputs = self._get_input_fpaths() bam_paths = inputs['bams'] reference_path = inputs['reference'] output_dir = self._create_output_dirs()['result'] merged_bam_path = VersionedPath(os.path.join(output_dir, BACKBONE_BASENAMES['merged_bam'])) merged_bam_fpath = merged_bam_path.next_version #Do we have to add the default qualities to the sam file? #do we have characters different from ACTGN? add_qualities = settings['Sam_processing']['add_default_qualities'] #memory for the java programs java_mem = settings['Other_settings']['java_memory'] picard_path = settings['Other_settings']['picard_path'] if add_qualities: default_sanger_quality = settings['Other_settings']['default_sanger_quality'] default_sanger_quality = int(default_sanger_quality) else: default_sanger_quality = None temp_dir = NamedTemporaryDir() for bam_path in bam_paths: bam_basename = bam_path.basename temp_sam = NamedTemporaryFile(prefix='%s.' % bam_basename, suffix='.sam') sam_fpath = os.path.join(temp_dir.name, bam_basename + '.sam') bam2sam(bam_path.last_version, temp_sam.name) sam_fhand = open(sam_fpath, 'w') # First we need to create the sam with added tags and headers add_header_and_tags_to_sam(temp_sam, sam_fhand) temp_sam.close() sam_fhand.close() #the standardization temp_sam2 = NamedTemporaryFile(prefix='%s.' % bam_basename, suffix='.sam', delete=False) standardize_sam(open(sam_fhand.name), temp_sam2, default_sanger_quality, add_def_qual=add_qualities, only_std_char=True) temp_sam2.flush() shutil.move(temp_sam2.name, sam_fhand.name) temp_sam2.close() get_sam_fpaths = lambda dir_: [os.path.join(dir_, fname) for fname in os.listdir(dir_) if fname.endswith('.sam')] # Once the headers are ready we are going to merge sams = get_sam_fpaths(temp_dir.name) sams = [open(sam) for sam in sams] temp_sam = NamedTemporaryFile(suffix='.sam') reference_fhand = open(reference_path.last_version) try: merge_sam(sams, temp_sam, reference_fhand) except Exception: if os.path.exists(merged_bam_fpath): os.remove(merged_bam_fpath) raise reference_fhand.close() # close files for sam in sams: sam.close() # Convert sam into a bam,(Temporary) temp_bam = NamedTemporaryFile(suffix='.bam') sam2bam(temp_sam.name, temp_bam.name) # finally we need to order the bam #print 'unsorted.bam', temp_bam.name #raw_input() sort_bam_sam(temp_bam.name, merged_bam_fpath, java_conf={'java_memory':java_mem, 'picard_path':picard_path}, tmp_dir=tmp_dir ) temp_bam.close() temp_sam.close() create_bam_index(merged_bam_fpath) self._log({'analysis_finished':True})