def run(self): '''It runs the analysis.''' self._log({'analysis_started':True}) settings = self._project_settings self._create_output_dirs()['result'] project_name = settings['General_settings']['project_name'] sample_size = settings['Sam_stats']['sampling_size'] project_path = settings['General_settings']['project_path'] inputs = self._get_input_fpaths() bam_path = inputs['bam'] bam_fpath = bam_path.last_version bam_fhand = open(bam_fpath) out_dir = os.path.abspath(self._get_output_dirs()['result']) summary_fname = os.path.join(out_dir, BACKBONE_BASENAMES['statistics_file']) summary_fhand = open(summary_fname, 'w') # non mapped_reads_fhand unmapped_fpath = os.path.join(project_path, BACKBONE_DIRECTORIES['mappings'][0], BACKBONE_BASENAMES['unmapped_list']) if os.path.exists(unmapped_fpath): unmapped_fhand = GzipFile(unmapped_fpath) else: unmapped_fhand = None #The general statistics bam_general_stats(bam_fhand, summary_fhand, unmapped_fhand) for kind in ('coverage', 'mapq'): basename = os.path.join(out_dir, "%s" % (project_name)) bam_fhand.seek(0) bam_distribs(bam_fhand, kind, basename=basename, sample_size=sample_size, summary_fhand=summary_fhand, plot_file_format=PLOT_FILE_FORMAT) bam_fhand.close() if unmapped_fhand is not None: unmapped_fhand.close()
def test_bam_distribs(): 'test bam coverage distrib' sam = NamedTemporaryFile(suffix='.sam') sam.write(SAM) sam.flush() bam_fhand = NamedTemporaryFile() sam2bam(sam.name, bam_fhand.name) summary_fhand = StringIO() distribs = bam_distribs(bam_fhand, 'coverage', summary_fhand=summary_fhand) expected = [2547] assert distribs[('platform', '454')]['distrib'] == expected assert 'average: 0.13' in summary_fhand.getvalue() distribs = bam_distribs(bam_fhand, 'mapq') assert distribs[('platform', '454')]['distrib'][0] == 1 distribs = bam_distribs(bam_fhand, 'mapq', sample_size=100) assert distribs[('platform', '454')]['distrib'][0] == 1 distribs = bam_distribs(bam_fhand, 'edit_distance') assert distribs[('platform', '454')]['distrib'][0] == 1