def bamsam_converter(input_fhand, output_fhand, java_conf=None): 'Converts between sam and bam' picard_path = guess_jar_dir('SortSam.jar', java_conf) picard_jar = os.path.join(picard_path, 'SamFormatConverter.jar') cmd = java_cmd(java_conf) cmd.extend(['-jar', picard_jar, 'INPUT=' + input_fhand, 'OUTPUT=' + output_fhand]) call(cmd, raise_on_error=True, add_ext_dir=False)
def realign_bam(bam_fpath, reference_fpath, out_bam_fpath, java_conf=None, threads=False, tmp_dir=None): 'It realigns the bam using GATK Local realignment around indels' #reference sam index create_sam_reference_index(reference_fpath) #reference picard dict create_picard_dict(reference_fpath, java_conf=java_conf) #bam index create_bam_index(bam_fpath) #the intervals to realign gatk_path = guess_jar_dir('GenomeAnalysisTK.jar', java_conf) gatk_jar = os.path.join(gatk_path, 'GenomeAnalysisTK.jar') intervals_fhand = tempfile.NamedTemporaryFile(suffix='.intervals') cmd = java_cmd(java_conf=java_conf) cmd.extend(['-jar', gatk_jar, '-T', 'RealignerTargetCreator', '-I', bam_fpath, '-R', reference_fpath, '-o', intervals_fhand.name]) #according to GATK this is experimental, so it might be a good idea to #do it in just one thread. In version 1.0.4498. This options is removed # so parallel = False parallel = False if parallel and threads and threads > 1: cmd.extend(['-nt', str(get_num_threads(threads))]) call(cmd, raise_on_error=True, add_ext_dir=False) #the realignment itself unsorted_bam = NamedTemporaryFile(suffix='.bam') cmd = java_cmd(java_conf=java_conf) cmd.extend(['-Djava.io.tmpdir=%s' % tempfile.gettempdir(), '-jar', gatk_jar, '-I', bam_fpath, '-R', reference_fpath, '-T', 'IndelRealigner', '-targetIntervals', intervals_fhand.name, '-o', unsorted_bam.name]) if parallel and threads and threads > 1: cmd.extend(['-nt', str(get_num_threads(threads))]) call(cmd, raise_on_error=True, add_ext_dir=False) # now we have to realign the bam sort_bam_sam(unsorted_bam.name, out_bam_fpath, java_conf=java_conf, tmp_dir=tmp_dir)
def sort_bam_sam(in_fpath, out_fpath, sort_method='coordinate', java_conf=None, tmp_dir=None, strict_validation=True): 'It sorts a bam file using picard' picard_path = guess_jar_dir('SortSam.jar', java_conf) picard_sort_jar = os.path.join(picard_path, 'SortSam.jar') java_cmd_ = java_cmd(java_conf) java_cmd_.extend(['-jar', picard_sort_jar, 'INPUT=' + in_fpath, 'OUTPUT=' + out_fpath, 'SORT_ORDER=' + sort_method]) if not strict_validation: java_cmd_.append('VALIDATION_STRINGENCY=LENIENT') if tmp_dir: java_cmd_.append('TMP_DIR=%s' % tmp_dir) stdout, stderr, retcode = call(java_cmd_, raise_on_error=False, add_ext_dir=False) err_msg = 'No space left on device' if retcode and (err_msg in stdout or err_msg in stderr): raise RuntimeError('Picard sort consumed all space in device.' + stderr) elif retcode: msg = 'Error running picard: %s\n stderr: %s\n stdout: %s' % \ (' '.join(java_cmd_), stderr, stdout) raise RuntimeError(msg)