def bwa_aln(bam_path,reference_fasta_path,logger): uuid_dir=os.path.dirname(bam_path) fastq_dir=os.path.join(uuid_dir,'fastq') realn_dir=os.path.join(uuid_dir,'realn') os.makedirs(realn_dir,exist_ok=True) fastqlist=fastq_util.buildfastqlist(fastq_dir) pefastqdict=fastq_util.buildpefastqdict(fastqlist) logger.info('pefastqdict=%s' % pefastqdict) sefastqlist=fastq_util.buildsefastqlist(fastqlist) logger.info('sefastqlist=%s' % sefastqlist) o1fastqlist=fastq_util.buildo1fastqlist(fastqlist) logger.info('o1fastqlist=%s' % o1fastqlist) o2fastqlist=fastq_util.buildo2fastqlist(fastqlist) logger.info('o2fastqlist=%s' % o2fastqlist) bam_path_list=list() for read1 in sorted(pefastqdict.keys()): bam_path=bwa_aln_paired(bam_path,fastq_dir,read1,pefastqdict[read1],realn_dir,reference_fasta_path,logger) bam_path_list.append(bam_path) for seread in sefastqlist: bam_path=bwa_aln_single(bam_path,fastq_dir,seread,realn_dir,'s',reference_fasta_path,logger) bam_path_list.append(bam_path) for o1read in o1fastqlist: bam_path=bwa_aln_single(bam_path,fastq_dir,o1read,realn_dir,'o1',reference_fasta_path,logger) bam_path_list.append(bam_path) for o2read in o2fastqlist: bam_path=bwa_aln_single(bam_path,fastq_dir,o2read,realn_dir,'o2',reference_fasta_path,logger) bam_path_list.append(bam_path) return bam_path_list
def fastqc_validate(uuid,bam_path,thread_count,engine,logger): uuid_dir=os.path.dirname(bam_path) fastq_dir=os.path.join(uuid_dir,'fastq') fastqlist=fastq_util.buildfastqlist(fastq_dir) logging.info('fastqlist=%s' % fastqlist) pefastqdict=fastq_util.buildpefastqdict(fastqlist) logger.info('pefastqdict=%s' % pefastqdict) sefastqlist=fastq_util.buildsefastqlist(fastqlist) logger.info('sefastqlist=%s' % sefastqlist) o1fastqlist=fastq_util.buildo1fastqlist(fastqlist) logger.info('o1fastqlist=%s' % o1fastqlist) o2fastqlist=fastq_util.buildo2fastqlist(fastqlist) logger.info('o2fastqlist=%s' % o2fastqlist) for read1 in sorted(pefastqdict.keys()): #read1 fq_path=os.path.join(fastq_dir,read1) do_fastqc(uuid,fq_path,thread_count,engine,logger) fastqc_to_db(uuid,fq_path,engine,logger) do_guess_encoding(uuid,fq_path,engine,logger) guess_enc_db(uuid,fq_path,engine,logger) fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,read1,engine,logger) # removable #read2 fq_path=os.path.join(fastq_dir,pefastqdict[read1]) do_fastqc(uuid,fq_path,thread_count,engine,logger) fastqc_to_db(uuid,fq_path,engine,logger) do_guess_encoding(uuid,fq_path,engine,logger) guess_enc_db(uuid,fq_path,engine,logger) for seread in sefastqlist: fq_path=os.path.join(fastq_dir,seread) do_fastqc(uuid,fq_path,thread_count,engine,logger) fastqc_to_db(uuid,fq_path,engine,logger) do_guess_encoding(uuid,fq_path,engine,logger) guess_enc_db(uuid,fq_path,engine,logger) fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,seread,engine,logger) # removable for o1read in o1fastqlist: fq_path=os.path.join(fastq_dir,o1read) do_fastqc(uuid,fq_path,thread_count,engine,logger) fastqc_to_db(uuid,fq_path,engine,logger) do_guess_encoding(uuid,fq_path,engine,logger) guess_enc_db(uuid,fq_path,engine,logger) fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,seread,engine,logger) # removable for o2read in o2fastqlist: fq_path=os.path.join(fastq_dir,o2read) do_fastqc(uuid,fq_path,thread_count,engine,logger) fastqc_to_db(uuid,fq_path,engine,logger) do_guess_encoding(uuid,fq_path,engine,logger) guess_enc_db(uuid,fq_path,engine,logger) fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,seread,engine,logger) # removable return fastq_length
def bwa_mem(uuid, bam_path, reference_fasta_path, readgroup_path_dict, thread_count, engine, logger): uuid_dir = os.path.dirname(bam_path) logger.info("uuid_dir=%s" % uuid_dir) fastq_dir = os.path.join(uuid_dir, "fastq") logger.info("fastq_dir=%s" % fastq_dir) realn_dir = os.path.join(uuid_dir, "realn") logger.info("realn_dir=%s" % realn_dir) os.makedirs(realn_dir, exist_ok=True) fastqlist = fastq_util.buildfastqlist(fastq_dir) logging.info("fastqlist=%s" % fastqlist) pefastqdict = fastq_util.buildpefastqdict(fastqlist) logger.info("pefastqdict=%s" % pefastqdict) sefastqlist = fastq_util.buildsefastqlist(fastqlist) logger.info("sefastqlist=%s" % sefastqlist) o1fastqlist = fastq_util.buildo1fastqlist(fastqlist) logger.info("o1fastqlist=%s" % o1fastqlist) o2fastqlist = fastq_util.buildo2fastqlist(fastqlist) logger.info("o2fastqlist=%s" % o2fastqlist) bam_path_list = list() for read1 in sorted(pefastqdict.keys()): rg_str = bam_util.get_readgroup_str(read1, readgroup_path_dict, logger) bam_path = bwa_mem_paired( uuid, bam_path, fastq_dir, read1, pefastqdict[read1], realn_dir, reference_fasta_path, rg_str, thread_count, engine, logger, ) bam_path_list.append(bam_path) for seread in sefastqlist: rg_str = bam_util.get_readgroup_str(seread, readgroup_path_dict, logger) bam_path = bwa_mem_single( uuid, bam_path, fastq_dir, seread, realn_dir, "s", reference_fasta_path, rg_str, thread_count, engine, logger, ) bam_path_list.append(bam_path) for o1read in o1fastqlist: rg_str = bam_util.get_readgroup_str(o1read, readgroup_path_dict, logger) bam_path = bwa_mem_single( uuid, bam_path, fastq_dir, o1read, realn_dir, "o1", reference_fasta_path, rg_str, thread_count, engine, logger, ) bam_path_list.append(bam_path) for o2read in o2fastqlist: rg_str = bam_util.get_readgroup_str(o2read, readgroup_path_dict, logger) bam_path = bwa_mem_single( uuid, bam_path, fastq_dir, o2read, realn_dir, "o2", reference_fasta_path, rg_str, thread_count, engine, logger, ) bam_path_list.append(bam_path) return bam_path_list
def bwa(uuid,bam_path,reference_fasta_path,readgroup_path_dict,thread_count,engine,logger): uuid_dir=os.path.dirname(bam_path) logger.info('uuid_dir=%s' % uuid_dir) fastq_dir=os.path.join(uuid_dir,'fastq') logger.info('fastq_dir=%s' % fastq_dir) realn_dir=os.path.join(uuid_dir,'realn') logger.info('realn_dir=%s' % realn_dir) os.makedirs(realn_dir,exist_ok=True) fastqlist=fastq_util.buildfastqlist(fastq_dir) logging.info('fastqlist=%s' % fastqlist) pefastqdict=fastq_util.buildpefastqdict(fastqlist) logger.info('pefastqdict=%s' % pefastqdict) sefastqlist=fastq_util.buildsefastqlist(fastqlist) logger.info('sefastqlist=%s' % sefastqlist) o1fastqlist=fastq_util.buildo1fastqlist(fastqlist) logger.info('o1fastqlist=%s' % o1fastqlist) o2fastqlist=fastq_util.buildo2fastqlist(fastqlist) logger.info('o2fastqlist=%s' % o2fastqlist) bam_path_list=list() for read1 in sorted(pefastqdict.keys()): rg_str=bam_util.get_readgroup_str(read1,readgroup_path_dict,logger) fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,read1,engine,logger) if fastq_length<MEM_ALN_CUTOFF: bam_path=bwa_aln_paired(uuid,bam_path,fastq_dir,read1,pefastqdict[read1],realn_dir,reference_fasta_path, rg_str,thread_count,engine,logger) bam_path_list.append(bam_path) else: bam_path=bwa_mem_paired(uuid,bam_path,fastq_dir,read1,pefastqdict[read1],realn_dir,reference_fasta_path, rg_str,thread_count,engine,logger) bam_path_list.append(bam_path) for seread in sefastqlist: rg_str=bam_util.get_readgroup_str(seread,readgroup_path_dict,logger) fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,seread,engine,logger) if fastq_length<MEM_ALN_CUTOFF: bam_path=bwa_aln_single(uuid,bam_path,fastq_dir,seread,realn_dir,'s',reference_fasta_path, rg_str,thread_count,engine,logger) bam_path_list.append(bam_path) else: bam_path=bwa_mem_single(uuid,bam_path,fastq_dir,seread,realn_dir,'s',reference_fasta_path, rg_str,thread_count,engine,logger) bam_path_list.append(bam_path) for o1read in o1fastqlist: rg_str=bam_util.get_readgroup_str(o1read,readgroup_path_dict,logger) fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,o1read,engine,logger) if fastq_length<MEM_ALN_CUTOFF: bam_path=bwa_aln_single(uuid,bam_path,fastq_dir,o1read,realn_dir,'o1',reference_fasta_path, rg_str,thread_count,engine,logger) bam_path_list.append(bam_path) else: bam_path=bwa_mem_single(uuid,bam_path,fastq_dir,o1read,realn_dir,'o1',reference_fasta_path, rg_str,thread_count,engine,logger) bam_path_list.append(bam_path) for o2read in o2fastqlist: rg_str=bam_util.get_readgroup_str(o2read,readgroup_path_dict,logger) fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,o2read,engine,logger) if fastq_length<MEM_ALN_CUTOFF: bam_path=bwa_aln_single(uuid,bam_path,fastq_dir,o2read,realn_dir,'o2',reference_fasta_path, rg_str,thread_count,engine,logger) bam_path_list.append(bam_path) else: bam_path=bwa_mem_single(uuid,bam_path,fastq_dir,o2read,realn_dir,'o2',reference_fasta_path, rg_str,thread_count,engine,logger) bam_path_list.append(bam_path) return bam_path_list