def end(self): if self.mod == 'sjm': step = 'merge_report' merge_report( self.fq_dict, self.__STEPS__, self.last_step, self.sjm_cmd, self.sjm_order, self.logdir, self.__CONDA__, self.outdir, self.rm_files, ) if self.mod == 'shell': os.system('mkdir -p ./shell/') for sample in self.shell_dict: with open(f'./shell/{sample}.sh', 'w') as f: f.write(self.shell_dict[sample])
def main(): # init assay = __ASSAY__ steps = __STEPS__ conda = __CONDA__ app = 'celescope' # parser parser = multi_opts(assay) parser.add_argument('--starMem', help='starMem', default=30) parser.add_argument('--genomeDir', help='genome index dir', required=True) parser.add_argument( '--gtf_type', help='Specify attribute type in GTF annotation, default=exon', default='exon') parser.add_argument('--thread', help='thread', default=6) parser.add_argument('--probe_file', help="probe fasta file") args = parser.parse_args() # read args outdir = args.outdir chemistry = args.chemistry pattern = args.pattern whitelist = args.whitelist linker = args.linker lowQual = args.lowQual lowNum = args.lowNum mod = args.mod rm_files = args.rm_files # parse mapfile fq_dict, match_dict = parse_map_col4(args.mapfile, None) # link link_data(outdir, fq_dict) # custom args thread = args.thread genomeDir = args.genomeDir starMem = args.starMem gtf_type = args.gtf_type probe_file = args.probe_file # mk log dir logdir = outdir + '/log' os.system('mkdir -p %s' % (logdir)) # script init sjm_cmd = 'log_dir %s\n' % (logdir) sjm_order = '' shell_dict = defaultdict(str) # outdir dict for sample in fq_dict: outdir_dic = {} index = 0 for step in steps: step_outdir = f"{outdir}/{sample}/{index:02d}.{step}" outdir_dic.update({step: step_outdir}) index += 1 # sample step = "sample" cmd = ( f'{app} {assay} {step} ' f'--chemistry {chemistry} ' f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda) shell_dict[sample] += cmd + '\n' last_step = step # barcode arr = fq_dict[sample] step = "barcode" cmd = ( f'{app} {assay} {step} ' f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} ' f'--pattern {pattern} --whitelist {whitelist} --linker {linker} ' f'--sample {sample} --lowQual {lowQual} --thread {thread} ' f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ' f'--probe_file {probe_file} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # adapt step = "cutadapt" fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} --sample {sample} --outdir ' f'{outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # STAR step = 'STAR' fq = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} --sample {sample} ' f'--genomeDir {genomeDir} --thread {thread} ' f'--outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=starMem, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # featureCounts step = 'featureCounts' input = f'{outdir_dic["STAR"]}/{sample}_Aligned.sortedByCoord.out.bam' cmd = ( f'{app} {assay} {step} ' f'--input {input} --gtf_type {gtf_type} ' f'--sample {sample} --thread {thread} --outdir {outdir_dic[step]} ' f'--genomeDir {genomeDir} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=8, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # count step = 'count_capture_rna' bam = f'{outdir_dic["featureCounts"]}/{sample}_name_sorted.bam' cmd = (f'{app} {assay} {step} ' f'--bam {bam} --sample {sample} --cells auto ' f'--outdir {outdir_dic[step]} --assay {assay} ' f'--match_dir {match_dict[sample]} ' f'--genomeDir {genomeDir}') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=8, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # analysis step = 'analysis' matrix_file = f'{outdir_dic["count_capture_rna"]}/{sample}_matrix.tsv.gz' cmd = (f'{app} {assay} {step} ' f'--matrix_file {matrix_file} --sample {sample} ' f'--outdir {outdir_dic[step]} ' f'--assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=15, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # merged report if mod == 'sjm': step = 'merge_report' merge_report( fq_dict, steps, last_step, sjm_cmd, sjm_order, logdir, conda, outdir, rm_files, ) if mod == 'shell': os.system('mkdir -p ./shell/') for sample in shell_dict: with open(f'./shell/{sample}.sh', 'w') as f: f.write(shell_dict[sample])
def main(): # init assay = __ASSAY__ steps = __STEPS__ conda = __CONDA__ app = 'celescope' # parser parser = multi_opts(assay) parser.add_argument('--starMem', help='starMem', default=10) parser.add_argument('--thread', help='thread', default=6) parser.add_argument('--genomeDir', help='fusion genomeDir', required=True) parser.add_argument( "--fusion_pos", help="first base position of the second gene(0-start),tsv file", required=True) parser.add_argument("--UMI_min", default=1) args = parser.parse_args() # read args outdir = args.outdir chemistry = args.chemistry pattern = args.pattern whitelist = args.whitelist linker = args.linker lowQual = args.lowQual lowNum = args.lowNum mod = args.mod rm_files = args.rm_files # parse mapfile fq_dict, match_dict = parse_map_col4(args.mapfile, None) # link link_data(outdir, fq_dict) # custom args thread = args.thread genomeDir = args.genomeDir starMem = args.starMem fusion_pos = args.fusion_pos UMI_min = args.UMI_min # mk log dir logdir = outdir + '/log' os.system('mkdir -p %s' % (logdir)) # script init sjm_cmd = 'log_dir %s\n' % (logdir) sjm_order = '' shell_dict = defaultdict(str) # outdir dict for sample in fq_dict: outdir_dic = {} index = 0 for step in steps: step_outdir = f"{outdir}/{sample}/{index:02d}.{step}" outdir_dic.update({step: step_outdir}) index += 1 # sample step = "sample" cmd = ( f'{app} {assay} {step} ' f'--chemistry {chemistry} ' f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda) shell_dict[sample] += cmd + '\n' last_step = step # barcode arr = fq_dict[sample] step = "barcode" cmd = ( f'{app} {assay} {step} ' f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} ' f'--pattern {pattern} --whitelist {whitelist} --linker {linker} ' f'--sample {sample} --lowQual {lowQual} --thread {thread} ' f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # adapt step = "cutadapt" fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} --sample {sample} --outdir ' f'{outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # STAR_fusion step = 'STAR_fusion' input_read = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz' cmd = ( f'{app} {assay} {step} ' f'--outdir {outdir_dic[step]} --assay {assay} --sample {sample} ' f'--thread {thread} ' f'--input_read {input_read} ' f'--genomeDir {genomeDir} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=starMem, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # count_fusion step = 'count_fusion' bam = f'{outdir_dic["STAR_fusion"]}/{sample}_Aligned.sortedByCoord.out.bam' cmd = ( f'{app} {assay} {step} ' f'--outdir {outdir_dic[step]} --assay {assay} --sample {sample} ' f'--bam {bam} ' f'--UMI_min {UMI_min} ' f'--match_dir {match_dict[sample]} ' f'--fusion_pos {fusion_pos} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=20, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' last_step = step # merged report if mod == 'sjm': step = 'merge_report' merge_report( fq_dict, steps, last_step, sjm_cmd, sjm_order, logdir, conda, outdir, rm_files, ) if mod == 'shell': os.system('mkdir -p ./shell/') for sample in shell_dict: with open(f'./shell/{sample}.sh', 'w') as f: f.write(shell_dict[sample])
def main(): # init assay = __ASSAY__ steps = __STEPS__ conda = __CONDA__ app = 'celescope' # parser parser = multi_opts(assay) parser.add_argument('--thread', help='thread', default=6) parser.add_argument( "--UMI_min", help="cells have SMK_UMI>=UMI_min are considered as valid cell", default="auto") parser.add_argument("--dim", help="SMK tag dimension", default=1) parser.add_argument("--SNR_min", help="minimum signal to noise ratio", default="auto") parser.add_argument("--SMK_pattern", help="SMK read2 pattern") parser.add_argument("--SMK_linker", help="SMK read2 linker fasta path") parser.add_argument("--SMK_barcode", help="SMK read2 barcode fasta path ") args = parser.parse_args() # read args outdir = args.outdir chemistry = args.chemistry pattern = args.pattern whitelist = args.whitelist linker = args.linker lowQual = args.lowQual lowNum = args.lowNum mod = args.mod rm_files = args.rm_files # parse mapfile fq_dict, match_dict = parse_map_col4(args.mapfile, "auto") # link link_data(outdir, fq_dict) # custom args thread = args.thread UMI_min = args.UMI_min dim = args.dim SNR_min = args.SNR_min SMK_pattern = args.SMK_pattern SMK_linker = args.SMK_linker SMK_barcode = args.SMK_barcode # mk log dir logdir = outdir + '/log' os.system('mkdir -p %s' % (logdir)) # script init sjm_cmd = 'log_dir %s\n' % (logdir) sjm_order = '' shell = '' # run for sample in fq_dict: outdir_dic = {} index = 0 for step in steps: step_outdir = f"{outdir}/{sample}/{index:02d}.{step}" outdir_dic.update({step: step_outdir}) index += 1 # sample step = "sample" cmd = ( f'{app} {assay} {step} ' f'--chemistry {chemistry} ' f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda) shell += cmd + '\n' last_step = step # barcode arr = fq_dict[sample] step = "barcode" cmd = ( f'{app} {assay} {step} ' f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} ' f'--pattern {pattern} --whitelist {whitelist} --linker {linker} ' f'--sample {sample} --lowQual {lowQual} --thread {thread} ' f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # adapt step = "cutadapt" fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} --sample {sample} --outdir ' f'{outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # mapping_smk step = 'mapping_smk' SMK_read2 = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--sample {sample} ' f'--outdir {outdir_dic[step]} ' f'--assay {assay} ' f'--SMK_read2 {SMK_read2} ' f'--SMK_pattern {SMK_pattern} ' f'--SMK_barcode {SMK_barcode} ' f'--SMK_linker {SMK_linker} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # count_smk step = 'count_smk' read_file = f'{outdir_dic["mapping_smk"]}/{sample}_read_count.tsv' cmd = (f'{app} {assay} {step} ' f'--sample {sample} ' f'--outdir {outdir_dic[step]} ' f'--assay {assay} ' f'--match_dir {match_dict[sample]} ' f'--read_file {read_file} ' f'--dim {dim} ' f'--UMI_min {UMI_min} ' f'--SNR_min {SNR_min} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # analysis_smk step = 'analysis_smk' tsne_tag_file = f'{outdir_dic["count_smk"]}/{sample}_tsne_tag.tsv' cmd = (f'{app} {assay} {step} ' f'--sample {sample} ' f'--outdir {outdir_dic[step]} ' f'--assay {assay} ' f'--match_dir {match_dict[sample]} ' f'--tsne_tag_file {tsne_tag_file} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # merged report if mod == 'sjm': step = 'merge_report' merge_report( fq_dict, steps, last_step, sjm_cmd, sjm_order, logdir, conda, outdir, rm_files, ) if mod == 'shell': os.system('mkdir -p ./shell/') with open(f'./shell/{sample}.sh', 'w') as f: f.write(shell)
def main(): parser = argparse.ArgumentParser('CeleScope RNA multi-sample') parser.add_argument('--mod', help='mod, sjm or shell', choices=['sjm', 'shell'], default='sjm') parser.add_argument( '--mapfile', help= 'mapfile, 4 columns, "LibName\\tDataDir\\tSampleName\\tCellNum", CellNum is optional', required=True) parser.add_argument( '--chemistry', choices=['scopeV2.0.0', 'scopeV2.0.1', 'scopeV2.1.0', 'scopeV2.1.1'], help='chemistry version') parser.add_argument('--whitelist', help='cellbarcode list') parser.add_argument('--linker', help='linker') parser.add_argument('--pattern', help='read1 pattern') parser.add_argument('--outdir', help='output dir', default="./") parser.add_argument( '--adapt', action='append', help='adapter sequence', default=['polyT=A{15}', 'p5=AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC']) parser.add_argument('--minimum-length', dest='minimum_length', help='minimum_length', default=20) parser.add_argument('--nextseq-trim', dest='nextseq_trim', help='nextseq_trim', default=20) parser.add_argument('--overlap', help='minimum overlap length, default=5', default=5) parser.add_argument('--lowQual', type=int, help='max phred of base as lowQual', default=0) parser.add_argument('--lowNum', type=int, help='max number with lowQual allowed', default=2) parser.add_argument('--starMem', help='starMem', default=30) parser.add_argument('--thread', help='thread', default=6) parser.add_argument('--rm_files', action='store_true', help='remove redundant fq.gz and bam after running') parser.add_argument("--mut_file", help="mutation file", required=True) parser.add_argument("--shift_base", default=2) parser.add_argument( '--indel_genomeDir', help='insertion or deletion STAR indexed genome directory', required=True) args = vars(parser.parse_args()) fq_dict, match_dict = parse_map_col4(args['mapfile'], None) # link raw_dir = args['outdir'] + '/data_give/rawdata' os.system('mkdir -p %s' % (raw_dir)) with open(raw_dir + '/ln.sh', 'w') as fh: fh.write('cd %s\n' % (raw_dir)) for s, arr in fq_dict.items(): fh.write('ln -sf %s %s\n' % (arr[0], s + '_1.fq.gz')) fh.write('ln -sf %s %s\n' % (arr[1], s + '_2.fq.gz')) #os.system('sh %s'%(raw_dir+'/ln.sh')) logdir = args['outdir'] + '/log' os.system('mkdir -p %s' % (logdir)) sjm_cmd = 'log_dir %s\n' % (logdir) sjm_order = '' shell = '' app = 'celescope' chemistry = args['chemistry'] pattern = args['pattern'] whitelist = args['whitelist'] linker = args['linker'] lowQual = args['lowQual'] lowNum = args['lowNum'] starMem = args['starMem'] thread = args['thread'] basedir = args['outdir'] mod = args['mod'] mut_file = args['mut_file'] shift_base = args['shift_base'] indel_genomeDir = args['indel_genomeDir'] assay = __ASSAY__ steps = __STEPS__ conda = __CONDA__ for sample in fq_dict: outdir_dic = {} index = 0 for step in steps: outdir = f"{basedir}/{sample}/{index:02d}.{step}" outdir_dic.update({step: outdir}) index += 1 # sample step = "sample" cmd = ( f'{app} {assay} {step} ' f'--chemistry {chemistry} ' f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda) shell += cmd + '\n' last_step = step # barcode arr = fq_dict[sample] step = "barcode" cmd = ( f'{app} {assay} {step} ' f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} ' f'--pattern {pattern} --whitelist {whitelist} --linker {linker} ' f'--sample {sample} --lowQual {lowQual} --thread {thread} ' f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # adapt step = "cutadapt" fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} --sample {sample} --outdir ' f'{outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # mapping_mut step = 'mapping_mut' fq = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} ' f'--sample {sample} ' f'--outdir {outdir_dic[step]} ' f'--thread {thread} ' f'--indel_genomeDir {indel_genomeDir} ' f'--assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=starMem, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # count_mut step = 'count_mut' bam = f'{outdir_dic["mapping_mut"]}/{sample}_Aligned.sortedByCoord.out.bam' cmd = (f'{app} {assay} {step} ' f'--bam {bam} ' f'--sample {sample} ' f'--outdir {outdir_dic[step]} ' f'--mut_file {mut_file} ' f'--match_dir {match_dict[sample]} ' f'shift_base {shift_base} ' f'--assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=8, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # merged report if mod == 'sjm': step = 'merge_report' merge_report(fq_dict, steps, last_step, sjm_cmd, sjm_order, logdir, conda, args['outdir'], args['rm_files']) if mod == 'shell': os.system('mkdir -p ./shell/') with open(f'./shell/{sample}.sh', 'w') as f: f.write(shell)
def main(): parser = argparse.ArgumentParser('CeleScope vdj multi-sample') parser.add_argument( '--mapfile', help='mapfile, 3 columns, "LibName\\tDataDir\\tSampleName"', required=True) parser.add_argument('--mod', help='mod, sjm or shell', choices=['sjm', 'shell'], default='sjm') parser.add_argument( '--chemistry', choices=['scopeV2.0.0', 'scopeV2.0.1', 'scopeV2.1.0', 'scopeV2.1.1'], help='chemistry version') parser.add_argument('--whitelist', help='cellbarcode list') parser.add_argument('--linker', help='linker') parser.add_argument('--pattern', help='read1 pattern') parser.add_argument('--outdir', help='output dir', default="./") parser.add_argument( '--adapt', action='append', help='adapter sequence', default=['polyT=A{15}', 'p5=AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC']) parser.add_argument('--minimum-length', dest='minimum_length', help='minimum_length', default=20) parser.add_argument('--nextseq-trim', dest='nextseq_trim', help='nextseq_trim', default=20) parser.add_argument('--overlap', help='minimum overlap length, default=5', default=5) parser.add_argument('--lowQual', type=int, help='max phred of base as lowQual', default=0) parser.add_argument('--lowNum', type=int, help='max number with lowQual allowed', default=2) parser.add_argument('--thread', help='thread', default=6) parser.add_argument("--type", help='TCR or BCR', required=True) parser.add_argument("--debug", action='store_true') parser.add_argument( '--iUMI', help='minimum number of UMI of identical receptor type and CDR3', default=1) parser.add_argument('--rm_files', action='store_true', help='remove redundant fq.gz and bam after running') args = vars(parser.parse_args()) fq_dict, match_dict = parse_map_col4(args['mapfile'], None) raw_dir = args['outdir'] + '/data_give/rawdata' os.system('mkdir -p %s' % (raw_dir)) with open(raw_dir + '/ln.sh', 'w') as fh: fh.write('cd %s\n' % (raw_dir)) for s, arr in fq_dict.items(): fh.write('ln -sf %s %s\n' % (arr[0], s + '_1.fq.gz')) fh.write('ln -sf %s %s\n' % (arr[1], s + '_2.fq.gz')) logdir = args['outdir'] + '/log' os.system('mkdir -p %s' % (logdir)) sjm_cmd = 'log_dir %s\n' % (logdir) sjm_order = '' shell = '' app = 'celescope' mod = args['mod'] thread = args['thread'] chemistry = args['chemistry'] pattern = args['pattern'] whitelist = args['whitelist'] linker = args['linker'] lowQual = args['lowQual'] lowNum = args['lowNum'] basedir = args['outdir'] type = args['type'] iUMI = args['iUMI'] assay = __ASSAY__ steps = __STEPS__ conda = __CONDA__ for sample in fq_dict: outdir_dic = {} index = 0 for step in steps: outdir = f"{basedir}/{sample}/{index:02d}.{step}" outdir_dic.update({step: outdir}) index += 1 step = "sample" cmd = ( f'{app} {assay} {step} ' f'--chemistry {chemistry} ' f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda) shell += cmd + '\n' last_step = step # barcode arr = fq_dict[sample] step = "barcode" cmd = ( f'{app} {assay} {step} ' f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} ' f'--pattern {pattern} --whitelist {whitelist} --linker {linker} ' f'--sample {sample} --lowQual {lowQual} --thread {thread} ' f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # adapt step = "cutadapt" fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} --sample {sample} --outdir ' f'{outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # mapping_vdj step = 'mapping_vdj' fq = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} ' f'--sample {sample} ' f'--type {type} ' f'--thread {thread} ' f'--outdir {outdir_dic[step]} ' f'--assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=15, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # count_vdj step = 'count_vdj' UMI_count_filter1_file = f'{outdir_dic["mapping_vdj"]}/{sample}_UMI_count_filtered1.tsv' cmd = (f'{app} {assay} {step} ' f'--sample {sample} ' f'--type {type} ' f'--iUMI {iUMI} ' f'--outdir {outdir_dic[step]} ' f'--assay {assay} ' f'--UMI_count_filter1_file {UMI_count_filter1_file} ' f'--match_dir {match_dict[sample]} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=8, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # merged report step = 'merge_report' if mod == 'sjm': # add type to steps mapping and count for i in range(3, len(steps)): steps[i] = f'{type}_{steps[i]}' merge_report(fq_dict, steps, last_step, sjm_cmd, sjm_order, logdir, conda, args['outdir'], args['rm_files']) if mod == 'shell': os.system('mkdir -p ./shell/') with open(f'./shell/{sample}.sh', 'w') as f: f.write(shell)
def main(): # init assay = __ASSAY__ steps = __STEPS__ conda = __CONDA__ app = 'celescope' # parser parser = multi_opts(assay) parser.add_argument('--thread', help='thread', default=6) parser.add_argument("--fq_pattern", help="tag read2 pattern", required=True) parser.add_argument("--linker_fasta", help="linker fasta") parser.add_argument("--barcode_fasta", help="barcode fasta", required=True) args = parser.parse_args() # read args outdir = args.outdir chemistry = args.chemistry pattern = args.pattern whitelist = args.whitelist linker = args.linker lowQual = args.lowQual lowNum = args.lowNum mod = args.mod rm_files = args.rm_files minimum_length = args.minimum_length # parse mapfile fq_dict, match_dict = parse_map_col4(args.mapfile, None) # link link_data(outdir, fq_dict) # custom args thread = args.thread fq_pattern = args.fq_pattern linker_fasta = args.linker_fasta barcode_fasta = args.barcode_fasta # mk log dir logdir = outdir + '/log' os.system('mkdir -p %s' % (logdir)) # script init sjm_cmd = 'log_dir %s\n' % (logdir) sjm_order = '' shell_dict = defaultdict(str) # run for sample in fq_dict: outdir_dic = {} index = 0 for step in steps: step_outdir = f"{outdir}/{sample}/{index:02d}.{step}" outdir_dic.update({step: step_outdir}) index += 1 # sample step = "sample" cmd = ( f'{app} {assay} {step} ' f'--chemistry {chemistry} ' f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda) shell_dict[sample] += cmd + '\n' last_step = step # barcode arr = fq_dict[sample] step = "barcode" cmd = ( f'{app} {assay} {step} ' f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} ' f'--pattern {pattern} --whitelist {whitelist} --linker {linker} ' f'--sample {sample} --lowQual {lowQual} --thread {thread} ' f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # adapt step = "cutadapt" fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} --sample {sample} --outdir ' f'{outdir_dic[step]} --assay {assay} ' f'--minimum_length {minimum_length} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # mapping_tag step = 'mapping_tag' fq = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--sample {sample} ' f'--outdir {outdir_dic[step]} ' f'--assay {assay} ' f'--fq {fq} ' f'--fq_pattern {fq_pattern} ' f'--barcode_fasta {barcode_fasta} ' f'--linker_fasta {linker_fasta} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step step = 'count_cite' read_count_file = f'{outdir_dic["mapping_tag"]}/{sample}_read_count.tsv' cmd = (f'{app} {assay} {step} ' f'--sample {sample} ' f'--outdir {outdir_dic[step]} ' f'--assay {assay} ' f'--match_dir {match_dict[sample]} ' f'--read_count_file {read_count_file} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step step = 'analysis_cite' citeseq_mtx = f'{outdir_dic["count_cite"]}/{sample}_citeseq.mtx.gz' cmd = (f'{app} {assay} {step} ' f'--sample {sample} ' f'--outdir {outdir_dic[step]} ' f'--assay {assay} ' f'--match_dir {match_dict[sample]} ' f'--citeseq_mtx {citeseq_mtx} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell_dict[sample] += cmd + '\n' last_step = step # merged report if mod == 'sjm': step = 'merge_report' merge_report( fq_dict, steps, last_step, sjm_cmd, sjm_order, logdir, conda, outdir, rm_files, ) if mod == 'shell': os.system('mkdir -p ./shell/') for sample in shell_dict: with open(f'./shell/{sample}.sh', 'w') as f: f.write(shell_dict[sample])
def main(): # init assay = __ASSAY__ steps = __STEPS__ conda = __CONDA__ app = 'celescope' # parser parser = multi_opts(assay) parser.add_argument('--thread', help='thread', default=6) args = parser.parse_args() # read args outdir = args.outdir chemistry = args.chemistry pattern = args.pattern whitelist = args.whitelist linker = args.linker lowQual = args.lowQual lowNum = args.lowNum mod = args.mod rm_files = args.rm_files # parse mapfile fq_dict, match_dict = parse_map_col4(args.mapfile, None) # link link_data(outdir, fq_dict) # custom args thread = args.thread # mk log dir logdir = outdir + '/log' os.system('mkdir -p %s' % (logdir)) # script init sjm_cmd = 'log_dir %s\n' % (logdir) sjm_order = '' shell = '' # outdir dict for sample in fq_dict: outdir_dic = {} index = 0 for step in steps: step_outdir = f"{outdir}/{sample}/{index:02d}.{step}" outdir_dic.update({step: step_outdir}) index += 1 # sample step = "sample" cmd = ( f'{app} {assay} {step} ' f'--chemistry {chemistry} ' f'--sample {sample} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda) shell += cmd + '\n' last_step = step # barcode arr = fq_dict[sample] step = "barcode" cmd = ( f'{app} {assay} {step} ' f'--fq1 {arr[0]} --fq2 {arr[1]} --chemistry {chemistry} ' f'--pattern {pattern} --whitelist {whitelist} --linker {linker} ' f'--sample {sample} --lowQual {lowQual} --thread {thread} ' f'--lowNum {lowNum} --outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # adapt step = "cutadapt" fq = f'{outdir_dic["barcode"]}/{sample}_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} --sample {sample} --outdir ' f'{outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=5, x=1) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # mapping_hla step = 'mapping_hla' fq = f'{outdir_dic["cutadapt"]}/{sample}_clean_2.fq.gz' cmd = (f'{app} {assay} {step} ' f'--fq {fq} --sample {sample} ' f'--thread {thread} ' f'--match_dir {match_dict[sample]} ' f'--outdir {outdir_dic[step]} --assay {assay} ') sjm_cmd += generate_sjm(cmd, f'{step}_{sample}', conda, m=30, x=thread) sjm_order += f'order {step}_{sample} after {last_step}_{sample}\n' shell += cmd + '\n' last_step = step # merged report if mod == 'sjm': step = 'merge_report' merge_report( fq_dict, steps, last_step, sjm_cmd, sjm_order, logdir, conda, outdir, rm_files, ) if mod == 'shell': os.system('mkdir -p ./shell/') with open(f'./shell/{sample}.sh', 'w') as f: f.write(shell)