def main(argv=None): settings = process_command_line(argv) # Read the transcripts if given if settings.transcripts: trans_dict = RILseq.read_transcripts(settings.transcripts) else: trans_dict = None # Get the ends of the reads from the bam files # sys.stderr.write('%s\n'%str(settings.bamfiles)) if settings.all_reads: try: outall = open(settings.all_reads, 'w') except IOError: outall = None elif settings.add_all_reads: outall = sys.stdout else: outall = None for bf in RILseq.flat_list(settings.bamfiles): bfin = pysam.Samfile(bf) outhead = bf.rsplit('.', 1)[0] libname = outhead.rsplit('/',1)[-1] fsq1name = "%s/%s_ends_1.fastq"%(settings.dirout, libname) fsq2name = "%s/%s_ends_2.fastq"%(settings.dirout, libname) if settings.skip_mapping: fsq1 = open(os.devnull, 'w') fsq2 = fsq1 else: fsq1 = open(fsq1name, 'w') fsq2 = open(fsq2name, 'w') single_mapped = RILseq.get_unmapped_reads( bfin, fsq1, fsq2, settings.length, settings.maxG, rev=settings.reverse_complement, all_reads=True, dust_thr=settings.dust_thr) reads_in = [] # Map the fastq files to the genome for fqname in (fsq1name, fsq2name): bamheadname = fqname.rsplit('.',1)[0].rsplit('/',1)[-1] if settings.skip_mapping: bamname = "%s/%s.bam"%(settings.dirout, bamheadname) else: bamname = RILseq.run_bwa( settings.bwa_exec, fqname, None, settings.dirout, bamheadname, settings.max_mismatches, settings.genome_fasta, settings.params_aln, '', settings.samse_params, settings.samtools_cmd, processors=settings.processors) bamin = pysam.Samfile(bamname) reads_in.append(RILseq.read_bam_file( bamin, bamin.references, settings.allowed_mismatches)) RILseq.write_reads_table( sys.stdout, reads_in[0], reads_in[1], bfin.references, settings.distance, not settings.keep_circular, trans_dict, write_single=outall, single_mapped=single_mapped, max_NM=settings.allowed_mismatches) return 0 # success
def main(argv=None): settings = process_command_line(argv) if not os.path.exists(settings.dirout): os.makedirs(settings.dirout) if settings.genes_gff: try: pos_feat_list, all_features = RILseq.read_gtf( open(settings.genes_gff), settings.feature, settings.identifier) except IOError: settings.genes_gff = None gcounts = {} lib_order = [] fastq_1_list = list(RILseq.flat_list(settings.fastq_1)) fastq_2_list = list(RILseq.flat_list(settings.fastq_2)) for i, r1_name in enumerate(RILseq.flat_list(settings.fastq_1)): try: r2_name = fastq_2_list[i] except IndexError: r2_name = None outhead = r1_name.rsplit('.', 1)[0] libname = outhead.rsplit('/',1)[-1] outhead = '%s_bwa'%libname bamname = RILseq.run_bwa( settings.bwa_exec, r1_name, r2_name, settings.dirout, outhead, settings.allowed_mismatches, settings.genome_fasta, settings.params_aln, settings.sampe_params, settings.samse_params, settings.samtools_cmd) samfile = pysam.Samfile(bamname) if settings.genes_gff: lib_order.append(libname) gcounts[libname] = RILseq.count_features( pos_feat_list, samfile, settings.overlap, rev=settings.reverse_complement) if settings.create_wig: outwigs = [open("%s/%s_coverage.wig"%(settings.dirout, fastq.split("_cutadapt")[0]), 'w') for fastq in fastq_1_list] coverage = RILseq.generate_wig( samfile, rev=settings.reverse_complement, first_pos=False) RILseq.print_wiggle( coverage, "%s_single_fragments_coverage"%libname, "%s single fragments coverage"%libname, outwigs[i]) # Print the table of counts if settings.genes_gff: outtables = [open("%s/%s_counts.txt"%(settings.dirout, fastq.split("_cutadapt")[0]), 'w') for fastq in fastq_1_list] for i, r1_name in enumerate(fastq_1_list): outt = csv.writer(outtables[i], delimiter='\t') outt.writerow(['Gene name'] + lib_order) for g in sorted(list(all_features)): row_out = [g] for libn in lib_order: row_out.append(gcounts[libn][g]) outt.writerow(row_out) return 0 # success
def main(argv=None): settings = process_command_line(argv) if not os.path.exists(settings.dirout): os.makedirs(settings.dirout) outwig = open("%s/%s_coverage.wig"%(settings.dirout, settings.outhead), 'w') if settings.genes_gff: try: pos_feat_list, all_features = RILseq.read_gtf( open(settings.genes_gff), settings.feature, settings.identifier) except IOError: settings.genes_gff = None gcounts = {} lib_order = [] fastq_2_list = list(RILseq.flat_list(settings.fastq_2)) for i, r1_name in enumerate(RILseq.flat_list(settings.fastq_1)): try: r2_name = fastq_2_list[i] except IndexError: r2_name = None outhead = r1_name.rsplit('.', 1)[0] libname = outhead.rsplit('/',1)[-1] outhead = '%s_bwa'%libname bamname = RILseq.run_bwa( settings.bwa_exec, r1_name, r2_name, settings.dirout, outhead, settings.allowed_mismatches, settings.genome_fasta, settings.params_aln, settings.sampe_params, settings.samse_params, settings.samtools_cmd, processors=settings.processors) samfile = pysam.Samfile(bamname) if settings.genes_gff: lib_order.append(libname) gcounts[libname] = RILseq.count_features( pos_feat_list, samfile, settings.overlap, rev=settings.reverse_complement) coverage = RILseq.generate_wig( samfile, rev=settings.reverse_complement, first_pos=False) RILseq.print_wiggle( coverage, "%s_single_fragments_coverage"%libname, "%s single fragments coverage"%libname, outwig) # Print the table of counts if settings.genes_gff: outtable = open( "%s/%s_counts.txt"%(settings.dirout, settings.outhead), 'w') outt = csv.writer(outtable, delimiter='\t') outt.writerow(['Gene name'] + lib_order) for g in sorted(list(all_features)): row_out = [g] for libn in lib_order: row_out.append(gcounts[libn][g]) outt.writerow(row_out) return 0 # success
def main(argv=None): settings = process_command_line(argv) # Read the transcripts if given if settings.transcripts: trans_dict = RILseq.read_transcripts(settings.transcripts) else: trans_dict = None # Get the ends of the reads from the bam files # sys.stderr.write('%s\n'%str(settings.bamfiles)) if settings.all_reads: try: outall = open(settings.all_reads, 'w') except IOError: outall = None elif settings.add_all_reads: outall = sys.stdout else: outall = None for bf in RILseq.flat_list(settings.bamfiles): bfin = pysam.Samfile(bf) outhead = bf.rsplit('.', 1)[0] libname = outhead.rsplit('/', 1)[-1] fsq1name = "%s/%s_ends_1.fastq" % (settings.dirout, libname) fsq2name = "%s/%s_ends_2.fastq" % (settings.dirout, libname) if settings.skip_mapping: fsq1 = open(os.devnull, 'w') fsq2 = fsq1 else: fsq1 = open(fsq1name, 'w') fsq2 = open(fsq2name, 'w') single_mapped = RILseq.get_unmapped_reads( bfin, fsq1, fsq2, settings.length, settings.maxG, rev=settings.reverse_complement, all_reads=True, dust_thr=settings.dust_thr) reads_in = [] # Map the fastq files to the genome for fqname in (fsq1name, fsq2name): bamheadname = fqname.rsplit('.', 1)[0].rsplit('/', 1)[-1] if settings.skip_mapping: bamname = "%s/%s.bam" % (settings.dirout, bamheadname) else: bamname = RILseq.run_bwa(settings.bwa_exec, fqname, None, settings.dirout, bamheadname, settings.max_mismatches, settings.genome_fasta, settings.params_aln, '', settings.samse_params, settings.samtools_cmd, processors=settings.processors) bamin = pysam.Samfile(bamname) reads_in.append( RILseq.read_bam_file(bamin, bamin.references, settings.allowed_mismatches)) RILseq.write_reads_table(sys.stdout, reads_in[0], reads_in[1], bfin.references, settings.distance, not settings.keep_circular, trans_dict, write_single=outall, single_mapped=single_mapped, max_NM=settings.allowed_mismatches) return 0 # success