def main(): usage = "usage: %prog [options] [ > out.bed ]" desc = """Blast has to be run with -m8.""" epilog = "" parser = OptionParser( usage=usage,version="%prog 1.0",description=desc,epilog=epilog ) parser.add_option("-i", dest="infile", default="", help="blast output") parser.add_option("-j", dest="query", default="", help="query fasta") parser.add_option("-k", dest="target", default="", help="target fasta") parser.add_option("-e", dest="evalue", default=1e-05, type=float, help="E-value cut-off [%default]" ) parser.add_option("-q", dest="qcov", default=0, type=float, help="query coverage [%default]") parser.add_option("-t", dest="tcov", default=0, type=float, help="target coverage [%default]") #parser.add_option("-s", dest="tsplit", default=3, type=int, # help="split target name by '|' and print s postition [%default]") parser.add_option("-v", dest="verbose", default=False, action="store_true" ) ( o, fnames ) = parser.parse_args() if o.verbose: sys.stderr.write( "Options: %s\nArgs: %s\n" % ( o,fnames ) ) #check files for fn in ( o.infile,o.query,o.target ): if not fn: parser.error( "Provide input file!" ) if not os.path.isfile( fn ): parser.error( "No such file: %s" % fn ) #get sizes of queries and targets q2len = get_contig2size( o.query ) t2len = get_contig2size( o.target ) #get significant matches matches = parse_blast( o.infile,q2len,t2len,o.evalue,o.qcov,o.tcov,o.verbose ) #parse matches print "#Query\tTarget\tIndentity\tAlignment length\tMismatches\tGaps\tQuery start\tQuery end\tTarget start\tTarget end\tE-value\tScore\tQuery aligned [%]\tTarget aligned [%]\t" for qlocus,tlocus,identity,algLen,mismatches,gaps,qstart,qend,tstart,tend,e,score,qcov,tcov in matches: '''#chr start end name score strand name = "%s:%s-%s" % ( tlocus,tstart,tend ) #get strand strand = "+" if qstart>qend: strand = "-" qstart,qend = qend,qstart #define bed bed = "%s\t%s\t%s\t%s\t%s\t%s\n" % ( qlocus,qstart-1,qend,name,score,strand ) ''' out = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%.1f\t%.1f\n" % (qlocus,tlocus,identity,algLen,mismatches,gaps,qstart,qend,tstart,tend,e,score,qcov*100,tcov*100, ) sys.stdout.write( out )
def main(): usage = "usage: %prog [options] [ > out.bed ]" desc = """Blast has to be run with -m8.""" epilog = "" parser = OptionParser( usage=usage,version="%prog 1.0",description=desc,epilog=epilog ) parser.add_option("-i", dest="infile", help="blast output") parser.add_option("-j", dest="query", help="query fasta") parser.add_option("-k", dest="target", help="target fasta") parser.add_option("-e", dest="evalue", default=1e-05, type=float, help="E-value cut-off [%default]" ) parser.add_option("-q", dest="qcov", default=0, type=float, help="query coverage [%default]") parser.add_option("-t", dest="tcov", default=0, type=float, help="target coverage [%default]") #parser.add_option("-s", dest="tsplit", default=3, type=int, # help="split target name by '|' and print s postition [%default]") parser.add_option("-v", dest="verbose", default=False, action="store_true" ) ( o, fnames ) = parser.parse_args() if o.verbose: sys.stderr.write( "Options: %s\nArgs: %s\n" % ( o,fnames ) ) #check files for fn in ( o.infile,o.query,o.target ): if not fn: parser.error( "Provide input file!" ) if not os.path.isfile( fn ): parser.error( "No such file: %s" % fn ) #get sizes of queries and targets q2len = get_contig2size( o.query ) t2len = get_contig2size( o.target ) #get significant matches matches = parse_blast( o.infile,q2len,t2len,o.evalue,o.qcov,o.tcov,o.verbose ) #parse matches print "#Query\tTarget\tIndentity\tAlignment length\tMismatches\tGaps\tQuery start\tQuery end\tTarget start\tTarget end\tE-value\tScore\tQuery aligned [%]\tTarget aligned [%]\t" for qlocus,tlocus,identity,algLen,mismatches,gaps,qstart,qend,tstart,tend,e,score,qcov,tcov in matches: '''#chr start end name score strand name = "%s:%s-%s" % ( tlocus,tstart,tend ) #get strand strand = "+" if qstart>qend: strand = "-" qstart,qend = qend,qstart #define bed bed = "%s\t%s\t%s\t%s\t%s\t%s\n" % ( qlocus,qstart-1,qend,name,score,strand ) ''' out = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%.1f\t%.1f\n" % (qlocus,tlocus,identity,algLen,mismatches,gaps,qstart,qend,tstart,tend,e,score,qcov*100,tcov*100, ) sys.stdout.write( out )
def main(): usage = "usage: %prog [options] vcf1 [ vcf2 ... vcfN ]" parser = OptionParser(usage=usage, version="%prog 1.0") #allow_interspersed_args=True parser.add_option("-g", dest="gtf", help="genome annotation") parser.add_option("-f", dest="fasta", help="genome fasta") parser.add_option("-o", dest="outbase", default="plots", help="output directory [%default]") parser.add_option("-s", dest="splitFn", default=False, action="store_true", help="split fname (sheet name) by dot") parser.add_option("-w", dest="window", default=10, type=int, help="window size in kb [%default]") parser.add_option( "-p", dest="ext", default="png", help= "Supported: emf, eps, pdf, png, ps, raw, rgba, svg, svgz [%default]") parser.add_option("-v", dest="verbose", default=True, action="store_false") (o, args) = parser.parse_args() if o.verbose: sys.stderr.write("%s\nFiles to process: %s\n" % (str(o), ", ".join(args))) #check if any input file if not args: parser.error("At least one input file has to be specified!") #check if files exists for fn in args: if not os.path.isfile(fn): parser.error("No such file: %s") #load genome - in fact need only contig sizes contig2size = get_contig2size(o.fasta) #load gtf if o.gtf.endswith(".gff"): gene2position, contig2gene = load_gff(o.gtf) else: gene2position, contig2gene = load_gtf(o.gtf) #process vcf for fn in args: print fn snps2plot(fn, o.window, contig2gene, contig2size, o.outbase, o.splitFn, o.ext, o.verbose)
def main(): usage = "usage: %prog [options] vcf1 [ vcf2 ... vcfN ]" parser = OptionParser( usage=usage,version="%prog 1.0" ) #allow_interspersed_args=True parser.add_option("-g", dest="gtf", help="genome annotation" ) parser.add_option("-f", dest="fasta", help="genome fasta" ) parser.add_option("-o", dest="outbase", default="plots", help="output directory [%default]" ) parser.add_option("-s", dest="splitFn", default=False, action="store_true", help="split fname (sheet name) by dot") parser.add_option("-w", dest="window", default=10, type=int, help="window size in kb [%default]") parser.add_option("-p", dest="ext", default="png", help="Supported: emf, eps, pdf, png, ps, raw, rgba, svg, svgz [%default]") parser.add_option("-v", dest="verbose", default=True, action="store_false") ( o, args ) = parser.parse_args() if o.verbose: sys.stderr.write( "%s\nFiles to process: %s\n" % ( str(o),", ".join( args ) ) ) #check if any input file if not args: parser.error( "At least one input file has to be specified!" ) #check if files exists for fn in args: if not os.path.isfile( fn ): parser.error( "No such file: %s" ) #load genome - in fact need only contig sizes contig2size = get_contig2size( o.fasta ) #load gtf if o.gtf.endswith(".gff"): gene2position, contig2gene = load_gff( o.gtf ) else: gene2position, contig2gene = load_gtf( o.gtf ) #process vcf for fn in args: print fn snps2plot( fn,o.window,contig2gene,contig2size,o.outbase,o.splitFn,o.ext,o.verbose )
def main(): usage = "usage: %prog [options] blastout1 [blastout2 ... blastoutN] [ > out ]" desc = """Blast has to be run with -m8.""" epilog = "" parser = OptionParser(usage=usage, version="%prog 1.0", description=desc, epilog=epilog) parser.add_option("-k", dest="target", default="", help="target fasta") parser.add_option("-e", dest="evalue", default=1e-05, type=float, help="E-value cut-off [%default]") parser.add_option("-q", dest="qcov", default=0, type=float, help="query coverage [%default]") parser.add_option("-t", dest="tcov", default=0, type=float, help="target coverage [%default]") parser.add_option("-s", dest="fnsplit", default=True, action="store_false", help="split fnames [%default]") parser.add_option("-v", dest="verbose", default=False, action="store_true") (o, fnames) = parser.parse_args() if o.verbose: sys.stderr.write("Options: %s\nArgs: %s\n" % (o, fnames)) #check files for fn in fnames + [ o.target, ]: if not fn: parser.error("Provide input file!") if not os.path.isfile(fn): parser.error("No such file: %s" % fn) #get sizes of targets t2len = get_contig2size(o.target) #dict to store matches and list of targets s2matches = [] targets = sorted(t2len.keys()) #process all files samples = [] for fn in fnames: #define sample name s = fn #split by dot if requested if o.fnsplit: s = fn.split(".")[0] samples.append(s) #define empty matches smatch = [] for i in range(len(targets)): smatch.append([]) #get sizes of queries q2len = {} #get_contig2size( fn ) #get significant matches matches = parse_blast(fn, q2len, t2len, o.evalue, o.qcov, o.tcov, o.verbose) #parse matches for qlocus, tlocus, identity, algLen, mismatches, gaps, qstart, qend, tstart, tend, e, score, qcov, tcov in matches: i = targets.index(tlocus) #add match info if not match for given target if not smatch[i]: smatch[i] = (qlocus, e, score, identity, tcov) #or better match found elif score > smatch[i][2]: smatch[i] = (qlocus, e, score, identity, tcov) #store matches s2matches.append(smatch) #write header header = "Target" for s in samples: header += "\t%s\t" % s print header print "\t" + "identity [%]\tcoverage [%]\t" * len(samples) #write data for i in range(len(targets)): line = targets[i] for smatch in s2matches: if smatch[i]: qlocus, e, score, identity, tcov = smatch[i] else: identity = tcov = 0 line += "\t%6.2f\t%6.2f" % (identity, tcov * 100) print line
def main(): usage = "usage: %prog [options] blastout1 [blastout2 ... blastoutN] [ > out ]" desc = """Blast has to be run with -m8.""" epilog = "" parser = OptionParser( usage=usage,version="%prog 1.0",description=desc,epilog=epilog ) parser.add_option("-k", dest="target", default="", help="target fasta") parser.add_option("-e", dest="evalue", default=1e-05, type=float, help="E-value cut-off [%default]" ) parser.add_option("-q", dest="qcov", default=0, type=float, help="query coverage [%default]") parser.add_option("-t", dest="tcov", default=0, type=float, help="target coverage [%default]") parser.add_option("-s", dest="fnsplit", default=True, action="store_false", help="split fnames [%default]") parser.add_option("-v", dest="verbose", default=False, action="store_true" ) ( o, fnames ) = parser.parse_args() if o.verbose: sys.stderr.write( "Options: %s\nArgs: %s\n" % ( o,fnames ) ) #check files for fn in fnames + [ o.target, ]: if not fn: parser.error( "Provide input file!" ) if not os.path.isfile( fn ): parser.error( "No such file: %s" % fn ) #get sizes of targets t2len = get_contig2size( o.target ) #dict to store matches and list of targets s2matches = [] targets = sorted( t2len.keys() ) #process all files samples = [] for fn in fnames: #define sample name s = fn #split by dot if requested if o.fnsplit: s = fn.split(".")[0] samples.append( s ) #define empty matches smatch = [] for i in range( len(targets) ): smatch.append( [] ) #get sizes of queries q2len = {}#get_contig2size( fn ) #get significant matches matches = parse_blast( fn,q2len,t2len,o.evalue,o.qcov,o.tcov,o.verbose ) #parse matches for qlocus,tlocus,identity,algLen,mismatches,gaps,qstart,qend,tstart,tend,e,score,qcov,tcov in matches: i = targets.index( tlocus ) #add match info if not match for given target if not smatch[i]: smatch[i] = ( qlocus,e,score,identity,tcov ) #or better match found elif score > smatch[i][2]: smatch[i] = ( qlocus,e,score,identity,tcov ) #store matches s2matches.append( smatch ) #write header header = "Target" for s in samples: header += "\t%s\t" % s print header print "\t" + "identity [%]\tcoverage [%]\t" * len(samples) #write data for i in range( len(targets) ): line = targets[i] for smatch in s2matches: if smatch[i]: qlocus,e,score,identity,tcov = smatch[i] else: identity=tcov=0 line += "\t%6.2f\t%6.2f" % ( identity,tcov*100 ) print line
def main(): usage = "usage: %prog [options] [ 1> matches.table.txt ]" desc = """Blast has to be run with -m8.""" epilog = "" parser = OptionParser( usage=usage,version="%prog 1.0",description=desc,epilog=epilog ) parser.add_option("-i", dest="infile", default="", help="blast output") parser.add_option("-j", dest="query", default="", help="query fasta") parser.add_option("-k", dest="target", default="", help="target fasta") parser.add_option("-e", dest="evalue", default=1e-05, type=float, help="E-value cut-off [%default]" ) parser.add_option("-q", dest="qcov", default=0.3, type=float, help="query coverage [%default]") parser.add_option("-t", dest="tcov", default=0, type=float, help="target coverage [%default]") #parser.add_option("-s", dest="tsplit", default=3, type=int, # help="split target name by '|' and print s postition [%default]") parser.add_option("-v", dest="verbose", default=False, action="store_true" ) ( o, fnames ) = parser.parse_args() if o.verbose: sys.stderr.write( "Options: %s\nArgs: %s\n" % ( o,fnames ) ) #check files for fn in ( o.infile,o.query,o.target ): if not fn: parser.error( "Provide input file!" ) if not os.path.isfile( fn ): parser.error( "No such file: %s" % fn ) #queries = get_ #get sizes of queries and targets q2len = get_contig2size( o.query ) t2len = get_contig2size( o.target ) #get significant matches matches = parse_blast( o.infile,q2len,t2len,o.evalue,0,0,o.verbose ) #parse matches matches_collapsed = {} print "#Query\tTarget\tIndentity\tAlignment length\tMismatches\tGaps\tQuery start\tQuery end\tTarget start\tTarget end\tE-value\tScore\tQuery aligned [%]\tTarget aligned [%]\t" for qlocus,tlocus,identity,algLen,mismatches,gaps,qstart,qend,tstart,tend,e,score,qcov,tcov in matches: #add qlocus to matches if qlocus not in matches_collapsed: matches_collapsed[qlocus]={} if tlocus not in matches_collapsed[qlocus]: matches_collapsed[qlocus][tlocus]=[] #store data matches_collapsed[qlocus][tlocus].append( (algLen,mismatches,gaps,qstart,qend,tstart,tend,e,score,qcov,tcov) ) # matched_queries = set() for qlocus in sorted( matches_collapsed.keys() ): for tlocus in sorted( matches_collapsed[qlocus].keys() ): qCov=tCov=0 for algLen,mismatches,gaps,qstart,qend,tstart,tend,e,score,qcov,tcov in matches_collapsed[qlocus][tlocus]: qCov += qcov tCov += tcov if qCov<o.qcov or tCov<o.tcov: continue out = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%.1f\t%.1f\n" % (qlocus,tlocus,identity,algLen,mismatches,gaps,qstart,qend,tstart,tend,e,score,qCov*100,tCov*100, ) sys.stdout.write( out ) matched_queries.add( qlocus ) #get with no valid match sys.stderr.write( "Queries without valid matches:\n" ) i = 0 out = open( o.query + ".nomatch.fa","w" ) for r in SeqIO.parse( open(o.query),"fasta" ): if r.id in matched_queries: continue i+=1 line = "%s\t%s\t%s" % (i,r.id,len(r.seq)) if r.id in matches_collapsed: line += "\t%s" % str( matches_collapsed[r.id] ) sys.stderr.write( line+"\n" ) #save fasta out.write( ">%s\n%s\n" % ( r.id,_get_formatted_seq( r.seq ) ) )
def main(): usage = "usage: %prog [options] [ 1> matches.table.txt ]" desc = """Blast has to be run with -m8.""" epilog = "" parser = OptionParser(usage=usage, version="%prog 1.0", description=desc, epilog=epilog) parser.add_option("-i", dest="infile", default="", help="blast output") parser.add_option("-j", dest="query", default="", help="query fasta") parser.add_option("-k", dest="target", default="", help="target fasta") parser.add_option("-e", dest="evalue", default=1e-05, type=float, help="E-value cut-off [%default]") parser.add_option("-q", dest="qcov", default=0.3, type=float, help="query coverage [%default]") parser.add_option("-t", dest="tcov", default=0, type=float, help="target coverage [%default]") #parser.add_option("-s", dest="tsplit", default=3, type=int, # help="split target name by '|' and print s postition [%default]") parser.add_option("-v", dest="verbose", default=False, action="store_true") (o, fnames) = parser.parse_args() if o.verbose: sys.stderr.write("Options: %s\nArgs: %s\n" % (o, fnames)) #check files for fn in (o.infile, o.query, o.target): if not fn: parser.error("Provide input file!") if not os.path.isfile(fn): parser.error("No such file: %s" % fn) #queries = get_ #get sizes of queries and targets q2len = get_contig2size(o.query) t2len = get_contig2size(o.target) #get significant matches matches = parse_blast(o.infile, q2len, t2len, o.evalue, 0, 0, o.verbose) #parse matches matches_collapsed = {} print "#Query\tTarget\tIndentity\tAlignment length\tMismatches\tGaps\tQuery start\tQuery end\tTarget start\tTarget end\tE-value\tScore\tQuery aligned [%]\tTarget aligned [%]\t" for qlocus, tlocus, identity, algLen, mismatches, gaps, qstart, qend, tstart, tend, e, score, qcov, tcov in matches: #add qlocus to matches if qlocus not in matches_collapsed: matches_collapsed[qlocus] = {} if tlocus not in matches_collapsed[qlocus]: matches_collapsed[qlocus][tlocus] = [] #store data matches_collapsed[qlocus][tlocus].append( (algLen, mismatches, gaps, qstart, qend, tstart, tend, e, score, qcov, tcov)) # matched_queries = set() for qlocus in sorted(matches_collapsed.keys()): for tlocus in sorted(matches_collapsed[qlocus].keys()): qCov = tCov = 0 for algLen, mismatches, gaps, qstart, qend, tstart, tend, e, score, qcov, tcov in matches_collapsed[ qlocus][tlocus]: qCov += qcov tCov += tcov if qCov < o.qcov or tCov < o.tcov: continue out = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%.1f\t%.1f\n" % ( qlocus, tlocus, identity, algLen, mismatches, gaps, qstart, qend, tstart, tend, e, score, qCov * 100, tCov * 100, ) sys.stdout.write(out) matched_queries.add(qlocus) #get with no valid match sys.stderr.write("Queries without valid matches:\n") i = 0 out = open(o.query + ".nomatch.fa", "w") for r in SeqIO.parse(open(o.query), "fasta"): if r.id in matched_queries: continue i += 1 line = "%s\t%s\t%s" % (i, r.id, len(r.seq)) if r.id in matches_collapsed: line += "\t%s" % str(matches_collapsed[r.id]) sys.stderr.write(line + "\n") #save fasta out.write(">%s\n%s\n" % (r.id, _get_formatted_seq(r.seq)))