def bisect(args): """ %prog bisect acc accession.fasta determine the version of the accession, based on a fasta file. This proceeds by a sequential search from xxxx.1 to the latest record. """ p = OptionParser(bisect.__doc__) p.set_email() opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) acc, fastafile = args arec = get_first_rec(fastafile) valid = None for i in range(1, 100): term = "%s.%d" % (acc, i) try: query = list(batch_entrez([term], email=opts.email)) except AssertionError as e: logging.debug("no records found for %s. terminating." % term) return id, term, handle = query[0] brec = SeqIO.parse(handle, "fasta").next() match = print_first_difference(arec, brec, ignore_case=True, ignore_N=True, rc=True) if match: valid = term break if valid: print print green("%s matches the sequence in `%s`" % (valid, fastafile))
def bisect(args): """ %prog bisect acc accession.fasta determine the version of the accession by querying entrez, based on a fasta file. This proceeds by a sequential search from xxxx.1 to the latest record. """ p = OptionParser(bisect.__doc__) p.set_email() opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) acc, fastafile = args arec = get_first_rec(fastafile) valid = None for i in range(1, 100): term = "%s.%d" % (acc, i) try: query = list(batch_entrez([term], email=opts.email)) except AssertionError as e: logging.debug("no records found for %s. terminating." % term) return id, term, handle = query[0] brec = SeqIO.parse(handle, "fasta").next() match = print_first_difference(arec, brec, ignore_case=True, ignore_N=True, rc=True) if match: valid = term break if valid: print print green("%s matches the sequence in `%s`" % (valid, fastafile))
def asciiplot(x, y, digit=1, width=50, title=None, char="="): """ Print out a horizontal plot using ASCII chars. width is the textwidth (height) of the plot. """ ax = np.array(x) ay = np.array(y) if title: print >> sys.stderr, dark(title) az = ay * width / ay.max() tx = [asciiaxis(x, digit=digit) for x in ax] rjust = max([len(x) for x in tx]) + 1 for x, y, z in zip(tx, ay, az): x = x.rjust(rjust) y = y or "" z = green(char * z) print >> sys.stderr, "{0} |{1} {2}".format(x, z, y)
def asciiplot(x, y, digit=1, width=50, title=None, char="="): """ Print out a horizontal plot using ASCII chars. width is the textwidth (height) of the plot. """ ax = np.array(x) ay = np.array(y) if title: print(dark(title), file=sys.stderr) az = ay * width // ay.max() tx = [asciiaxis(x, digit=digit) for x in ax] rjust = max([len(x) for x in tx]) + 1 for x, y, z in zip(tx, ay, az): x = x.rjust(rjust) y = y or "" z = green(char * z) print("{0} |{1} {2}".format(x, z, y), file=sys.stderr)
def overlap(args): """ %prog overlap best.contains iid Visualize overlaps for a given fragment. Must be run in 4-unitigger. All overlaps for iid were retrieved, excluding the ones matching best.contains. """ from jcvi.apps.console import green p = OptionParser(overlap.__doc__) p.add_option("--maxerr", default=2, type="int", help="Maximum error rate") p.add_option("--canvas", default=100, type="int", help="Canvas size") opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) bestcontains, iid = args canvas = opts.canvas bestcontainscache = bestcontains + ".cache" if need_update(bestcontains, bestcontainscache): fp = open(bestcontains) fw = open(bestcontainscache, "w") exclude = set() for row in fp: if row[0] == "#": continue j = int(row.split()[0]) exclude.add(j) cPickle.dump(exclude, fw) fw.close() exclude = cPickle.load(open(bestcontainscache)) logging.debug("A total of {0} reads to exclude".format(len(exclude))) cmd = "overlapStore -d ../asm.ovlStore -b {0} -e {0}".format(iid) cmd += " -E {0}".format(opts.maxerr) frags = [] for row in popen(cmd): r = OverlapLine(row) if r.bid in exclude: continue frags.append(r) # Also include to query fragment frags.append(OverlapLine("{0} {0} N 0 0 0 0".format(iid))) frags.sort(key=lambda x: x.ahang) # Determine size of the query fragment cmd = "gatekeeper -b {0} -e {0}".format(iid) cmd += " -tabular -dumpfragments ../asm.gkpStore" fp = popen(cmd) row = fp.next() size = int(fp.next().split()[-1]) # Determine size of canvas xmin = min(x.ahang for x in frags) xmax = max(x.bhang for x in frags) xsize = -xmin + size + xmax ratio = xsize / canvas fw = sys.stdout for f in frags: fsize = -f.ahang + size + f.bhang a = (f.ahang - xmin) / ratio b = fsize / ratio t = "-" * b if f.orientation == "N": t = t[:-1] + ">" else: t = "<" + t[1:] if f.ahang == 0 and f.bhang == 0: t = green(t) c = canvas - a - b fw.write(" " * a) fw.write(t) fw.write(" " * c) print >> fw, "{0} ({1})".format(str(f.bid).rjust(10), f.erate_adj)
from jcvi.formats.base import LineFile, must_open from jcvi.formats.bed import Bed from jcvi.formats.sizes import Sizes from jcvi.formats.blast import Blast from jcvi.graphics.base import normalize_axes, plt, savefig from jcvi.graphics.dotplot import dotplot from jcvi.utils.cbook import gene_name, human_size from jcvi.utils.natsort import natsorted # Map orientations to ints FF = {'+': 1, '-': -1, '?': 1} RR = {'+': -1, '-': 1, '?': -1} LB = 18 # Lower bound for golden_array() UB = 29 # Upper bound for golden_array() BB = UB - LB + 1 # Span for golden_array() ACCEPT = green("ACCEPT") REJECT = red("REJECT") BINSIZE = 50000 class ContigOrderingLine(object): '''Stores one line in the ContigOrdering file ''' def __init__(self, line, sep="|"): args = line.split() self.contig_id = args[0] self.contig_name = args[1].split(sep)[0] contig_rc = args[2] assert contig_rc in ('0', '1') self.strand = '+' if contig_rc == '0' else '-' self.orientation_score = args[3]
def overlap(args): """ %prog overlap best.contains iid Visualize overlaps for a given fragment. Must be run in 4-unitigger. All overlaps for iid were retrieved, excluding the ones matching best.contains. """ from jcvi.apps.console import green p = OptionParser(overlap.__doc__) p.add_option("--maxerr", default=2, type="int", help="Maximum error rate") p.add_option("--canvas", default=100, type="int", help="Canvas size") opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) bestcontains, iid = args canvas = opts.canvas bestcontainscache = bestcontains + ".cache" if need_update(bestcontains, bestcontainscache): fp = open(bestcontains) fw = open(bestcontainscache, "w") exclude = set() for row in fp: if row[0] == '#': continue j = int(row.split()[0]) exclude.add(j) dump(exclude, fw) fw.close() exclude = load(open(bestcontainscache)) logging.debug("A total of {0} reads to exclude".format(len(exclude))) cmd = "overlapStore -d ../asm.ovlStore -b {0} -e {0}".format(iid) cmd += " -E {0}".format(opts.maxerr) frags = [] for row in popen(cmd): r = OverlapLine(row) if r.bid in exclude: continue frags.append(r) # Also include to query fragment frags.append(OverlapLine("{0} {0} N 0 0 0 0".format(iid))) frags.sort(key=lambda x: x.ahang) # Determine size of the query fragment cmd = "gatekeeper -b {0} -e {0}".format(iid) cmd += " -tabular -dumpfragments ../asm.gkpStore" fp = popen(cmd) row = next(fp) size = int(fp.next().split()[-1]) # Determine size of canvas xmin = min(x.ahang for x in frags) xmax = max(x.bhang for x in frags) xsize = -xmin + size + xmax ratio = xsize / canvas fw = sys.stdout for f in frags: fsize = -f.ahang + size + f.bhang a = (f.ahang - xmin) / ratio b = fsize / ratio t = '-' * b if f.orientation == 'N': t = t[:-1] + '>' else: t = '<' + t[1:] if f.ahang == 0 and f.bhang == 0: t = green(t) c = canvas - a - b fw.write(' ' * a) fw.write(t) fw.write(' ' * c) print("{0} ({1})".format(str(f.bid).rjust(10), f.erate_adj), file=fw)
from jcvi.formats.bed import Bed from jcvi.formats.sizes import Sizes from jcvi.formats.blast import Blast from jcvi.graphics.base import normalize_axes, plt, savefig from jcvi.graphics.dotplot import dotplot from jcvi.utils.cbook import gene_name, human_size from jcvi.utils.natsort import natsorted # Map orientations to ints FF = {'+': 1, '-': -1, '?': 1} RR = {'+': -1, '-': 1, '?': -1} LB = 18 # Lower bound for golden_array() UB = 29 # Upper bound for golden_array() BB = UB - LB + 1 # Span for golden_array() ACCEPT = green("ACCEPT") REJECT = red("REJECT") BINSIZE = 50000 class ContigOrderingLine(object): '''Stores one line in the ContigOrdering file ''' def __init__(self, line, sep="|"): args = line.split() self.contig_id = args[0] self.contig_name = args[1].split(sep)[0] contig_rc = args[2] assert contig_rc in ('0', '1') self.strand = '+' if contig_rc == '0' else '-' self.orientation_score = args[3]