def activate(self, tourfile=None, minsize=10000, backuptour=True): """ Select contigs in the current partition. This is the setup phase of the algorithm, and supports two modes: - "de novo": This is useful at the start of a new run where no tours are available. We select the strong contigs that have significant number of links to other contigs in the partition. We build a histogram of link density (# links per bp) and remove the contigs that appear to be outliers. The orientations are derived from the matrix decomposition of the pairwise strandedness matrix O. - "hotstart": This is useful when there was a past run, with a given tourfile. In this case, the active contig list and orientations are derived from the last tour in the file. """ if tourfile and (not op.exists(tourfile)): logging.debug("Tourfile `{}` not found".format(tourfile)) tourfile = None if tourfile: logging.debug("Importing tourfile `{}`".format(tourfile)) tour, tour_o = iter_last_tour(tourfile, self) self.active = set(tour) tig_to_idx = self.tig_to_idx tour = [tig_to_idx[x] for x in tour] signs = sorted([(x, FF[o]) for (x, o) in zip(tour, tour_o)]) _, signs = zip(*signs) self.signs = np.array(signs, dtype=int) if backuptour: backup(tourfile) tour = array.array('i', tour) else: self.report_active() while True: logdensities = self.calculate_densities() lb, ub = outlier_cutoff(logdensities.values()) logging.debug("Log10(link_densities) ~ [{}, {}]".format( lb, ub)) remove = set(x for x, d in logdensities.items() \ if (d < lb and self.tig_to_size[x] < minsize * 10)) if remove: self.active -= remove self.report_active() else: break logging.debug("Remove contigs with size < {}".format(minsize)) self.active = set(x for x in self.active if self.tig_to_size[x] >= minsize) tour = range(self.N) # Use starting (random) order otherwise tour = array.array('i', tour) # Determine orientations self.flip_all(tour) self.report_active() self.tour = tour return tour
def activate(self, tourfile=None, minsize=10000, backuptour=True): """ Select contigs in the current partition. This is the setup phase of the algorithm, and supports two modes: - "de novo": This is useful at the start of a new run where no tours available. We select the strong contigs that have significant number of links to other contigs in the partition. We build a histogram of link density (# links per bp) and remove the contigs that appear as outliers. The orientations are derived from the matrix decomposition of the pairwise strandedness matrix O. - "hotstart": This is useful when there was a past run, with a given tourfile. In this case, the active contig list and orientations are derived from the last tour in the file. """ if tourfile and (not op.exists(tourfile)): logging.debug("Tourfile `{}` not found".format(tourfile)) tourfile = None if tourfile: logging.debug("Importing tourfile `{}`".format(tourfile)) tour, tour_o = iter_last_tour(tourfile, self) self.active = set(tour) tig_to_idx = self.tig_to_idx tour = [tig_to_idx[x] for x in tour] signs = sorted([(x, FF[o]) for (x, o) in zip(tour, tour_o)]) _, signs = zip(*signs) self.signs = np.array(signs, dtype=int) if backuptour: backup(tourfile) tour = array.array('i', tour) else: self.report_active() while True: logdensities = self.calculate_densities() lb, ub = outlier_cutoff(logdensities.values()) logging.debug("Log10(link_densities) ~ [{}, {}]" .format(lb, ub)) remove = set(x for x, d in logdensities.items() if (d < lb and self.tig_to_size[x] < minsize * 10)) if remove: self.active -= remove self.report_active() else: break logging.debug("Remove contigs with size < {}".format(minsize)) self.active = set(x for x in self.active if self.tig_to_size[x] >= minsize) tour = range(self.N) # Use starting (random) order otherwise tour = array.array('i', tour) # Determine orientations self.flip_all(tour) self.report_active() self.tour = tour return tour
def write(self): assert self.targets, "No targets specified" filename = self.makefile if op.exists(filename): backup(filename) fw = open(filename, "w") print >> fw, "all : {0}\n".format(" ".join(sorted(self.targets))) for d in self: print >> fw, d fw.close() logging.debug("Makefile written to `{0}`.".format(self.makefile))
def write(self): assert self.targets, "No targets specified" filename = self.makefile if op.exists(filename): backup(filename) fw = open(filename, "w") print >> fw, "all : {0}\n".format(" ".join(self.targets)) for d in self: print >> fw, d fw.close() logging.debug("Makefile written to `{0}`.".format(self.makefile))
def write(self): assert self.targets, "No targets specified" filename = self.makefile if op.exists(filename): backup(filename) fw = open(filename, "w") print("all : {0}\n".format(" ".join(sorted(self.targets))), file=fw) for d in self: print(d, file=fw) print("clean :\n\trm -rf {0}\n".format(" ".join(self.targets)), file=fw) fw.close() logging.debug("Makefile written to `{0}`.".format(self.makefile))
def start(self, path=sge): if self.is_defunct: return cwd = os.getcwd() if path: os.chdir(path) # Shell commands if "|" in self.cmd or "&&" in self.cmd or "||" in self.cmd: quote = "\"" if "'" in self.cmd else "'" self.cmd = "sh -c {1}{0}{1}".format(self.cmd, quote) # qsub command (the project code is specific to jcvi) qsub = "qsub -P {0} -cwd".format(PCODE) if self.queue != "default": qsub += " -l {0}".format(self.queue) if self.threaded: qsub += " -pe threaded {0}".format(self.threaded) if self.infile: qsub += " -i {0}".format(self.infile) if self.outfile: qsub += " -o {0}".format(self.outfile) if self.errfile: qsub += " -e {0}".format(self.errfile) cmd = " ".join((qsub, self.cmd)) # run the command and get the job-ID (important) output = popen(cmd, debug=False).read() if output.strip() != "": self.jobid = re.search(self.pat, output).group("id") else: self.jobid = "-1" msg = "[{0}] {1}".format(self.jobid, self.cmd) if self.infile: msg += " < {0} ".format(self.infile) if self.outfile: backup(self.outfile) msg += " > {0} ".format(self.outfile) if self.errfile: backup(self.errfile) msg += " 2> {0} ".format(self.errfile) logging.debug(msg) os.chdir(cwd)
def start(self): cmd = self.build() # run the command and get the job-ID (important) output = popen(cmd, debug=False).read() if output.strip() != "": self.jobid = re.search(self.pat, output).group("id") else: self.jobid = "-1" msg = "[{0}] {1}".format(self.jobid, self.cmd) if self.infile: msg += " < {0} ".format(self.infile) if self.outfile: backup(self.outfile) msg += " > {0} ".format(self.outfile) if self.errfile: backup(self.errfile) msg += " 2> {0} ".format(self.errfile) logging.debug(msg)
def check_index(dbfile, supercat=False, go=True): if supercat: updated = False pf = dbfile.rsplit(".", 1)[0] supercatfile = pf + ".supercat" coordsfile = supercatfile + ".coords" if go and need_update(dbfile, supercatfile): cmd = "tGBS-Generate_Pseudo_Genome.pl" cmd += " -f {0} -o {1}".format(dbfile, supercatfile) sh(cmd) # Rename .coords file since gmap_build will overwrite it coordsbak = backup(coordsfile) updated = True dbfile = supercatfile + ".fasta" #dbfile = get_abs_path(dbfile) dbdir, filename = op.split(dbfile) if not dbdir: dbdir = "." dbname = filename.rsplit(".", 1)[0] safile = op.join(dbdir, "{0}/{0}.genomecomp".format(dbname)) if dbname == filename: dbname = filename + ".db" if not go: return dbdir, dbname if need_update(dbfile, safile): cmd = "gmap_build -D {0} -d {1} {2}".format(dbdir, dbname, filename) sh(cmd) else: logging.error("`{0}` exists. `gmap_build` already run.".format(safile)) if go and supercat and updated: sh("mv {0} {1}".format(coordsbak, coordsfile)) return dbdir, dbname
def __init__(self, filename="makefile"): backup(filename) self.makefile = filename self.targets = []