Exemplo n.º 1
0
    def activate(self, tourfile=None, minsize=10000, backuptour=True):
        """
        Select contigs in the current partition. This is the setup phase of the
        algorithm, and supports two modes:

        - "de novo": This is useful at the start of a new run where no tours are
          available. We select the strong contigs that have significant number
          of links to other contigs in the partition. We build a histogram of
          link density (# links per bp) and remove the contigs that appear to be
          outliers. The orientations are derived from the matrix decomposition
          of the pairwise strandedness matrix O.

        - "hotstart": This is useful when there was a past run, with a given
          tourfile. In this case, the active contig list and orientations are
          derived from the last tour in the file.
        """
        if tourfile and (not op.exists(tourfile)):
            logging.debug("Tourfile `{}` not found".format(tourfile))
            tourfile = None

        if tourfile:
            logging.debug("Importing tourfile `{}`".format(tourfile))
            tour, tour_o = iter_last_tour(tourfile, self)
            self.active = set(tour)
            tig_to_idx = self.tig_to_idx
            tour = [tig_to_idx[x] for x in tour]
            signs = sorted([(x, FF[o]) for (x, o) in zip(tour, tour_o)])
            _, signs = zip(*signs)
            self.signs = np.array(signs, dtype=int)
            if backuptour:
                backup(tourfile)
            tour = array.array('i', tour)
        else:
            self.report_active()
            while True:
                logdensities = self.calculate_densities()
                lb, ub = outlier_cutoff(logdensities.values())
                logging.debug("Log10(link_densities) ~ [{}, {}]".format(
                    lb, ub))
                remove = set(x for x, d in logdensities.items() \
                                if (d < lb and self.tig_to_size[x] < minsize * 10))
                if remove:
                    self.active -= remove
                    self.report_active()
                else:
                    break

            logging.debug("Remove contigs with size < {}".format(minsize))
            self.active = set(x for x in self.active
                              if self.tig_to_size[x] >= minsize)
            tour = range(self.N)  # Use starting (random) order otherwise
            tour = array.array('i', tour)

            # Determine orientations
            self.flip_all(tour)

        self.report_active()
        self.tour = tour

        return tour
Exemplo n.º 2
0
Arquivo: hic.py Projeto: xuanblo/jcvi
    def activate(self, tourfile=None, minsize=10000, backuptour=True):
        """
        Select contigs in the current partition. This is the setup phase of the
        algorithm, and supports two modes:

        - "de novo": This is useful at the start of a new run where no tours
          available. We select the strong contigs that have significant number
          of links to other contigs in the partition. We build a histogram of
          link density (# links per bp) and remove the contigs that appear as
          outliers. The orientations are derived from the matrix decomposition
          of the pairwise strandedness matrix O.

        - "hotstart": This is useful when there was a past run, with a given
          tourfile. In this case, the active contig list and orientations are
          derived from the last tour in the file.
        """
        if tourfile and (not op.exists(tourfile)):
            logging.debug("Tourfile `{}` not found".format(tourfile))
            tourfile = None

        if tourfile:
            logging.debug("Importing tourfile `{}`".format(tourfile))
            tour, tour_o = iter_last_tour(tourfile, self)
            self.active = set(tour)
            tig_to_idx = self.tig_to_idx
            tour = [tig_to_idx[x] for x in tour]
            signs = sorted([(x, FF[o]) for (x, o) in zip(tour, tour_o)])
            _, signs = zip(*signs)
            self.signs = np.array(signs, dtype=int)
            if backuptour:
                backup(tourfile)
            tour = array.array('i', tour)
        else:
            self.report_active()
            while True:
                logdensities = self.calculate_densities()
                lb, ub = outlier_cutoff(logdensities.values())
                logging.debug("Log10(link_densities) ~ [{}, {}]"
                              .format(lb, ub))
                remove = set(x for x, d in logdensities.items() if
                             (d < lb and self.tig_to_size[x] < minsize * 10))
                if remove:
                    self.active -= remove
                    self.report_active()
                else:
                    break

            logging.debug("Remove contigs with size < {}".format(minsize))
            self.active = set(x for x in self.active if
                              self.tig_to_size[x] >= minsize)
            tour = range(self.N)  # Use starting (random) order otherwise
            tour = array.array('i', tour)

            # Determine orientations
            self.flip_all(tour)

        self.report_active()
        self.tour = tour

        return tour
Exemplo n.º 3
0
 def write(self):
     assert self.targets, "No targets specified"
     filename = self.makefile
     if op.exists(filename):
         backup(filename)
     fw = open(filename, "w")
     print >> fw, "all : {0}\n".format(" ".join(sorted(self.targets)))
     for d in self:
         print >> fw, d
     fw.close()
     logging.debug("Makefile written to `{0}`.".format(self.makefile))
Exemplo n.º 4
0
 def write(self):
     assert self.targets, "No targets specified"
     filename = self.makefile
     if op.exists(filename):
         backup(filename)
     fw = open(filename, "w")
     print >> fw, "all : {0}\n".format(" ".join(self.targets))
     for d in self:
         print >> fw, d
     fw.close()
     logging.debug("Makefile written to `{0}`.".format(self.makefile))
Exemplo n.º 5
0
 def write(self):
     assert self.targets, "No targets specified"
     filename = self.makefile
     if op.exists(filename):
         backup(filename)
     fw = open(filename, "w")
     print("all : {0}\n".format(" ".join(sorted(self.targets))), file=fw)
     for d in self:
         print(d, file=fw)
     print("clean :\n\trm -rf {0}\n".format(" ".join(self.targets)),
           file=fw)
     fw.close()
     logging.debug("Makefile written to `{0}`.".format(self.makefile))
Exemplo n.º 6
0
    def start(self, path=sge):

        if self.is_defunct:
            return

        cwd = os.getcwd()
        if path:
            os.chdir(path)

        # Shell commands
        if "|" in self.cmd or "&&" in self.cmd or "||" in self.cmd:
            quote = "\"" if "'" in self.cmd else "'"
            self.cmd = "sh -c {1}{0}{1}".format(self.cmd, quote)

        # qsub command (the project code is specific to jcvi)
        qsub = "qsub -P {0} -cwd".format(PCODE)
        if self.queue != "default":
            qsub += " -l {0}".format(self.queue)
        if self.threaded:
            qsub += " -pe threaded {0}".format(self.threaded)
        if self.infile:
            qsub += " -i {0}".format(self.infile)
        if self.outfile:
            qsub += " -o {0}".format(self.outfile)
        if self.errfile:
            qsub += " -e {0}".format(self.errfile)

        cmd = " ".join((qsub, self.cmd))
        # run the command and get the job-ID (important)
        output = popen(cmd, debug=False).read()

        if output.strip() != "":
            self.jobid = re.search(self.pat, output).group("id")
        else:
            self.jobid = "-1"

        msg = "[{0}] {1}".format(self.jobid, self.cmd)
        if self.infile:
            msg += " < {0} ".format(self.infile)
        if self.outfile:
            backup(self.outfile)
            msg += " > {0} ".format(self.outfile)
        if self.errfile:
            backup(self.errfile)
            msg += " 2> {0} ".format(self.errfile)

        logging.debug(msg)

        os.chdir(cwd)
Exemplo n.º 7
0
Arquivo: grid.py Projeto: bennyyu/jcvi
    def start(self, path=sge):

        if self.is_defunct:
            return

        cwd = os.getcwd()
        if path:
            os.chdir(path)

        # Shell commands
        if "|" in self.cmd or "&&" in self.cmd or "||" in self.cmd:
            quote = "\"" if "'" in self.cmd else "'"
            self.cmd = "sh -c {1}{0}{1}".format(self.cmd, quote)

        # qsub command (the project code is specific to jcvi)
        qsub = "qsub -P {0} -cwd".format(PCODE)
        if self.queue != "default":
            qsub += " -l {0}".format(self.queue)
        if self.threaded:
            qsub += " -pe threaded {0}".format(self.threaded)
        if self.infile:
            qsub += " -i {0}".format(self.infile)
        if self.outfile:
            qsub += " -o {0}".format(self.outfile)
        if self.errfile:
            qsub += " -e {0}".format(self.errfile)

        cmd = " ".join((qsub, self.cmd))
        # run the command and get the job-ID (important)
        output = popen(cmd, debug=False).read()

        if output.strip() != "":
            self.jobid = re.search(self.pat, output).group("id")
        else:
            self.jobid = "-1"

        msg = "[{0}] {1}".format(self.jobid, self.cmd)
        if self.infile:
            msg += " < {0} ".format(self.infile)
        if self.outfile:
            backup(self.outfile)
            msg += " > {0} ".format(self.outfile)
        if self.errfile:
            backup(self.errfile)
            msg += " 2> {0} ".format(self.errfile)

        logging.debug(msg)

        os.chdir(cwd)
Exemplo n.º 8
0
    def start(self):
        cmd = self.build()
        # run the command and get the job-ID (important)
        output = popen(cmd, debug=False).read()

        if output.strip() != "":
            self.jobid = re.search(self.pat, output).group("id")
        else:
            self.jobid = "-1"

        msg = "[{0}] {1}".format(self.jobid, self.cmd)
        if self.infile:
            msg += " < {0} ".format(self.infile)
        if self.outfile:
            backup(self.outfile)
            msg += " > {0} ".format(self.outfile)
        if self.errfile:
            backup(self.errfile)
            msg += " 2> {0} ".format(self.errfile)

        logging.debug(msg)
Exemplo n.º 9
0
    def start(self):
        cmd = self.build()
        # run the command and get the job-ID (important)
        output = popen(cmd, debug=False).read()

        if output.strip() != "":
            self.jobid = re.search(self.pat, output).group("id")
        else:
            self.jobid = "-1"

        msg = "[{0}] {1}".format(self.jobid, self.cmd)
        if self.infile:
            msg += " < {0} ".format(self.infile)
        if self.outfile:
            backup(self.outfile)
            msg += " > {0} ".format(self.outfile)
        if self.errfile:
            backup(self.errfile)
            msg += " 2> {0} ".format(self.errfile)

        logging.debug(msg)
Exemplo n.º 10
0
def check_index(dbfile, supercat=False, go=True):
    if supercat:
        updated = False
        pf = dbfile.rsplit(".", 1)[0]
        supercatfile = pf + ".supercat"
        coordsfile = supercatfile + ".coords"
        if go and need_update(dbfile, supercatfile):
            cmd = "tGBS-Generate_Pseudo_Genome.pl"
            cmd += " -f {0} -o {1}".format(dbfile, supercatfile)
            sh(cmd)
            # Rename .coords file since gmap_build will overwrite it
            coordsbak = backup(coordsfile)
            updated = True
        dbfile = supercatfile + ".fasta"

    #dbfile = get_abs_path(dbfile)
    dbdir, filename = op.split(dbfile)
    if not dbdir:
        dbdir = "."
    dbname = filename.rsplit(".", 1)[0]
    safile = op.join(dbdir, "{0}/{0}.genomecomp".format(dbname))
    if dbname == filename:
        dbname = filename + ".db"

    if not go:
        return dbdir, dbname

    if need_update(dbfile, safile):
        cmd = "gmap_build -D {0} -d {1} {2}".format(dbdir, dbname, filename)
        sh(cmd)
    else:
        logging.error("`{0}` exists. `gmap_build` already run.".format(safile))

    if go and supercat and updated:
        sh("mv {0} {1}".format(coordsbak, coordsfile))

    return dbdir, dbname
Exemplo n.º 11
0
def check_index(dbfile, supercat=False, go=True):
    if supercat:
        updated = False
        pf = dbfile.rsplit(".", 1)[0]
        supercatfile = pf + ".supercat"
        coordsfile = supercatfile + ".coords"
        if go and need_update(dbfile, supercatfile):
            cmd = "tGBS-Generate_Pseudo_Genome.pl"
            cmd += " -f {0} -o {1}".format(dbfile, supercatfile)
            sh(cmd)
            # Rename .coords file since gmap_build will overwrite it
            coordsbak = backup(coordsfile)
            updated = True
        dbfile = supercatfile + ".fasta"

    #dbfile = get_abs_path(dbfile)
    dbdir, filename = op.split(dbfile)
    if not dbdir:
        dbdir = "."
    dbname = filename.rsplit(".", 1)[0]
    safile = op.join(dbdir, "{0}/{0}.genomecomp".format(dbname))
    if dbname == filename:
        dbname = filename + ".db"

    if not go:
        return dbdir, dbname

    if need_update(dbfile, safile):
        cmd = "gmap_build -D {0} -d {1} {2}".format(dbdir, dbname, filename)
        sh(cmd)
    else:
        logging.error("`{0}` exists. `gmap_build` already run.".format(safile))

    if go and supercat and updated:
        sh("mv {0} {1}".format(coordsbak, coordsfile))

    return dbdir, dbname
Exemplo n.º 12
0
 def __init__(self, filename="makefile"):
     backup(filename)
     self.makefile = filename
     self.targets = []
Exemplo n.º 13
0
 def __init__(self, filename="makefile"):
     backup(filename)
     self.makefile = filename
     self.targets = []