示例#1
0
    def __init__(self,
                 prog_name,
                 root_dir,
                 bas_fofn=None,
                 ccs_fofn=None,
                 fasta_fofn=None,
                 no_log_f=False,
                 tmp_dir=None,
                 make_dirs=True):
        """
        prog_name --- name of a sub-class
        root_dir --- root directory of the whole project. There will be
                     sub-directories under it, including:
                     tmp/ --- 0/  c0, c1, ..., c9999
                          --- 1/  c10000, c10001, ..., c19999
                          ...
                          each c? folder contains data for a cluster id=c?
                     script/
                          --- 0/  gcon_job_?.sh, gcon jobs in the first iteration
                          --- 1/  gcon_job_?.sh, gcon jobs in the second iteration
                          ...
                     log/
                          --- ICE.log   Log of the ICE algorithm
                          --- 0/  log for jobs in the first iteration
                          ...
                     output/   output files go here.
        bas_fofn --- input.fofn which contains movie.bas|bax.h5 files.
        ccs_fofn --- a fofn contains movie.ccs.h5 files.
        fasta_fofn --- a fofn contains movie.bax.h5.fasta files.
                     script/
        no_log_f --- DON'T write log to a log file.
        tmp_dir --- Write temporary files to tmp_dir (usually /scratch) for speed
        """
        self.prog_name = str(prog_name)
        self.root_dir = real_ppath(root_dir)
        self._tmp_dir = real_ppath(tmp_dir)

        self.bas_fofn = real_ppath(bas_fofn)
        self.ccs_fofn = real_ppath(ccs_fofn)
        self.fasta_fofn = real_ppath(fasta_fofn)

        if make_dirs is True:
            mkdir(self.root_dir)
            mkdir(self.tmp_dir)
            mkdir(self.log_dir)
            mkdir(self.script_dir)
            mkdir(self.out_dir)

        self.no_log_f = no_log_f
        if not no_log_f:
            self.log_f = open(self.log_fn, 'w', 0)
            self.add_log(msg="{p} initialized.".format(p=self.prog_name))
示例#2
0
    def run(self):
        """Run"""
        logging.debug("root_dir: {d}.".format(d=self.root_dir))
        logging.debug("nfl_fa: {f}.".format(f=self.nfl_fa))
        logging.debug("Total number of chunks: N={N}.".format(N=self.N))

        # Validate input files,
        (num_reads, reads_per_split, nfl_dir, splitted_fas_todo) = \
            self.validate_inputs()

        logging.info("Total number of reads is {n}.".format(n=num_reads))
        logging.info("Splitting nfl_fa into chunks each " +
                     "containing {n} reads.".format(n=reads_per_split))

        splitted_fas_done = splitFasta(
            input_fasta=real_ppath(self.nfl_fa),
            reads_per_split=reads_per_split,
            out_dir=nfl_dir,
            out_prefix="input.split")

        logging.info("Splitted files are: " + "\n".join(splitted_fas_done))
        for fa in splitted_fas_todo:
            if fa not in splitted_fas_done:
                logging.info("touching {f}".format(f=fa))
                touch(fa)
示例#3
0
    def __init__(self, prog_name, root_dir,
                 bas_fofn=None, ccs_fofn=None, fasta_fofn=None,
                 no_log_f=False, tmp_dir=None, make_dirs=True):
        """
        prog_name --- name of a sub-class
        root_dir --- root directory of the whole project. There will be
                     sub-directories under it, including:
                     tmp/ --- 0/  c0, c1, ..., c9999
                          --- 1/  c10000, c10001, ..., c19999
                          ...
                          each c? folder contains data for a cluster id=c?
                     script/
                          --- 0/  gcon_job_?.sh, gcon jobs in the first iteration
                          --- 1/  gcon_job_?.sh, gcon jobs in the second iteration
                          ...
                     log/
                          --- ICE.log   Log of the ICE algorithm
                          --- 0/  log for jobs in the first iteration
                          ...
                     output/   output files go here.
        bas_fofn --- input.fofn which contains movie.bas|bax.h5 files.
        ccs_fofn --- a fofn contains movie.ccs.h5 files.
        fasta_fofn --- a fofn contains movie.bax.h5.fasta files.
                     script/
        no_log_f --- DON'T write log to a log file.
        tmp_dir --- Write temporary files to tmp_dir (usually /scratch) for speed
        """
        self.prog_name = str(prog_name)
        self.root_dir = real_ppath(root_dir)
        self._tmp_dir = real_ppath(tmp_dir)

        self.bas_fofn = real_ppath(bas_fofn)
        self.ccs_fofn = real_ppath(ccs_fofn)
        self.fasta_fofn = real_ppath(fasta_fofn)

        if make_dirs is True:
            mkdir(self.root_dir)
            mkdir(self.tmp_dir)
            mkdir(self.log_dir)
            mkdir(self.script_dir)
            mkdir(self.out_dir)

        self.no_log_f = no_log_f
        if not no_log_f:
            self.log_f = open(self.log_fn, 'w', 0)
            self.add_log(msg="{p} initialized.".format(p=self.prog_name))
示例#4
0
    def run(self):
        """Run"""
        logging.debug("root_dir: {d}.".format(d=self.root_dir))
        logging.debug("nfl_fa: {f}.".format(f=self.nfl_fa))
        logging.debug("Total number of chunks: N={N}.".format(N=self.N))

        # Validate input files,
        (num_reads, reads_per_split, nfl_dir, splitted_fas_todo) = \
            self.validate_inputs()

        logging.info("Total number of reads is {n}.".format(n=num_reads))
        logging.info("Splitting nfl_fa into chunks each " +
                     "containing {n} reads.".format(n=reads_per_split))

        splitted_fas_done = splitFasta(input_fasta=real_ppath(self.nfl_fa),
                                       reads_per_split=reads_per_split,
                                       out_dir=nfl_dir,
                                       out_prefix="input.split")

        logging.info("Splitted files are: " + "\n".join(splitted_fas_done))
        for fa in splitted_fas_todo:
            if fa not in splitted_fas_done:
                logging.info("touching {f}".format(f=fa))
                touch(fa)