def __init__(self, prog_name, root_dir, bas_fofn=None, ccs_fofn=None, fasta_fofn=None, no_log_f=False, tmp_dir=None, make_dirs=True): """ prog_name --- name of a sub-class root_dir --- root directory of the whole project. There will be sub-directories under it, including: tmp/ --- 0/ c0, c1, ..., c9999 --- 1/ c10000, c10001, ..., c19999 ... each c? folder contains data for a cluster id=c? script/ --- 0/ gcon_job_?.sh, gcon jobs in the first iteration --- 1/ gcon_job_?.sh, gcon jobs in the second iteration ... log/ --- ICE.log Log of the ICE algorithm --- 0/ log for jobs in the first iteration ... output/ output files go here. bas_fofn --- input.fofn which contains movie.bas|bax.h5 files. ccs_fofn --- a fofn contains movie.ccs.h5 files. fasta_fofn --- a fofn contains movie.bax.h5.fasta files. script/ no_log_f --- DON'T write log to a log file. tmp_dir --- Write temporary files to tmp_dir (usually /scratch) for speed """ self.prog_name = str(prog_name) self.root_dir = real_ppath(root_dir) self._tmp_dir = real_ppath(tmp_dir) self.bas_fofn = real_ppath(bas_fofn) self.ccs_fofn = real_ppath(ccs_fofn) self.fasta_fofn = real_ppath(fasta_fofn) if make_dirs is True: mkdir(self.root_dir) mkdir(self.tmp_dir) mkdir(self.log_dir) mkdir(self.script_dir) mkdir(self.out_dir) self.no_log_f = no_log_f if not no_log_f: self.log_f = open(self.log_fn, 'w', 0) self.add_log(msg="{p} initialized.".format(p=self.prog_name))
def run(self): """Run""" logging.debug("root_dir: {d}.".format(d=self.root_dir)) logging.debug("nfl_fa: {f}.".format(f=self.nfl_fa)) logging.debug("Total number of chunks: N={N}.".format(N=self.N)) # Validate input files, (num_reads, reads_per_split, nfl_dir, splitted_fas_todo) = \ self.validate_inputs() logging.info("Total number of reads is {n}.".format(n=num_reads)) logging.info("Splitting nfl_fa into chunks each " + "containing {n} reads.".format(n=reads_per_split)) splitted_fas_done = splitFasta( input_fasta=real_ppath(self.nfl_fa), reads_per_split=reads_per_split, out_dir=nfl_dir, out_prefix="input.split") logging.info("Splitted files are: " + "\n".join(splitted_fas_done)) for fa in splitted_fas_todo: if fa not in splitted_fas_done: logging.info("touching {f}".format(f=fa)) touch(fa)
def run(self): """Run""" logging.debug("root_dir: {d}.".format(d=self.root_dir)) logging.debug("nfl_fa: {f}.".format(f=self.nfl_fa)) logging.debug("Total number of chunks: N={N}.".format(N=self.N)) # Validate input files, (num_reads, reads_per_split, nfl_dir, splitted_fas_todo) = \ self.validate_inputs() logging.info("Total number of reads is {n}.".format(n=num_reads)) logging.info("Splitting nfl_fa into chunks each " + "containing {n} reads.".format(n=reads_per_split)) splitted_fas_done = splitFasta(input_fasta=real_ppath(self.nfl_fa), reads_per_split=reads_per_split, out_dir=nfl_dir, out_prefix="input.split") logging.info("Splitted files are: " + "\n".join(splitted_fas_done)) for fa in splitted_fas_todo: if fa not in splitted_fas_done: logging.info("touching {f}".format(f=fa)) touch(fa)