def __init__(self, root_dir, fasta_filenames, fastq_filenames, ref_fasta, out_pickle, ice_opts, sge_opts, cpus, tmp_dir=None): """ fasta_filenames --- a list of splitted nfl fasta files. ref_fasta --- (unpolished) consensus isoforms out_pickle --- a pickle file with all nfl fasta reads root_dir --- ICE root output directory tmp_dir --- if not None, write temporary clusters, dazz, las files to the given temporaray directory sge_opts --- params for SGE environment, including use_sge : use SGE or not max_sge_jobs: maximum number of sub-jobs submitted unique_id : unique qsub job id, important that this DOES NOT CONFLICT! """ self.prog_name = "IceAllPartials" IceFiles.__init__(self, prog_name=self.prog_name, root_dir=root_dir, tmp_dir=tmp_dir) self.fasta_filenames, self.ref_fasta = \ self._validate_inputs(fasta_filenames=fasta_filenames, ref_fasta=ref_fasta) if fastq_filenames is not None: for fq in fastq_filenames: assert op.exists(fq) self.fastq_filenames = fastq_filenames # note: could be None self.out_pickle = out_pickle self.ice_opts = ice_opts self.sge_opts = sge_opts self.cpus = cpus # this is the number of CPUs to use per SGE job or per local job self.add_log("Making dir for mapping noFL reads: " + self.nfl_dir) mkdir(self.nfl_dir) self.add_log("input fasta files are: " + ", ".join(self.fasta_filenames)) self.add_log("temp pickle files are: " + ", ".join(self.pickle_filenames)) self.add_log("out pickle file is: " + self.out_pickle) self.add_log("temp directory is: " + str(self.tmp_dir))
def __init__(self, root_dir, nfl_fa, bas_fofn, ccs_fofn, ice_opts, sge_opts, ipq_opts, fasta_fofn=None, tmp_dir=None): """ root_dir --- IceFiles.root_dir, usually data/clusterOutDir nfl_fa --- non-full-length reads in fasta, e.g., isoseq_nfl.fasta bas_fofn --- e.g. input.fofn of bas|bax.h5 files ccs_fofn --- e.g. ccs.fofn of ccs files. ipq_opts --- IceQuiverHQLQOptions qv_trim_5: ignore QV of n bases in the 5' end qv_trim_3: ignore QV of n bases in the 3' end hq_quiver_min_accuracy: minimum allowed quiver accuracy to mark an isoform as high quality hq_isoforms_fa|fq: polished, hiqh quality consensus isoforms in fasta|q lq_isoforms_fa|fq: polished, low quality consensus isoforms in fasta|q """ IceFiles.__init__(self, prog_name="IcePolish", root_dir=root_dir, bas_fofn=bas_fofn, ccs_fofn=ccs_fofn, fasta_fofn=fasta_fofn, tmp_dir=tmp_dir) self.nfl_fa = realpath(nfl_fa) self.ice_opts = ice_opts self.sge_opts = sge_opts self.ipq_opts = ipq_opts self.add_log("ece_penalty: {0}, ece_min_len: {1}".format(self.ice_opts.ece_penalty, self.ice_opts.ece_min_len)) self.icep = None # IceAllPartials. self.iceq = None # IceQuiver self.icepq = None # IceQuiverPostprocess self._nfl_splitted_fas = None self.validate_inputs()
def __init__(self, root_dir, fasta_filenames, ref_fasta, out_pickle, sge_opts, ccs_fofn=None, tmp_dir=None): """ fasta_filenames --- a list of splitted nfl fasta files. ref_fasta --- (unpolished) consensus isoforms out_pickle --- a pickle file with all nfl fasta reads ccs_fofn --- should be reads_of_insert.fofn or None root_dir --- ICE root output directory tmp_dir --- if not None, write temporary clusters, dazz, las files to the given temporaray directory sge_opts --- params for SGE environment, including use_sge : use SGE or not max_sge_jobs: maximum number of sub-jobs submitted unique_id : unique qsub job id, important that this DOES NOT CONFLICT! blasr_nproc: blasr -nproc param, number of threads per cpu. """ self.prog_name = "IceAllPartials" IceFiles.__init__(self, prog_name=self.prog_name, root_dir=root_dir, tmp_dir=tmp_dir) self.fasta_filenames, self.ref_fasta, self.ccs_fofn, = \ self._validate_inputs(fasta_filenames=fasta_filenames, ref_fasta=ref_fasta, ccs_fofn=ccs_fofn) self.out_pickle = out_pickle self.sge_opts = sge_opts self.add_log("Making dir for mapping noFL reads: " + self.nfl_dir) mkdir(self.nfl_dir) self.add_log("input fasta files are: " + ", ".join(self.fasta_filenames)) self.add_log("temp pickle files are: " + ", ".join(self.pickle_filenames)) self.add_log("out pickle file is: " + self.out_pickle) self.add_log("temp directory is: " + str(self.tmp_dir))
def __init__(self, root_dir, nfl_fa, bas_fofn, ccs_fofn, ice_opts, sge_opts, ipq_opts, fasta_fofn=None, tmp_dir=None): """ root_dir --- IceFiles.root_dir, usually data/clusterOutDir nfl_fa --- non-full-length reads in fasta, e.g., isoseq_nfl.fasta bas_fofn --- e.g. input.fofn of bas|bax.h5 files ccs_fofn --- e.g. ccs.fofn of ccs files. ipq_opts --- IceQuiverHQLQOptions qv_trim_5: ignore QV of n bases in the 5' end qv_trim_3: ignore QV of n bases in the 3' end hq_quiver_min_accuracy: minimum allowed quiver accuracy to mark an isoform as high quality hq_isoforms_fa|fq: polished, hiqh quality consensus isoforms in fasta|q lq_isoforms_fa|fq: polished, low quality consensus isoforms in fasta|q """ IceFiles.__init__(self, prog_name="IcePolish", root_dir=root_dir, bas_fofn=bas_fofn, ccs_fofn=ccs_fofn, fasta_fofn=fasta_fofn, tmp_dir=tmp_dir) self.nfl_fa = realpath(nfl_fa) self.ice_opts = ice_opts self.sge_opts = sge_opts self.ipq_opts = ipq_opts self.add_log("ece_penalty: {0}, ece_min_len: {1}".format( self.ice_opts.ece_penalty, self.ice_opts.ece_min_len)) self.icep = None # IceAllPartials. self.iceq = None # IceQuiver self.icepq = None # IceQuiverPostprocess self._nfl_splitted_fas = None self.validate_inputs()