def makeblastdb(cls): fobj_list = utl.check_for_files( "{}*.nsq".format(glv.conf.blastdb)) # exit blastdb if len(fobj_list) != 0: return root_ext_pair = os.path.splitext(glv.conf.ref_fasta) cmd1 = '' if root_ext_pair[1] == '.gz': bgzip = "bgzip -cd -@ {} {}" mkdb = "makeblastdb -in - -title {} -dbtype nucl -out {}" cmd1 = "{} | {}".format( bgzip.format( glv.conf.parallele_full_thread, glv.conf.ref_fasta), mkdb.format( glv.conf.blastdb_title, glv.conf.blastdb)) else: mkdb = "makeblastdb -in {} -title {} -dbtype nucl -out {}" cmd1 = "{}".format( mkdb.format( glv.conf.ref_fasta, glv.conf.blastdb_title, glv.conf.blastdb)) utl.try_exec(cmd1)
def _copy_ini_file(self): # ini file self.ini_file_path # out_dir self.out_dir # back up ini_base = os.path.basename(self.ini_file_path) out_dir_ini_file = "{}/{}".format(self.out_dir, ini_base) utl.save_to_tmpfile(out_dir_ini_file) cmd = "cp {} {}".format(self.ini_file_path, out_dir_ini_file) utl.try_exec(cmd)
def prepare_ref(self): # ref_fasta_user: existence confirmation if os.path.isfile(glv.conf.ref_fasta_user): log.info("{} found.".format(glv.conf.ref_fasta_user)) else: log.info("{} not found. exit.".format(glv.conf.ref_fasta_user)) sys.exit(1) # ext, basename, without_ext # https://note.nkmk.me/python-os-basename-dirname-split-splitext/ basename_user = os.path.basename(glv.conf.ref_fasta_user) root_ext_pair = os.path.splitext(glv.conf.ref_fasta_user) without_ext = root_ext_pair[0] basename_without_ext = os.path.basename(without_ext) ext = root_ext_pair[1] # ref_fasta_slink_system # symbolic link user's fasta to sys_ref_dir as .org(.gz) if ext == '.gz': glv.conf.ref_fasta_slink_system = "{}/{}{}".format( glv.conf.ref_dir, basename_user, '.org_slink.gz') # for blast glv.conf.blastdb_title = basename_without_ext else: glv.conf.ref_fasta_slink_system = "{}/{}{}".format( glv.conf.ref_dir, basename_user, '.org_slink') # for blast glv.conf.blastdb_title = basename_user glv.conf.blastdb = "{}/{}{}".format( glv.conf.ref_dir, glv.conf.blastdb_title, '.blastdb') log.info("glv.conf.blastdb={}".format(glv.conf.blastdb)) if os.path.isfile(glv.conf.ref_fasta_slink_system): log.info("{} exist.".format(glv.conf.ref_fasta_slink_system)) else: utl.ln_s( glv.conf.ref_fasta_user, glv.conf.ref_fasta_slink_system) log.info("ext ({}).".format(ext)) # convert to bgz if ext is .gz and set to ref_fasta if ext == '.gz': # it should be convert to bgz in ref_dir glv.conf.ref_fasta = "{}/{}".format( glv.conf.ref_dir, basename_user) log.info("ext {}, glv.conf.ref_fasta={}.".format( ext, glv.conf.ref_fasta)) # half of thread? cmd1 = 'bgzip -cd -@ {} {} | bgzip -@ {} > {}'.format( glv.conf.parallele_full_thread, glv.conf.ref_fasta_slink_system, glv.conf.parallele_full_thread, glv.conf.ref_fasta) else: # it should be convert to bgz in ref_dir glv.conf.ref_fasta = "{}/{}{}".format( glv.conf.ref_dir, basename_user, '.gz') cmd1 = 'bgzip -c -@ {} {} > {}'.format( glv.conf.parallele_full_thread, glv.conf.ref_fasta_slink_system, glv.conf.ref_fasta) # execute if os.path.isfile(glv.conf.ref_fasta): log.info("{} exist.".format(glv.conf.ref_fasta)) else: log.info("{} not exist. do cmd={}".format( glv.conf.ref_fasta, cmd1)) utl.try_exec(cmd1) # make fai file cmd2 = 'samtools faidx {}'.format( glv.conf.ref_fasta, glv.conf.log_dir) glv.conf.ref_fasta_fai = "{}{}".format(glv.conf.ref_fasta, '.fai') if os.path.isfile(glv.conf.ref_fasta_fai): log.info("{} exist.".format(glv.conf.ref_fasta_fai)) else: utl.try_exec(cmd2) # read fasta to dict vprimer.cnf.refseq glv.conf.ref_fasta_pickle = "{}{}".format( glv.conf.ref_fasta, '.pickle') self._read_fasta() # ref to makeblastdb Blast.makeblastdb() return self