示例#1
0
文件: cmd.py 项目: COL-IU/mgescan
    def nonltr(self):
        print 'nonltr: starting'
        start = time.time()

        # nonltr
        #cmd0 = self.base_path + "/nonltr/run_MGEScan.pl \
        #               -genome=%(genome_dir)s \
        #               -data=%(data_dir)s \
        #               -hmmerv=%(hmmerv)s"
        cmd0 = "python " + self.base_path + "/nonltr/nonltr.py " + \
                        "%(genome_dir)s " + \
                        "%(data_dir)s "

        if self.mpi_enabled:
            #cmd0 = (cmd0 + " -mpi=%(mpi_enabled)s")
            cmd0 = (cmd0 + " --mpi=%(mpi_enabled)s")
        res0 = self.run_cmd(cmd0)

        # gff3
        self.nonltr_out_path = utils.get_abspath(self.data_dir + "/info/full/")
        self.nonltr_gff_path = utils.get_abspath(self.data_dir + "/info/nonltr.gff3")
        cmd1 = self.base_path + "/nonltr/toGFF.py %(nonltr_out_path)s %(nonltr_gff_path)s"
        res1 = self.run_cmd(cmd1)

        end = time.time()
        print ('nonltr: finishing (elapsed time: {0} secs)'.format(int(round(end -
                start))))
示例#2
0
文件: nonltr.py 项目: COL-IU/mgescan
    def forward_strand(self):
        
        mypath = self.plus_dir
        out_dir = self.plus_out_dir
        for (dirpath, dirnames, filenames) in os.walk(mypath):
            break
        for name in filenames:
            file_path = utils.get_abspath(dirpath + "/" + name)

            # Rename to sequence id
            sid = getid(file_path)
            new_path = utils.get_abspath(dirpath + "/" + sid)
            os.rename(file_path, new_path)

            command = self.cmd_hmm + (" --dna=%s --out=%s --hmmerv=%s" % 
                    (new_path, out_dir, self.hmmerv))
            command = command.split()
            self.processes.add(Popen(command, stdout=PIPE,
                stderr=PIPE))
            if len(self.processes) >= self.max_processes:
                time.sleep(.1)
                self.processes.difference_update([p for p in self.processes if
                    p.poll() is not None])
        #print dirpath, dirnames, filenames
        for p in self.processes:
            if p.poll() is None:
                p.wait()

        self.post_processing_after_forward_strand()
示例#3
0
    def nonltr(self):
        print 'nonltr: starting'
        start = time.time()

        # nonltr
        #cmd0 = self.base_path + "/nonltr/run_MGEScan.pl \
        #        -genome=%(genome_dir)s \
        #        -data=%(data_dir)s \
        #        -hmmerv=%(hmmerv)s"
        cmd0 = "python " + self.base_path + "/nonltr/nonltr.py " + \
                "%(genome_dir)s " + \
                "%(data_dir)s "

        if self.mpi_enabled:
            #cmd0 = (cmd0 + " -mpi=%(mpi_enabled)s")
            cmd0 = (cmd0 + " --mpi=%(mpi_enabled)s")
        res0 = self.run_cmd(cmd0)

        # gff3
        self.nonltr_out_path = utils.get_abspath(self.data_dir + "/info/full/")
        self.nonltr_gff_path = utils.get_abspath(self.data_dir +
                                                 "/info/nonltr.gff3")
        cmd1 = self.base_path + "/nonltr/toGFF.py %(nonltr_out_path)s %(nonltr_gff_path)s"
        res1 = self.run_cmd(cmd1)

        end = time.time()
        print('nonltr: finishing (elapsed time: {0} secs)'.format(
            int(round(end - start))))
示例#4
0
    def backward_strand(self):

        mypath = self.minus_dir
        out_dir = self.minus_out_dir
        for (dirpath, dirnames, filenames) in os.walk(mypath):
            break
        for name in filenames:
            file_path = utils.get_abspath(dirpath + "/" + name)

            # Rename to sequence id
            sid = getid(file_path)
            new_path = utils.get_abspath(dirpath + "/" + sid)
            os.rename(file_path, new_path)

            command = self.cmd_hmm + (" --dna=%s --out=%s --hmmerv=%s" %
                                      (new_path, out_dir, self.hmmerv))
            command = command.split()
            self.processes.add(Popen(command, stdout=PIPE, stderr=PIPE))
            if len(self.processes) >= self.max_processes:
                time.sleep(.1)
                self.processes.difference_update(
                    [p for p in self.processes if p.poll() is not None])
        #print dirpath, dirnames, filenames
        for p in self.processes:
            if p.poll() is None:
                p.wait()

        self.post_processing_after_reverse_strand()
示例#5
0
 def post_processing(self, out_dir, dir, reverse_yn):
     utils.silentremove(utils.get_abspath(out_dir + "/out1/aaaaa"))
     utils.silentremove(utils.get_abspath(out_dir + "out1/bbbbb"))
     utils.silentremove(utils.get_abspath(out_dir + "out1/ppppp"))
     utils.silentremove(utils.get_abspath(out_dir + "out1/qqqqq"))
     cmd = self.cmd_post_process + (" --dna=%s --out=%s --rev=%s" %
                                    (dir, out_dir, reverse_yn))
     self.run_cmd(cmd)
示例#6
0
文件: nonltr.py 项目: COL-IU/mgescan
 def post_processing(self, out_dir, dir, reverse_yn):
     utils.silentremove(utils.get_abspath(out_dir + "/out1/aaaaa"))
     utils.silentremove(utils.get_abspath(out_dir + "out1/bbbbb"))
     utils.silentremove(utils.get_abspath(out_dir + "out1/ppppp"))
     utils.silentremove(utils.get_abspath(out_dir + "out1/qqqqq"))
     cmd = self.cmd_post_process + (" --dna=%s --out=%s --rev=%s" %
             (dir, out_dir, reverse_yn))
     self.run_cmd(cmd)
示例#7
0
文件: cmd.py 项目: MGEScan/mgescan
    def set_inputs(self):
        self.data_dir = utils.get_abspath(self.args['--output'])
        self.genome_dir = utils.get_abspath(self.args['<genome_dir>'])
        self.ltr_enabled = self.args['ltr']
        self.nonltr_enabled = self.args['nonltr']
        self.mpi_enabled = self.args['--mpi']
	if(self.mpi_enabled and not self.ltr_enabled and not self.nonltr_enabled):
        	self.mpi_enabled = str(int(math.ceil(1.0*int(self.args['--mpi'])/2)))
        self.debug = self.args['--debug']
示例#8
0
文件: nonltr.py 项目: COL-IU/mgescan
    def toGFF(self):

        if self.gff3_enabled:
            # Assume info is a only directory in genome_dir
            shutil.move(self.genome_dir + "/info", self.data_dir)

        # gff3
        self.nonltr_out_path = utils.get_abspath(self.data_dir + "/info/full/")
        self.nonltr_gff_path = utils.get_abspath(self.data_dir + "/info/nonltr.gff3")
        cmd = self.cmd_togff + " %(nonltr_out_path)s %(nonltr_gff_path)s"
        res = self.run_cmd(cmd)
示例#9
0
 def set_inputs(self):
     self.data_dir = utils.get_abspath(self.args['--output'])
     self.genome_dir = utils.get_abspath(self.args['<genome_dir>'])
     self.ltr_enabled = self.args['ltr']
     self.nonltr_enabled = self.args['nonltr']
     self.mpi_enabled = self.args['--mpi']
     if (self.mpi_enabled and not self.ltr_enabled
             and not self.nonltr_enabled):
         self.mpi_enabled = str(
             int(math.ceil(1.0 * int(self.args['--mpi']) / 2)))
     self.debug = self.args['--debug']
示例#10
0
    def toGFF(self):

        if self.gff3_enabled:
            # Assume info is a only directory in genome_dir
            shutil.move(self.genome_dir + "/info", self.data_dir)

        # gff3
        self.nonltr_out_path = utils.get_abspath(self.data_dir + "/info/full/")
        self.nonltr_gff_path = utils.get_abspath(self.data_dir +
                                                 "/info/nonltr.gff3")
        cmd = self.cmd_togff + " %(nonltr_out_path)s %(nonltr_gff_path)s"
        res = self.run_cmd(cmd)
示例#11
0
文件: cmd.py 项目: COL-IU/mgescan
    def set_defaults(self):
        """Set default values to run programs

        For LTR,
        min_dist: minimum distance(bp) between LTRs.
        max_dist: maximum distance(bp) between LTRS
        min_len_ltr: minimum length(bp) of LTR.
        max_len_ltr: maximum length(bp) of LTR.
        ltr_sim_condition: minimum similarity(%) for LTRs in an element.
        cluster_sim_condition: minimum similarity(%) for LTRs in a cluster
        len_condition: minimum length(bp) for LTRs aligned in local alignment.
        """

        if self.data_dir:
            self.data_dir = utils.create_directory(self.data_dir, False)
        else:
            self.data_dir = \
            utils.create_directory(utils.get_abspath(self.default_output_path))

        self.hmmerv = 3
        self.min_dist = 2000
        self.max_dist = 20000
        self.min_len_ltr = 130
        self.max_len_ltr = 2000
        self.ltr_sim_condition = 70
        self.cluster_sim_condition = 70
        self.len_condition = 70

        self.sw_rm = "No" # or Yes
        self.scaffold = "" # or directory
示例#12
0
    def set_defaults(self):
        """Set default values to run programs

        For LTR,
        min_dist: minimum distance(bp) between LTRs.
        max_dist: maximum distance(bp) between LTRS
        min_len_ltr: minimum length(bp) of LTR.
        max_len_ltr: maximum length(bp) of LTR.
        ltr_sim_condition: minimum similarity(%) for LTRs in an element.
        cluster_sim_condition: minimum similarity(%) for LTRs in a cluster
        len_condition: minimum length(bp) for LTRs aligned in local alignment.
        """

        if self.data_dir:
            self.data_dir = utils.create_directory(self.data_dir, False)
        else:
            self.data_dir = \
            utils.create_directory(utils.get_abspath(self.default_output_path))

        self.hmmerv = 3
        self.min_dist = 2000
        self.max_dist = 20000
        self.min_len_ltr = 130
        self.max_len_ltr = 2000
        self.ltr_sim_condition = 70
        self.cluster_sim_condition = 70
        self.len_condition = 70

        self.sw_rm = "No"  # or Yes
        self.scaffold = ""  # or directory
示例#13
0
 def reverse_complement(self, directory):
     mypath = self.genome_path
     for (dirpath, dirnames, filenames) in os.walk(mypath):
         break
     utils.create_directory(directory, False)
     for name in filenames:
         file_path = utils.get_abspath(dirpath + "/" + name)
         reverse_complement_fasta(file_path, directory)
示例#14
0
 def reverse_complement(self, directory):
     mypath = self.genome_path
     for (dirpath, dirnames, filenames) in os.walk(mypath):
         break
     utils.create_directory(directory, False)
     for name in filenames:
         file_path = utils.get_abspath(dirpath + "/" + name)
         reverse_complement_fasta(file_path, directory)
示例#15
0
 def reverse_complement(self):
     mypath = self.genome_dir
     for (dirpath, dirnames, filenames) in os.walk(mypath):
         break
     directory = self.minus_dir
     if not os.path.exists(directory):
         os.makedirs(directory)
     for name in filenames:
         file_path = utils.get_abspath(dirpath + "/" + name)
         reverse_complement_fasta(file_path, directory)
示例#16
0
文件: nonltr.py 项目: COL-IU/mgescan
 def reverse_complement(self):
     mypath = self.genome_dir
     for (dirpath, dirnames, filenames) in os.walk(mypath):
         break
     directory = self.minus_dir
     if not os.path.exists(directory):
         os.makedirs(directory)
     for name in filenames:
         file_path = utils.get_abspath(dirpath + "/" + name)
         reverse_complement_fasta(file_path, directory)
示例#17
0
    def ltr(self):
        print 'ltr: starting'
        start = time.time()

        # scaffold
        # repeatmasker
        cmd0 = self.base_path + "/ltr/pre_process.pl \
                -genome=%(genome_dir)s \
                -data=%(data_dir)s \
                -sw_rm=%(sw_rm)s \
                -scaffold=%(scaffold)s"

        res0 = self.run_cmd(cmd0)

        # find-ltr
        cmd1 = self.base_path + "/ltr/find_ltr.pl \
                -genome=%(genome_dir)s \
                -data=%(data_dir)s \
                -hmmerv=%(hmmerv)s \
                -min_dist=%(min_dist)s \
                -max_dist=%(max_dist)s \
                -min_len_ltr=%(min_len_ltr)s \
                -max_len_ltr=%(max_len_ltr)s \
                -ltr_sim_condition=%(ltr_sim_condition)s \
                -cluster_sim_condition=%(cluster_sim_condition)s \
                -len_condition=%(len_condition)s"

        if self.mpi_enabled:
            cmd1 = (cmd1 + " -mpi=%(mpi_enabled)s")
        res1 = self.run_cmd(cmd1)

        # gff3
        self.ltr_out_path = utils.get_abspath(self.data_dir + "/ltr/ltr.out")
        self.ltr_gff_path = utils.get_abspath(self.data_dir + "/ltr/ltr.gff3")
        cmd2 = self.base_path + "/ltr/toGFF.py %(ltr_out_path)s %(ltr_gff_path)s"
        res2 = self.run_cmd(cmd2)

        end = time.time()
        print('ltr: finishing (elapsed time: {0} secs)'.format(
            int(round(end - start))))
示例#18
0
文件: cmd.py 项目: COL-IU/mgescan
    def ltr(self):
        print 'ltr: starting'
        start = time.time()

        # scaffold
        # repeatmasker
        cmd0 = self.base_path + "/ltr/pre_process.pl \
                        -genome=%(genome_dir)s \
                        -data=%(data_dir)s \
                        -sw_rm=%(sw_rm)s \
                        -scaffold=%(scaffold)s"
        res0 = self.run_cmd(cmd0)

        # find-ltr
        cmd1 = self.base_path + "/ltr/find_ltr.pl \
                        -genome=%(genome_dir)s \
                        -data=%(data_dir)s \
                        -hmmerv=%(hmmerv)s \
                        -min_dist=%(min_dist)s \
                        -max_dist=%(max_dist)s \
                        -min_len_ltr=%(min_len_ltr)s \
                        -max_len_ltr=%(max_len_ltr)s \
                        -ltr_sim_condition=%(ltr_sim_condition)s \
                        -cluster_sim_condition=%(cluster_sim_condition)s \
                        -len_condition=%(len_condition)s"
        if self.mpi_enabled:
            cmd1 = (cmd1 + " -mpi=%(mpi_enabled)s")
        res1 = self.run_cmd(cmd1)

        # gff3
        self.ltr_out_path = utils.get_abspath(self.data_dir + "/ltr/ltr.out")
        self.ltr_gff_path = utils.get_abspath(self.data_dir + "/ltr/ltr.gff3")
        cmd2 = self.base_path + "/ltr/toGFF.py %(ltr_out_path)s %(ltr_gff_path)s"
        res2 = self.run_cmd(cmd2)

        end = time.time()
        print ('ltr: finishing (elapsed time: {0} secs)'.format(int(round(end -
                start))))
示例#19
0
 def set_input(self, path):
     self.input_file = utils.get_abspath(path)
     return self.input_file
示例#20
0
 def set_inputs(self):
     self.data_dir = utils.get_abspath(self.args['--output'])
     self.genome_dir = utils.get_abspath(self.args['<genome_dir>'])
示例#21
0
文件: nonltr.py 项目: COL-IU/mgescan
 def set_inputs(self):
     self.data_dir = utils.get_abspath(self.args['--output'])
     self.genome_dir = utils.get_abspath(self.args['<genome_dir>'])
示例#22
0
 def set_output(self, path):
     self.result_path = utils.get_abspath(path)
     return self.result_path
示例#23
0
 def set_inputs(self):
     self.args = docopt(self.__doc__, version=self.ver)
     self.nmpi = self.args['--mpi']
     self.genome_path = utils.get_abspath(self.args['<input>'])
     self.output_path = utils.get_abspath(self.args['<output>'])
示例#24
0
 def set_inputs(self):
     self.args = docopt(self.__doc__, version=self.ver)
     self.nmpi = self.args['--mpi']
     self.genome_path = utils.get_abspath(self.args['<input>'])
     self.output_path = utils.get_abspath(self.args['<output>'])
示例#25
0
	def set_datadir(self, path):
		self.datadir = utils.get_abspath(path)
		return self.datadir