Exemplo n.º 1
0
def do_astral(input, output):
    astralpath = getlocalpath()
    if not os.path.exists(output):
        os.mkdir(output)
    out_name = "combine.tree"
    consensuseCmd = "java -jar " + astralpath + "/astral.5.6.3.jar" + " -i " + input + " -o " + output + "/" + out_name
    subprocess.call(consensuseCmd, shell=True)
Exemplo n.º 2
0
def do_supertree(input, output):
    supertreepath = getlocalpath()
    if not os.path.exists(output):
        os.mkdir(output)
    out_name = "spr_supertree.tree"
    consensuseCmd = supertreepath + "/spr_supertree" + " < " + input + " > " + output + "/" + out_name
    subprocess.call(consensuseCmd, shell=True)
Exemplo n.º 3
0
def domuscle_file(indata_files, outdata, musclepara):
    """
call muscle software to do align
    :param indata_files: a directory contain more than one file
    :param outdata: out file after alignment
    :return: path
    """
    muscleparas = musclepara.lstrip()
    mupath = getlocalpath()
    out_path = os.path.dirname(outdata)
    timeformat = '%Y%m%d%H%M%S'
    timeinfo = str(time.strftime(timeformat))
    subdir = 'temp/alignment' + timeinfo
    muscle_dir = os.path.join(out_path, subdir)
    # muscle_dir = os.path.join(indata_files, 'muscle_alignment')
    pro_name = os.listdir(indata_files)
    if not os.path.exists(muscle_dir):
        os.makedirs(muscle_dir)
    for i in pro_name:
        out_alg = os.path.join(muscle_dir, i.split('.')[0])
        each_pro = os.path.join(indata_files, i)
        cmd = mupath + "/muscle -in " + each_pro + " -out " + out_alg + " " + muscleparas
        subprocess.call(cmd, shell=True)
    logdomuscle.info("Multiple sequence alignment by Muscle was completed.")
    return muscle_dir
Exemplo n.º 4
0
def domafft_file(indata_files, outdata, mafftparas, thread):
    """
call mafft software to do align
    :param indata_files: a directory contain more than one file
    :param outdata: out file after alignment
    :return: path
    """
    mafftparas = mafftparas.lstrip()
    mapath = getlocalpath()
    out_path = os.path.dirname(outdata)
    timeformat = '%Y%m%d%H%M%S'
    timeinfo = str(time.strftime(timeformat))
    subdir = 'temp/alignment' + timeinfo
    mafft_dir = os.path.join(out_path, subdir)
    # mafft_dir = os.path.join(indata_files, 'muscle_alignment')
    pro_name = os.listdir(indata_files)
    if not os.path.exists(mafft_dir):
        os.makedirs(mafft_dir)
    all_cmd = []
    for i in pro_name:
        out_alg = os.path.join(mafft_dir, i.split('.')[0])
        each_pro = os.path.join(indata_files, i)
        cmd = mapath + "/mafft " + mafftparas + " " + each_pro + " > " + out_alg
        # subprocess.call(cmd, shell=True)
        all_cmd.append(cmd)

    pool = multiprocessing.Pool(processes=thread)
    # method 1: map
    pool.map(run_cmd, all_cmd)
    logmafft.info("Multiple sequence alignment by mafft was completed.")
    return mafft_dir
Exemplo n.º 5
0
def dogblocks(indata, gblockpara):
    """
    do gblocks after muslce and concatenate
    :param indata: a fasta file input after gblock
    :param gblockpara: the gblocks para
    :return: a file path of gblocks result
    """
    # Deal with outdata name
    gblockparas = gblockpara.lstrip()
    gblockparalist = gblockparas.split(" ")
    regex = '-e='
    for i in range(0, len(gblockparalist)):
        if re.search(regex, gblockparalist[i]):
            index = i
            break
    outnamepara = gblockparalist[index]
    outdata = outnamepara.split('=')[1]
    gblockpath = getlocalpath()
    alg_name = os.path.basename(indata)
    out_path = os.path.dirname(indata)
    gblock_name = alg_name + outdata
    gblock_data = os.path.join(out_path, gblock_name)

    cmd = gblockpath + "/Gblocks " + indata + " " + gblockparas
    subprocess.call(cmd, shell=True)
    loggblocks.info('Select conserved blocks by Gblocks was completed')
    loggblocks.debug('Gblocks path:{0}'.format(gblock_data))
    return gblock_data
Exemplo n.º 6
0
def domafft_file(indata_files, outdata, mafftparas):
    """
call mafft software to do align
    :param indata_files: a directory contain more than one file
    :param outdata: out file after alignment
    :return: path
    """
    mafftparas = mafftparas.lstrip()
    mapath = getlocalpath()
    out_path = os.path.dirname(outdata)
    timeformat = '%Y%m%d%H%M%S'
    timeinfo = str(time.strftime(timeformat))
    subdir = 'temp/alignment' + timeinfo
    mafft_dir = os.path.join(out_path, subdir)
    # mafft_dir = os.path.join(indata_files, 'muscle_alignment')
    pro_name = os.listdir(indata_files)
    if not os.path.exists(mafft_dir):
        os.makedirs(mafft_dir)
    for i in pro_name:
        out_alg = os.path.join(mafft_dir, i.split('.')[0])
        each_pro = os.path.join(indata_files, i)
        cmd = mapath + "/mafft " + mafftparas + " " + each_pro + " > " + out_alg
        subprocess.call(cmd, shell=True)
    logmafft.info("Multiple sequence alignment by mafft was completed.")
    return mafft_dir
Exemplo n.º 7
0
def doraxml(inputfile, outputfile, raxmlpara, thread):
    """
call RAxML method to construct species tree
    :param inputfile: abs path of .phy format files
    :param outputfile: a file contain RAxML result
    """
    raxmlparas = raxmlpara.lstrip()

    raxmlparalist = raxmlparas.split(" ")
    tpara = '-T'
    if tpara in raxmlparalist:
        index = raxmlparalist.index(tpara)
        raxmlparalist.remove(raxmlparalist[index])
        raxmlparalist.remove(raxmlparalist[index])
        raxmlpararet = ' '.join(raxmlparalist)
    else:
        raxmlpararet = raxmlpara
    threadtostr = str(thread)
    raxmlpath = getlocalpath()
    if not os.path.exists(outputfile):
        os.mkdir(outputfile)
    strs = raxmlpath + "/raxmlHPC-PTHREADS-AVX " + "-T " + threadtostr + " " + raxmlpararet
    # cmd command
    cmd = strs + " -s " + inputfile + " -w " + outputfile
    subprocess.call(cmd, shell=True)
    logdoraxml.info("Phylogenetic species tree reconstructed by RAxML was completed")
Exemplo n.º 8
0
def doclustalw_file(indata_files, outdata, clustalwpara):
    """
Call clustalw software to do align
    :param indata_files: a directory contain more than one file
    :param outdata: out file after alignment
    :return: path
    """
    type = "-TYPE=PROTEIN"
    if clustalwpara is None:
        clustalwparas = type
    else:
        clustalwparas = type + " " + clustalwpara.lstrip()

    clu_path = getlocalpath()
    out_path = os.path.dirname(outdata)
    doclu_subdir = str(timeformat('temp/hcp_alignment'))
    clustalw_dir = os.path.join(out_path, doclu_subdir)
    pro_name = os.listdir(indata_files)
    if not os.path.exists(clustalw_dir):
        os.makedirs(clustalw_dir)
    for i in pro_name:
        each_pro = os.path.join(indata_files, i)
        out_file = "-OUTFILE=" + os.path.join(clustalw_dir, i)
        cmd = clu_path + "/clustalw2 " + "-INFILE=" + each_pro + " -OUTPUT=FASTA -ALIGN " + out_file + " " + clustalwparas
        subprocess.call(cmd, shell=True)
    logdoclustalw.info("Multiple sequence alignment by Clustalw2 was completed.")
    return clustalw_dir
Exemplo n.º 9
0
def dotrimal(indata, trimalpara):
    """
    do trimal after muslce and concatenate
    :param indata: a fasta file input to do trimal
    :param outdata: append name after
    :return: a file path of trimal result
    """
    # Deal with outdata name
    trimalparas = trimalpara.lstrip()
    trimalpath = getlocalpath()
    out_path = os.path.dirname(indata)
    trimal_name = "trimal.phy"
    trimal_data = os.path.join(out_path, trimal_name)

    cmd = trimalpath + "/trimal " + " -in " + indata + " -out " + trimal_data + " " + trimalparas + " -phylip"

    subprocess.call(cmd, shell=True)
    # support fasttree software
    trimal_data2 = trimal_data.replace('.phy', '')
    cmd2 = trimalpath + "/trimal " + " -in " + indata + " -out " + trimal_data2 + " " + trimalparas + " -fasta"
    subprocess.call(cmd2, shell=True)

    loggtrimal.info('Select conserved blocks by trimal was completed')
    loggtrimal.debug('trimal path:{0}'.format(trimal_data))
    return trimal_data
Exemplo n.º 10
0
def doclustalw_file(indata_files, outdata, clustalwpara):
    """
Call clustalw software to do align
    :param indata_files: a directory contain more than one file
    :param outdata: out file after alignment
    :return: path
    """
    type = "-TYPE=PROTEIN"
    if clustalwpara is None:
        clustalwparas = type
    else:
        clustalwparas = type + " " + clustalwpara.lstrip()

    clu_path = getlocalpath()
    out_path = os.path.dirname(outdata)
    doclu_subdir = str(timeformat('temp/hcp_alignment'))
    clustalw_dir = os.path.join(out_path, doclu_subdir)
    pro_name = os.listdir(indata_files)
    if not os.path.exists(clustalw_dir):
        os.makedirs(clustalw_dir)
    for i in pro_name:
        each_pro = os.path.join(indata_files, i)
        out_file = "-OUTFILE=" + os.path.join(clustalw_dir, i)
        cmd = clu_path + "/clustalw2 " + "-INFILE=" + each_pro + " -OUTPUT=FASTA -ALIGN " + out_file + " " + clustalwparas
        subprocess.call(cmd, shell=True)
    logdoclustalw.info(
        "Multiple sequence alignment by Clustalw2 was completed.")
    return clustalw_dir
Exemplo n.º 11
0
def docontree(input, output, rule):
    """
Combine tree
    :param input: input files
    :param output: output directory
    """
    # get raxml path
    raxmlpath = getlocalpath()
    # run

    # prepare a dir store result
    if not os.path.exists(output):
        os.mkdir(output)
    consensuseCmd = raxmlpath + "/raxmlHPC-PTHREADS-AVX " + " -J " + rule + " -m GTRCAT -z " + input + " -w " + output + " -n T1"
    subprocess.call(consensuseCmd, shell=True)
Exemplo n.º 12
0
def docontree(input, output, rule):
    """
Combine tree
    :param input: input files
    :param output: output directory
    """
    # get raxml path
    raxmlpath = getlocalpath()
    # run

    # prepare a dir store result
    if not os.path.exists(output):
        os.mkdir(output)
    consensuseCmd = raxmlpath + "/raxmlHPC-PTHREADS-AVX " + " -J " + rule + " -m GTRCAT -z " + input + " -w " + output + " -n T1"
    subprocess.call(consensuseCmd, shell=True)
Exemplo n.º 13
0
def doiqtree(inputfile, outputfile, iqtreepara, thread):
    # Use FASTA format build tree
    # input_fasta = inputfile.replace('.phy', '')
    iqtreePath = getlocalpath()
    thread_to_str = str(thread)
    out_tree_name = os.path.join(outputfile, "iqtree.tree")
    if not os.path.exists(outputfile):
        os.mkdir(outputfile)
    if thread_to_str is '1':
        cmd = iqtreePath + "/iqtree " + "-s " + inputfile + " -pre " + out_tree_name + iqtreepara
        subprocess.call(cmd, shell=True)
    else:
        # set the threads
        cmd = iqtreePath + "/iqtree " + "-s " + inputfile + " -pre " + out_tree_name + " -nt " + thread_to_str + iqtreepara
        subprocess.call(cmd, shell=True)
    logdoiqtree.info("Phylogenetic species tree reconstructed by iqtree was completed")
Exemplo n.º 14
0
def doFastTree(inputfile, outputfile, FastTreepara, thread):
    # Use FASTA format build tree
    input_fasta = inputfile.replace('.phy', '')
    FastTreePath = getlocalpath()
    thread_to_str = str(thread)
    out_tree_name = os.path.join(outputfile, "FastTree.tree")
    if not os.path.exists(outputfile):
        os.mkdir(outputfile)
    if thread_to_str is '1':
        cmd = FastTreePath + "/FastTree " + FastTreepara + input_fasta + " >" + out_tree_name
        subprocess.call(cmd, shell=True)
    else:
        # set the threads
        os.environ["OMP_NUM_THREADS"] = thread_to_str
        cmd = FastTreePath + "/FastTreeMP " + FastTreepara + input_fasta + " >" + out_tree_name
        subprocess.call(cmd, shell=True)
    logdofasttree.info("Phylogenetic species tree reconstructed by FastTree was completed")
Exemplo n.º 15
0
def doiqtree(inputfile, outputfile, iqtreepara, thread):
    # Use FASTA format build tree
    # input_fasta = inputfile.replace('.phy', '')
    iqtreePath = getlocalpath()
    thread_to_str = str(thread)
    out_tree_name = os.path.join(outputfile, "iqtree.tree")
    if not os.path.exists(outputfile):
        os.mkdir(outputfile)
    if thread_to_str is '1':
        cmd = iqtreePath + "/iqtree " + "-s " + inputfile + " -pre " + out_tree_name + iqtreepara
        subprocess.call(cmd, shell=True)
    else:
        # set the threads
        cmd = iqtreePath + "/iqtree " + "-s " + inputfile + " -pre " + out_tree_name + " -nt " + thread_to_str + iqtreepara
        subprocess.call(cmd, shell=True)
    logdoiqtree.info(
        "Phylogenetic species tree reconstructed by iqtree was completed")
Exemplo n.º 16
0
def doFastTree(inputfile, outputfile, FastTreepara, thread):
    # Use FASTA format build tree
    input_fasta = inputfile.replace('.phy', '')
    FastTreePath = getlocalpath()
    thread_to_str = str(thread)
    out_tree_name = os.path.join(outputfile, "FastTree.tree")
    if not os.path.exists(outputfile):
        os.mkdir(outputfile)
    if thread_to_str is '1':
        cmd = FastTreePath + "/FastTree " + FastTreepara + " " + input_fasta + " >" + out_tree_name
        subprocess.call(cmd, shell=True)
    else:
        # set the threads
        os.environ["OMP_NUM_THREADS"] = thread_to_str
        cmd = FastTreePath + "/FastTreeMP " + FastTreepara + " " + input_fasta + " >" + out_tree_name
        subprocess.call(cmd, shell=True)
    logdofasttree.info(
        "Phylogenetic species tree reconstructed by FastTree was completed")
Exemplo n.º 17
0
def doclustalw(indata, outdata, clustalwpara):
    """
    Call clustalw software to do align
    :param indata: a director contain a fasta format file or a fasta format file
    :param outdata: the out is abs path with a file name
    :return: outdata path
    """
    logdoclustalw.debug("clustalw input data:{0}".format(indata))
    type = "-TYPE=DNA"
    if clustalwpara is None:
        clustalwparas = type
    else:
        clustalwparas = type + " " + clustalwpara.lstrip()
    clu_path = getlocalpath()
    out_path = os.path.dirname(outdata)
    doclu_subdir = str(timeformat('temp/rna_sequence'))
    clustalw_dir = os.path.join(out_path, doclu_subdir)
    # check indata type is dir or files?
    if os.path.isdir(indata):
        pro_name = os.listdir(indata)
        if not os.path.exists(clustalw_dir):
            os.makedirs(clustalw_dir)
        out_file = "-OUTFILE=" + os.path.join(clustalw_dir, pro_name[0])
        infile = os.path.join(indata, pro_name[0])
        cmd = clu_path + "/clustalw2 " + "-INFILE=" + infile + " -OUTPUT=FASTA -ALIGN " + out_file + " " + clustalwparas
        subprocess.call(cmd, shell=True)
        logdoclustalw.info(
            "Multiple sequence alignment  by Clustalw2 was completed.")
        out_alg = os.path.join(clustalw_dir, pro_name[0])
        return out_alg
    # indata is a file
    elif os.path.isfile(indata):
        pro_name = indata
        if not os.path.exists(clustalw_dir):
            os.makedirs(clustalw_dir)
        out_file = "-OUTFILE=" + os.path.join(clustalw_dir, pro_name)
        cmd = clu_path + "/clustalw2 " + "-INFILE=" + pro_name + " -OUTPUT=FASTA -ALIGN " + out_file + " " + clustalwparas
        subprocess.call(cmd, shell=True)
        logdoclustalw.info(
            "Multiple sequence alignment  by Clustalw2 was completed.")
        out_alg = os.path.join(clustalw_dir, pro_name)
        return out_alg
Exemplo n.º 18
0
def doclustalw(indata, outdata, clustalwpara):
    """
    Call clustalw software to do align
    :param indata: a director contain a fasta format file or a fasta format file
    :param outdata: the out is abs path with a file name
    :return: outdata path
    """
    logdoclustalw.debug("clustalw input data:{0}".format(indata))
    type = "-TYPE=DNA"
    if clustalwpara is None:
        clustalwparas = type
    else:
        clustalwparas = type + " " + clustalwpara.lstrip()
    clu_path = getlocalpath()
    out_path = os.path.dirname(outdata)
    doclu_subdir = str(timeformat('temp/rna_sequence'))
    clustalw_dir = os.path.join(out_path, doclu_subdir)
    # check indata type is dir or files?
    if os.path.isdir(indata):
        pro_name = os.listdir(indata)
        if not os.path.exists(clustalw_dir):
            os.makedirs(clustalw_dir)
        out_file = "-OUTFILE=" + os.path.join(clustalw_dir, pro_name[0])
        infile = os.path.join(indata,pro_name[0])
        cmd = clu_path + "/clustalw2 " + "-INFILE=" + infile + " -OUTPUT=FASTA -ALIGN " + out_file + " " + clustalwparas
        subprocess.call(cmd, shell=True)
        logdoclustalw.info("Multiple sequence alignment  by Clustalw2 was completed.")
        out_alg = os.path.join(clustalw_dir, pro_name[0])
        return out_alg
    # indata is a file
    elif os.path.isfile(indata):
        pro_name = indata
        if not os.path.exists(clustalw_dir):
            os.makedirs(clustalw_dir)
        out_file = "-OUTFILE=" + os.path.join(clustalw_dir, pro_name)
        cmd = clu_path + "/clustalw2 " + "-INFILE=" + pro_name + " -OUTPUT=FASTA -ALIGN " + out_file + " " + clustalwparas
        subprocess.call(cmd, shell=True)
        logdoclustalw.info("Multiple sequence alignment  by Clustalw2 was completed.")
        out_alg = os.path.join(clustalw_dir, pro_name)
        return out_alg
Exemplo n.º 19
0
def domuscle(indata, outdata, musclepara):
    """
    call muscle software to do align
    :param indata: a director contain a fasta format file or a fasta format file
    :param outdata: the out is abs path with a file name
    :return: outdata path
    """
    muscleparas = musclepara.lstrip()
    mupath = getlocalpath()
    out_path = os.path.dirname(outdata)
    timeformat = '%Y%m%d%H%M%S'
    timeinfo = str(time.strftime(timeformat))
    subdir = 'temp/rna_alignment' + timeinfo
    muscle_dir = os.path.join(out_path, subdir)
    # check indata type
    if os.path.isdir(indata):
        pro_name = os.listdir(indata)
        if not os.path.exists(muscle_dir):
            os.makedirs(muscle_dir)
        out_alg = os.path.join(muscle_dir, pro_name[0])
        each_pro = os.path.join(indata, pro_name[0])
        cmd = mupath + "/muscle -in " + each_pro + " -out " + out_alg + " " + muscleparas
        subprocess.call(cmd, shell=True)
        logdomuscle.debug('muscle result path:{0}'.format(out_alg))
        logdomuscle.info(
            "Multiple sequence alignment by Muscle was completed.")
        return out_alg
    elif os.path.isfile(indata):
        pro_name = indata
        if not os.path.exists(muscle_dir):
            os.makedirs(muscle_dir)
        out_alg = os.path.join(muscle_dir, pro_name)
        each_pro = pro_name
        cmd = mupath + "/muscle -in " + each_pro + " -out " + out_alg + " " + muscleparas
        subprocess.call(cmd, shell=True)
        logdomuscle.debug('muscle result path:{0}'.format(out_alg))
        logdomuscle.info(
            "Multiple sequence alignment by Muscle was completed.")
        return out_alg
Exemplo n.º 20
0
def domafft(indata, outdata, mafftparas):
    """
    call mafft software to do align
    :param indata: a director contain a fasta format file or a fasta format file
    :param outdata: the out is abs path with a file name
    :return: outdata path
    """
    mafftparas = mafftparas.lstrip()
    mapath = getlocalpath()
    out_path = os.path.dirname(outdata)
    timeformat = '%Y%m%d%H%M%S'
    timeinfo = str(time.strftime(timeformat))
    subdir = 'temp/rna_alignment' + timeinfo
    mafft_dir = os.path.join(out_path, subdir)
    # check indata type
    if os.path.isdir(indata):
        pro_name = os.listdir(indata)
        if not os.path.exists(mafft_dir):
            os.makedirs(mafft_dir)
        out_alg = os.path.join(mafft_dir, pro_name[0])
        each_pro = os.path.join(indata, pro_name[0])
        cmd = mapath + "/mafft " + mafftparas + " " + each_pro + " > " + out_alg
        subprocess.call(cmd, shell=True)
        logmafft.debug('mafft result path:{0}'.format(out_alg))
        logmafft.info("Multiple sequence alignment by mafft was completed.")
        return out_alg
    elif os.path.isfile(indata):
        pro_name = indata
        if not os.path.exists(mafft_dir):
            os.makedirs(mafft_dir)
        out_alg = os.path.join(mafft_dir, pro_name)
        each_pro = pro_name
        cmd = mapath + "/mafft " + mafftparas + " " + each_pro + " > " + out_alg
        subprocess.call(cmd, shell=True)
        logmafft.debug('mafft result path:{0}'.format(out_alg))
        logmafft.info("Multiple sequence alignment by mafft was completed.")
        return out_alg