Exemplo n.º 1
0
#                                                                               #
# You should have received a copy of the GNU Lesser General Public License      #
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>.            #
#                                                                               #
# ###############################################################################
"""
function to call FastTree and FastTree reconstruct tree.
"""
import os

import subprocess

from physpetool.phylotree.log import getLogging
from physpetool.softwares.path import getlocalpath

logdofasttree = getLogging('FastTree')


def doFastTree(inputfile, outputfile, FastTreepara, thread):
    # Use FASTA format build tree
    input_fasta = inputfile.replace('.phy', '')
    FastTreePath = getlocalpath()
    thread_to_str = str(thread)
    out_tree_name = os.path.join(outputfile, "FastTree.tree")
    if not os.path.exists(outputfile):
        os.mkdir(outputfile)
    if thread_to_str is '1':
        cmd = FastTreePath + "/FastTree " + FastTreepara + input_fasta + " >" + out_tree_name
        subprocess.call(cmd, shell=True)
    else:
        # set the threads
Exemplo n.º 2
0
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>.            #
#                                                                               #
# ###############################################################################

"""
Module to call muscle to do alignment
"""

import subprocess
import os
import os.path
import time
from physpetool.phylotree.log import getLogging
from physpetool.softwares.path import getlocalpath

logdomuscle = getLogging('muscle')


# muscle -in process_L1.txt -out process_L1.afa -maxiters 100
def domuscle(indata, outdata, musclepara):
    """
    call muscle software to do align
    :param indata: a director contain a fasta format file or a fasta format file
    :param outdata: the out is abs path with a file name
    :return: outdata path
    """
    muscleparas = musclepara.lstrip()
    mupath = getlocalpath()
    out_path = os.path.dirname(outdata)
    timeformat = '%Y%m%d%H%M%S'
    timeinfo = str(time.strftime(timeformat))
Exemplo n.º 3
0
The module retrieve highly conserved proteins and download from KEGG database


"""
import shutil
import glob
import ftplib
import os
import sqlite3
import time
from physpetool.database.dbpath import getlocaldbpath
from physpetool.phylotree.log import getLogging
from physpetool.tools.keggapi import getprotein

logretrieveprotein = getLogging('KEGG INDEX DB')
KEGGDB = "KEGG_DB_3.0.db"


def getspecies(spelist, colname):
    """
    Get species protein index for DB
    :param name: a list contain abbreviation species nam
    :param colname: a list contain colname of DB
    :return: a list contain protein index can be retrieved and a match ko list (is a ko id list)
    """
    dbpath = getlocaldbpath()
    db = os.path.join(dbpath, KEGGDB)
    relist = []
    match_ko_name = []
    conn = sqlite3.connect(db)
Exemplo n.º 4
0
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more  #
# details.                                                                      #
#                                                                               #
# You should have received a copy of the GNU Lesser General Public License      #
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>.            #
#                                                                               #
# ###############################################################################


"""
Convert FASTA(.fasta) format to PHYLIP(.phy) format

"""
from physpetool.phylotree.log import getLogging

logfasta2phy = getLogging('convert')


# function to read fasta file
def read_fasta(fp):
    """
    read *.fasta file and parse
    :param fp: a fasta format file
    """
    seq_name, seq_pro = None, []
    for line in fp:
        line = line.rstrip()
        if line.startswith(">"):
            if seq_name: yield (seq_name, ''.join(seq_pro))
            seq_name, seq_pro = line.replace(">", ""), []
        else:
Exemplo n.º 5
0
#                                                                               #
# ###############################################################################

"""
Module to call clustalw2 to do alignment

"""

# clustalw2 -INFILE=p1.fasta -TYPE=PROTEIN -OUTPUT=FASTA -ALIGN -OUTFILE=test/clustalw_p2.fasta
import os
import subprocess
from physpetool.phylotree.log import getLogging
from physpetool.softwares.path import getlocalpath
from physpetool.utils.outdirforamt import timeformat

logdoclustalw = getLogging('clutalw2')


def doclustalw(indata, outdata, clustalwpara):
    """
    Call clustalw software to do align
    :param indata: a director contain a fasta format file or a fasta format file
    :param outdata: the out is abs path with a file name
    :return: outdata path
    """
    logdoclustalw.debug("clustalw input data:{0}".format(indata))
    type = "-TYPE=DNA"
    if clustalwpara is None:
        clustalwparas = type
    else:
        clustalwparas = type + " " + clustalwpara.lstrip()
Exemplo n.º 6
0
#                                                                               #
# You should have received a copy of the GNU Lesser General Public License      #
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>.            #
#                                                                               #
# ###############################################################################

import os
import time
from physpetool.phylotree.log import getLogging
"""

Concatenate the muscle alignment files to one file.

"""

logconcat = getLogging('concatenate')


def read_fasta_co(fp):
    """read *.fasta file and parse
    :param fp: a fasta format file
    """
    seq_name, seq_pro = None, []
    for line in fp:
        line = line.rstrip()
        if line.startswith(">"):
            if seq_name: yield (seq_name, ''.join(seq_pro))
            seq_name, seq_pro = line, []
        else:
            seq_pro.append(line.replace(" ", ""))
    if seq_name: yield (seq_name, ''.join(seq_pro))
Exemplo n.º 7
0
# details.                                                                      #
#                                                                               #
# You should have received a copy of the GNU Lesser General Public License      #
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>.            #
#                                                                               #
# ###############################################################################
"""
function to call RAxML construct tree
"""

import os
import subprocess
from physpetool.phylotree.log import getLogging
from physpetool.softwares.path import getlocalpath

logdoraxml = getLogging('RAxML')


def doraxml(inputfile, outputfile, raxmlpara, thread):
    """
call RAxML method to construct species tree
    :param inputfile: abs path of .phy format files
    :param outputfile: a file contain RAxML result
    """
    raxmlparas = raxmlpara.lstrip()

    raxmlparalist = raxmlparas.split(" ")
    tpara = '-T'
    if tpara in raxmlparalist:
        index = raxmlparalist.index(tpara)
        raxmlparalist.remove(raxmlparalist[index])
Exemplo n.º 8
0
# details.                                                                      #
#                                                                               #
# You should have received a copy of the GNU Lesser General Public License      #
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>.            #
#                                                                               #
# ###############################################################################
"""
The module retrieve and download SSU rRNA sequence from SILVA Database
"""

import ftplib
import os
import time
from physpetool.phylotree.log import getLogging

log_retrieve = getLogging('SSU rRNA DB')


def retrieve16srna(spenamelist, outpath):
    """
retrieve 16s rna form bioinfor.scu.edu.cn
    :param spenamelist: a list contain species names
    :param outpath: output data path
    :return: download file path
    """
    # pares and cheak species names lsit
    # spelist = []
    # for line in spenamelist:
    #     st = line.strip()
    #     spelist.append(st)
    spelist = spenamelist
Exemplo n.º 9
0
#                                                                               #
# You should have received a copy of the GNU Lesser General Public License      #
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>.            #
#                                                                               #
# ###############################################################################
"""
function to call iqtree and iqtree reconstruct tree.
"""
import os

import subprocess

from physpetool.phylotree.log import getLogging
from physpetool.softwares.path import getlocalpath

logdoiqtree = getLogging('iqtree')


def doiqtree(inputfile, outputfile, iqtreepara, thread):
    # Use FASTA format build tree
    # input_fasta = inputfile.replace('.phy', '')
    iqtreePath = getlocalpath()
    thread_to_str = str(thread)
    out_tree_name = os.path.join(outputfile, "iqtree.tree")
    if not os.path.exists(outputfile):
        os.mkdir(outputfile)
    if thread_to_str is '1':
        cmd = iqtreePath + "/iqtree " + "-s " + inputfile + " -pre " + out_tree_name + iqtreepara
        subprocess.call(cmd, shell=True)
    else:
        # set the threads
Exemplo n.º 10
0
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>.            #
#                                                                               #
# ###############################################################################

"""
Module to call muscle to do alignment
"""
import multiprocessing
import subprocess
import os
import os.path
import time
from physpetool.phylotree.log import getLogging
from physpetool.softwares.path import getlocalpath

logdomuscle = getLogging('muscle')


# muscle -in process_L1.txt -out process_L1.afa -maxiters 100
def domuscle(indata, outdata, musclepara):
    """
    call muscle software to do align
    :param indata: a director contain a fasta format file or a fasta format file
    :param outdata: the out is abs path with a file name
    :return: outdata path
    """
    muscleparas = musclepara.lstrip()
    mupath = getlocalpath()
    out_path = os.path.dirname(outdata)
    timeformat = '%Y%m%d%H%M%S'
    timeinfo = str(time.strftime(timeformat))
Exemplo n.º 11
0
#                                                                               #
# You should have received a copy of the GNU Lesser General Public License      #
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>.            #
#                                                                               #
# ###############################################################################
"""
Check input file is right.

"""

from physpetool.database.dbpath import getlocaldbpath
from physpetool.phylotree.log import getLogging
import os

dbpath = getlocaldbpath()
logchecking = getLogging('Checking organisms')


def check_organism(input, db_list):
    """
    check input organism
    :param input: a list contain species name
    :param db_list: a list file contain organism in corresponding database
    :return: inputlist: match in database mislist: can't match in database
    """
    originaList = []
    for line in input:
        st = line.strip()
        originaList.append(st)
    originaList = removeEmptyStr(originaList)
    inputlist = []
Exemplo n.º 12
0
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS     #
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more  #
# details.                                                                      #
#                                                                               #
# You should have received a copy of the GNU Lesser General Public License      #
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>.            #
#                                                                               #
# ###############################################################################

import os
import subprocess
import re
from physpetool.phylotree.log import getLogging
from physpetool.softwares.path import getlocalpath

loggtrimal = getLogging('trimal')


# trimal -in concatenate.fasta -out output2 -gt 1 -phylip3.2

def dotrimal(indata, trimalpara):
    """
    do trimal after muslce and concatenate
    :param indata: a fasta file input to do trimal
    :param outdata: append name after
    :return: a file path of trimal result
    """
    # Deal with outdata name
    trimalparas = trimalpara.lstrip()
    trimalpath = getlocalpath()
    out_path = os.path.dirname(indata)
Exemplo n.º 13
0
from physpetool.phylotree.log import getLogging

testlog = getLogging('wwww')
testlog.error("~~~~~~~~~~~~~~test1.")
testlog.info("~~~~~~~~~~~~~~test2.")
testlog.debug("~~~~~~~~~~~~~~~~~~~~~~~~~test3.")
Exemplo n.º 14
0
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>.            #
#                                                                               #
# ###############################################################################
"""
Module to call clustalw2 to do alignment

"""

# clustalw2 -INFILE=p1.fasta -TYPE=PROTEIN -OUTPUT=FASTA -ALIGN -OUTFILE=test/clustalw_p2.fasta
import os
import subprocess
from physpetool.phylotree.log import getLogging
from physpetool.softwares.path import getlocalpath
from physpetool.utils.outdirforamt import timeformat

logdoclustalw = getLogging('clutalw2')


def doclustalw(indata, outdata, clustalwpara):
    """
    Call clustalw software to do align
    :param indata: a director contain a fasta format file or a fasta format file
    :param outdata: the out is abs path with a file name
    :return: outdata path
    """
    logdoclustalw.debug("clustalw input data:{0}".format(indata))
    type = "-TYPE=DNA"
    if clustalwpara is None:
        clustalwparas = type
    else:
        clustalwparas = type + " " + clustalwpara.lstrip()
Exemplo n.º 15
0
# ###############################################################################


"""
Check input file is right.

"""

from physpetool.database.dbpath import getlocaldbpath
from physpetool.phylotree.log import getLogging
import os

from physpetool.utils.checkIsNum import is_number

dbpath = getlocaldbpath()
logchecking = getLogging('Checking organisms')


def check_organism(input, db_list):
    """
    check input organism
    :param input: a list contain species name
    :param db_list: a list file contain organism in corresponding database
    :return: inputlist: match in database mislist: can't match in database
    """
    originaList = input

    inputlist = []
    mislist = []
    spelist = os.path.join(dbpath, db_list)
    for line in originaList:
Exemplo n.º 16
0
#                                                                               #
# You should have received a copy of the GNU Lesser General Public License      #
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>.            #
#                                                                               #
# ###############################################################################
"""
function to call FastTree and FastTree reconstruct tree.
"""
import os

import subprocess

from physpetool.phylotree.log import getLogging
from physpetool.softwares.path import getlocalpath

logdofasttree = getLogging('FastTree')


def doFastTree(inputfile, outputfile, FastTreepara, thread):
    # Use FASTA format build tree
    input_fasta = inputfile.replace('.phy', '')
    FastTreePath = getlocalpath()
    thread_to_str = str(thread)
    out_tree_name = os.path.join(outputfile, "FastTree.tree")
    if not os.path.exists(outputfile):
        os.mkdir(outputfile)
    if thread_to_str is '1':
        cmd = FastTreePath + "/FastTree " + FastTreepara + " " + input_fasta + " >" + out_tree_name
        subprocess.call(cmd, shell=True)
    else:
        # set the threads
Exemplo n.º 17
0
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>.            #
#                                                                               #
# ###############################################################################

"""
Module to call mafft to do alignment
"""

import subprocess
import os
import os.path
import time
from physpetool.phylotree.log import getLogging
from physpetool.softwares.path import getlocalpath

logmafft = getLogging('mafft')


# mafft --auto p1.fasta > output
def domafft(indata, outdata, mafftparas):
    """
    call mafft software to do align
    :param indata: a director contain a fasta format file or a fasta format file
    :param outdata: the out is abs path with a file name
    :return: outdata path
    """
    mafftparas = mafftparas.lstrip()
    mapath = getlocalpath()
    out_path = os.path.dirname(outdata)
    timeformat = '%Y%m%d%H%M%S'
    timeinfo = str(time.strftime(timeformat))
Exemplo n.º 18
0
from physpetool.utils.checkinputfile import checkKeggOrganism, checkSilvaOrganism, checkFile, recovery, recovery_silva
import argparse
import os

APP_DESC = "Reconstruct"

raxmlpara_pro = "-f a -m PROTGAMMAJTTX  -p 12345 -x 12345 -# 100 -n T1"
raxmlpara_dna = "-f a -m GTRGAMMA  -p 12345 -x 12345 -# 100 -n T1"
musclepara = '-maxiters 100'
gblockspara_pro = '-t=p -b5=h -e=-gb1'
gblockspara_dna = '-t=d -b5=h -e=-gb1'
clustalwpara = None
trimalpara = "-gt 1"
mafftpara = "--auto"

auto_build_log = getLogging('Used time')


def start_args(input):
    """
Argument parse
    :param input: arguments
    """
    autobuild_args = input.add_argument_group("AUTOBUILD OPTIONS")
    advance_args = input.add_argument_group("ADVANCE OPTIONS")
    autobuild_args.add_argument(
        '-i',
        nargs='?',
        dest='spenames',
        type=argparse.FileType('r'),
        help=
Exemplo n.º 19
0
# PhySpeTree is distributed in the hope that it will be useful, but WITHOUT ANY #
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS     #
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more  #
# details.                                                                      #
#                                                                               #
# You should have received a copy of the GNU Lesser General Public License      #
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>.            #
#                                                                               #
# ###############################################################################
"""
Convert FASTA(.fasta) format to PHYLIP(.phy) format

"""
from physpetool.phylotree.log import getLogging

logfasta2phy = getLogging('convert')


# function to read fasta file
def read_fasta(fp):
    """
    read *.fasta file and parse
    :param fp: a fasta format file
    """
    seq_name, seq_pro = None, []
    for line in fp:
        line = line.rstrip()
        if line.startswith(">"):
            if seq_name: yield (seq_name, ''.join(seq_pro))
            seq_name, seq_pro = line.replace(">", ""), []
        else:
Exemplo n.º 20
0
# You should have received a copy of the GNU Lesser General Public License      #
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>.            #
#                                                                               #
# ###############################################################################
"""
Module to call mafft to do alignment
"""
import multiprocessing
import subprocess
import os
import os.path
import time
from physpetool.phylotree.log import getLogging
from physpetool.softwares.path import getlocalpath

logmafft = getLogging('mafft')


# mafft --auto p1.fasta > output
def domafft(indata, outdata, mafftparas):
    """
    call mafft software to do align
    :param indata: a director contain a fasta format file or a fasta format file
    :param outdata: the out is abs path with a file name
    :return: outdata path
    """
    mafftparas = mafftparas.lstrip()
    mapath = getlocalpath()
    out_path = os.path.dirname(outdata)
    timeformat = '%Y%m%d%H%M%S'
    timeinfo = str(time.strftime(timeformat))
Exemplo n.º 21
0
#                                                                               #
# You should have received a copy of the GNU Lesser General Public License      #
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>.            #
#                                                                               #
# ###############################################################################
"""
Gblocks module: do gblocks before aligned sequences.

"""
import os
import subprocess
import re
from physpetool.phylotree.log import getLogging
from physpetool.softwares.path import getlocalpath

loggblocks = getLogging('Gblocks')

# Gblocks protein_alignment.fasta -t=p -e=-gb1 -b4=5 -d=y


def dogblocks(indata, gblockpara):
    """
    do gblocks after muslce and concatenate
    :param indata: a fasta file input after gblock
    :param gblockpara: the gblocks para
    :return: a file path of gblocks result
    """
    # Deal with outdata name
    gblockparas = gblockpara.lstrip()
    gblockparalist = gblockparas.split(" ")
    regex = '-e='
Exemplo n.º 22
0
#                                                                               #
# You should have received a copy of the GNU Lesser General Public License      #
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>.            #
#                                                                               #
# ###############################################################################
"""
function to call iqtree and iqtree reconstruct tree.
"""
import os

import subprocess

from physpetool.phylotree.log import getLogging
from physpetool.softwares.path import getlocalpath

logdoiqtree = getLogging('iqtree')


def doiqtree(inputfile, outputfile, iqtreepara, thread):
    # Use FASTA format build tree
    # input_fasta = inputfile.replace('.phy', '')
    iqtreePath = getlocalpath()
    thread_to_str = str(thread)
    out_tree_name = os.path.join(outputfile, "iqtree.tree")
    if not os.path.exists(outputfile):
        os.mkdir(outputfile)
    if thread_to_str is '1':
        cmd = iqtreePath + "/iqtree " + "-s " + inputfile + " -pre " + out_tree_name + iqtreepara
        subprocess.call(cmd, shell=True)
    else:
        # set the threads