def mgs(config, name):
    print gettime("stat 10.mgs")
    commands=[]
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    material_dir = '%s/material' % os.path.dirname(config)
    if os.path.isdir(work_dir):
        pass
    else:
        mkdir(work_dir)
    
    config_group = ConfigParser()
    config_group.read(config)
    group = re.split('\s+|,\s*|\t+|,\t*|', config_group.get('param','group'))

    for (i,subgroup_name) in enumerate(group):
        #subgroup_filename = '0' + str((i+1)) + '.' + subgroup_name
        subgroup_filename = subgroup_name
        mkdir("%s/%s" % (work_dir, subgroup_filename))
        
        sample_num_in_groups, min_sample_num_in_groups, sample_num_total, group_num = parse_group("%s/%s_group.list" % (material_dir, subgroup_name))
        if min_sample_num_in_groups >= 5 and sample_num_total >= 20 and group_num == 2:
            os.system("cp %s/%s_group.list %s/%s/group.list" % (material_dir, subgroup_name, work_dir, subgroup_filename))
            commands.append("## mgs start")
            commands.append('ls | while read a; do if [ -f "$a/group.list" ];then python %s/full_MGS_llf.py -p ../../06.gene_profile/gene.profile -g $a/group.list -d $a/; fi; done' % (bin_mgs_default_dir))
            commands.append('ls | while read a; do if [ -f "$a/group.list" ];then cd $a;sh work.sh;cd -; fi; done')
            commands.append('ls | while read a; do if [ -f "$a/group.list" ];then python %s/mgs_taxonomy.py -i $a/pathway/ -g ../05.gene_catalog/gene_catalog.fna -o $a/taxonomy/ --group $a/group.list; fi; done' % (bin_mgs_default_dir))
            commands.append('ls | while read a; do if [ -f "$a/group.list" ];then cd $a/taxonomy/;sh mgs_taxonomy.sh;cd -; fi; done')

        else:
            log = open("%s/%s/Sample_not_enough.log" % (work_dir, subgroup_filename),"w+")
            log.write("min_sample_num_in_groups >= 5 and sample_num_total >= 20 and group_num == 2")
            log.close
    return commands
示例#2
0
def gene_profile_pre(config, name):
    print gettime("start 06.gene_profile_pre")
    commands = []
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    mkdir(work_dir)
    commands.append("cp %s/../01.clean_reads/clean_reads.list %s/clean_reads.list"\
                    %(work_dir,work_dir))
    commands.append("## build index")
    mkdir("%s/database/" % work_dir)
    commands.append(
        "#ln -s %s/../05.gene_catalog/gene_catalog.fna %s/database/" %
        (work_dir, work_dir))
    commands.append("#2bwt-builder %s/database/gene_catalog.fna" % work_dir)
    commands.append("python %s/genebuild.py -d %s" %
                    (bin_gene_profile_default_dir, work_dir))
    commands.append(
        "/data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 10G --jobs 1 --prefix BI --lines 1 shell/2bwt_builder.sh"
    )
    commands.append("## calculate gene abundance")
    commands.append("perl %s/geneabundance.pl %s/clean_reads.list database/gene_catalog.fna %s/../05.gene_catalog/gene_catalog.length %s/"\
                    %(bin_gene_profile_default_dir,work_dir,work_dir,work_dir))

    commands.append(
        "/data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 10G --jobs 50 --prefix MA --lines 1 shell/match.sh"
    )
    commands.append(
        "/data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 10G --jobs 10 --prefix AB --lines 2 shell/abun.sh"
    )
    print gettime("end 06.gene_profile_pre")
    return commands
示例#3
0
def gene_catalog(config, name):
    commands = []
    print gettime("start 05.gene_catalog")
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    commands.append(command_default + "perl %s/cds2pep.pl %s/gene_catalog.fna %s/gene_catalog.faa"\
                    %(tool_default_dir,work_dir,work_dir))
    commands.append("gzip -c %s/redundant.gene_catalog.fna > %s/redundant.gene_catalog.fna.gz"\
                    %(work_dir,work_dir))
    commands.append("gzip -c %s/gene_catalog.fna > %s/gene_catalog.fna.gz"\
                    %(work_dir,work_dir))
    commands.append("gzip -c %s/gene_catalog.faa > %s/gene_catalog.faa.gz"\
                    %(work_dir,work_dir))
    commands.append("## info of gene catalog")
    commands.append(command_default + "perl %s/gene_catalog.stat.pl < %s/gene_catalog.fna > %s/gene_catalog.stat.tsv"\
                    %(bin_gene_catalog_default_dir,work_dir,work_dir))
    commands.append("perl %s/lengthfasta.pl %s/gene_catalog.fna > %s/gene_catalog.length"\
                    %(tool_default_dir,work_dir,work_dir))
    commands.append(
        "Rscript %s/../04.gene_predict/gene.histogram.R %s/gene_catalog.length %s/gene_catalog.length.histogram.pdf"
        % (bin_gene_catalog_default_dir, work_dir, work_dir))
    commands.append("convert -density 300 %s/gene_catalog.length.histogram.pdf %s/gene_catalog.length.histogram.png"\
                    %(work_dir,work_dir))
    commands.append("## split gene catalog")
    commands.append("perl %s/cutfasta.pl %s/gene_catalog.faa 10 > %s/gene_catalog.split.list"\
                    %(tool_default_dir,work_dir,work_dir))
    print gettime("end 05.gene_catalog")
    return commands
示例#4
0
def gene_predict(config, name):
    print gettime("start 04.gene_predict")
    commands = []
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    config_gene = ConfigParser()
    config_gene.read(config)
    ins_list = config_gene.get("param", "ins_list")
    mkdir(work_dir)
    commands.append("ls gene/*fna | perl %s/stat.pl > orf.stat.tsv" %
                    bin_gene_predict_default_dir)
    commands.append(
        "ls gff/*gff | sed 's/.gff//g' | while read a ; do gzip -c $a.gff > $a.gff.gz;done"
    )
    commands.append("ls gene/*fna | sed 's/.fna//g' | while read a ; do perl %s/cds2pep.pl $a.fna $a.faa; gzip -c $a.fna > $a.fna.gz; gzip -c $a.faa > $a.faa.gz; done"\
                    %tool_default_dir)
    commands.append("## histogram")
    mkdir("%s/histogram/" % work_dir)
    commands.append(
        "cut -f 1 gene.list | while read a; do /data_center_03/USER/zhongwd/bin/lengthfasta gene/$a.gene.fna > histogram/$a.gene.length; done"
    )
    commands.append("cut -f 1 gene.list | while read a; do Rscript %s/gene.histogram.R histogram/$a.gene.length histogram/$a.gene.histogram.pdf; done"\
                    %bin_gene_predict_default_dir)
    commands.append(
        "cut -f 1 gene.list | while read a; do convert -density 300 histogram/$a.gene.histogram.pdf histogram/$a.gene.histogram.png; done"
    )
    print gettime("end 04.gene_predict")
    return commands
示例#5
0
def use_old_version(config, name):
    print gettime('start create old version step script')
    commands = []
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s/preprocess_for_taxon_profile/old_version' % (main_dir, name)
    mkdir(work_dir, '%s/profile'%work_dir)
    commands.append('## calculate abundance')
    commands.append('cp %s/01.clean_reads/clean_reads.list ./' % main_dir)
    commands.append('%s/speciesabundance.pl %s/01.clean_reads/clean_reads.list .' % (bin_dir, main_dir))
    commands.append('nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 15G --jobs 10 --prefix MA --lines 1 shell/match.sh &')
    commands.append('nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 15G --jobs 10 --prefix AB --lines 2 shell/abun.sh &')
    commands.append('## form species profile')
    commands.append('ls alignment/*/*root.abundance >abund.list')
    #commands.append('python %s/02_taxonomy.py -d . -c ../%s/qc_%s.stat.tsv' % (bin_dir, raw_dir_name, batch_num))
    commands.append('python %s/02_taxonomy.py -i abund.list' % bin_dir)
    commands.append('rm abund.list')
    commands.append('for i in all phylum class order family genus species; do ls alignment/*/*$i.abundance |perl %s/201_profile - >profile/$i.profile; done' % bin_dir)
    #commands.append('cut -f1 %s/materials/sample.list |while read a; do ls alignment/$a/*phylum.abundance; done | profile - >profile/phylum.profilea'%main_dir)
    commands.append('num=1;for i in phylum class order family genus species; do let num=num+1; python %s/201_profile_convert.py -i profile/$i.profile -o profile/otu_table_L$num.txt; done' % bin_dir)
    commands.append('ls profile/* | while read a; do cp $a ../../taxon_profile; done')
    commands.append('## reads use rate')
    commands.append('#ls alignment/*/*.MATCH.logs >match_logs.list')
    commands.append('#python %s/201_use_rate.py -i match_logs.list -o use_rate.stat.tsv -clean %s/00.raw_reads/qc_stat.tsv' % (bin_dir, main_dir))
    commands.append('#rm match_logs.list')
    return work_dir, commands
def use_other_method(config, name):
    print gettime('start create other step script')
    commands = []
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s/preprocess_for_assembly/other' % (main_dir, name)
    mkdir(work_dir)
    print 'This method is not complete,please select other method!'
    return work_dir, commands
示例#7
0
def use_kraken2_method(config, name):
    print gettime('start create kraken2 step script')
    work_dir, commands = '',[]
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s/preprocess_for_taxon_profile/kraken2' % (main_dir, name)
    mkdir(work_dir)
    print 'This method is not complete,please select other method!'
    return work_dir, commands
def eggnog(config, name):
    print gettime("end 08.eggnog")
    commands = []
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    mkdir(work_dir)
    commands.append("rm %s/blat/all.m8" % work_dir)
    commands.append("cat %s/blat/* > %s/blat/all.m8" % (work_dir, work_dir))
    commands.append(command_default + "python %s/701_pick_blast_m8.py -i %s/blat/all.m8 -o %s/eggnog.m8"%\
                    (bin_kegg_default_dir,work_dir,work_dir))
    commands.append(command_default + "perl %s/03_get_annot_info.pl %s/eggnog.m8 /data_center_02/Database/eggNOGv4.0/all.members.txt /data_center_02/Database/eggNOGv4.0/all.description.txt /data_center_02/Database/eggNOGv4.0/all.funccat.txt %s/eggnog.m8.tab"%\
                    (bin_eggnog_default_dir,work_dir,work_dir))
    commands.append("perl %s/04_get_count.pl %s/eggnog.m8.tab /data_center_02/Database/eggNOGv4.0/eggnogv4.funccats.txt %s/eggnog.tab"%\
                    (bin_eggnog_default_dir,work_dir,work_dir))
    commands.append(command_default + "perl /data_center_07/Project/RY2015K16A01-1/08.eggnog/bin/eggnog.annotation.pl < %s/eggnog.m8.tab > %s/eggnog.anno.tsv"%\
                    (work_dir,work_dir))
    #获取分组名称
    config_gene = ConfigParser()
    config_gene.read(config)
    group = re.split("\s+|\t|,\s*|,\t+", config_gene.get("param", "group"))
    sample_names = config_gene.get("param", "sample_name")
    sample_num_in_groups, min_sample_num_in_groups, sample_num_total, group_num = parse_group(
        sample_names)
    if sample_num_total <= 10:
        mkdir("%s/samples" % work_dir)
        commands.append("cut -f 1 %s/../01.clean_reads/clean_reads.list | while read a ; do cut -f 1 %s/../06.gene_profile/alignment/$a/$a.gene.abundance > %s/samples/$a.gene.list; done"%\
                        (work_dir,work_dir,work_dir))
        commands.append("ls %s/samples/*gene.list | sed 's/.gene.list//g'|while read a; do perl %s/04_get_countlist.pl %s/eggnog.m8.tab /data_center_02/Database/eggNOGv4.0/eggnogv4.funccats.txt $a.gene.list $a.eggnog.tab;done"%\
                        (work_dir,bin_eggnog_default_dir,work_dir))
        commands.append("ls %s/samples/*.eggnog.tab | sed 's/.eggnog.tab//g' | while read a;do cut -f 3,4 $a.eggnog.tab > $a.eggnog.count.tab; done"%\
                        (work_dir))
        commands.append("ls %s/samples/*.eggnog.count.tab | /data_center_03/USER/zhongwd/bin/profile - > %s/eggnog.count.tab"%\
                        (work_dir,work_dir))
        commands.append("Rscript /data_center_04/Projects/pichongbingdu/pair_reads/08.eggnog/NOG.R %s/eggnog.count.tab"%\
                        work_dir)

    for subgroup_name in group:
        subgroup = '%s/material/%s_group.list' % (os.path.dirname(config),
                                                  subgroup_name)
        work_dir_01 = "%s/group/%s/" % (work_dir, subgroup_name)
        mkdir(work_dir_01)
        commands.append(
            "## ----------------------------------%s----------------------" %
            (subgroup_name))
        commands.append("cd %s; perl /data_center_06/Project/pracrice/yehaocheng_20160120/08.eggnog/bin/profile2list.pl %s %s/../06.gene_profile/gene.profile; cd -"%\
                        (work_dir_01,subgroup,work_dir))
        commands.append("ls %s/*gene.list | sed 's/.gene.list//g'|while read a; do perl %s/04_get_countlist.pl %s/eggnog.m8.tab /data_center_02/Database/eggNOGv4.0/eggnogv4.funccats.txt $a.gene.list $a.eggnog.tab;done"%\
                        (work_dir_01,bin_eggnog_default_dir,work_dir))
        commands.append("ls %s/*.eggnog.tab | sed 's/.eggnog.tab//g' | while read a;do cut -f 3,4 $a.eggnog.tab > $a.eggnog.count.tab; done"%\
                        (work_dir_01))
        commands.append("ls %s/*.eggnog.count.tab | /data_center_03/USER/zhongwd/bin/profile - > %s/eggnog.count.tab"%\
                        (work_dir_01,work_dir_01))
        commands.append("cd %s;Rscript /data_center_04/Projects/pichongbingdu/pair_reads/08.eggnog/NOG.R eggnog.count.tab;cd -"%\
                        (work_dir_01))
        commands.append("convert -density 300 %s/NOG.pdf %s/NOG.png" %
                        (work_dir_01, work_dir_01))
    print gettime("end 08.eggnog")
    return commands
示例#9
0
def ardb(config, name):
    print gettime("start 09.ardb")
    commands=[]
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    mkdir(work_dir)
    commands.append("## blat mapping")
    commands.append("cat blat/* > all.m8")
    commands.append("pick_blast_m8 all.m8 > ardb.m8")
    commands.append("cut -f 2 ardb.m8 | search - /data_center_03/Project/AS/16_ARDB/old/ardbAnno1.0_modify_db07/tabs/ardb.tab | paste ardb.m8 - | cut -f 1,13-  > gene2ardb.tsv")
    commands.append("classprofile -i gene2ardb.tsv -p ../06.gene_profile/gene.profile -f 3 > ardb.type.profile")
    commands.append("classprofile -i gene2ardb.tsv -p ../06.gene_profile/gene.profile -f 4 > ardb.class.profile")
    commands.append("Rscript /data_center_07/Project/RY2015K16A01-1/09.ardb/bin/ardb.barplot.r\n")
    commands.append("(echo -e 'Gene ID\tProtein name\tType\tClass\tDescription'; cat gene2ardb.tsv) > ardb.anno.tsv")
    
    # groups
    config_gene = ConfigParser()
    config_gene.read(config)
    group = re.split("\s+|\t|,\s*|,\t+",config_gene.get("param","group"))
    for subgroup_name in group:
        subgroup = '%s/material/%s_group.list' % (os.path.dirname(config), subgroup_name)
        sample_num_in_groups,min_sample_num_in_groups,sample_num_total,group_num=parse_group(subgroup)
        commands.append("## ----------------------------------%s----------------------"%(subgroup_name))
        # diff 
        work_dir_901 = "%s/group/%s/01.class_diff/" % (work_dir,subgroup_name)
        mkdir(work_dir_901)
        work_dir_902 = "%s/group/%s/02.type_diff/" % (work_dir,subgroup_name)
        mkdir(work_dir_902)
        commands.append("#01 diff class")
        commands.append(command_default + "python %s/t08_diff.py -i %s/ardb.class.profile -g %s -o %s" % (tool_default_dir, work_dir,subgroup, work_dir_901))
        commands.append(command_default + "python %s/t09_diff_boxplot.py -i %s/diff.marker.filter.profile.tsv -p %s/diff.marker.filter.tsv -g %s -o %s/diff_boxplot/"\
                        %(tool_default_dir,work_dir_901,work_dir_901,subgroup,work_dir_901))
        commands.append("#02 diff type")
        commands.append(command_default + "python %s/t08_diff.py -i %s/ardb.class.profile -g %s -o %s" % (tool_default_dir, work_dir,subgroup, work_dir_902))
        commands.append(command_default + "python %s/t09_diff_boxplot.py -i %s/diff.marker.filter.profile.tsv -p %s/diff.marker.filter.tsv -g %s -o %s/diff_boxplot/"\
                        %(tool_default_dir,work_dir_902,work_dir_902,subgroup,work_dir_902))

        commands.append("#03 function_barplot")
        commands.append(command_default + "Rscript %s/710_level1_barplot.R %s/ardb.class.profile %s/group/%s/ardb.class.pdf Class %s"\
                        % (bin_ardb_default_dir, work_dir, work_dir, subgroup_name, subgroup))
        commands.append("convert -density 300 %s/group/%s/ardb.class.pdf %s/group/%s/ardb.class.png" % (work_dir, subgroup_name, work_dir, subgroup_name))            
        commands.append(command_default + "Rscript %s/710_level1_barplot.R %s/ardb.type.profile %s/group/%s/ardb.type.pdf Type %s"\
                        % (bin_ardb_default_dir, work_dir, work_dir, subgroup_name, subgroup))
        commands.append("convert -density 300 %s/group/%s/ardb.type.pdf %s/group/%s/ardb.type.png" % (work_dir, subgroup_name, work_dir, subgroup_name))
        
        if group_num==2:
            commands.append("#04 dimond swarm")
            commands.append(command_default + "Rscript %s/dimond_swarm.R %s/ardb.type.profile %s %s/group/%s/dimond_swarm.pdf"\
                        % (bin_ardb_default_dir, work_dir, subgroup, work_dir, subgroup_name))
            commands.append("convert -density 300 %s/group/%s/dimond_swarm.pdf %s/group/%s/dimond_swarm.png" % (work_dir, subgroup_name, work_dir, subgroup_name))
            commands.append("#05 top ardb")
            commands.append(command_default + "Rscript %s/top_ardb.R %s/ardb.type.profile %s %s/group/%s/top_ardb.pdf"\
                        % (bin_ardb_default_dir, work_dir, subgroup, work_dir, subgroup_name))
            commands.append("convert -density 300 %s/group/%s/top_ardb.pdf %s/group/%s/top_ardb.png" % (work_dir, subgroup_name, work_dir, subgroup_name))
        
    print gettime("end 009.ardb")
    return commands
示例#10
0
def ardb_pre(config, name):
    print gettime("start 09.ardb_pre")
    commands=[]
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    mkdir(work_dir)
    commands.append("## blat mapping")
    commands.append("cp /data_center_03/Project/AS/16_ARDB/db.list ./")
    commands.append("perl %s/blatprot.pl db.list %s/../05.gene_catalog/gene_catalog.split.list %s/"%(tool_default_dir,work_dir,work_dir))
    commands.append("nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 5G --jobs 10 --prefix AR --lines 1 --getmem shell/blat.sh &")
    print gettime("end 09.ardb_pre")
    return commands
def kegg_pre(config, name):
    print gettime("start 07.kegg_pre")
    commands=[]
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    mkdir(work_dir)
    commands.append("## blat mapping")
    commands.append("perl %s/blatprot.pl /data_center_01/home/NEOLINE/zwd/project/PMO/LiuLin-ascites-stool/07.kegg/db.list %s/../05.gene_catalog/gene_catalog.split.list %s/"\
                    %(tool_default_dir,work_dir,work_dir))
    commands.append("nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 6G --jobs 10 --prefix KEGG --lines 1 shell/blat.sh &")
    print gettime("end 07.kegg_pre")
    return commands
def clean_reads(config, name):
    print gettime("start raw_reads")
    commands = []
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s' % (main_dir, name)
    mkdir(work_dir)
    commands.append('nohup python %s/merge.py -l %s/material/sample.list -c %s/ &' %\
                    (bin_defdir, main_dir, work_dir))
    commands.append('awk -F "\\t" \'{print $1"\\t"$2"\\t"$3"\\t"$4"\\t"$5"\\t"$6"\\t"$7}\' %s/00.raw_reads/qc_*.stat.tsv > %s/qc_stat.tsv' %\
                    (main_dir, work_dir))
    print gettime("end raw_reads")
    return commands
示例#13
0
def gene_catalog_pre(config, name):
    commands = []
    print gettime("start 05.gene_catalog_pre")

    work_dir = '%s/%s' % (os.path.dirname(config), name)
    mkdir(work_dir)
    commands.append("## build gene catalog")
    commands.append("cat %s/../04.gene_predict/gene/*.fna > %s/redundant.gene_catalog.fna"\
                    %(work_dir,work_dir))
    commands.append("perl %s/cd-hit.pl %s/redundant.gene_catalog.fna %s/gene_catalog.fna 20"\
                    %(bin_gene_catalog_default_dir,work_dir,work_dir))
    print gettime("end 05.gene_catalog_pre")
    return commands
def cazy_pre(config, name):
    print gettime("start 12.cazy_pre")
    commands = []
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    mkdir(work_dir)
    commands.append("## blat mapping")
    commands.append("perl %s/blatprot.pl /data_center_09/Project/lixr/00.DATA/CAZY_DB/db.list %s/../05.gene_catalog/gene_catalog.split.list %s"\
                    % (tools_dir,  work_dir,  work_dir))
    commands.append(
        "nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 6G --jobs 10 --prefix CAZY --lines 1 shell/blat.sh &"
    )
    print gettime("end 12.cazy_pre")
    return commands
def eggnog_pre(config, name):
    print gettime("end 08.eggnog_pre")
    commands = []
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    mkdir(work_dir)
    commands.append("## blat mapping")
    commands.append(
        "perl %s/blatprot.pl /data_center_06/Project/pracrice/yehaocheng_20160120/08.eggnog/db.list %s/../05.gene_catalog/gene_catalog.split.list %s"
        % (tool_default_dir, work_dir, work_dir))
    commands.append("nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 6G --jobs 10 --prefix NOG --lines 1 %s/shell/blat.sh &"\
                    %work_dir)
    print gettime("end 08.eggnog_pre")
    return commands
示例#16
0
def gene_predict_pre(config, name):
    print gettime("start 04.gene_predict_pre")
    commands = []
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    config_gene = ConfigParser()
    config_gene.read(config)
    ins_list = config_gene.get("param", "ins_list")
    mkdir(work_dir)
    commands.append("## gene_predict")
    commands.append("perl %s/GenePredict.pl -s %s/../03.assembly/scaftigs.list -l 100 -d %s"\
                    %(bin_gene_predict_default_dir,work_dir,work_dir))
    commands.append("nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 1G --jobs 10 --prefix GP --lines 2 %s/shell/predict.sh &"\
                    %work_dir)
    print gettime("end 04.gene_predict_pre")
    return commands
示例#17
0
def html(config, sh_file, name):
    print gettime("start html")
    commands = []
    work_dir = '%s/' % (os.path.dirname(sh_file))

    if glob.glob(work_dir+'/*'):
        os.system('rm -rf %s/*' % work_dir)

    #获取分组名称
    config_gene = ConfigParser()
    config_gene.read(config)
    groups = re.split("\s+|\t|,\t+|,\s*", config_gene.get("html","group"))
    group_dir = config_gene.get("param","group_dir").strip()

    #准备配置文件
    os.system("mkdir -p %s/result/result/" % work_dir)
    os.system("mkdir -p %s/result/html/" % work_dir)
    os.system("cp -r %s/json_structure/html_material/ %s/result/html/" % (bin_html_default_dir, work_dir))
    os.system("mkdir %s/data/" % work_dir)
    os.system("mkdir %s/result_structure/" % work_dir)
    os.system("mkdir %s/html_structure/" % work_dir)
    os.system("mkdir %s/json_structure/" % work_dir)

    os.system("cp %s %s/result_structure/" % (const.result_structure, work_dir))
    os.system("cp %s %s/html_structure/" % (const.html_structure, work_dir))
    os.system("cp %s %s/json_structure/" % (const.json_structure, work_dir))

    commands.append("/data_center_01/home/mas/python3.6/bin/python3 %s/result_structure/check_result_structure.py -g %s -c %s/result_structure/result_structure -o %s/ -so %s/result_structure/result_structure.new"\
                    % (bin_html_default_dir, config, work_dir, work_dir, work_dir))
    commands.append("# 复制标准结果额外的文件夹\n# /data_center_01/home/mas/python3.6/bin/python3 %s/result_structure/check_result_structure.py -g %s -c %s/result_structure/result_structure -o %s/ -so %s/result_structure/result_structure.new -eo %s/result_structure/result_structure.extra"\
                    % (bin_html_default_dir, config, work_dir, work_dir, work_dir, work_dir))

    commands.append("/data_center_01/home/mas/python3.6/bin/python3 %s/result_structure/cp_result_structure.py -c %s/result_structure/result_structure.new -so %s/result/result/ -do %s/data/"\
                    % (bin_html_default_dir, work_dir, work_dir, work_dir))

    commands.append("/data_center_01/home/mas/python3.6/bin/python3 %s/html_structure/check_html_structure.py -c %s/html_structure/html_structure -p %s -o %s/html_structure/html_config/ -os html_structure -g %s"\
                    % (bin_html_default_dir, work_dir, config, work_dir, group_dir))
    for group in groups:
        commands.append("/data_center_01/home/mas/python3.6/bin/python3 %s/html_structure/cp_html_structure.py -c %s/html_structure/html_config/%s_html_structure -o %s/result/html/html_material/images/%s/"\
                        % (bin_html_default_dir, work_dir, group, work_dir, group))

    commands.append("/data_center_01/home/mas/python3.6/bin/python3 %s/json_structure/00.getJson.py -p %s -c %s/json_structure/json_structure -g %s -o %s/json_structure/json_structure.json -r %s/result/html/html_material/images/"\
                    % (bin_html_default_dir, config, work_dir, group_dir, work_dir, work_dir))
    commands.append("/data_center_01/home/mas/python3.6/bin/python3 %s/json_structure/parse_html.py -j %s/json_structure/json_structure.json -t %s/json_structure/html_templates/ -o %s/result/html/"\
                    % (bin_html_default_dir, work_dir, bin_html_default_dir, work_dir))

    print gettime("end html")
    return commands
示例#18
0
def cag(config, name):
    print gettime("stat 11.cag")
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    material_dir = '%s/material' % os.path.dirname(config)
    if os.path.isdir(work_dir):
        pass
    else:
        mkdir(work_dir)

    config_group = ConfigParser()
    config_group.read(config)
    group = re.split('\s+|,\s*|\t+|,\t*|', config_group.get('param', 'group'))
    for (i, subgroup_name) in enumerate(group):

        #subgroup_filename = '0' + str((i+1)) + '.' + subgroup_name
        subgroup_filename = subgroup_name
        mkdir("%s/%s" % (work_dir, subgroup_filename))

        sample_num_in_groups, min_sample_num_in_groups, sample_num_total, group_num = parse_group(
            "%s/%s_group.list" % (material_dir, subgroup_name))
        if sample_num_total < 20:
            log = open(
                "%s/%s/Sample_not_enough.log" % (work_dir, subgroup_filename),
                "w+")
            log.write("The minimum sample size (20) is not met.")
            log.close
        else:
            grp_sh = []

            os.system(
                "cp %s/%s_group.list %s/%s/group.list" %
                (material_dir, subgroup_name, work_dir, subgroup_filename))
            grp_sh.append(
                "python %s/full_CAG.py -p %s/../06.gene_profile/gene.profile -d %s/%s -g %s/%s/group.list"
                % (bin_cag_default_dir, work_dir, work_dir, subgroup_filename,
                   work_dir, subgroup_filename))
            grp_sh.append(
                "python %s/cag_taxonomy.py -i %s/%s/outfile/cag -g %s/../05.gene_catalog/gene_catalog.fna -o %s/%s/taxonomy/"
                % (bin_cag_default_dir, work_dir, subgroup_filename, work_dir,
                   work_dir, subgroup_filename))
            grp_sh.append("python %s/cag_exe_sequence.py -d %s/%s" %
                          (bin_cag_default_dir, work_dir, subgroup_filename))
            grp_sh.append("\n")

            with open('%s/%s/cag_pre.sh' % (work_dir, subgroup_filename),
                      'w') as outf:
                outf.write('\n'.join(grp_sh))
    print gettime("end 11.cag")
def raw_reads(config, name):
    print gettime("start raw_reads")
    commands = []
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s' % (main_dir, name)
    mkdir(work_dir)
    commands.append('python %s/QC_main.py -b %s/material/batch.list -c %s/material/config.list -p %s' %\
                    (bin_default_dir, main_dir, main_dir, config))
    # commands.append("## Q20 Q30")
    # commands.append('cp %s/pipeline.cfg %s/pipeline.cfg' % (main_dir,work_dir))
    # commands.append('# nohup python %s/Q20_Q30_stat.py -b %s/material/batch.list -c %s/pipeline.cfg  -o . &' %\
    # (bin_default_dir, main_dir, work_dir))
    # commands.append('python %s/Q20_Q30_stat_python2_new.py -b %s/material/batch.list -c %s/pipeline.cfg  -o . ' %\
    # (bin_default_dir, main_dir, main_dir))
    print gettime("end raw_reads")
    return commands
示例#20
0
def use_snakemake_method(config, name):
    print gettime('start create snakemake step script')
    commands = []
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s/preprocess_for_taxon_profile/snakemake_method' % (main_dir, name)
    mkdir(work_dir)

    # updata config.yaml
    with open(const.config_yaml,'r') as inf:
        data = yaml.load(inf)
    data['clean_reads_dir'] = '%s/01.clean_reads' % main_dir
    data['clean_reads_list'] = '%s/clean_reads.list' % work_dir
    data['outdir'] = '%s/alignment' % work_dir
    with open('%s/config.yaml' % work_dir, 'w') as outf:
        yaml.dump(data, outf, default_flow_style=False)

    # update cluster.yaml
    with open(const.cluster_yaml,'r') as inf:
        data = yaml.load(inf)
    data['__default__']['qsublog'] = '%s/log/' % work_dir
    data['align']['qsublog'] = '%s/log/align/' % work_dir
    data['abund']['qsublog'] = '%s/log/abund/' % work_dir
    data['abund_profile']['qsublog'] = '%s/log/' % work_dir
    with open('%s/cluster.yaml' % work_dir,'w') as outf:
        yaml.dump(data, outf, default_flow_style=False)
    # prepare file
    os.system('cp %s %s/Snakefile' % (const.snakemake, work_dir))
    mkdir('%s/log/align/'%work_dir,'%s/log/abund/'%work_dir, '%s/profile'%work_dir)

    commands.append('cp %s/01.clean_reads/clean_reads.list .' % main_dir)
    commands.append('## calculate abundance')
    commands.append('source activate /data_center_03/USER/huangy/soft/MAIN/anaconda2/envs/gutbio')
    commands.append('snakemake --cluster-config cluster.yaml --cluster \'qsub -o {cluster.qsublog} -e {cluster.qsublog} -l vf={cluster.vf} -q {cluster.queue}\' -j 40 --nolock')
    commands.append('source deactivate')
    commands.append('## form species profile')
    commands.append('ls alignment/*/*root.abundance >abund.list')
    #commands.append('python %s/02_taxonomy.py -d . -c ../%s/qc_%s.stat.tsv' % (bin_dir, raw_dir_name, batch_num))
    commands.append('python %s/02_taxonomy.py -i abund.list' % bin_dir)
    commands.append('rm abund.list')
    commands.append('for i in all phylum class order family genus species; do ls alignment/*/*$i.abundance |perl %s/201_profile - >profile/$i.profile; done' % bin_dir)
    commands.append('num=1;for i in phylum class order family genus species; do let num=num+1; python %s/201_profile_convert.py -i profile/$i.profile -o profile/otu_table_L$num.txt; done' % bin_dir)
    commands.append('ls profile/* | while read a; do cp $a ../../taxon_profile; done')
    commands.append('## reads use rate')
    commands.append('#ls alignment/*/*.MATCH.logs >match_logs.list')
    commands.append('#python %s/201_use_rate.py -i match_logs.list -o use_rate.stat.tsv -clean %s/00.raw_reads/qc_stat.tsv' % (bin_dir, main_dir))
    commands.append('#rm match_logs.list')
    return work_dir, commands
def assembly_soapdenove(config, name):
    print gettime("start 03.assembly soapdenove method")
    commands = []
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s/preprocess_for_assembly/soapdenove' % (main_dir, name)
    # beginning assembly file
    commands.append("## best contigs")
    # commands.append('ls assembly/*/*/*scafSeq |while read a; do perl /data_center_06/Project/LiuLin-ascites-stool/03.assembly/bin/scaftigs.pl $a 500 ${a%%.*}.scaftigs.fna ${a%%.*}.scaftigs.stat; done')
    commands.append(
        'ls assembly/*/*/*scafSeq |while read a; do perl %s/scaftigs.pl $a 500 ${a%%.*}.scaftigs.fna ${a%%.*}.scaftigs.stat; done'
        % bin_dir)
    commands.append(
        "/data_center_03/USER/zhongwd/bin/list assembly/*/* >%s/list.txt" %
        work_dir)
    commands.append(
        "python %s/best_scaftigs_selecter.py -i %s/list.txt -o %s/best_scaftigs"
        % (bin_dir, work_dir, work_dir))
    commands.append("rm %s/list.txt" % work_dir)
    #commands.append("/data_center_03/USER/zhongwd/bin/list best_scaftigs/*stat | perl /data_center_07/Project/RY2015K16A01-1/03.assembly/bin/stat.pl >  %s/scaftigs.best.stat.tsv" % work_dir)
    commands.append(
        "/data_center_03/USER/zhongwd/bin/list best_scaftigs/*stat | perl %s/stat.pl >  %s/scaftigs.best.stat.tsv"
        % (bin_dir, work_dir))
    commands.append("## histogram")
    mkdir("%s/histogram/" % work_dir)
    commands.append(
        "ls best_scaftigs/*.scaftigs.fna | sed 's#best_scaftigs/\(.*\).fna#\\1#g' | while read a; do lengthfasta best_scaftigs/$a.fna >histogram/$a.length; done"
    )
    commands.append(
        "ls histogram/*.scaftigs.length |while read a; do Rscript %s/scaftigs_length.R $a ${a%%.*}.histogram.pdf; done"
        % bin_dir)
    commands.append(
        "ls histogram/*.pdf |while read a; do convert -density 300 $a ${a%%.*}.png; done"
    )
    commands.append("## upload")
    commands.append(
        "ls best_scaftigs/*fna |while read a ; do gzip -c $a >${a%%.*}.fna.gz; done"
    )
    commands.append("md5sum best_scaftigs/*.gz > best_scaftigs/scaftigs.md5")
    commands.append(
        'ls best_scaftigs/*scaftigs.fna | while read a;do b=${a##*/};echo -e "${b%%.*}\\t`pwd $a`/$a";done > ../../scaftigs.list'
    )

    print gettime("end 03.assembly")
    return work_dir, commands
def use_megahit_version(config, name):
    print gettime('start create megahit step script')
    commands = []
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s/preprocess_for_assembly/megahit' % (main_dir, name)
    mkdir(work_dir)
    # prepare assembly file
    commands.append("## assembly")
    commands.append("cp %s/01.clean_reads/clean_reads.list %s" %
                    (main_dir, work_dir))
    commands.append(
        "perl %s/megahit_shell_maker.pl -l clean_reads.list -d %s" %
        (bin_dir, work_dir)
    )  # 参考:/data_center_11/Project/wenpp/01.wujianrong_20180822/03.assembly/assembly_megahit
    commands.append(
        "nohup /data_center_03/USER/zhongwd/bin/qsge --queue neo.q --memery 30G --jobs 2 --lines 1 --prefix megahit shell/assembly.sh &"
    )
    print gettime("end assembly_pre")
    return work_dir, commands
def use_soapdenove_method(config, name):
    print gettime('start create soapdenove step script')
    commands = []
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s/preprocess_for_assembly/soapdenove' % (main_dir, name)
    mkdir(work_dir)
    # prepare assembly file
    config_gene = ConfigParser()
    config_gene.read(config)
    ins_list = config_gene.get("param", "ins_list")

    commands.append("## assembly")
    commands.append("cp %s/01.clean_reads/clean_reads.list %s/" %
                    (main_dir, work_dir))
    #commands.append("perl /data_center_03/USER/zhongwd/rd/12_soap_denovo/soapdenovo_shell_maker.pl -l clean_reads.list -i %s -minkmer 51 -maxkmer 63 -b 4 -d %s/"%( ins_list,work_dir))
    commands.append(
        "perl %s/soapdenovo_shell_maker.pl -l clean_reads.list -i %s -minkmer 51 -maxkmer 63 -b 4 -d %s/"
        % (bin_dir, ins_list, work_dir))
    commands.append(
        "nohup /data_center_03/USER/zhongwd/bin/qsge --queue big.q:all.q:all.q:all.q --memery 100G:5G:10G:3G --jobs 2 --lines 4 --prefix AS shell/assembly.sh &"
    )
    print gettime("end assembly_pre")
    return work_dir, commands
示例#24
0
def gene_profile(config, name):
    print gettime("start 06.gene_profile")
    commands = []
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    commands.append("## calculate gene abundance")
    commands.append("ls %s/alignment/*/*abundance |perl %s/profile.pl - > %s/gene.profile"\
                    %(work_dir,tool_default_dir,work_dir))
    commands.append("## 01.alpha diversity")
    mkdir("%s/01.alpha_diversity/" % work_dir)
    commands.append(command_default + "perl %s/shannon.pl %s/gene.profile %s/01.alpha_diversity/gene.alpha.div.tsv"\
                    %(tool_default_dir,work_dir,work_dir))

    config_gene = ConfigParser()
    config_gene.read(config)
    group = re.split("\s+|\t+|,\s*|,\t+", config_gene.get("param", "group"))
    mkdir("%s/group" % work_dir)
    for subgroup_name in group:
        subgroup = '%s/material/%s_group.list' % (os.path.dirname(config),
                                                  subgroup_name)
        sample_num_in_groups, min_sample_num_in_groups, sample_num_total, group_num = parse_group(
            subgroup)

        commands.append(
            "## ----------------------------------%s----------------------" %
            (subgroup_name))
        work_dir_601 = "%s/group/%s/01.alpha_diversity" % (work_dir,
                                                           subgroup_name)
        mkdir(work_dir_601)
        commands.append("##01.alpha diversity")
        commands.append(command_default + "Rscript %s/gene.alpha.div.R %s/01.alpha_diversity/gene.alpha.div.tsv %s %s/gene.alpha.div.pdf"\
                        %(bin_gene_profile_default_dir,work_dir,subgroup,work_dir_601))
        commands.append("convert -density 300 %s/gene.alpha.div.pdf %s/gene.alpha.div.png"\
                        %(work_dir_601,work_dir_601))
        # 2018.10.25新增加
        commands.append(command_default + "python %s/gene.alpha.div.py -i %s/01.alpha_diversity/gene.alpha.div.tsv -g %s -o %s/gene.num.tvs"\
                        %(bin_gene_profile_default_dir,work_dir,subgroup,work_dir_601))
        commands.append(command_default + "Rscript %s/01.alpha_diversity.gene.num.R %s/gene.num.tvs %s/gene.num.pdf"\
                        %(bin_gene_profile_default_dir,work_dir_601,work_dir_601))
        commands.append("convert -density 300 %s/gene.num.pdf %s/gene.num.png"\
                        %(work_dir_601,work_dir_601))

        work_dir_602 = "%s/group/%s/02.anosim" % (work_dir, subgroup_name)
        mkdir(work_dir_602)
        commands.append("##02.anosim")
        commands.append(command_default + "python %s/t04_anosim.py -i %s/gene.profile -g %s -o %s"\
                            %(tool_default_dir,work_dir,subgroup,work_dir_602))
        # commands.extend(share_mothod(tool_default_dir,work_dir,"gene.profile",subgroup,subgroup_name,\
        #                              numlist=["02","03","04","05","06","07","08","09"]))

        # commands.append("##03.LefSe")
        # work_dir_603 = "%s/group/%s/03.LEfSe/" % (work_dir,subgroup_name)
        # mkdir(work_dir_603)
        # commands.append(command_default + "python %s/603_LEfSe.py -i %s/gene.profile -l /data_center_03/USER/huangy/soft/LEfSe_lzb -g %s -o %s --LDA 2"\
        #                 %(bin_gene_profile_default_dir,work_dir,subgroup,work_dir_603))
        #becose  Error: protect(): protection stack overflow

        commands.append("##03.diff")
        work_dir_603 = "%s/group/%s/03.diff" % (work_dir, subgroup_name)
        mkdir(work_dir_603)
        commands.append(command_default + "python %s/t08_diff.py -i %s/gene.profile -g %s -o %s/"\
                        %(tool_default_dir,work_dir,subgroup,work_dir_603))
        commands.append(
            '''awk -F "\\t" '{print $1"\\t"$7"\\t"$8"\\t"$9}' %s/diff.marker.filter.tsv|sed '1a Gene ID\\tP-value\\tQ-value\\tGroup'|sed '1d' > %s/diff.stat.tsv'''
            % (work_dir_603, work_dir_603))
        commands.append("##03.diff/diff_boxplot")
        commands.append(command_default + "python %s/t09_diff_boxplot.py -i %s/diff.marker.filter.profile.tsv -p %s/diff.marker.filter.tsv -g %s -o %s/diff_boxplot/"\
                        %(tool_default_dir,work_dir_603,work_dir_603,subgroup,work_dir_603))
        commands.append("## diff_qvalue")
        commands.append("Rscript %s/qvalue.R %s/diff.marker.tsv %s/qvalue.pdf"\
                        %(bin_gene_profile_default_dir,work_dir_603,work_dir_603))
        commands.append("convert -density 300 %s/qvalue.pdf %s/qvalue.png"\
                        %(work_dir_603,work_dir_603))

        # if sample_num_in_groups>5 and sample_num_total>20 and group_num==2:
        # work_dir_604 = "%s/group/%s/04.mgs"%(work_dir,subgroup_name)
        # mkdir(work_dir_604)
        # #os.system("cp %s/MGS.V2.0/MGS.cfg %s/MGS.cfg"%(const.bin_default_dir,work_dir_604))

        # commands.append("python %s/full_MGS.py -p %s/gene.profile -d %s -g %s --threshold 0"%\
        # (bin_mgs_default_dir,work_dir,work_dir_604,subgroup))
        # mkdir("%s/taxonomy/"%work_dir_604)
        # commands.append("python %s/mgs_taxonomy.py -i %s/pathway/ -g %s/../05.gene_catalog/gene_catalog.fna -o %s/taxonomy/ --group %s"\
        # %(bin_mgs_default_dir,work_dir_604,work_dir,work_dir_604,subgroup))
        # #TODO mgs
        # if sample_num_in_groups>5 and sample_num_total>20:
        # work_dir_605 = "%s/group/%s/05.cag" % (work_dir,subgroup_name)
        # mkdir(work_dir_605)
        # #os.system("cp %s/CAG.V1.0/CAG.cfg %s/CAG.cfg"%(const.bin_default_dir,work_dir_605))
        # commands.append("python %s/full_CAG.py -p %s/gene.profile -d %s -g %s "%\
        # (bin_cag_default_dir,work_dir,work_dir_605,subgroup))
        # mkdir("%s/taxonomy"%work_dir_605)
        # commands.append("python %s/cag_taxonomy.py -i %s/outfile/cag -g %s/../05.gene_catalog/gene_catalog.fna -o %s/taxonomy/"\
        # %(bin_cag_default_dir,work_dir_605,work_dir,work_dir_605))
        # #TODO cag
    print gettime("end 06.gene_profile")
    return commands
示例#25
0
def taxon(config, name):
    print gettime('start create nalysis step script')
    samples(config, name)
    group(config, name)
示例#26
0
def read_params(args):
    parsers = argparse.ArgumentParser(
        description='''The initial run script of metagene ''')
    parsers.add_argument('--config',
                         dest='config_path',
                         metavar='FILE',
                         type=str,
                         required=True,
                         help="config file for metagenome pipeline")
    args = parsers.parse_args()
    return args


if __name__ == '__main__':
    print gettime("start")
    step_names_order = const.step_names_order
    params = read_params(sys.argv)
    config_path = params.config_path  # 配置文件名称
    # print config_path
    config = ConfigParser.ConfigParser()  # 增加修改配置文件的类
    config.read(config_path)  # 读取配置文件
    # option_value = config.read_config()  # 将配置文件的内容添加字典中
    work_dir = config.get('param', 'work_dir')
    step_names = re.split(',\s*|,\t+|\t|\s+',
                          config.get('step', 'step_names_order'))
    step_names_all = step_names_order.split(",")
    steps = []
    for i, name in enumerate(step_names):
        if name in step_names_all:
            print gettime("start create %s step script" % name)
def kegg(config, name):
    print gettime("start 07.kegg")
    commands = []
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    commands.append("## whole kegg analysis")
    commands.append("rm %s/blat/all.m8"%work_dir)
    commands.append("cat %s/blat/* > %s/blat/all.m8"%(work_dir,work_dir))
    commands.append(command_default + "python %s/701_pick_blast_m8.py -i %s/blat/all.m8 -o %s/kegg.m8"%\
                    (bin_kegg_default_dir,work_dir,work_dir))
    commands.append(command_default + "perl %s/prokaryote.annotation.pl < %s/kegg.m8 > %s/kegg.anno.tsv"%\
                    (bin_kegg_default_dir,work_dir,work_dir))
    commands.append(command_default + "cut -f2 %s/kegg.m8|sort|uniq >%s/sort_uniq_m8.list"%\
                    (work_dir,work_dir))
    commands.append(command_default + "python %s/702_blast2ko_v2.py -i %s/kegg.m8 -o %s/gene_catalog.ko --subjectId %s/sort_uniq_m8.list"%\
                    (bin_kegg_default_dir,work_dir,work_dir,work_dir))
    commands.append(command_default + "perl /data_center_02/Database/KEGG/bin/07_keggMap_nodiff.pl -ko %s/gene_catalog.ko -outdir %s/gene_catalog.map"%\
                    (work_dir,work_dir))
    commands.append(command_default + "perl /data_center_02/Database/KEGG/bin/06_pathfind.pl -fg %s/gene_catalog.ko -output %s/gene_catalog.path -cutoff 0.0"%\
                    (work_dir,work_dir))
    commands.append(command_default + "perl %s/10_KEGG_class.pl %s/gene_catalog.path gene_catalog.path"%\
                    (bin_kegg_default_dir,work_dir))

    # commands.append("## group analysis")
    # commands.append("mkdir 07.kegg/")
    # commands.append("perl core.pl group.list 06.gene_profile/species.profile > 06.gene_profile/core.profile")
    #
    # commands.append("## diff analysis")
    # commands.append("mkdir 07.kegg/diff_gene")
    #

    commands.append("## ko profile")
    commands.append(command_default + "python %s/04_get_profiling_ko.py -i %s/gene_catalog.ko --gene_profile %s/../06.gene_profile/gene.profile -o %s/ko.profile"%\
                    (bin_kegg_default_dir,work_dir,work_dir,work_dir))

    # commands.append("## diff ko")
    #
    # commands.append("## diff module(pathway)")
    #
    commands.append("## 701 kegg功能统计")
    work_dir_701 = "%s/01.kegg_class/"%work_dir
    mkdir(work_dir_701)
    commands.append(command_default + "Rscript %s/701_KEGG_class.R %s/gene_catalog.path.class %s/701_KEGG_class.pdf"\
                    %(bin_kegg_default_dir,work_dir,work_dir_701))
    commands.append("convert -density 300 %s/701_KEGG_class.pdf %s/701_KEGG_class.png"\
                    %(work_dir_701,work_dir_701))

    ##function
    commands.append("## 712.function_barplot")
    work_dir_712 = "%s/12.functional_barplot"%work_dir
    mkdir(work_dir_712)
    commands.append(command_default + "python %s/04_get_profiling_level1.py -i %s/gene_catalog.path -k %s/ko.profile -o %s"\
                    %(bin_kegg_default_dir,work_dir,work_dir,work_dir_712))
    commands.append("Rscript %s/710_level1_barplot.R %s/kegg_level1_profile.txt %s/level1_barplot.pdf 1"\
                    %(bin_kegg_default_dir,work_dir_712,work_dir_712))
    commands.append("convert -density 300 %s/level1_barplot.pdf %s/level1_barplot.png"\
                    %(work_dir_712,work_dir_712))
    commands.append("Rscript %s/710_level1_barplot.R %s/kegg_level2_profile.txt %s/level2_barplot.pdf 2"\
                    %(bin_kegg_default_dir,work_dir_712,work_dir_712))
    commands.append("convert -density 300 %s/level2_barplot.pdf %s/level2_barplot.png"\
                    %(work_dir_712,work_dir_712))

    ##diff
    config_gene = ConfigParser()
    config_gene.read(config)
    group = re.split("\s+|\t|,\s*|,\t+",config_gene.get("param","group"))
    for subgroup_name in group:
        subgroup = '%s/material/%s_group.list' % (os.path.dirname(config), subgroup_name)
        sample_num_in_groups,min_sample_num_in_groups,sample_num_total,group_num=parse_group(subgroup)
        commands.append("## ----------------------------------%s----------------------"%(subgroup_name))
        # heatmap & pca & pcoa
        work_dir_702 = "%s/group/%s/02.heatmap/"%(work_dir,subgroup_name)
        mkdir(work_dir_702)
        work_dir_703 = "%s/group/%s/03.pca/"%(work_dir,subgroup_name)
        mkdir(work_dir_703)
        work_dir_704 = "%s/group/%s/04.pcoa/"%(work_dir,subgroup_name)
        mkdir(work_dir_704)
        if sample_num_total>=5:
            commands.append("##heatmap")
            commands.append(command_default + "python %s/t06_heatmap.py -i %s/ko.profile -g %s -o %s"\
                                %(tool_default_dir,work_dir,subgroup,work_dir_702))
            commands.append("##pca")
            commands.append(command_default + "python %s/t01_pca.py -i %s/ko.profile -g %s -o %s --with_boxplot "\
                                %(tool_default_dir,work_dir,subgroup,work_dir_703))
            commands.append("##pcoa")
            commands.append(command_default + "python %s/t02_pcoa.py -i %s/ko.profile -g %s -o %s"\
                                %(tool_default_dir,work_dir,subgroup,work_dir_704))
        else:
            log = "The minimum sample size (5) is not met."
            samp_num_enough(work_dir_702,log)
            samp_num_enough(work_dir_703,log)
            samp_num_enough(work_dir_704,log)
        # nmds & anosim & adonis & mrpp
        work_dir_705 = "%s/group/%s/05.nmds/"%(work_dir,subgroup_name)
        mkdir(work_dir_705)
        work_dir_706 = "%s/group/%s/06.anosim/"%(work_dir,subgroup_name)
        mkdir(work_dir_706)
        work_dir_706_1 = "%s/group/%s/07.adonis/"%(work_dir,subgroup_name)
        mkdir(work_dir_706_1)
        work_dir_707 = "%s/group/%s/08.mrpp/"%(work_dir,subgroup_name)
        mkdir(work_dir_707)
        if min_sample_num_in_groups>=5:
            commands.append("##nmds")
            commands.append(command_default + "python %s/t03_nmds.py -i %s/ko.profile -g %s -o %s"\
                                %(tool_default_dir,work_dir,subgroup,work_dir_705))
            commands.append("##anosim")
            commands.append(command_default + "python %s/t04_anosim.py -i %s/ko.profile -g %s -o %s"\
                                %(tool_default_dir,work_dir,subgroup,work_dir_706))
            commands.append("##adonis")
            commands.append(command_default + "python %s/t12_adonis_pca.py -i %s/ko.profile -g %s -o %s"\
                                %(tool_default_dir,work_dir,subgroup,work_dir_706_1))
            commands.append(command_default + "python %s/t12_adonis_pcoa.py -i %s/ko.profile -g %s -o %s"\
                                %(tool_default_dir,work_dir,subgroup,work_dir_706_1))
            commands.append("##mrpp")
            commands.append(command_default + "python %s/t05_mrpp.py -i %s/ko.profile -g %s -o %s"\
                            %(tool_default_dir,work_dir,subgroup,work_dir_707))
        else:
            log = "min_sample_num_in_groups >= 5"
            samp_num_enough(work_dir_705,log)
            samp_num_enough(work_dir_706,log)
            samp_num_enough(work_dir_706_1,log)
            samp_num_enough(work_dir_707,log)
        # flower|venn
        if group_num>=6 and group_num<30:
            work_dir_708_1 = "%s/group/%s/09.flower/"%(work_dir,subgroup_name)
            mkdir(work_dir_708_1)
            commands.append("##flower")
            commands.append(command_default + "perl %s/t07_flower.pl %s/ko.profile %s %s"\
                            %(tool_default_dir,work_dir,subgroup,work_dir_708_1))
        elif group_num>=2 and group_num<6:
            work_dir_708_2 = "%s/group/%s/09.venn/"%(work_dir,subgroup_name)
            mkdir(work_dir_708_2)
            commands.append("##venn")
            commands.append(command_default + "python %s/t07_venn_flower.py -i %s/ko.profile -o %s -g %s --with_group "%\
                        (tool_default_dir,work_dir,work_dir_708_2,subgroup))
        # ko_wilcoxon & ko_lefse
        work_dir_709 = "%s/group/%s/10.ko_wilcoxon/"%(work_dir,subgroup_name)
        mkdir(work_dir_709)
        work_dir_710 = "%s/group/%s/11.ko_lefse"%(work_dir,subgroup_name)
        mkdir(work_dir_710)
        if min_sample_num_in_groups>=5:
            # work_dir_709 = "%s/group/%s/09.ko_wilcoxon/"%(work_dir,subgroup_name)
            # mkdir(work_dir_709)
            commands.append("##09.0 diff")
            commands.append(command_default + "python %s/t08_diff.py -i %s/ko.profile -g %s -o %s"\
                            %(tool_default_dir,work_dir,subgroup,work_dir_709))
            commands.append(command_default + "python %s/ko_description.py -i %s/diff.marker.filter.tsv -o %s/diff.marker.filter.definition.tsv --ko_def /data_center_09/Project/lixr/00.DATA/KEGG_DB/ko_description.tab"%(bin_kegg_default_dir,work_dir_709,work_dir_709))
            commands.append("#09.1 diff boxplot")
            commands.append(command_default + "python %s/t09_diff_boxplot.py -i %s/diff.marker.filter.profile.tsv -p %s/diff.marker.filter.tsv -g %s -o %s/diff_boxplot/"\
                        %(tool_default_dir,work_dir_709,work_dir_709,subgroup,work_dir_709))
            commands.append("#09.2 diff heatmap")
            commands.append(command_default + "python %s/t06_heatmap.py -i %s/diff.marker.filter.profile.tsv -g %s -o %s/heatmap/"\
                                %(tool_default_dir,work_dir_709,subgroup,work_dir_709))
            commands.append("#09.3 diff pathway")
            mkdir("%s/pathway/"%work_dir_709)
            commands.append("Rscript %s/707_compare_pathway.R %s/diff.marker.filter.profile.tsv %s %s/pathway/707_compare.txt"\
                            %(bin_kegg_default_dir,work_dir_709,subgroup,work_dir_709))


            commands.append("#09.4 diff detail pathway")
            work_dir_709_2 = "%s/detail_pathway/"%work_dir_709
            mkdir(work_dir_709_2)
            commands.append("python %s/709_diff_map.py -i %s/diff.marker.filter.tsv -ko %s/gene_catalog.ko -g %s -o %s "\
                            %(bin_kegg_default_dir,work_dir_709,work_dir,subgroup,work_dir_709_2))

            commands.append("#09.5 diff barplot")
            #commands.append("cut -f2 %s|uniq|less|while read a ;do grep \"$a\" %s|cut -f1 > %s/$a.list;done" % (subgroup,subgroup,work_dir_709))
            commands.append("cut -f 1 %s/diff.marker.filter.tsv |sed -n '2,$p' |while read line;do grep \"$line\" %s/gene_catalog.path >>%s/diff_gene_catalog.path;done"\
                            %(work_dir_709,work_dir,work_dir_709))
            commands.append("sort %s/diff_gene_catalog.path |uniq > %s/diff_gene_catalog2.path"%(work_dir_709,work_dir_709))
            commands.append("rm %s/diff_gene_catalog.path"%work_dir_709)
            mkdir("%s/path_barplot/"%work_dir_709)
            commands.append("python %s/712_ko2path_bar.py -i %s/diff_gene_catalog2.path -g %s/diff.marker.filter.tsv -o %s/path_barplot/ -l 2"\
                            %(bin_kegg_default_dir,work_dir_709,work_dir_709,work_dir_709))
            ## ko_lefse

            commands.append("## lefse")
            commands.append(command_default + "python %s/../06.gene_profile/603_LEfSe.py -i %s/ko.profile -l /data_center_03/USER/huangy/soft/LEfSe_lzb -g %s -o %s --LDA 2"\
                            %(bin_kegg_default_dir,work_dir,subgroup,work_dir_710))
            commands.append("#lefse heatmap")
            mkdir("%s/heatmap/"%work_dir_710)
            commands.append(command_default + "python %s/t06_heatmap.py -i %s/diff.marker.filter.profile.tsv -g %s -o %s/heatmap/"\
                                %(tool_default_dir,work_dir_710,subgroup,work_dir_710))
            commands.append("#lefse pathway")
            mkdir("%s/pathway/"%work_dir_710)
            commands.append("Rscript %s/707_compare_pathway.R %s/diff.marker.filter.profile.tsv %s %s/pathway/707_compare.txt"\
                            %(bin_kegg_default_dir,work_dir_710,subgroup,work_dir_710))
            commands.append("#lefse barplot")
            commands.append("cut -f 1 %s/diff.marker.filter.tsv |while read line;do grep \"$line\" %s/gene_catalog.path >>%s/diff_gene_catalog.path;done"\
                            %(work_dir_710,work_dir,work_dir_710))
            commands.append("sort %s/diff_gene_catalog.path |uniq > %s/diff_gene_catalog2.path"%(work_dir_710,work_dir_710))
            commands.append("rm %s/diff_gene_catalog.path"%work_dir_710)
            mkdir("%s/path_barplot/"%work_dir_710)
            commands.append("python %s/712_ko2path_bar.py -i %s/diff_gene_catalog2.path -g %s/diff.marker.filter.tsv -o %s/path_barplot/ -l 2"\
                            %(bin_kegg_default_dir,work_dir_710,work_dir_710,work_dir_710))
            
            # lefse detail pathway
            work_dir_710_2 = "%s/detail_pathway/"%work_dir_710
            mkdir(work_dir_710_2)
            if group_num==2:
                commands.append("#lefse detail pathway")
                commands.append("python %s/709_diff_map.py -i %s/diff.marker.filter.tsv -ko %s/gene_catalog.ko -g %s -o %s "\
                                %(bin_kegg_default_dir,work_dir_710,work_dir,subgroup,work_dir_710_2))
            else:
                log = "The number of groups must be 2."
                samp_num_enough(work_dir_710_2,log)
        else:
            log = "min_sample_num_in_groups >= 5"
            samp_num_enough(work_dir_709,log)
            samp_num_enough(work_dir_710,log)

        # ko_metastats
        # if group_num==2 and min_sample_num_in_groups>=5:
            # work_dir_711 = "%s/group/%s/11.ko_metastats/"%(work_dir,subgroup_name)
            # mkdir(work_dir_711)
            # commands.append("##metastats")
            # commands.append("python %s/708_sample2profile.py -i %s/ko.profile -g %s -o %s -f for_metastats.profile --num 1"\
                            # %(bin_kegg_default_dir,work_dir,subgroup,work_dir_711))
            # commands.append(command_default + "Rscript %s/708_metastats.R %s/for_metastats.profile %s %s/ XX 0.05 TRUE"\
                            # %(bin_kegg_default_dir,work_dir_711,subgroup,work_dir_711))
            # commands.append("convert -density 300 %s/708_metastats_boxplot.pdf %s/708_metastats_boxplot.png"\
                            # %(work_dir_711,work_dir_711))
            # commands.append("# diff heatmap")
            # mkdir("%s/heatmap/"%work_dir_711)
            # commands.append(command_default + "python %s/t06_heatmap.py -i %s/diff.marker.filter.profile.tsv -g %s -o %s/heatmap/"\
                                # %(tool_default_dir,work_dir_711,subgroup,work_dir_711))
            # commands.append("#diff pathway")
            # mkdir("%s/pathway/"%work_dir_711)
            # commands.append("Rscript %s/707_compare_pathway.R %s/diff.marker.filter.profile.tsv %s %s/pathway/707_compare.txt"\
                            # %(bin_kegg_default_dir,work_dir_711,subgroup,work_dir_711))
            # commands.append("#diff detail pathway")
            # work_dir_711_2 = "%s/detail_pathway/"%work_dir_711
            # mkdir(work_dir_711_2)
            # commands.append("python %s/709_diff_map.py -i %s/diff.marker.filter.tsv -ko %s/gene_catalog.ko -g %s -o %s "\
                            # %(bin_kegg_default_dir,work_dir_711,work_dir,subgroup,work_dir_711_2))
            # commands.append("# diff barplot")
            # commands.append("cut -f 1 %s/diff.marker.filter.tsv |while read line;do grep \"$line\" %s/gene_catalog.path >>%s/diff_gene_catalog.path;done"\
                            # %(work_dir_711,work_dir,work_dir_711))
            # commands.append("sort %s/diff_gene_catalog.path |uniq > %s/diff_gene_catalog2.path"%(work_dir_711,work_dir_711))
            # commands.append("rm %s/diff_gene_catalog.path"%work_dir_711)
            # mkdir("%s/path_barplot/"%work_dir_711)
            # commands.append("python %s/712_ko2path_bar.py -i %s/diff_gene_catalog2.path -g %s/diff.marker.filter.tsv -o %s/path_barplot/ -l 2"\
                            # %(bin_kegg_default_dir,work_dir_711,work_dir_711,work_dir_711))
        ##function
        work_dir_712_2 = "%s/group/%s/12.functional_barplot"%(work_dir,subgroup_name)
        mkdir(work_dir_712_2)
        commands.append("##712.function_barplot")
        commands.append("Rscript %s/702_level1_barplot_withgroup.R %s/kegg_level1_profile.txt %s/level1_barplot_withgroup.pdf 1 %s"\
                        %(bin_kegg_default_dir,work_dir_712,work_dir_712_2,subgroup))
        commands.append("convert -density 300 %s/level1_barplot_withgroup.pdf %s/level1_barplot_withgroup.png"\
                        %(work_dir_712_2,work_dir_712_2))
        commands.append("Rscript %s/702_level1_barplot_withgroup.R %s/kegg_level2_profile.txt %s/level2_barplot_withgroup.pdf 2 %s"\
                        %(bin_kegg_default_dir,work_dir_712,work_dir_712_2,subgroup))
        commands.append("convert -density 300 %s/level2_barplot_withgroup.pdf %s/level2_barplot_withgroup.png"\
                        %(work_dir_712_2,work_dir_712_2))
        work_dir_713 = "%s/group/%s/13.functional_clust"%(work_dir,subgroup_name)
        mkdir(work_dir_713)
        commands.append("##713 sample cluster")
        commands.append(command_default + "python %s/t10_sample_clustering.py -i %s/kegg_level1_profile.txt -g %s -o %s/ -t \"KEGG Level1 Abundance in Samples\" "\
                            %(tool_default_dir,work_dir_712,subgroup,work_dir_713))
        # roc
        work_dir_714 = "%s/group/%s/14.ROC"%(work_dir,subgroup_name)
        mkdir(work_dir_714)
        commands.append("##714 ROC")
        if sample_num_total >= 50 and min_sample_num_in_groups >=20:
            commands.append("cut -f1 %s/diff.marker.filter.tsv >%s/diff.list;Rscript %s/710_roc.R %s/diff.marker.filter.profile.tsv %s/diff.list %s %s/710_roc.pdf"\
                        %(work_dir_710,work_dir_710,bin_kegg_default_dir,work_dir_710,work_dir_710,subgroup,work_dir_714))
        else:
            log = "sample_num_total >= 50 and min_sample_num_in_groups >=20"
            samp_num_enough(work_dir_714,log)

    print gettime("end kegg")
    return commands
def cazy(config, name):
    print gettime("start 12.cazy")
    commands = []
    work_dir = "%s/%s" % (os.path.dirname(config), name)
    commands.append("## whole cazy analysis")
    commands.append("rm %s/blat/all.m8" % work_dir)
    commands.append("cat %s/blat/* > %s/blat/all.m8" % (work_dir, work_dir))
    commands.append(const.command_default +
                    "python %s/01.get_anno_info.py -i %s/blat/all.m8 -o %s" %
                    (cazy_bin_dir, work_dir, work_dir))
    commands.append(const.command_default + "python %s/02.get_profile_and_count.py -a %s/cazy.anno.tsv -p %s/../06.gene_profile/gene.profile -l 5  -c class -o %s"%\
                    (cazy_bin_dir, work_dir, work_dir, work_dir))
    commands.append(const.command_default +"python %s/02.get_profile_and_count.py -a %s/cazy.anno.tsv -p %s/../06.gene_profile/gene.profile -l 6  -c type -o %s"%\
                    (cazy_bin_dir, work_dir, work_dir, work_dir))
    commands.append(const.command_default + "python %s/02.get_profile_and_count.py -a %s/cazy.anno.tsv -p %s/../06.gene_profile/gene.profile -l 9  -c enzyme -o %s"%\
                    (cazy_bin_dir, work_dir, work_dir, work_dir))
    commands.append("## 1212.function_barplot")
    work_dir_12 = "%s/12.functional_barplot" % work_dir
    mkdir(work_dir_12)
    all_levels = ["class", "type", "enzyme"]
    for level in all_levels:
        commands.append(
            const.command_default +
            "Rscript %s/710_level1_barplot.R %s/%s.profile %s/%s_barplot.pdf %s"
            % (cazy_bin_dir, work_dir, level, work_dir_12, level, level))
        commands.append(
            "convert -density 300 %s/%s_barplot.pdf %s/%s_barplot.png" %
            (work_dir_12, level, work_dir_12, level))
    config_gene = ConfigParser()
    config_gene.read(config)
    group = re.split("\s+|\t|,", config_gene.get("param", "group"))
    #all_methods = ['cazy_class', 'cazy_protein', 'cazy_enzyme']
    for subgroup in group:
        dirname, subgroup_name, _ = get_name(subgroup)
        sample_num_in_groups, min_sample_num_in_groups, sample_num_total, group_num = parse_group(
            subgroup)
        sub_work_dir = "%s/group/%s" % (work_dir, subgroup_name)
        commands.append(
            "## ----------------------------------%s----------------------------------##"
            % (subgroup_name))
        if sample_num_total >= 5:
            work_dir_1202 = "%s/02.heatmap/" % sub_work_dir
            mkdir(work_dir_1202)
            commands.append("##heatmap")
            commands.append(
                const.command_default +
                "python %s/6_heatmap.py -i %s/type.profile -g %s -o %s" %
                (tools_dir, work_dir, subgroup, work_dir_1202))
            work_dir_1203 = "%s/03.pca/" % sub_work_dir
            mkdir(work_dir_1203)
            commands.append("##pca")
            commands.append(
                const.command_default +
                "python %s/1_pca.py -i %s/type.profile -g %s -o %s" %
                (tools_dir, work_dir, subgroup, work_dir_1203))
            work_dir_1204 = "%s/04.pcoa/" % sub_work_dir
            mkdir(work_dir_1204)
            commands.append("##pcoa")
            commands.append(
                const.command_default +
                "python %s/2_pcoa.py -i %s/type.profile -g %s -o %s" %
                (tools_dir, work_dir, subgroup, work_dir_1204))
        if min_sample_num_in_groups >= 5:
            work_dir_1205 = "%s/05.nmds/" % sub_work_dir
            mkdir(work_dir_1205)
            commands.append("##nmds")
            commands.append(
                const.command_default +
                "python %s/3_nmds.py -i %s/type.profile -g %s -o %s" %
                (tools_dir, work_dir, subgroup, work_dir_1205))
            work_dir_1206 = "%s/06.anosim/" % sub_work_dir
            mkdir(work_dir_1206)
            commands.append("##anosim")
            commands.append(
                const.command_default +
                "python %s/4_anosim.py -i %s/type.profile -g %s -o %s" %
                (tools_dir, work_dir, subgroup, work_dir_1206))
            work_dir_1207 = "%s/07.mrpp/" % sub_work_dir
            mkdir(work_dir_1207)
            commands.append("##mrpp")
            commands.append(
                const.command_default +
                "python %s/5_mrpp.py -i %s/type.profile -g %s -o %s" %
                (tools_dir, work_dir, subgroup, work_dir_1207))
        if group_num >= 6 and group_num < 30:
            work_dir_1208 = "%s/08.flower/" % sub_work_dir
            mkdir(work_dir_1208)
            commands.append("##flower")
            commands.append(const.command_default +
                            "perl %s/7_flower.pl %s/type.profile %s %s" %
                            (tools_dir, work_dir, subgroup, work_dir_1208))
        elif group_num >= 2 and group_num < 6:
            work_dir_1208 = "%s/08.venn/" % sub_work_dir
            mkdir(work_dir_1208)
            commands.append("##venn")
            commands.append(
                const.command_default +
                "python %s/7_venn_flower.py -i %s/type.profile -o %s -g %s --with_group"
                % (tools_dir, work_dir, sub_work_dir, subgroup))
        if min_sample_num_in_groups >= 5:
            work_dir_1209 = "%s/09.ko_wilcoxon/" % sub_work_dir
            mkdir(work_dir_1209)
            commands.append("##diff")
            commands.append(const.command_default +
                            "%s/8_diff.py -i %s/type.profile -g %s -o %s" %
                            (tools_dir, work_dir, subgroup, sub_work_dir))
            commands.append("# diff boxplot")
            commands.append(const.command_default + "python %s/9_diff_boxplot.py -i %s/diff.marker.filter.profile.tsv -p %s/diff.marker.filter.tsv -g %s -o %s/diff_boxplot/"\
                        %(tools_dir, work_dir_1209, work_dir_1209, subgroup, work_dir_1209))
            commands.append("# diff heatmap")
            commands.append(const.command_default + "python %s/6_heatmap.py -i %s/diff.marker.filter.profile.tsv -g %s -o %s/heatmap/"\
                        %(tools_dir, work_dir_1209, subgroup, work_dir_1209))
            work_dir_1210 = "%s/10.lefse/" % sub_work_dir
            mkdir(work_dir_1210)
            commands.append("## lefse")
            commands.append(const.command_default + "python %s/603_LEfSe.py -i %s/type.profile -l /data_center_03/USER/huangy/soft/LEfSe_lzb -g %s -o %s --LDA 2"\
                            %(cazy_bin_dir, work_dir, subgroup, work_dir_1210))
            commands.append("#lefse heatmap")
            commands.append(const.command_default + "python %s/6_heatmap.py -i %s/diff.marker.filter.profile.tsv -g %s -o %s/heatmap/"\
                        %(tools_dir, work_dir_1210, subgroup, work_dir_1210))
        if group_num == 2 and min_sample_num_in_groups >= 5:
            #work_dir_1211 = "%s/11.metastats/" % sub_work_dir
            #mkdir(work_dir_1211)
            #commands.append("##metastats")
            #commands.append(const.command_default + "python %s/708_sample2profile.py -i %s/type.profile -g %s -o %s -f for_metastats.profile --num 100000"\
            #            %(cazy_bin_dir, work_dir, subgroup, sub_work_dir))
            #commands.append(const.command_default + "Rscript %s/708_metastats.R %s/for_metastats.profile %s %s XX 0.05 TRUE"\
            #            %(cazy_bin_dir, work_dir, subgroup, work_dir_1211))
            #commands.append("convert -density 300 %s/708_metastats_boxplot.pdf %s/708_metastats_boxplot.png"%(work_dir_1211,work_dir_1211))
            #commands.append("# diff heatmap")
            #commands.append(const.command_default + "python %s/6_heatmap.py -i %s/diff.marker.filter.profile.tsv -g %s -o %s/heatmap/"\
            #            %(tools_dir, work_dir_1211, subgroup, work_dir_1211))
            #work_dir_1212 = "%s/12.functional_barplot/" % sub_work_dir
            #mkdir(work_dir_1212)
            #commands.append("##711.function_barplot")
            #commands.append(const.command_default + "Rscript %s/702_level1_barplot_withgroup.R %s/class.profile %s/calss_barplot_withgroup.pdf class %s"\
            #            %(cazy_bin_dir, work_dir, work_dir_1211, subgroup))
            #commands.append("convert -density 300 %s/calss_barplot_withgroup.pdf %s/calss_barplot_withgroup.png"%(work_dir_1211, work_dir_1211))
            #commands.append(const.command_default + "Rscript %s/702_level1_barplot_withgroup.R %s/type.profile %s/type_barplot_withgroup.pdf type %s"\
            #            %(cazy_bin_dir, work_dir, work_dir_1211, subgroup))
            #commands.append("convert -density 300 %s/type_barplot_withgroup.pdf %s/type_barplot_withgroup.png"%(work_dir_1211, work_dir_1211))
            #commands.append(const.command_default + "Rscript %s/702_level1_barplot_withgroup.R %s/enzyme.profile %s/enzyme_barplot_withgroup.pdf enzyme %s"\
            #            %(cazy_bin_dir, work_dir, work_dir_1211, subgroup))
            #commands.append("convert -density 300 %s/enzyme_barplot_withgroup.pdf %s/enzyme_barplot_withgroup.png"%(work_dir_1211, work_dir_1211))
            work_dir_1213 = "%s/13.functional_clust/" % sub_work_dir
            mkdir(work_dir_1213)
            commands.append("##712 sample cluster")
            commands.append(const.command_default + "python %s/10_sample_clustering.py -i %s/type.profile -g %s -o %s -t \"Type Abundance in Samples\""\
                        %(tools_dir, work_dir, subgroup, work_dir_1213))

    print gettime("end cazy")
    return commands