Python STAP.EvaluateSpHMM примеры использования

Язык программирования: Python

Пространство имен/Пакет: rpy2.robjects.packages

Класс/Тип: STAP

Метод/Функция: EvaluateSpHMM

Примеров на hotexamples.com: 3

Python STAP.EvaluateSpHMM - 3 примера найдено. Это лучшие примеры Python кода для rpy2.robjects.packages.STAP.EvaluateSpHMM, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

STAP(30)

EvaluateSpHMM(3)

colorcitos(3)

mdl_func(2)

orderInds(2)

plotCorRes(2)

plot_sequencing_depth_overview(1)

plotInPdf(1)

plot_beta_distribution_binary_class(1)

plot_density_heatmap(1)

plot_distribution(1)

plot_heatmap(1)

Chi_sq_based_bin(1)

pr_curve(1)

my_cego(1)

rcca_fit(1)

roc_curve(1)

runAnnotation(1)

run_fitZIG(1)

rx2(1)

sPLS(1)

simulate(1)

rcca_eval(1)

main(1)

multi_class_func(1)

dpbinom(1)

add_lag(1)

as_matrix(1)

assess_get_performances(1)

assess_read(1)

assess_summarise_performances(1)

combine_tx_txf(1)

corrMat(1)

ejecutable_espectro(1)

join_tables(1)

entropy_based_bin(1)

estacionariedad(1)

evaluate(1)

findPubs(1)

fitcoxnet(1)

fullAnnotationInGRanges(1)

initial_config(1)

woe_based_binning(1)

Пример #1

Показать файл

Файл: MetaBGC-Build.py Проект: shw079/MetaBGC

                if re.match(r".*txt$", file) and os.path.getsize(filePath) > 0:
                    with open(filePath) as infile:
                        for line in infile:
                            sampleName = ntpath.basename(filePath).split(".txt")[0]
                            outfile.write(line.strip() + "\t" + sampleName + "\t" + args.cohort_name + "\n")

    # Eval spHMMs
    rpackages.importr('base')
    #packageNames = ('tidyverse','ggsci','ggpubr')
    #utils = rpackages.importr('utils')
    #utils.chooseCRANmirror(ind=1)
    #packnames_to_install = [x for x in packageNames if not rpackages.isinstalled(x)]
    #if len(packnames_to_install) > 0:
    #    utils.install_packages(StrVector(packnames_to_install))

    rpackages.importr('tidyverse')
    rpackages.importr('ggsci')
    rpackages.importr('ggpubr')

    hp_hmm_directory = os.path.join(build_op_dir, 'HiPer_spHMMs')
    os.makedirs(hp_hmm_directory,0o777,True)
    with open('EvaluateSpHMMs.R', 'r') as f:
        rStr = f.read()
    myfunc = STAP(rStr, "EvaluateSpHMM")
    myfunc.EvaluateSpHMM(allHMMResult, allBLASTResult, gene_pos_file, args.prot_family_name, float(args.F1_Thresh), hmm_directory, hp_hmm_directory)

    timeTaken = time.time() - startTime
    mins = int(timeTaken / 60)
    secs = int(timeTaken) % 60
    print("\nTotal time taken : " + str(mins) + " mins " + str(secs) + " seconds")

Пример #2

Показать файл

def mbgcbuild(prot_alignment, prot_family_name, cohort_name,
              nucl_seq_directory, prot_seq_directory, seq_fmt, pair_fmt, r1_file_suffix,
              r2_file_suffix, tp_genes_nucl, blast_db_directory_map_file, blastn_search_directory, hmm_search_directory, f1_thresh,
              output_directory, cpu):
    try:
        CPU_THREADS = 4
        startTime = time.time()
        if cpu is not None:
            CPU_THREADS = int(cpu)

        # setup paths
        build_op_dir = output_directory + os.sep + "build"
        hmm_directory = os.path.join(build_op_dir, 'spHMMs')
        tp_genes_prot = build_op_dir + os.sep + "TPGenes.faa"
        alnOutput = os.path.join(build_op_dir,"TP_Homolog_Alignment.afa")
        gene_pos_file = os.path.join(build_op_dir, 'Gene_Interval_Pos.txt')
        gene_pos_file_aa = os.path.join(build_op_dir, 'Gene_Interval_Pos_AA.txt')
        if hmm_search_directory is None:
            hmm_search_directory = os.path.join(build_op_dir, 'hmm_result')
        allHMMResult = os.path.join(build_op_dir,"CombinedHmmSearch.txt")
        if blastn_search_directory is None:
            blastn_search_directory = os.path.join(build_op_dir, 'blastn_result')
        allBLASTResult = os.path.join(build_op_dir,"CombinedBLASTSearch.txt")

        # Create OP dirs
        os.makedirs(hmm_directory, 0o777, True)

        # Translate protein sequence
        runTranSeq(tp_genes_nucl,"1",tp_genes_prot)

        # Join true positives in the sample with the BGC proteins
        tmpFile = os.path.join(build_op_dir,"TP_Homolog.faa")
        joinedSeqs = []
        tpGeneSeqs = list(SeqIO.parse(tp_genes_prot, "fasta"))
        # Removing _1 added by TranSeq
        for seq in tpGeneSeqs:
            seq.id = seq.id[:-2]
            seq.description = ""
            joinedSeqs.append(seq)
        SeqIO.write(joinedSeqs,tp_genes_prot,"fasta")
        protAlnSeqs = list(SeqIO.parse(prot_alignment, "fasta"))
        for seq in protAlnSeqs:
            joinedSeqs.append(seq)
        SeqIO.write(joinedSeqs, tmpFile, "fasta")

        # MUSCLE align TP genes with markers
        runMUSCLE(tmpFile, alnOutput)

        # Gen spHMMs and interval pos
        # Extract spHMM coordinates from MUSCLE alignment
        hmmDict = gensphmmfiles(prot_family_name, alnOutput, tp_genes_prot,
                                hmm_directory, gene_pos_file, gene_pos_file_aa)

        if r1_file_suffix is None:
            r1_file_suffix = ""
        if r2_file_suffix is None:
            r2_file_suffix = ""

        # #Preprocess synthetic reads
        nucl_seq_directory = PreProcessReadsPar(nucl_seq_directory,
                                                seq_fmt,pair_fmt,
                                                r1_file_suffix.strip(),
                                                r2_file_suffix.strip(),
                                                build_op_dir,
                                                CPU_THREADS)

        #Check if BLAST DB directory mapping file is provided or not
        if blast_db_directory_map_file is None:
            blast_db_directory_map_file = ""

        # Translate nucleotide seq
        if not os.path.isdir(prot_seq_directory):
            prot_seq_directory = TranseqReadsDir(build_op_dir, nucl_seq_directory, CPU_THREADS)

        # HMMER Search
        if not os.path.exists(allHMMResult):
            os.makedirs(hmm_search_directory,0o777,True)
            for hmmSeqPosKey, hmmFileObj in hmmDict.items():
                hmmInterval = str(hmmDict[hmmSeqPosKey].intervalStart)+"_"+str(hmmDict[hmmSeqPosKey].intervalEnd)
                RunHMMDirectoryParallel(prot_seq_directory,hmmFileObj.hmmFile, cohort_name, prot_family_name, "30_10", hmmInterval, hmm_search_directory, CPU_THREADS)

            with open(allHMMResult, 'w') as outfile:
                for subdir, dirs, files in os.walk(hmm_search_directory):
                    for file in files:
                        filePath = os.path.join(subdir, file)
                        if re.match(r".*txt$", file) and os.path.getsize(filePath) > 0:
                            with open(filePath) as infile:
                                for line in infile:
                                    outfile.write(line)

        # BLAST Alignment
        if not os.path.exists(allBLASTResult):
            if not os.path.isdir(blastn_search_directory):
                print("Constructing BLAST Search Dir:" + blastn_search_directory)
                os.makedirs(blastn_search_directory,0o777,True)
                RunMakeDBandBlastN(nucl_seq_directory, blast_db_directory_map_file,
                                   tp_genes_nucl, "blastn", "-max_target_seqs 10000 -perc_identity 90.0 -outfmt \"6 sseqid slen sstart send qseqid qlen qstart qend pident evalue\" ",
                                   blastn_search_directory, CPU_THREADS)

            with open(allBLASTResult, 'w') as outfile:
                outfile.write("sseqid\tslen\tsstart\tsend\tqseqid\tqlen\tqstart\tqend\tpident\tevalue\tSample\tsampleType\n")
                for subdir, dirs, files in os.walk(blastn_search_directory):
                    for file in files:
                        filePath = os.path.join(subdir, file)
                        if re.match(r".*txt$", file) and os.path.getsize(filePath) > 0:
                            with open(filePath) as infile:
                                for line in infile:
                                    sampleName = os.path.basename(filePath).split(".txt")[0]
                                    outfile.write(line.strip() + "\t" + sampleName + "\t" + cohort_name + "\n")

        # Eval spHMMs
        rpackages.importr('base')
        utils = rpackages.importr('utils')
        packageNames = ('tidyverse','ggsci','ggpubr','dplyr','ggplot2')
        packnames_to_install = [x for x in packageNames if not rpackages.isinstalled(x)]
        if len(packnames_to_install) > 0:
            utils.install_packages(StrVector(packnames_to_install))
        rpackages.importr('tidyverse')
        rpackages.importr('ggsci')
        rpackages.importr('ggpubr')
        rpackages.importr('dplyr')
        rpackages.importr('ggplot2')

        hp_hmm_directory = os.path.join(build_op_dir, 'HiPer_spHMMs')
        os.makedirs(hp_hmm_directory,0o777,True)
        module_dir = os.path.dirname(os.path.abspath(createhmm.__file__))
        print("\nR-script path : " + module_dir)
        r_script = os.path.join(module_dir,'EvaluateSpHMMs.R')

        with open(r_script, 'r') as f:
            rStr = f.read()
        myfunc = STAP(rStr, "EvaluateSpHMM")
        myfunc.EvaluateSpHMM(allHMMResult, allBLASTResult, gene_pos_file, prot_family_name, float(f1_thresh), hmm_directory, hp_hmm_directory)
        timeTaken = time.time() - startTime
        mins = int(timeTaken / 60)
        secs = int(timeTaken) % 60
        print("\nTotal time taken : " + str(mins) + " mins " + str(secs) + " seconds")
        return hp_hmm_directory
    except:
        print("Metabgc-build has failed. Please check your inputs and contact support on : https://github.com/donia-lab/MetaBGC")
        exit()

Пример #3

Показать файл

def mbgcbuild(prot_alignment, prot_family_name, cohort_name,
              nucl_seq_directory, prot_seq_directory, seq_fmt, pair_fmt,
              r1_file_suffix, r2_file_suffix, tp_genes_nucl,
              blastn_search_directory, hmm_search_directory, f1_thresh,
              output_directory, cpu):
    startTime = time.time()
    if cpu is not None:
        CPU_THREADS = int(cpu)

    # setup paths
    build_op_dir = output_directory + os.sep + "build"
    hmm_directory = os.path.join(build_op_dir, 'spHMMs')
    prot_aln_file = os.path.join(hmm_directory,
                                 ntpath.basename(prot_alignment))
    tp_genes_prot = build_op_dir + os.sep + "TPGenes.faa"
    alnOutput = os.path.join(build_op_dir, "tmp.afa")
    gene_pos_file = os.path.join(build_op_dir, 'Gene_Interval_Pos.txt')
    if hmm_search_directory is None:
        hmm_search_directory = os.path.join(build_op_dir, 'hmm_result')
    allHMMResult = hmm_search_directory + os.sep + "CombinedHmmSearch.txt"
    if blastn_search_directory is None:
        blastn_search_directory = os.path.join(build_op_dir, 'blastn_result')
    allBLASTResult = blastn_search_directory + os.sep + "CombinedBLASTSearch.txt"

    # Gen spHMMs and interval pos
    os.makedirs(hmm_directory, 0o777, True)
    copyfile(prot_alignment, prot_aln_file)
    hmmDict = gensphmmfiles(prot_family_name, prot_aln_file, hmm_directory)

    runTranSeq(tp_genes_nucl, "1", tp_genes_prot)
    tmpFile = os.path.join(build_op_dir, "tmp.fa")

    # Join true positives in the sample with the BGC proteins
    joinedSeqs = []
    tpGeneSeqs = list(SeqIO.parse(tp_genes_prot, "fasta"))
    # Removing _1 added by TranSeq
    for seq in tpGeneSeqs:
        seq.id = seq.id[:-2]
        seq.description = ""
        joinedSeqs.append(seq)
    protAlnSeqs = list(SeqIO.parse(prot_aln_file, "fasta"))
    for seq in protAlnSeqs:
        joinedSeqs.append(seq)
    SeqIO.write(joinedSeqs, tmpFile, "fasta")

    # MUSCLE align TP genes with markers
    runMUSCLE(tmpFile, alnOutput)
    # Extract spHMM coordinates from MUSCLE alignment
    gengeneposlist(prot_family_name, protAlnSeqs, hmmDict, alnOutput,
                   gene_pos_file)

    if r1_file_suffix is None:
        r1_file_suffix = ""
    if r2_file_suffix is None:
        r2_file_suffix = ""

    # #Preprocess synthetic reads
    nucl_seq_directory = PreProcessReadsPar(nucl_seq_directory, seq_fmt,
                                            pair_fmt, r1_file_suffix.strip(),
                                            r2_file_suffix.strip(),
                                            build_op_dir, CPU_THREADS)
    # Translate nucleotide seq
    if not os.path.isdir(prot_seq_directory):
        prot_seq_directory = TranseqReadsDir(build_op_dir, nucl_seq_directory,
                                             CPU_THREADS)

    # HMMER Search
    os.makedirs(hmm_search_directory, 0o777, True)
    for hmmSeqPosKey, hmmFileObj in hmmDict.items():
        hmmInterval = str(hmmDict[hmmSeqPosKey].intervalStart) + "_" + str(
            hmmDict[hmmSeqPosKey].intervalEnd)
        RunHMMDirectory(prot_seq_directory, hmmFileObj.hmmFile, cohort_name,
                        prot_family_name, "30_10", hmmInterval,
                        hmm_search_directory, CPU_THREADS)

    with open(allHMMResult, 'w') as outfile:
        for subdir, dirs, files in os.walk(hmm_search_directory):
            for file in files:
                filePath = os.path.join(subdir, file)
                if re.match(r".*txt$", file) and os.path.getsize(filePath) > 0:
                    with open(filePath) as infile:
                        for line in infile:
                            outfile.write(line)

    # BLAST Alignment
    if not os.path.isdir(blastn_search_directory):
        os.makedirs(blastn_search_directory, 0o777, True)
        RunBLASTNDirectoryPar(nucl_seq_directory, tp_genes_nucl,
                              blastn_search_directory, CPU_THREADS)

    with open(allBLASTResult, 'w') as outfile:
        outfile.write(
            "sseqid\tslen\tsstart\tsend\tqseqid\tqlen\tqstart\tqend\tpident\tevalue\tSample\tsampleType\n"
        )
        for subdir, dirs, files in os.walk(blastn_search_directory):
            for file in files:
                filePath = os.path.join(subdir, file)
                if re.match(r".*txt$", file) and os.path.getsize(filePath) > 0:
                    with open(filePath) as infile:
                        for line in infile:
                            sampleName = ntpath.basename(filePath).split(
                                ".txt")[0]
                            outfile.write(line.strip() + "\t" + sampleName +
                                          "\t" + cohort_name + "\n")

    # Eval spHMMs
    rpackages.importr('base')
    utils = rpackages.importr('utils')
    packageNames = ('tidyverse', 'ggsci', 'ggpubr', 'dplyr', 'ggplot2')
    packnames_to_install = [
        x for x in packageNames if not rpackages.isinstalled(x)
    ]
    if len(packnames_to_install) > 0:
        utils.install_packages(StrVector(packnames_to_install))
    rpackages.importr('tidyverse')
    rpackages.importr('ggsci')
    rpackages.importr('ggpubr')
    rpackages.importr('dplyr')
    rpackages.importr('ggplot2')

    hp_hmm_directory = os.path.join(build_op_dir, 'HiPer_spHMMs')
    os.makedirs(hp_hmm_directory, 0o777, True)
    r_script = os.path.join(sys.path[0], 'metabgc', 'src', 'EvaluateSpHMMs.R')

    with open(r_script, 'r') as f:
        rStr = f.read()
    myfunc = STAP(rStr, "EvaluateSpHMM")
    myfunc.EvaluateSpHMM(allHMMResult,
                         allBLASTResult, gene_pos_file, prot_family_name,
                         float(f1_thresh), hmm_directory, hp_hmm_directory)
    timeTaken = time.time() - startTime
    mins = int(timeTaken / 60)
    secs = int(timeTaken) % 60
    print("\nTotal time taken : " + str(mins) + " mins " + str(secs) +
          " seconds")
    return hp_hmm_directory