Exemplos de Pangenome em Python, exemplos de ppanggolin.pangenome.Pangenome em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: alignOnPang.py Projeto: tauqeer9/PPanGGOLiN

def launch(args):
    mkOutdir(args.output, args.force)
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    if args.proteins is not None:
        align(pangenome=pangenome,
              proteinFile=args.proteins,
              output=args.output,
              tmpdir=args.tmpdir,
              identity=args.identity,
              coverage=args.coverage,
              defrag=args.defrag,
              cpu=args.cpu,
              getinfo=args.getinfo,
              draw_related=args.draw_related)

    if args.annotation is not None:
        projectRGP(pangenome,
                   args.annotation,
                   args.output,
                   args.tmpdir,
                   args.identity,
                   args.coverage,
                   args.defrag,
                   args.cpu,
                   args.translation_table,
                   pseudo=args.use_pseudo)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: writeFlat.py Projeto: zhaoc1/PPanGGOLiN

def launch(args):
    mkOutdir(args.output, args.force)
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    writeFlatFiles(pangenome,
                   args.output,
                   cpu=args.cpu,
                   soft_core=args.soft_core,
                   dup_margin=args.dup_margin,
                   csv=args.csv,
                   genePA=args.Rtab,
                   gexf=args.gexf,
                   light_gexf=args.light_gexf,
                   projection=args.projection,
                   stats=args.stats,
                   json=args.json,
                   partitions=args.partitions,
                   regions=args.regions,
                   families_tsv=args.families_tsv,
                   all_genes=args.all_genes,
                   all_prot_families=args.all_prot_families,
                   all_gene_families=args.all_gene_families,
                   spots=args.spots,
                   borders=args.borders,
                   compress=args.compress)

Exemplo n.º 3

0

Exibir arquivo

def launch(args):
    """
        main code when launch partition from the command line.
    """
    if args.draw_ICL or args.keep_tmp_files:
        mkOutdir(args.output, args.force)
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    partition(pangenome,
              args.tmpdir,
              args.output,
              args.force,
              args.beta,
              args.max_degree_smoothing,
              args.free_dispersion,
              args.chunk_size,
              args.nb_of_partitions,
              args.krange,
              args.ICL_margin,
              args.draw_ICL,
              args.cpu,
              args.seed,
              args.keep_tmp_files,
              show_bar=args.show_prog_bars)
    writePangenome(pangenome,
                   pangenome.file,
                   args.force,
                   show_bar=args.show_prog_bars)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: cluster.py Projeto: labgem/PPanGGOLiN

def launch(args):
    """ launch the clustering step"""
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    if args.clusters is None:
        clustering(pangenome,
                   args.tmpdir,
                   args.cpu,
                   defrag=not args.no_defrag,
                   code=args.translation_table,
                   coverage=args.coverage,
                   identity=args.identity,
                   mode=args.mode,
                   force=args.force,
                   disable_bar=args.disable_prog_bar)
        logging.getLogger().info("Done with the clustering")
    else:
        readClustering(pangenome,
                       args.clusters,
                       args.infer_singletons,
                       args.force,
                       disable_bar=args.disable_prog_bar)
        logging.getLogger().info("Done reading the cluster file")
    writePangenome(pangenome,
                   pangenome.file,
                   args.force,
                   disable_bar=args.disable_prog_bar)

Exemplo n.º 5

0

Exibir arquivo

def launch(args):
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    if args.spot_graph or args.draw_hotspots:
        mkOutdir(args.output, args.force)
    predictHotspots(pangenome, args.output, force=args.force, cpu = args.cpu, spot_graph=args.spot_graph, overlapping_match=args.overlapping_match, set_size=args.set_size, exact_match=args.exact_match_size, draw_hotspot=args.draw_hotspots, interest=args.interest)
    writePangenome(pangenome, pangenome.file, args.force)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: workflow.py Projeto: vinisalazar/PPanGGOLiN

def launch(args):
    pangenome = Pangenome()
    filename = mkFilename(args.basename, args.output, args.force)
    if args.anno:  #if the annotations are provided, we read from it
        getSeq = True
        if args.clusters is not None:
            getSeq = False
        readAnnotations(pangenome, args.anno, getSeq)
        writePangenome(pangenome, filename, args.force)
        if args.clusters is None and pangenome.status[
                "geneSequences"] == "No" and args.fasta is None:
            raise Exception(
                "The gff/gbff provided did not have any sequence informations, you did not provide clusters and you did not provide fasta file. Thus, we do not have the information we need to continue the analysis."
            )

        elif args.clusters is None and pangenome.status[
                "geneSequences"] == "No" and args.fasta is not None:
            getGeneSequencesFromFastas(pangenome, args.fasta)

        if args.clusters is not None:
            readClustering(pangenome, args.clusters)

        elif args.clusters is None:  #we should have the sequences here.
            clustering(pangenome, args.tmpdir, args.cpu)
    elif args.fasta is not None:
        pangenome = Pangenome()
        annotatePangenome(pangenome, args.fasta, args.tmpdir, args.cpu)
        writePangenome(pangenome, filename, args.force)
        clustering(pangenome, args.tmpdir, args.cpu)

    computeNeighborsGraph(pangenome)

    partition(pangenome,
              tmpdir=args.tmpdir,
              cpu=args.cpu,
              K=args.nb_of_partitions)
    writePangenome(pangenome, filename, args.force)

    if args.rarefaction:
        makeRarefactionCurve(pangenome, args.output, args.tmpdir, cpu=args.cpu)
    if len(pangenome.organisms) < 5000:
        drawTilePlot(pangenome,
                     args.output,
                     nocloud=False if len(pangenome.organisms) < 500 else True)
    drawUCurve(pangenome, args.output)

    writeFlatFiles(pangenome,
                   args.output,
                   args.cpu,
                   csv=True,
                   genePA=True,
                   gexf=True,
                   light_gexf=True,
                   projection=True,
                   json=True,
                   stats=True,
                   partitions=True)

    printInfo(filename, content=True)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: writeSequences.py Projeto: labgem/PPanGGOLiN

def launchSequences(args):
    checkOptions(args)
    mkOutdir(args.output, args.force)
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    writeSequenceFiles(pangenome, args.output, fasta=args.fasta, anno=args.anno, soft_core=args.soft_core,
                       regions=args.regions, genes=args.genes, gene_families=args.gene_families,
                       prot_families=args.prot_families, compress=args.compress, disable_bar=args.disable_prog_bar)

Exemplo n.º 8

0

Exibir arquivo

def launch(args):
    mkOutdir(args.output, args.force)
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    if args.tile_plot:
        drawTilePlot(pangenome, args.output, args.nocloud)
    if args.ucurve:
        drawUCurve(pangenome, args.output, soft_core = args.soft_core)

Exemplo n.º 9

0

Exibir arquivo

def launch(args):
    mkOutdir(args.output, args.force)
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    writeFlatFiles(pangenome, args.output, args.cpu, args.soft_core,
                   args.dup_margin, args.csv, args.Rtab, args.gexf,
                   args.light_gexf, args.projection, args.stats, args.json,
                   args.partitions, args.families_tsv, args.all_genes,
                   args.all_prot_families, args.all_gene_families,
                   args.compress)

Exemplo n.º 10

0

Exibir arquivo

def launch(args):
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    predictRGP(pangenome,
               force=args.force,
               persistent_penalty=args.persistent_penalty,
               variable_gain=args.variable_gain,
               min_length=args.min_length,
               min_score=args.min_score,
               dup_margin=args.dup_margin,
               cpu=args.cpu)
    writePangenome(pangenome, pangenome.file, args.force)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: writeMSA.py Projeto: tauqeer9/PPanGGOLiN

def launchMSA(args):
    mkOutdir(args.output, args.force)
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    writeMSAFiles(pangenome,
                  args.output,
                  cpu=args.cpu,
                  partition=args.partition,
                  tmpdir=args.tmpdir,
                  source=args.source,
                  force=args.force,
                  show_bar=args.show_prog_bars)

Exemplo n.º 12

0

Exibir arquivo

def launch(args):
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    if args.spot_graph:
        mkOutdir(args.output, args.force)
    if args.draw_hotspots or args.interest or args.fig_margin or args.priority:
        logging.getLogger().warning(
            "Options to draw the spots with the 'ppanggolin spot' subcommand have been deprecated, "
            "and are now dealt with in a dedicated subcommand 'ppanggolin drawspot'.")
    predictHotspots(pangenome, args.output, force=args.force, cpu=args.cpu, spot_graph=args.spot_graph,
                    overlapping_match=args.overlapping_match, set_size=args.set_size, exact_match=args.exact_match_size,
                    disable_bar=args.disable_prog_bar)
    writePangenome(pangenome, pangenome.file, args.force, disable_bar=args.disable_prog_bar)

Exemplo n.º 13

0

Exibir arquivo

def launch(args):
    """ launch the clustering step"""
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    if args.clusters is None:
        clustering(pangenome, args.tmpdir, args.cpu, args.defrag,
                   args.translation_table, args.coverage, args.identity,
                   args.force)
        logging.getLogger().info("Done with the clustering")
    else:
        readClustering(pangenome, args.clusters, args.infer_singletons,
                       args.force)
        logging.getLogger().info("Done reading the cluster file")
    writePangenome(pangenome, pangenome.file, args.force)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: writeSequences.py Projeto: tauqeer9/PPanGGOLiN

def launchSequences(args):
    mkOutdir(args.output, args.force)
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    checkOptions(args)
    writeSequenceFiles(pangenome,
                       args.output,
                       fasta=args.fasta,
                       anno=args.anno,
                       cpu=args.cpu,
                       regions=args.regions,
                       genes=args.genes,
                       prot_families=args.prot_families,
                       gene_families=args.gene_families,
                       compress=args.compress,
                       show_bar=args.show_prog_bars)

Exemplo n.º 15

0

Exibir arquivo

Arquivo: annotate.py Projeto: Alastor-pentious/PPanGGOLiN

def launch(args):
    filename = mkFilename(args.basename, args.output, args.force)
    pangenome = Pangenome()
    if args.fasta is not None and args.anno is None:
        annotatePangenome(pangenome,
                          args.fasta,
                          tmpdir=args.tmpdir,
                          cpu=args.cpu,
                          translation_table=args.translation_table,
                          kingdom=args.kingdom,
                          norna=args.norna,
                          overlap=args.overlap,
                          show_bar=args.show_prog_bars)
    elif args.anno is not None:
        readAnnotations(pangenome,
                        args.anno,
                        cpu=args.cpu,
                        pseudo=args.use_pseudo,
                        show_bar=args.show_prog_bars)
        if pangenome.status["geneSequences"] == "No":
            if args.fasta:
                getGeneSequencesFromFastas(pangenome, args.fasta)
            else:
                logging.getLogger().warning(
                    "You provided gff files without sequences, and you did not provide fasta sequences. Thus it was not possible to get the gene sequences."
                )
                logging.getLogger().warning(
                    "You will be able to proceed with your analysis ONLY if you provide the clustering results in the next step."
                )

    writePangenome(pangenome,
                   filename,
                   args.force,
                   show_bar=args.show_prog_bars)

Exemplo n.º 16

0

Exibir arquivo

Arquivo: metrics.py Projeto: labgem/PPanGGOLiN

def launch(args):
    if not any(x for x in [args.genome_fluidity, args.family_fluidity, args.info_modules, args.all]):
        raise Exception("You did not indicate which metric you want to compute.")

    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)

    logging.getLogger().debug("Check if one of the metrics was already compute")
    check_metric(pangenome, all=args.all, genome_fluidity=args.genome_fluidity, family_fluidity=args.family_fluidity,
                 info_modules=args.info_modules, force=args.force)
    logging.getLogger().info("Metrics computation begin")
    metrics_dictionary = compute_metrics(pangenome, all=args.all, genome_fluidity=args.genome_fluidity,
                                         family_fluidity=args.family_fluidity, info_modules=args.info_modules,
                                         disable_bar=args.disable_prog_bar)
    logging.getLogger().info("Metrics computation done")

    write_metrics(pangenome, metrics_dictionary, no_print_info=args.no_print_info)

Exemplo n.º 17

0

Exibir arquivo

Arquivo: drawing.py Projeto: labgem/PPanGGOLiN

def launch(args):
    mkOutdir(args.output, args.force)
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    if args.tile_plot:
        drawTilePlot(pangenome,
                     args.output,
                     args.nocloud,
                     disable_bar=args.disable_prog_bar)
    if args.ucurve:
        drawUCurve(pangenome,
                   args.output,
                   soft_core=args.soft_core,
                   disable_bar=args.disable_prog_bar)
    if args.spots != '':
        drawSpots(pangenome=pangenome,
                  output=args.output,
                  spot_list=args.spots,
                  disable_bar=args.disable_prog_bar)

Exemplo n.º 18

0

Exibir arquivo

def launch(args):
    mkOutdir(args.output, args.force)
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    if args.interest or args.fig_margin or args.label_priority:
        logging.getLogger().warning(
            "Options --interest, --fig_margin and --label_priority are deprecated, "
            "and the actions they defined are now doable directly in the interactive figures "
            "that are drawn")
    align(pangenome=pangenome,
          sequenceFile=args.sequences,
          output=args.output,
          tmpdir=args.tmpdir,
          cpu=args.cpu,
          identity=args.identity,
          coverage=args.coverage,
          no_defrag=args.no_defrag,
          getinfo=args.getinfo,
          draw_related=args.draw_related,
          disable_bar=args.disable_prog_bar)

Exemplo n.º 19

0

Exibir arquivo

Arquivo: rarefaction.py Projeto: zhaoc1/PPanGGOLiN

def launch(args):
    """
        main code when launch partition from the command line.
    """
    mkOutdir(args.output, args.force)
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    makeRarefactionCurve( pangenome = pangenome,
                        output = args.output,
                        tmpdir = args.tmpdir,
                        beta =args.beta,
                        depth = args.depth,
                        minSampling=args.min,
                        maxSampling=args.max,
                        sm_degree=args.max_degree_smoothing,
                        free_dispersion=args.free_dispersion,
                        chunk_size=args.chunk_size,
                        K=args.nb_of_partitions,
                        cpu = args.cpu,
                        seed = args.seed,
                        kestimate=args.reestimate_K,
                        krange = args.krange,
                        soft_core = args.soft_core)

Exemplo n.º 20

0

Exibir arquivo

def test_cstr():
    o_pang = Pangenome()
    assert isinstance(o_pang, Pangenome)

    for attr in "max_fam_id", "parameters", "status":
        assert hasattr(o_pang, attr)
    assert o_pang.max_fam_id == 0
    assert o_pang.parameters == {}
    assert o_pang.status == {
        'genomesAnnotated': "No",
        'geneSequences': "No",
        'genesClustered': "No",
        'defragmented': "No",
        'geneFamilySequences': "No",
        'neighborsGraph': "No",
        'partitionned': "No"
    }

Exemplo n.º 21

0

Exibir arquivo

def launch(args):
    if not any([args.fasta, args.anno]):
        raise Exception("At least one of --fasta or --anno must be given")
    filename = mkFilename(args.basename, args.output, args.force)
    pangenome = Pangenome()
    if args.fasta is not None and args.anno is None:
        annotatePangenome(pangenome, args.fasta, tmpdir=args.tmpdir, cpu=args.cpu,
                          translation_table=args.translation_table, kingdom=args.kingdom, norna=args.norna,
                          overlap=args.overlap, contig_filter=args.contig_filter, disable_bar=args.disable_prog_bar)
    elif args.anno is not None:
        readAnnotations(pangenome, args.anno, cpu=args.cpu, pseudo=args.use_pseudo, disable_bar=args.disable_prog_bar)
        if pangenome.status["geneSequences"] == "No":
            if args.fasta:
                getGeneSequencesFromFastas(pangenome, args.fasta)
            else:
                logging.getLogger().warning("You provided gff files without sequences, and you did not provide "
                                            "fasta sequences. Thus it was not possible to get the gene sequences.")
                logging.getLogger().warning("You will be able to proceed with your analysis ONLY if you provide "
                                            "the clustering results in the next step.")

    writePangenome(pangenome, filename, args.force, disable_bar=args.disable_prog_bar)

Exemplo n.º 22

0

Exibir arquivo

def projectRGP(pangenome,
               annotation,
               output,
               tmpdir,
               identity=0.8,
               coverage=0.8,
               defrag=False,
               cpu=1,
               translation_table=11):
    if pangenome.status["geneFamilySequences"] not in [
            "inFile", "Loaded", "Computed"
    ]:
        raise Exception(
            "Cannot use this function as your pangenome does not have gene families representatives associated to it. For now this works only if the clustering is realised by PPanGGOLiN."
        )

    #read given file
    logging.getLogger().info("Retrieving the annotations from the given file")
    singleOrgPang = Pangenome(
    )  #need to create a new 'pangenome' as the annotation reading functions take a pangenome as input.
    filetype = detect_filetype(annotation)
    if filetype == "gff":
        singleOrgPang.status[
            "geneSequences"] = "Computed"  #if there are no sequences in the gff, this value will change to 'No'
        read_org_gff(singleOrgPang, 'myGenome', annotation, [], True)
        if singleOrgPang.status["geneSequences"] == "No":
            raise Exception(
                f"The given annotation file did not have a FASTA sequence included (expected '##FASTA' pragma followed by a fasta-like file format). This is required for computing the Regions of Genomic Plasticity of your organism"
            )
    elif filetype == "gbff":
        read_org_gbff(singleOrgPang, 'myGenome', annotation, [], True)

    #check and read given pangenome
    checkPangenomeInfo(pangenome,
                       needFamilies=True,
                       needPartitions=True,
                       needAnnotations=True)

    newtmpdir = tempfile.TemporaryDirectory(dir=tmpdir)
    tmpPangFile = tempfile.NamedTemporaryFile(mode="w", dir=newtmpdir.name)
    tmpGeneFile = tempfile.NamedTemporaryFile(mode="w", dir=newtmpdir.name)

    writeGeneSequencesFromAnnotations(singleOrgPang, tmpGeneFile)
    writeGeneFamSequences(pangenome, tmpPangFile)

    blastout = alignSeqToPang(tmpPangFile, tmpGeneFile, output, newtmpdir, cpu,
                              defrag, identity, coverage, True,
                              translation_table)

    tmpPangFile.close()
    tmpGeneFile.close()
    newtmpdir.cleanup()
    #artificially reconstruct the gene families and their partitions
    linkNewGenomeFamilies(singleOrgPang, pangenome, blastout)

    multigenics = pangenome.get_multigenics(
        pangenome.parameters["RGP"]["dup_margin"])
    genomeMultigenics = linkMultigenicFamilies(singleOrgPang, multigenics)

    logging.getLogger().info("Predicting RGP in your genome")
    for org in singleOrgPang.organisms:
        genomeRGP = compute_org_rgp(
            org, pangenome.parameters["RGP"]["persistent_penalty"],
            pangenome.parameters["RGP"]["variable_gain"],
            pangenome.parameters["RGP"]["min_length"],
            pangenome.parameters["RGP"]["min_score"], genomeMultigenics)

    if filetype == "gff":
        #reread the file and insert sequence_feature objects corresponding to the predicted regions
        logging.getLogger().info("Writing the RGP in a gff file...")
        writeGffRegions(annotation, genomeRGP, output)
    elif filetype == "gbff":
        logging.getLogger().info("Writing the RGP in a gbff file...")
        writeGbffRegions(annotation, genomeRGP, output)

Exemplo n.º 23

0

Exibir arquivo

def launch(args):
    pangenome = Pangenome()
    filename = mkFilename(args.basename, args.output, args.force)
    if args.anno:#if the annotations are provided, we read from it
        getSeq = True
        if args.clusters is not None:
            getSeq = False
        start_anno = time.time()
        readAnnotations(pangenome, args.anno, cpu = args.cpu, getSeq = getSeq, show_bar=args.show_prog_bars)
        annotime = time.time() - start_anno
        start_writing = time.time()
        writePangenome(pangenome, filename, args.force, show_bar=args.show_prog_bars)
        writing_time = time.time() - start_writing
        if args.clusters is None and pangenome.status["geneSequences"] == "No" and args.fasta is None:
            raise Exception("The gff/gbff provided did not have any sequence informations, you did not provide clusters and you did not provide fasta file. Thus, we do not have the information we need to continue the analysis.")

        elif args.clusters is None and pangenome.status["geneSequences"] == "No" and args.fasta is not None:
            getGeneSequencesFromFastas(pangenome, args.fasta)
        start_clust = time.time()
        if args.clusters is not None:
            readClustering(pangenome, args.clusters, show_bar=args.show_prog_bars)

        elif args.clusters is None:#we should have the sequences here.
            clustering(pangenome, args.tmpdir, args.cpu, defrag=not args.no_defrag, show_bar=args.show_prog_bars)
        clust_time = time.time() - start_clust
    elif args.fasta is not None:
        start_anno = time.time()
        annotatePangenome(pangenome, args.fasta, args.tmpdir, args.cpu, show_bar=args.show_prog_bars)
        annotime = time.time() - start_anno
        start_writing = time.time()
        writePangenome(pangenome, filename, args.force, show_bar=args.show_prog_bars)
        writing_time = time.time() - start_writing
        start_clust = time.time()
        clustering(pangenome, args.tmpdir, args.cpu, defrag=not args.no_defrag, show_bar=args.show_prog_bars)
        clust_time = time.time() - start_clust

    writePangenome(pangenome, filename, args.force, show_bar=args.show_prog_bars)
    start_graph = time.time()
    computeNeighborsGraph(pangenome, show_bar=args.show_prog_bars)
    graph_time = time.time() - start_graph

    start_part = time.time()
    partition(pangenome, tmpdir = args.tmpdir, cpu = args.cpu, K=args.nb_of_partitions, show_bar=args.show_prog_bars)
    part_time = time.time() - start_part

    start_writing = time.time()
    writePangenome(pangenome, filename, args.force, show_bar=args.show_prog_bars)
    writing_time = writing_time + time.time() - start_writing

    start_regions = time.time()
    predictRGP(pangenome, show_bar=args.show_prog_bars)
    regions_time = time.time() - start_regions

    start_spots = time.time()
    predictHotspots(pangenome, args.output, interest=args.interest, show_bar=args.show_prog_bars)
    spot_time = time.time() - start_spots

    start_writing = time.time()
    writePangenome(pangenome, filename, args.force, show_bar=args.show_prog_bars)
    writing_time = writing_time + time.time() - start_writing

    if args.rarefaction:
        makeRarefactionCurve(pangenome,args.output, args.tmpdir, cpu=args.cpu, show_bar=args.show_prog_bars)
    if len(pangenome.organisms) > 1 and len(pangenome.organisms) < 5000:
        drawTilePlot(pangenome, args.output, nocloud = False if len(pangenome.organisms) < 500 else True)
    drawUCurve(pangenome, args.output)

    start_desc = time.time()
    writeFlatFiles(pangenome, args.output, args.cpu, csv = True, genePA=True, gexf=True, light_gexf = True, projection=True, json = True, stats = True, partitions = True, regions = True, spots=True)
    desc_time = time.time() - start_desc

    logging.getLogger().info(f"Annotation took : {round(annotime,2)} seconds")
    logging.getLogger().info(f"Clustering took : {round(clust_time,2)} seconds")
    logging.getLogger().info(f"Building the graph took : {round(graph_time,2)} seconds")
    logging.getLogger().info(f"Partitionning the pangenome took : {round(part_time,2)} seconds")
    logging.getLogger().info(f"Predicting RGP took : {round(regions_time,2)} seconds")
    logging.getLogger().info(f"Gathering RGP into spots took : {round(spot_time,2)} seconds")
    logging.getLogger().info(f"Writing the pangenome data in HDF5 took : {round(writing_time,2)} seconds")
    logging.getLogger().info(f"Writing descriptive files for the pangenome took : {round(desc_time,2)} seconds")
    printInfo(filename, content = True)

Exemplo n.º 24

0

Exibir arquivo

def launch(args):
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    computeNeighborsGraph(pangenome, args.remove_high_copy_number, args.force, show_bar=args.show_prog_bars)
    writePangenome(pangenome, pangenome.file, args.force, show_bar=args.show_prog_bars)

Exemplo n.º 25

0

Exibir arquivo

def launch(args):
    check_option_workflow(args)
    pangenome = Pangenome()
    filename = mkFilename(args.basename, args.output, args.force)
    writing_time, anno_time, clust_time, mod_time, desc_time = (None, None,
                                                                None, None,
                                                                None)
    if args.anno:  # if the annotations are provided, we read from it
        start_anno = time.time()
        readAnnotations(pangenome,
                        args.anno,
                        cpu=args.cpu,
                        disable_bar=args.disable_prog_bar)
        anno_time = time.time() - start_anno
        start_writing = time.time()
        writePangenome(pangenome,
                       filename,
                       args.force,
                       disable_bar=args.disable_prog_bar)
        writing_time = time.time() - start_writing
        if args.clusters is None and pangenome.status[
                "geneSequences"] == "No" and args.fasta is None:
            raise Exception(
                "The gff/gbff provided did not have any sequence informations, "
                "you did not provide clusters and you did not provide fasta file. "
                "Thus, we do not have the information we need to continue the analysis."
            )

        elif args.clusters is None and pangenome.status[
                "geneSequences"] == "No" and args.fasta is not None:
            getGeneSequencesFromFastas(pangenome, args.fasta)
        start_clust = time.time()
        if args.clusters is not None:
            readClustering(pangenome,
                           args.clusters,
                           disable_bar=args.disable_prog_bar)

        elif args.clusters is None:  # we should have the sequences here.
            clustering(pangenome,
                       args.tmpdir,
                       args.cpu,
                       identity=args.identity,
                       coverage=args.coverage,
                       mode=args.mode,
                       defrag=not args.no_defrag,
                       disable_bar=args.disable_prog_bar)
        clust_time = time.time() - start_clust
    elif args.fasta is not None:
        start_anno = time.time()
        annotatePangenome(pangenome,
                          args.fasta,
                          args.tmpdir,
                          args.cpu,
                          contig_filter=args.contig_filter,
                          disable_bar=args.disable_prog_bar)
        anno_time = time.time() - start_anno
        start_writing = time.time()
        writePangenome(pangenome,
                       filename,
                       args.force,
                       disable_bar=args.disable_prog_bar)
        writing_time = time.time() - start_writing
        start_clust = time.time()
        clustering(pangenome,
                   args.tmpdir,
                   args.cpu,
                   identity=args.identity,
                   coverage=args.coverage,
                   mode=args.mode,
                   defrag=not args.no_defrag,
                   disable_bar=args.disable_prog_bar)
        clust_time = time.time() - start_clust

    writePangenome(pangenome,
                   filename,
                   args.force,
                   disable_bar=args.disable_prog_bar)
    start_graph = time.time()
    computeNeighborsGraph(pangenome, disable_bar=args.disable_prog_bar)
    graph_time = time.time() - start_graph

    start_part = time.time()
    partition(pangenome,
              tmpdir=args.tmpdir,
              cpu=args.cpu,
              K=args.nb_of_partitions,
              disable_bar=args.disable_prog_bar)
    part_time = time.time() - start_part

    start_writing = time.time()
    writePangenome(pangenome,
                   filename,
                   args.force,
                   disable_bar=args.disable_prog_bar)
    writing_time = writing_time + time.time() - start_writing

    start_regions = time.time()
    predictRGP(pangenome, disable_bar=args.disable_prog_bar)
    regions_time = time.time() - start_regions

    start_spots = time.time()
    predictHotspots(pangenome, args.output, disable_bar=args.disable_prog_bar)
    spot_time = time.time() - start_spots

    start_mods = time.time()
    predictModules(pangenome=pangenome,
                   cpu=args.cpu,
                   tmpdir=args.tmpdir,
                   disable_bar=args.disable_prog_bar)
    mod_time = time.time() - start_mods

    start_writing = time.time()
    writePangenome(pangenome,
                   filename,
                   args.force,
                   disable_bar=args.disable_prog_bar)
    writing_time = writing_time + time.time() - start_writing

    if not args.only_pangenome:
        start_spot_drawing = time.time()
        mkOutdir(args.output + '/spot_figures', force=True)
        drawSpots(pangenome=pangenome,
                  output=args.output + '/spot_figures',
                  spot_list='all',
                  disable_bar=args.disable_prog_bar)
        spot_time = spot_time + time.time() - start_spot_drawing

        if args.rarefaction:
            makeRarefactionCurve(pangenome,
                                 args.output,
                                 args.tmpdir,
                                 cpu=args.cpu,
                                 disable_bar=args.disable_prog_bar)
        if 1 < len(pangenome.organisms) < 5000:
            drawTilePlot(
                pangenome,
                args.output,
                nocloud=False if len(pangenome.organisms) < 500 else True)
        drawUCurve(pangenome, args.output)

        start_desc = time.time()
        writeFlatFiles(pangenome,
                       args.output,
                       args.cpu,
                       csv=True,
                       genePA=True,
                       gexf=True,
                       light_gexf=True,
                       projection=True,
                       json=True,
                       stats=True,
                       partitions=True,
                       regions=True,
                       spots=True,
                       borders=True,
                       spot_modules=True,
                       modules=True)
        desc_time = time.time() - start_desc

    logging.getLogger().info(
        f"Annotation took : {round(anno_time, 2)} seconds")
    logging.getLogger().info(
        f"Clustering took : {round(clust_time, 2)} seconds")
    logging.getLogger().info(
        f"Building the graph took : {round(graph_time, 2)} seconds")
    logging.getLogger().info(
        f"Partitioning the pangenome took : {round(part_time, 2)} seconds")
    logging.getLogger().info(
        f"Predicting RGP took : {round(regions_time, 2)} seconds")
    logging.getLogger().info(
        f"Gathering RGP into spots took : {round(spot_time, 2)} seconds")
    logging.getLogger().info(
        f"Predicting modules took : {round(mod_time, 2)} seconds")
    logging.getLogger().info(
        f"Writing the pangenome data in HDF5 took : {round(writing_time, 2)} seconds"
    )
    if not args.only_pangenome:
        logging.getLogger().info(
            f"Writing descriptive files for the pangenome took : {round(desc_time, 2)} seconds"
        )
    printInfo(filename, content=True)

Exemplo n.º 26

0

Exibir arquivo

Arquivo: test_Pangenome.py Projeto: zhaoc1/PPanGGOLiN

def o_pang():
    return Pangenome()

Exemplo n.º 27

0

Exibir arquivo

Arquivo: makeGraph.py Projeto: vinisalazar/PPanGGOLiN

def launch(args):
    logging.getLogger().debug(f"Ram used at the start : {getCurrentRAM()}")
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    computeNeighborsGraph(pangenome, args.remove_high_copy_number, args.force)
    writePangenome(pangenome, pangenome.file, args.force)

Exemplo n.º 28

0

Exibir arquivo

def launch(args):
    mkOutdir(args.output, args.force)
    pangenome = Pangenome()
    pangenome.addFile(args.pangenome)
    align(pangenome, args.proteins, args.output, args.tmpdir, args.identity,
          args.coverage, args.defrag, args.cpu)

Exemplo n.º 29

0

Exibir arquivo

Arquivo: workflow.py Projeto: labgem/PPanGGOLiN

def launch(args):
    check_option_workflow(args)
    pangenome = Pangenome()
    filename = mkFilename(args.basename, args.output, args.force)
    if args.anno:  # if the annotations are provided, we read from it
        readAnnotations(pangenome,
                        args.anno,
                        cpu=args.cpu,
                        disable_bar=args.disable_prog_bar)
        writePangenome(pangenome,
                       filename,
                       args.force,
                       disable_bar=args.disable_prog_bar)
        if args.clusters is None and pangenome.status[
                "geneSequences"] == "No" and args.fasta is None:
            raise Exception(
                "The gff/gbff provided did not have any sequence informations, "
                "you did not provide clusters and you did not provide fasta file. "
                "Thus, we do not have the information we need to continue the analysis."
            )

        elif args.clusters is None and pangenome.status[
                "geneSequences"] == "No" and args.fasta is not None:
            getGeneSequencesFromFastas(pangenome, args.fasta)

        if args.clusters is not None:
            readClustering(pangenome,
                           args.clusters,
                           disable_bar=args.disable_prog_bar)

        elif args.clusters is None:  # we should have the sequences here.
            clustering(pangenome,
                       tmpdir=args.tmpdir,
                       cpu=args.cpu,
                       identity=args.identity,
                       coverage=args.coverage,
                       mode=args.mode,
                       defrag=not args.no_defrag,
                       disable_bar=args.disable_prog_bar)
    elif args.fasta is not None:
        pangenome = Pangenome()
        annotatePangenome(pangenome,
                          args.fasta,
                          args.tmpdir,
                          args.cpu,
                          contig_filter=args.contig_filter,
                          disable_bar=args.disable_prog_bar)
        writePangenome(pangenome,
                       filename,
                       args.force,
                       disable_bar=args.disable_prog_bar)
        clustering(pangenome,
                   tmpdir=args.tmpdir,
                   cpu=args.cpu,
                   identity=args.identity,
                   coverage=args.coverage,
                   mode=args.mode,
                   defrag=not args.no_defrag,
                   disable_bar=args.disable_prog_bar)

    computeNeighborsGraph(pangenome, disable_bar=args.disable_prog_bar)

    partition(pangenome,
              tmpdir=args.tmpdir,
              cpu=args.cpu,
              K=args.nb_of_partitions,
              disable_bar=args.disable_prog_bar)
    writePangenome(pangenome,
                   filename,
                   args.force,
                   disable_bar=args.disable_prog_bar)

    if args.rarefaction:
        makeRarefactionCurve(pangenome,
                             args.output,
                             args.tmpdir,
                             cpu=args.cpu,
                             disable_bar=args.disable_prog_bar)
    if 1 < len(pangenome.organisms) < 5000:
        drawTilePlot(pangenome,
                     args.output,
                     nocloud=False if len(pangenome.organisms) < 500 else True)
    drawUCurve(pangenome, args.output)

    writeFlatFiles(pangenome,
                   args.output,
                   args.cpu,
                   csv=True,
                   genePA=True,
                   gexf=True,
                   light_gexf=True,
                   projection=True,
                   json=True,
                   stats=True,
                   partitions=True)

    printInfo(filename, content=True)