Пример #1
0
def get_contract_parser():
    """Get tool contract parser.
    Input:
        idx 0 - HQ isoforms fastq as gmap input
        idx 1 - gmap reference set
    Output:
        idx 0 - gmap output SAM
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")

    # argument parser
    add_map_isoforms_io_arguments(p.arg_parser.parser)
    add_gmap_arguments(p.arg_parser.parser)

    # tool contract parser
    tcp = p.tool_contract_parser
    tcp.add_input_file_type(FileTypes.FASTQ, "hq_isoforms_fastq", "FASTQ In",
                            "HQ isoforms FASTQ file")  # input 0
    tcp.add_input_file_type(FileTypes.DS_GMAP_REF, "gmap_referenceset",
                            "GmapReferenceSet In",
                            "Gmap reference set file")  # input 1
    tcp.add_output_file_type(FileTypes.SAM,
                             "gmap_output_sam",
                             name="SAM file",
                             description="Gmap output sam",
                             default_name="gmap_output")

    tcp.add_int(option_id=Constants.GMAP_NPROC_ID,
                option_str="gmap_nproc",
                default=Constants.GMAP_NPROC_DEFAULT,
                name="GMAP nproc",
                description="GMAP nproc")
    return p
Пример #2
0
def get_contract_parser():
    """Get tool contract parser.
    Input:
        idx 0 - HQ isoforms fastq as gmap input
        idx 1 - gmap reference set
    Output:
        idx 0 - gmap output SAM
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")

    # argument parser
    add_map_isoforms_io_arguments(p.arg_parser.parser)
    add_gmap_arguments(p.arg_parser.parser)

    # tool contract parser
    tcp = p.tool_contract_parser
    tcp.add_input_file_type(FileTypes.FASTQ, "hq_isoforms_fastq", "FASTQ In",
                            "HQ isoforms FASTQ file") # input 0
    tcp.add_input_file_type(FileTypes.DS_GMAP_REF, "gmap_referenceset", "GmapReferenceSet In",
                            "Gmap reference set file") # input 1
    tcp.add_output_file_type(FileTypes.SAM, "gmap_output_sam",
                             name="Gmap SAM Mapping HQ Isoforms to Genome",
                             description="Gmap output sam mapping HQ isoforms to reference",
                             default_name="gmap_output")

    tcp.add_int(option_id=Constants.GMAP_NPROC_ID, option_str="gmap_nproc",
                default=Constants.GMAP_NPROC_DEFAULT,
                name="GMAP nproc", description="GMAP nproc")
    return p
Пример #3
0
def get_contract_parser():
    """Get tool contract parser.
    Input:
        idx 0 - group file
        idx 1 - pickle file
    Output:
        idx 0 - read stat file
        idx 1 - abundance file
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")

    # argument parser
    add_make_abundance_io_arguments(p.arg_parser.parser)

    # tool contract parser
    tcp = p.tool_contract_parser
    tcp.add_input_file_type(
        FileTypes.TXT, "group_txt", "TXT In",
        "Group file associating isoforms with reads")  # input 0
    tcp.add_input_file_type(
        FileTypes.PICKLE, "hq_lq_pre_dict", "PICKLE In",
        "Pickle file containing dicts mapping HQ (LQ) " +
        "sample prefixes to ICE cluster output directories")  # input 1
    tcp.add_output_file_type(FileTypes.TXT,
                             "read_stat_txt",
                             name="TXT file",
                             description="Read status of FL and nFL reads",
                             default_name="output_mapped_read_stat")
    tcp.add_output_file_type(FileTypes.TXT,
                             "abundance_txt",
                             name="TXT file",
                             description="Abundance file",
                             default_name="output_mapped_abundance")
    return p
Пример #4
0
def get_contract_parser():
    p = get_base_contract_parser(Constants, default_level="DEBUG")
    tcp = p.tool_contract_parser
    add_fofn_arguments(p.arg_parser.parser,
                       bas_fofn=True,
                       tool_contract_parser=tcp)
    tcp.add_input_file_type(FileTypes.DS_CONTIG,
                            "consensus_fa",
                            name="Consensus isoforms",
                            description="ContigSet of consensus isoforms")
    tcp.add_input_file_type(FileTypes.PICKLE,
                            "cluster_pickle",
                            name="Clusters",
                            description="Cluster pickle file")
    tcp.add_input_file_type(
        FileTypes.PICKLE,
        "map_nofl_pickle",
        name="Pickle file",
        description="Pickle file for non-full-length read mapping")
    tcp.add_input_file_type(FileTypes.JSON,
                            "json_in",
                            name="JSON file",
                            description="Sentinel file from ice_quiver task")
    add_cluster_summary_report_arguments(p)
    add_ice_post_quiver_hq_lq_arguments(p)
    return p
Пример #5
0
def get_contract_parser():
    """Get tool contract parser.
    Input:
        idx 0 - group file
        idx 1 - pickle file
    Output:
        idx 0 - read stat file
        idx 1 - abundance file
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")

    # argument parser
    add_make_abundance_io_arguments(p.arg_parser.parser)

    # tool contract parser
    tcp = p.tool_contract_parser
    tcp.add_input_file_type(FileTypes.TXT, "group_txt", "TXT In",
                            "Group file associating isoforms with reads") # input 0
    tcp.add_input_file_type(FileTypes.PICKLE, "hq_lq_pre_dict", "PICKLE In",
                            "Pickle file containing dicts mapping HQ (LQ) " +
                            "sample prefixes to ICE cluster output directories") # input 1
    tcp.add_output_file_type(FileTypes.TXT, "read_stat_txt",
                             name="TXT file", description="Read status of FL and nFL reads",
                             default_name="output_mapped_read_stat")
    tcp.add_output_file_type(FileTypes.TXT, "abundance_txt",
                             name="TXT file", description="Abundance file",
                             default_name="output_mapped_abundance")
    return p
def get_contract_parser():
    """Get tool contract parser.
    Input:
        idx 0 - A FASTQ file containing uncollapsed isoforms (hq_polished)
        idx 1 - A SORTED GMAP SAM file containing alignments mapping uncollapsed
                isoforms to reference genomes (produced by map_isoforms_to_genome)
        idx 2 - A Pickle file containing dicts mapping HQ (LQ) sample prefixes to
                ICE cluster output directories(e.g., hq_lq_pre_dict.pickle)
    Output:
        idx 0 - A FASTQ file containing representative sequences of filtered collapsed isoforms
        idx 1 - A GFF file containing collapsed filtered isoforms
        idx 2 - A Abundnace file containing abundance info of collapsed filtered isoforms
        idx 3 - A Group file which associates collapsed isoforms with uncollapsed isoforms
        idx 4 - A ReadStat file containing FL and nFL read status
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")

    # argument parser
    add_post_mapping_to_genome_io_arguments(p.arg_parser.parser)
    add_post_mapping_to_genome_arguments(p.arg_parser.parser)

    # tool contract parser
    tcp = p.tool_contract_parser
    tcp.add_input_file_type(FileTypes.FASTQ, "hq_isoforms_fq", "FASTQ In",
                            "Input HQ polished isoforms in FASTQ file") # input 0

    tcp.add_input_file_type(FileTypes.SAM, "sorted_gmap_sam", "SAM In",
                            "Sorted GMAP SAM file") # input 1

    tcp.add_input_file_type(FileTypes.PICKLE, "hq_lq_pre_dict", "PICKLE In",
                            "Pickle file containing dicts mapping HQ (LQ) " +
                            "sample prefixes to ICE cluster output directories") # input 2

    tcp.add_output_file_type(FileTypes.FASTQ, "collapsed_filtered_isoforms_fq",
                             name="FASTQ file",
                             description="Representative sequences of collapsed filtered isoforms",
                             default_name="output_mapped") # output 0

    tcp.add_output_file_type(FileTypes.GFF, "collapsed_filtered_isoforms_gff",
                             name="GFF file", description="Collapsed filtered isoforms gff",
                             default_name="output_mapped") # output 1

    tcp.add_output_file_type(FileTypes.TXT, "abundance_txt",
                             name="TXT file", description="Abundance file",
                             default_name="output_mapped_abundance") # output 2

    tcp.add_output_file_type(FileTypes.TXT, "groups_txt",
                             name="TXT file", description="Collapsed isoform groups",
                             default_name="output_mapped_groups") # output 3

    tcp.add_output_file_type(FileTypes.TXT, "read_stat_txt",
                             name="TXT file", description="Read status of FL and nFL reads",
                             default_name="output_mapped_read_stat") # output 3

    # Add tcp options
    add_post_mapping_to_genome_tcp_options(tcp)
    return p
def get_contract_parser():
    """Get tool contract parser.
    Input:
        idx 0 - group txt file
        idx 1 - abundance txt file
        idx 2 - gff file
        idx 3 - rep fastq file
    Output:
        idx 0 - abundance file
        idx 1 - gff file
        idx 2 - rep fastq file
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")

    # argument parser
    add_filter_collapsed_isoforms_io_arguments(p.arg_parser.parser)
    add_filter_collapsed_isoforms_arguments(p.arg_parser.parser)
    # DONT move --max_fuzzy_junction to add_filter_collapsed_isoforms_arguments
    p.arg_parser.parser.add_argument(
        "--max_fuzzy_junction",
        type=int,
        default=ci.Constants.MAX_FUZZY_JUNCTION_DEFAULT,
        help=ci.Constants.MAX_FUZZY_JUNCTION_DESC)

    # tool contract parser
    tcp = p.tool_contract_parser
    tcp.add_input_file_type(
        FileTypes.TXT, "group_txt", "TXT In",
        "Group file associating isoforms with reads")  # input 0
    tcp.add_input_file_type(FileTypes.TXT, "abundance_txt", "TXT In",
                            "Abundance file")  # input 1
    tcp.add_input_file_type(FileTypes.GFF, "collapsed_isoforms_gff", "GFF In",
                            "GFF file")  # input 2
    tcp.add_input_file_type(
        FileTypes.FASTQ, "rep_fastq", "FASTQ In",
        "Representative sequences of collapsed isoforms in FATSQ")  # input 3

    tcp.add_output_file_type(
        FileTypes.TXT,
        "abundance_txt",
        name="TXT file",
        description="Abundance file",
        default_name="output_mapped_filtered_abundance")  # output 0
    tcp.add_output_file_type(FileTypes.GFF,
                             "collapsed_isoforms_gff",
                             name="GFF file",
                             description="Collapsed isoforms in GFF file",
                             default_name="output_mapped_filtered")  # output 1
    tcp.add_output_file_type(
        FileTypes.FASTQ,
        "rep_fastq",
        name="FASTQ file",
        description="Representative sequences of collapsed isoforms in FASTQ",
        default_name="output_mapped_filtered")  # output 2
    add_filter_collapsed_isoforms_tcp_options(tcp)
    return p
Пример #8
0
def get_contract_parser():
    """ Return resolved tool contract.
    input files:
        idx 0 - ChunkTasksPickle of ClusterChunkTask objects
        idx 1 - sentinel txt
    output files:
        idx 0 - consensus_isoforms.fa
        idx 1 - summary.json
        idx 2 - cluster_report.csv
        idx 3 - hq_isoforms.contigset.xml
        idx 4 - hq_isoforms.fq
        idx 5 - lq_isoforms.contigset.xml
        idx 6 - lq_isoforms.fq
        idx 7 - hq_lq_prefix_dict.pickle
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")
    p.add_input_file_type(FileTypes.PICKLE, "cluster_chunks_pickle",
                          "Pickle In", "Cluster chunks pickle file")  # input 0
    p.add_input_file_type(FileTypes.TXT, "cluster_sentinel_in", "Sentinel In",
                          "Setinel file")  # input idx 1

    # output idx 0, consensus_isoforms.contigset,
    p.add_output_file_type(FileTypes.DS_CONTIG,
                           "consensus_isoforms",
                           name="Unpolished Consensus Isoforms",
                           description="Output unpolished consensus isoforms",
                           default_name="consensus_isoforms")

    # output idx 1, summary.json,
    # output idx 2, cluster_report.csv
    add_cluster_summary_report_arguments(p)

    # output idx 3, hq_isoforms_fa, idx 4, hq_isoforms_fq,
    # idx 5 lq_isoforms_fa, idx 6 lq_isoforms_fq
    add_ice_post_quiver_hq_lq_arguments(p)

    # output idx 7, hq_lq_prefix_dict.pickle
    p.add_output_file_type(
        FileTypes.PICKLE,
        "hq_lq_prefix_dict",
        name="HQ LQ Sample Prefix Dict",
        description="Pickle mapping HQ (LQ) sample prefixes to ICE dir",
        default_name="hq_lq_prefix_dict")

    # user specified sample name.
    p.add_str(option_id=Constants.SAMPLE_NAME_ID,
              option_str="sample_name",
              default=Constants.SAMPLE_NAME_DEFAULT,
              name="sample Name",
              description="Sample Name")
    return p
Пример #9
0
def get_contract_parser():
    """ Return resolved tool contract.
    input files:
        idx 0 - ChunkTasksPickle of ClusterChunkTask objects
        idx 1 - sentinel txt
    output files:
        idx 0 - consensus_isoforms.fa
        idx 1 - summary.json
        idx 2 - cluster_report.csv
        idx 3 - hq_isoforms.contigset.xml
        idx 4 - hq_isoforms.fq
        idx 5 - lq_isoforms.contigset.xml
        idx 6 - lq_isoforms.fq
        idx 7 - hq_lq_prefix_dict.pickle
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")
    p.add_input_file_type(FileTypes.PICKLE, "cluster_chunks_pickle", "Pickle In",
                          "Cluster chunks pickle file") # input 0
    p.add_input_file_type(FileTypes.TXT, "cluster_sentinel_in", "Sentinel In",
                          "Setinel file") # input idx 1

    # output idx 0, consensus_isoforms.contigset,
    p.add_output_file_type(FileTypes.DS_CONTIG, "consensus_isoforms",
                           name="Unpolished Consensus Isoforms",
                           description="Output unpolished consensus isoforms",
                           default_name="consensus_isoforms")

    # output idx 1, summary.json,
    # output idx 2, cluster_report.csv
    add_cluster_summary_report_arguments(p)

    # output idx 3, hq_isoforms_fa, idx 4, hq_isoforms_fq,
    # idx 5 lq_isoforms_fa, idx 6 lq_isoforms_fq
    add_ice_post_quiver_hq_lq_arguments(p)

    # output idx 7, hq_lq_prefix_dict.pickle
    p.add_output_file_type(FileTypes.PICKLE, "hq_lq_prefix_dict",
                           name="HQ LQ Sample Prefix Dict",
                           description="Pickle mapping HQ (LQ) sample prefixes to ICE dir",
                           default_name="hq_lq_prefix_dict")

    # user specified sample name.
    p.add_str(option_id=Constants.SAMPLE_NAME_ID, option_str="sample_name",
              default=Constants.SAMPLE_NAME_DEFAULT, name="sample Name",
              description="Sample Name")
    return p
def get_contract_parser():
    """Get tool contract parser.
    Input:
        idx 0 - group txt file
        idx 1 - abundance txt file
        idx 2 - gff file
        idx 3 - rep fastq file
    Output:
        idx 0 - abundance file
        idx 1 - gff file
        idx 2 - rep fastq file
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")

    # argument parser
    add_filter_collapsed_isoforms_io_arguments(p.arg_parser.parser)
    add_filter_collapsed_isoforms_arguments(p.arg_parser.parser)
    # DONT move --max_fuzzy_junction to add_filter_collapsed_isoforms_arguments
    p.arg_parser.parser.add_argument("--max_fuzzy_junction", type=int,
                                     default=ci.Constants.MAX_FUZZY_JUNCTION_DEFAULT,
                                     help=ci.Constants.MAX_FUZZY_JUNCTION_DESC)

    # tool contract parser
    tcp = p.tool_contract_parser
    tcp.add_input_file_type(FileTypes.TXT, "group_txt", "TXT In",
                            "Group file associating isoforms with reads") # input 0
    tcp.add_input_file_type(FileTypes.TXT, "abundance_txt", "TXT In",
                            "Abundance file") # input 1
    tcp.add_input_file_type(FileTypes.GFF, "collapsed_isoforms_gff", "GFF In",
                            "GFF file") # input 2
    tcp.add_input_file_type(FileTypes.FASTQ, "rep_fastq", "FASTQ In",
                            "Representative sequences of collapsed isoforms in FATSQ") # input 3

    tcp.add_output_file_type(FileTypes.TXT, "abundance_txt",
                             name="TXT file", description="Abundance file",
                             default_name="output_mapped_filtered_abundance") # output 0
    tcp.add_output_file_type(FileTypes.GFF, "collapsed_isoforms_gff",
                             name="GFF file", description="Collapsed isoforms in GFF file",
                             default_name="output_mapped_filtered") # output 1
    tcp.add_output_file_type(FileTypes.FASTQ, "rep_fastq", name="FASTQ file",
                             description="Representative sequences of collapsed isoforms in FASTQ",
                             default_name="output_mapped_filtered") # output 2
    add_filter_collapsed_isoforms_tcp_options(tcp)
    return p
Пример #11
0
def get_contract_parser():
    p = get_base_contract_parser(Constants, default_level="DEBUG")
    arg_parser = p.arg_parser.parser
    tcp = p.tool_contract_parser
    add_fofn_arguments(arg_parser, bas_fofn=True,
                       tool_contract_parser=tcp)
    tcp.add_input_file_type(FileTypes.DS_CONTIG, "consensus_fa",
                            name="ContigSet", description="Consensus isoforms")
    tcp.add_input_file_type(FileTypes.PICKLE, "cluster_pickle",
                            name="Clusters",
                            description="Cluster pickle file")
    tcp.add_input_file_type(FileTypes.PICKLE, "map_nofl_pickle",
                            name="Pickle file",
                            description="Pickle file for non-full-length read mapping")
    # XXX this file does nothing other than connect this task to
    # ice_quiver_postprocess in pbsmrtpipe
    tcp.add_output_file_type(FileTypes.JSON, "json_out", "JSON file",
                             "JSON sentinel file", default_name="quiver_out")
    return p
Пример #12
0
def get_contract_parser():
    p = get_base_contract_parser(Constants, default_level="DEBUG")
    tcp = p.tool_contract_parser
    add_fofn_arguments(p.arg_parser.parser, bas_fofn=True,
                       tool_contract_parser=tcp)
    tcp.add_input_file_type(FileTypes.DS_CONTIG, "consensus_fa",
                            name="Consensus isoforms",
                            description="ContigSet of consensus isoforms")
    tcp.add_input_file_type(FileTypes.PICKLE, "cluster_pickle",
                            name="Clusters",
                            description="Cluster pickle file")
    tcp.add_input_file_type(FileTypes.PICKLE, "map_nofl_pickle",
                            name="Pickle file",
                            description="Pickle file for non-full-length read mapping")
    tcp.add_input_file_type(FileTypes.JSON, "json_in", name="JSON file",
                            description="Sentinel file from ice_quiver task")
    add_cluster_summary_report_arguments(p)
    add_ice_post_quiver_hq_lq_arguments(p)
    return p
Пример #13
0
def get_contract_parser():
    """ Return resolved tool contract.
    input files:
        idx 0 - ChunkTasksPickle of ClusterChunkTask objects
        idx 1 - ice_polish_cluster_bins sentinel txt
    output files:
        idx 0 - sentinal
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")
    p.add_input_file_type(FileTypes.PICKLE, "cluster_chunks_pickle", "Pickle In",
                          "Cluster chunks pickle file") # input idx 0
    p.add_input_file_type(FileTypes.TXT, "ice_polish_cluster_bins_sentinel", "Sentinel In",
                          "Setinel file") # input idx 1

    # output idx 0, consensus_isoforms.contigset,
    p.add_output_file_type(FileTypes.TXT, "ice_cleanup_sentinel",
                           name="Sentinel file",
                           description="Output sentinel file",
                           default_name="ice_cleanup_done")
    return p
Пример #14
0
def get_contract_parser():
    """Get tool contract parser.
    Input:
        idx 0 -
    Output:
        idx 0 - reads which can represent collapsed isoforms in CONTIGSET
        idx 1 - collapsed isoforms in GFF
        idx 2 - collapsed isoform groups in TXT
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")

    # argument parser
    add_collapse_mapped_isoforms_io_arguments(p.arg_parser.parser)
    add_collapse_mapped_isoforms_arguments(p.arg_parser.parser)

    # tool contract parser
    tcp = p.tool_contract_parser
    tcp.add_input_file_type(
        FileTypes.DS_CONTIG, "hq_isoforms_ds", "ContigSet In",
        "Input HQ polished isoforms in ContigSet file")  # input 0
    tcp.add_input_file_type(FileTypes.SAM, "sorted_gmap_sam", "SAM In",
                            "Sorted GMAP SAM file")  # input 1
    tcp.add_output_file_type(
        FileTypes.DS_CONTIG,
        "collapsed_isoforms_rep_ds",
        name="Collapsed isoforms",
        description="Representative reads of collapsed isoforms",
        default_name="output")  # output 0
    tcp.add_output_file_type(FileTypes.GFF,
                             "collapsed_isoforms_gff",
                             name="Collapsed isoforms GFF",
                             description="Collapsed isoforms gff",
                             default_name="output")  # output 1
    tcp.add_output_file_type(FileTypes.TXT,
                             "groups_txt",
                             name="TXT file",
                             description="Collapsed isoform groups",
                             default_name="output_groups")  # output 2

    add_collapse_mapped_isoforms_tcp_options(tcp)
    return p
Пример #15
0
def get_contract_parser():
    """
    Return tool contract parser for running ICE clustering
    no quiver, no sge setting needed.
    The tool contract parser has 2 inputs:
        idx 0 - cluster_chunks.pickle (original or spawned)
        idx 1 - ccs,
    and has one outputs:
        idx 0 - cluster_chunk_done.txt, a sential file which does nothing
                but connect ICE cluster and the subsequence ice_partial
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")
    p.add_input_file_type(FileTypes.PICKLE, "cluster_chunks_pickle", "Pickle In",
                          "Cluster chunks pickle file") # input 0
    p.add_input_file_type(FileTypes.DS_CCS, "ccs_in", "ConsensusReadSet In",
                          "PacBio ConsensusReadSet") # input 1
    p.add_output_file_type(FileTypes.TXT, "cluster done txt",
                           name="Cluster Done Txt file",
                           description="Cluster Done Txt file.",
                           default_name="cluster_chunks_done")
    return p
Пример #16
0
def get_contract_parser():
    """
    input idx 0: polish_chunk_pickle_id
    input idx 1: sentinel.txt
    input idx 2: *.subreadset.xml
    output idx 0: chunk json
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")
    p.add_input_file_type(FileTypes.PICKLE, "polish_chunk_pickle",
                          "PICKLE", "Polish Chunk Tasks Pickle") # input idx 0
    p.add_input_file_type(FileTypes.TXT, "sentinel_in", "Sentinel In",
                          "Setinel file") # input idx 1
    p.add_input_file_type(FileTypes.DS_SUBREADS, "subreads_in", "SubreadSet In",
                          "PacBio SubreadSet") # input idx 2

    # this file does nothing other than connect pbsmrtpipe tasks
    p.add_output_file_type(FileTypes.TXT, "polish done txt",
                           name="Polish Done Txt file",
                           description="Polish Done Txt file.",
                           default_name="polish_chunks_done")
    return p
Пример #17
0
def get_contract_parser():
    p = get_base_contract_parser(Constants, default_level="DEBUG")
    add_cluster_arguments(p)
    p.tool_contract_parser.add_output_file_type(
        FileTypes.PICKLE, "pickle_fn",
        name="Clusters pickle file",
        description="Python pickle file of clusters",
        default_name="final_clusters")
    # rtc has 4 inputs:
    #    idx 0 - flnc.contigset,
    #    idx 1 - nfl.contigset,
    #    idx 2 - ccs,
    #    idx 3 - subreads
    #
    # rtc has 4 outputs:
    #    idx 0 - consensus_isoform.contigset,
    #    idx 1 - output.json,
    #    idx 2 - output.csv
    #    idx 3 - output.pickle

    return p
Пример #18
0
def get_contract_parser():
    """Return a tool contract parser for separate_flnc.
    Input:
        idx 0 - flnc.fa|fq|ds
    Output:
        idx 0 - out.pickle
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")
    add_separate_flnc_io_arguments(p.arg_parser.parser)
    add_separate_flnc_arguments(p.arg_parser.parser)

    # tool contract parser
    tcp = p.tool_contract_parser
    tcp.add_input_file_type(FileTypes.DS_CONTIG, "flnc_fa",
                            name="FASTA or ContigSet file",
                            description="FLNC reads ContigSet")
    tcp.add_output_file_type(FileTypes.PICKLE, "out_pickle",
                             default_name="separate_flnc",
                             name="Bins of FLNC Reads", description="output bins in pickle.")
    add_separate_flnc_tcp_options(tcp)
    return p
Пример #19
0
def get_contract_parser():
    """
    input idx 0: polish_chunk_pickle_id
    input idx 1: sentinel.txt
    input idx 2: *.subreadset.xml
    output idx 0: chunk json
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")
    p.add_input_file_type(FileTypes.PICKLE, "polish_chunk_pickle", "PICKLE",
                          "Polish Chunk Tasks Pickle")  # input idx 0
    p.add_input_file_type(FileTypes.TXT, "sentinel_in", "Sentinel In",
                          "Setinel file")  # input idx 1
    p.add_input_file_type(FileTypes.DS_SUBREADS, "subreads_in",
                          "SubreadSet In", "PacBio SubreadSet")  # input idx 2

    # this file does nothing other than connect pbsmrtpipe tasks
    p.add_output_file_type(FileTypes.TXT,
                           "polish done txt",
                           name="Polish Done Txt file",
                           description="Polish Done Txt file.",
                           default_name="polish_chunks_done")
    return p
Пример #20
0
def get_contract_parser():
    """
    Return tool contract parser for running ICE clustering
    no quiver, no sge setting needed.
    The tool contract parser has 2 inputs:
        idx 0 - cluster_chunks.pickle (original or spawned)
        idx 1 - ccs,
    and has one outputs:
        idx 0 - cluster_chunk_done.txt, a sential file which does nothing
                but connect ICE cluster and the subsequence ice_partial
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")
    p.add_input_file_type(FileTypes.PICKLE, "cluster_chunks_pickle",
                          "Pickle In", "Cluster chunks pickle file")  # input 0
    p.add_input_file_type(FileTypes.DS_CCS, "ccs_in", "ConsensusReadSet In",
                          "PacBio ConsensusReadSet")  # input 1
    p.add_output_file_type(FileTypes.TXT,
                           "cluster done txt",
                           name="Cluster Done Txt file",
                           description="Cluster Done Txt file.",
                           default_name="cluster_chunks_done")
    return p
Пример #21
0
def get_contract_parser():
    """Tool contract should have the following inputs and outputs.
    input:
        idx 0: partial_chunks.pickle
        idx 1: ccs
        idx 2: sentinel txt file
    output
        idx 0 - partial_chunk_done.txt, a sential file which does nothing
                but connect ice_partial and the subsequence task
                combine_ice_partial
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")
    p.add_input_file_type(FileTypes.PICKLE, "partial_chunks_pickle", "Pickle In",
                          "Partial chunks pickle file") # input 0
    p.add_input_file_type(FileTypes.TXT, "partial_sentinel_in", "Sentinel In",
                          "Setinel file") # input 1
    p.add_input_file_type(FileTypes.DS_CCS, "ccs_in", "ConsensusReadSet In",
                          "PacBio ConsensusReadSet") # input 2
    p.add_output_file_type(FileTypes.TXT, "partial done txt",
                           name="Partial Done Txt file",
                           description="Partial Done Txt file.",
                           default_name="partial_chunks_done")
    return p
Пример #22
0
def get_contract_parser():
    """Tool contract should have the following inputs and outputs.
    input:
        idx 0: partial_chunks.pickle
        idx 1: ccs
        idx 2: sentinel txt file
    output
        idx 0 - partial_chunk_done.txt, a sential file which does nothing
                but connect ice_partial and the subsequence task
                combine_ice_partial
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")
    p.add_input_file_type(FileTypes.PICKLE, "partial_chunks_pickle",
                          "Pickle In", "Partial chunks pickle file")  # input 0
    p.add_input_file_type(FileTypes.TXT, "partial_sentinel_in", "Sentinel In",
                          "Setinel file")  # input 1
    p.add_input_file_type(FileTypes.DS_CCS, "ccs_in", "ConsensusReadSet In",
                          "PacBio ConsensusReadSet")  # input 2
    p.add_output_file_type(FileTypes.TXT,
                           "partial done txt",
                           name="Partial Done Txt file",
                           description="Partial Done Txt file.",
                           default_name="partial_chunks_done")
    return p
Пример #23
0
def get_contract_parser():
    """Get tool contract parser.
    Input:
        idx 0 -
    Output:
        idx 0 - reads which can represent collapsed isoforms in CONTIGSET
        idx 1 - collapsed isoforms in GFF
        idx 2 - collapsed isoform groups in TXT
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")

    # argument parser
    add_collapse_mapped_isoforms_io_arguments(p.arg_parser.parser)
    add_collapse_mapped_isoforms_arguments(p.arg_parser.parser)

    # tool contract parser
    tcp = p.tool_contract_parser
    tcp.add_input_file_type(FileTypes.DS_CONTIG, "hq_isoforms_ds", "ContigSet In",
                            "Input HQ polished isoforms in ContigSet file") # input 0
    tcp.add_input_file_type(FileTypes.SAM, "sorted_gmap_sam", "SAM In",
                            "Sorted GMAP SAM file") # input 1
    tcp.add_output_file_type(FileTypes.DS_CONTIG, "collapsed_isoforms_rep_ds",
                             name="Collapsed isoforms",
                             description="Representative reads of collapsed isoforms",
                             default_name="output") # output 0
    tcp.add_output_file_type(FileTypes.GFF, "collapsed_isoforms_gff",
                             name="Collapsed isoforms GFF",
                             description="Collapsed isoforms gff",
                             default_name="output") # output 1
    tcp.add_output_file_type(FileTypes.TXT, "groups_txt",
                             name="TXT file",
                             description="Collapsed isoform groups",
                             default_name="output_groups") # output 2

    add_collapse_mapped_isoforms_tcp_options(tcp)
    return p
Пример #24
0
def get_contract_parser():
    """Return a tool contract parser for separate_flnc.
    Input:
        idx 0 - flnc.fa|fq|ds
    Output:
        idx 0 - out.pickle
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")
    add_separate_flnc_io_arguments(p.arg_parser.parser)
    add_separate_flnc_arguments(p.arg_parser.parser)

    # tool contract parser
    tcp = p.tool_contract_parser
    tcp.add_input_file_type(FileTypes.DS_CONTIG,
                            "flnc_fa",
                            name="FASTA or ContigSet file",
                            description="FLNC reads ContigSet")
    tcp.add_output_file_type(FileTypes.PICKLE,
                             "out_pickle",
                             default_name="separate_flnc",
                             name="Binned transcripts pickle file",
                             description="output bins in pickle.")
    add_separate_flnc_tcp_options(tcp)
    return p
Пример #25
0
def get_contract_parser():
    p = get_base_contract_parser(Constants, default_level="INFO")
    add_ice_partial_one_arguments(p)
    return p
Пример #26
0
def get_contract_parser():
    p = get_base_contract_parser(Constants)
    add_subset_arguments(p)
    return p
Пример #27
0
def get_contract_parser():
    p = get_base_contract_parser(Constants)
    add_subset_arguments(p)
    return p
Пример #28
0
def get_contract_parser():
    p = get_base_contract_parser(Constants, default_level="INFO")
    add_ice_partial_one_arguments(p)
    return p
Пример #29
0
def get_contract_parser():
    """Get tool contract parser.
    Input:
        idx 0 - A FASTQ file containing uncollapsed isoforms (hq_polished)
        idx 1 - A SORTED GMAP SAM file containing alignments mapping uncollapsed
                isoforms to reference genomes (produced by map_isoforms_to_genome)
        idx 2 - A Pickle file containing dicts mapping HQ (LQ) sample prefixes to
                ICE cluster output directories(e.g., hq_lq_pre_dict.pickle)
    Output:
        idx 0 - A FASTQ file containing representative sequences of filtered collapsed isoforms
        idx 1 - A GFF file containing collapsed filtered isoforms
        idx 2 - A Abundnace file containing abundance info of collapsed filtered isoforms
        idx 3 - A Group file which associates collapsed isoforms with uncollapsed isoforms
        idx 4 - A ReadStat file containing FL and nFL read status
    """
    p = get_base_contract_parser(Constants, default_level="DEBUG")

    # argument parser
    add_post_mapping_to_genome_io_arguments(p.arg_parser.parser)
    add_post_mapping_to_genome_arguments(p.arg_parser.parser)

    # tool contract parser
    tcp = p.tool_contract_parser
    tcp.add_input_file_type(
        FileTypes.FASTQ, "hq_isoforms_fq", "FASTQ In",
        "Input HQ polished isoforms in FASTQ file")  # input 0

    tcp.add_input_file_type(FileTypes.SAM, "sorted_gmap_sam", "SAM In",
                            "Sorted GMAP SAM file")  # input 1

    tcp.add_input_file_type(
        FileTypes.PICKLE, "hq_lq_pre_dict", "PICKLE In",
        "Pickle file containing dicts mapping HQ (LQ) " +
        "sample prefixes to ICE cluster output directories")  # input 2

    tcp.add_output_file_type(
        FileTypes.FASTQ,
        "collapsed_filtered_isoforms_fq",
        name="Collapsed Filtered Isoforms",
        description="Representative sequences of collapsed filtered isoforms",
        default_name="output_mapped")  # output 0

    tcp.add_output_file_type(FileTypes.GFF,
                             "collapsed_filtered_isoforms_gff",
                             name="Collapsed Filtered Isoforms",
                             description="Collapsed filtered isoforms gff",
                             default_name="output_mapped")  # output 1

    tcp.add_output_file_type(
        FileTypes.TXT,
        "abundance_txt",
        name="Isoform Abundance",
        description="Abundance file",
        default_name="output_mapped_abundance")  # output 2

    tcp.add_output_file_type(FileTypes.TXT,
                             "groups_txt",
                             name="Collapsed Isoform Groups",
                             description="Collapsed isoform groups",
                             default_name="output_mapped_groups")  # output 3

    tcp.add_output_file_type(
        FileTypes.TXT,
        "read_stat_txt",
        name="FL nFL Reads Status",
        description="Read status of FL and nFL reads",
        default_name="output_mapped_read_stat")  # output 4

    # Add tcp options
    add_post_mapping_to_genome_tcp_options(tcp)
    return p
Пример #30
0
def get_contract_parser():
    """Get PbParser for classify."""
    p = get_base_contract_parser(Constants, default_level="DEBUG")
    add_classify_arguments(p)
    return p