Exemplo n.º 1
0
def get_parser():
    parser = get_default_argparser_with_base_opts(
        version=__version__,
        description=__doc__,
        default_level="CRITICAL")
    parser.add_argument('file', help="BAM, FASTA, or DataSet XML file")
    parser.add_argument("-c", dest="use_termcolor", action="store_true")
    parser.add_argument("--quick", dest="quick", action="store_true",
                        help="Limits validation to the first 100 records "+
                             "(plus file header); equivalent to "+
                             "--max-records=100")
    parser.add_argument("--max", dest="max_errors", action="store", type=int,
                        help="Exit after MAX_ERRORS have been recorded "+
                             "(DEFAULT: check entire file)")
    parser.add_argument("--max-records", dest="max_records", action="store",
                        type=int,
                        help="Exit after MAX_RECORDS have been inspected "+
                             "(DEFAULT: check entire file)")
    parser.add_argument("--type", dest="file_type", action="store",
                        choices=["BAM", "Fasta"] + dataset.DatasetTypes.ALL,
                        help="Use the specified file type instead of guessing")
    parser.add_argument("--index", dest="validate_index", action="store_true",
                        help="Require index files (.fai or .pbi)")
    parser.add_argument("--strict", dest="strict", action="store_true",
                        help="Turn on additional validation, primarily for "+
                             "DataSet XML")
    parser.add_argument("-x", "--xunit-out", dest="xunit_out", action="store",
                        default=None, help="Xunit test results for Jenkins")
    g1 = parser.add_argument_group('bam', "BAM options")
    g2 = parser.add_argument_group('fasta', "Fasta options")
    bam.get_format_specific_args(g1)
    fasta.get_format_specific_args(g2)
    return parser
Exemplo n.º 2
0
def get_parser():
    p = get_default_argparser_with_base_opts(
            version=VERSION,
            description=__doc__,
            default_level="WARN")
    p.add_argument("input_bam",
                   help="Input BAM or DataSet from which reads will be read")
    p.add_argument("output_bam", nargs='?', default=None,
                   help="Output BAM or DataSet to which filtered reads will "
                        "be written")
    p.add_argument("--show-zmws", action="store_true", default=False,
                   help="Print a list of ZMWs and exit")
    p.add_argument("--whitelist", action="store", default=None,
                   help="Comma-separated list of ZMWs, or file containing " +
                        "whitelist of one hole number per line, or " +
                        "BAM/DataSet file from which to extract ZMWs")
    p.add_argument("--blacklist", action="store", default=None,
                   help="Opposite of --whitelist, specifies ZMWs to discard")
    p.add_argument("--percentage", action="store", type=float, default=None,
                   help="If you prefer to recover a percentage of a SMRTcell "
                        "rather than a specific list of reads specify that "
                        "percentage (range 0-100) here")
    p.add_argument("-n", "--count", action="store", type=int, default=None,
                   help="Recover a specific number of ZMWs picked at random")
    p.add_argument("-s", "--seed", action="store", type=int, default=None,
                   help="Random seed for selecting a percentage of reads")
    p.add_argument("--ignore-metadata", action="store_true",
                   help="Discard input DataSet metadata")
    p.add_argument("--anonymize", action="store_true",
                   help="Randomize sequences for privacy")
    return p
def get_parser():
    p = get_default_argparser_with_base_opts(
        version="0.1",
        description=__doc__)
    p.add_argument("testkit_cfg")
    p.add_argument("-u", "--host", dest="host", action="store",
                   default=os.environ.get("PB_SERVICE_HOST", "http://localhost"))
    p.add_argument("-p", "--port", dest="port", action="store", type=int,
                   default=int(os.environ.get("PB_SERVICE_PORT", "8081")),
                   help="Services port number")
    p.add_argument("-x", "--xunit", dest="xml_out", default="test-output.xml",
                   help="Output XUnit test results")
    p.add_argument("-t", "--timeout", dest="time_out", type=int, default=1800,
                   help="Timeout for blocking after job submission")
    p.add_argument("-s", "--sleep", dest="sleep", type=int, default=2,
                   help="Sleep time after job submission")
    p.add_argument("--ignore-test-failures", dest="ignore_test_failures",
                   action="store_true",
                   help="Only exit with non-zero return code if the job "+
                        "itself failed, regardless of test outcome")
    p.add_argument("--import-only", dest="import_only", action="store_true",
                   help="Import datasets without running pipeline")
    p.add_argument("--only-tests", dest="test_job_id", action="store",
                   type=int, default=None,
                   help="Run tests on an existing smrtlink job")
    return p
Exemplo n.º 4
0
def _get_parser():
    p = get_default_argparser_with_base_opts(
        version="0.1",
        description=__doc__)
    p.add_argument("testkit_cfg")
    p.add_argument("-u", "--host", dest="host", action="store",
                   default="http://localhost")
    p.add_argument("-p", "--port", dest="port", action="store", type=int,
                   default=8081, help="Port number")
    p.add_argument("-x", "--xunit", dest="xml_out", default="test-output.xml",
                   help="Output XUnit test results")
    p.add_argument("-t", "--timeout", dest="time_out", type=int, default=1800,
                   help="Timeout for blocking after job submission")
    p.add_argument("-s", "--sleep", dest="sleep", type=int, default=2,
                   help="Sleep time after job submission")
    p.add_argument("--ignore-test-failures", dest="ignore_test_failures",
                   action="store_true",
                   help="Only exit with non-zero return code if the job "+
                        "itself failed, regardless of test outcome")
    p.add_argument("--import-only", dest="import_only", action="store_true",
                   help="Import datasets without running pipeline")
    p.add_argument("--only-tests", dest="test_job_id", action="store",
                   type=int, default=None,
                   help="Run tests on an existing smrtlink job")
    return p
Exemplo n.º 5
0
def _get_parser():
    p = get_default_argparser_with_base_opts(
        version="0.1",
        description=__doc__)
    p.add_argument("junit_file", nargs="+", type=argparse.FileType('r'))
    p.add_argument("-o", "--output-file", dest="output_file", action="store",
                   default="junit_results_merged.xml")
    return p
def get_parser():
    """Set up and return argument parser."""
    p = get_default_argparser_with_base_opts(version=__version__,
                                             description=__doc__,
                                             default_level="INFO")
    p.add_argument("in_fn", type=str, help="Input DataSet or DataStore")
    p.add_argument("out_fn", type=str, help="Output DataSet or DataStore")
    return p
Exemplo n.º 7
0
def get_parser():
    p = get_default_argparser_with_base_opts(version=VERSION,
                                             description=__doc__,
                                             default_level="WARN")
    p.add_argument("input_bam",
                   help="Input BAM or DataSet from which reads will be read")
    p.add_argument("output_bam",
                   nargs='?',
                   default=None,
                   help="Output BAM or DataSet to which filtered reads will "
                   "be written")
    p.add_argument("--show-zmws",
                   action="store_true",
                   default=False,
                   help="Print a list of ZMWs and exit")
    p.add_argument("--whitelist",
                   action="store",
                   default=None,
                   help="Comma-separated list of ZMWs, or file containing " +
                   "whitelist of one hole number per line, or " +
                   "BAM/DataSet file from which to extract ZMWs")
    p.add_argument("--blacklist",
                   action="store",
                   default=None,
                   help="Opposite of --whitelist, specifies ZMWs to discard")
    p.add_argument("--percentage",
                   action="store",
                   type=float,
                   default=None,
                   help="If you prefer to recover a percentage of a SMRTcell "
                   "rather than a specific list of reads specify that "
                   "percentage (range 0-100) here")
    p.add_argument("-n",
                   "--count",
                   action="store",
                   type=int,
                   default=None,
                   help="Recover a specific number of ZMWs picked at random")
    p.add_argument("-s",
                   "--seed",
                   action="store",
                   type=int,
                   default=None,
                   help="Random seed for selecting a percentage of reads")
    p.add_argument("--ignore-metadata",
                   action="store_true",
                   help="Discard input DataSet metadata")
    p.add_argument("--relative",
                   action="store_true",
                   help="Make external resource paths relative")
    p.add_argument("--anonymize",
                   action="store_true",
                   help="Randomize sequences for privacy")
    p.add_argument("--barcodes",
                   action="store_true",
                   help="Indicates that the whitelist or blacklist contains " +
                   "barcode indices instead of ZMW numbers")
    return p
Exemplo n.º 8
0
def get_parser():
    p = get_default_argparser_with_base_opts(version=__version__,
                                             description=__doc__)
    p.add_argument("testkit_cfg_fofn",
                   nargs="+",
                   type=argparse.FileType("r"),
                   help="Text file listing testkit.cfg files to run; you " +
                   "may provide more than one of these")
    p.add_argument("-u",
                   "--host",
                   dest="host",
                   action="store",
                   default=Constants.HOST)
    p.add_argument("-p",
                   "--port",
                   dest="port",
                   action="store",
                   default=Constants.PORT,
                   help="Port number")
    p.add_argument("-n",
                   "--nworkers",
                   type=int,
                   default=Constants.NPROC,
                   help="Number of jobs to concurrently run.")
    p.add_argument("-t",
                   "--timeout",
                   dest="time_out",
                   type=int,
                   default=1800,
                   help="Timeout for blocking after job submission")
    p.add_argument("-s",
                   "--sleep",
                   dest="sleep",
                   type=int,
                   default=Constants.SLEEP_TIME,
                   help="Sleep time after job submission")
    p.add_argument("--ignore-test-failures",
                   dest="ignore_test_failures",
                   action="store_true",
                   help="Only exit with non-zero return code if the job " +
                   "itself failed, regardless of test outcome")
    p.add_argument("--import-only",
                   dest="import_only",
                   action="store_true",
                   help="Import datasets without running pipelines")
    p.add_argument("-j",
                   "--junit-xml",
                   dest="junit_out",
                   action="store",
                   default="junit_combined_results.xml",
                   help="JUnit output file for all tests")
    p.add_argument("-x",
                   "--nunit-xml",
                   dest="nunit_out",
                   action="store",
                   default="nunit_combined_results.xml",
                   help="NUnit output file for all tests")
    return p
def _get_parser():
    p = get_default_argparser_with_base_opts(version=__version__,
                                             description=__doc__,
                                             default_level="INFO")
    p.add_argument("fastq_in", help="Input FASTQ file")
    p.add_argument("summary_csv", help="Input Summary CSV file")
    p.add_argument("subreads_in", help="Input SubreadSet XML")
    p.add_argument("zip_out", help="Output ZIP file")
    return p
def get_parser():
    """Define Parser. Use the helper methods in validators to validate input"""
    p = get_default_argparser_with_base_opts(__version__, __doc__)
    f = p.add_argument

    f('path_to_file', type=validate_file, help="Path to File")
    f('output_file', help="Path to output TXT file")
    f('--nrecords', type=int, default=10, help="Number of records to write")

    return p
Exemplo n.º 11
0
def _get_parser():
    p = get_default_argparser_with_base_opts(version=__version__,
                                             description=__doc__,
                                             default_level="INFO")
    p.add_argument("consensus_in", help="Input consensus FASTQ file")
    p.add_argument("chimeras_in", help="Input chimeras FASTQ file")
    p.add_argument("subreads_in", help="Input SubreadSet XML")
    p.add_argument("consensus_out", help="Output consensus ZIP file")
    p.add_argument("chimeras_out", help="Output chimeras ZIP file")
    return p
def _get_parser():
    p = get_default_argparser_with_base_opts(
        version=__version__,
        description=__doc__,
        default_level="INFO")
    p.add_argument("ccs_in", help="Input ConsensusReadSet")
    p.add_argument("subreads_in", help="Input SubreadSet")
    p.add_argument("ccs_out", help="Output ConsensusReadSet")
    p.add_argument("--use-run-design-uuid", action="store_true", default=False,
                   help="Use pre-defined UUID generated by Run Design")
    return p
Exemplo n.º 13
0
def _get_parser():
    desc = "Custom PipelineTemplate Registry to write pipeline templates to output directory"
    p = get_default_argparser_with_base_opts(__version__,
                                             desc,
                                             default_level=logging.ERROR)
    p.add_argument('output_dir', help="Path to output directory")
    p.add_argument('--with-xml',
                   action="store_true",
                   default=False,
                   help="Also Write Pipeline Templates as XML")
    return p
def get_parser():
    desc = "Generate Pipeline documentation from a directory of Resolved Pipeline Templates"
    p = get_default_argparser_with_base_opts(__version__, desc)

    f = p.add_argument

    f("pipeline_dir", type=validate_dir, help="Path to Pipeline Template JSON Dir")
    f('-o', "--output-dir", default="pipeline-docs", help="Path to RST Output Dir")
    f('-t', '--title', default="PacBio Pipelines", help="Title of Pipeline documents")
    f('-d', '--doc-version', default="0.1.0", help="Version of Pipeline documents")
    return p
Exemplo n.º 15
0
def get_parser():

    p = get_default_argparser_with_base_opts(__version__, "Stress Tool")
    f = p.add_argument
    f('--host', default="localhost", help="Host name")
    f('--port', default=8070, type=int, help="Port")
    f('-n', '--nprocesses', default=10, type=int, help="Number of worker processes to launch")
    # FIXME this naming is terrible
    f('-x', default=5, type=int, help="Total number of tasks will be ~ 3 x")
    f('--profile', default="profile.json", help="Path to output profile.json")
    return p
Exemplo n.º 16
0
def _get_parser():
    p = get_default_argparser_with_base_opts(version=__version__,
                                             description=__doc__,
                                             default_level="INFO")
    p.add_argument("output_file", help="Gathered output file")
    p.add_argument("chunked_files", nargs="+", help="Chunked input files")
    p.add_argument("--join-contigs",
                   action="store_true",
                   default=False,
                   help="Merge split contigs")
    p.add_argument("--dataset", help="Dataset XML for populating metadata")
    return p
Exemplo n.º 17
0
def get_parser():
    p = get_default_argparser_with_base_opts(version=__version__,
                                             description=__doc__,
                                             default_level="INFO")
    p.add_argument("modifications", help="Base modification GFF file")
    p.add_argument("alignmentSummary", help="Alignment summary GFF")
    p.add_argument(
        "gff_out",
        help=
        "Coverage summary for regions (bins) spanning the reference with basemod results for each region"
    )
    return p
Exemplo n.º 18
0
def get_parser():
    description = 'Run dataset.py by specifying a command.'
    parser = get_default_argparser_with_base_opts(
        version=__VERSION__,
        description=description,
        default_level="WARNING")
    parser.add_argument("--strict", default=False, action='store_true',
                        help="Turn on strict tests, raise all errors")
    parser.add_argument("--skipCounts", default=False, action='store_true',
                        help="Turn on strict tests, raise all errors")
    subparser_list = get_subparsers()
    parser = add_subparsers(parser, subparser_list)
    return parser
Exemplo n.º 19
0
def get_parser():
    p = get_default_argparser_with_base_opts(__version__, __doc__)
    f = p.add_argument

    f('subreadset_path', type=validate_file, help="Path to SubreadSet XML")
    f('--name', type=str, default="Auto Job Name", help="Job Name")
    f('--host', type=str, default="smrtlink-beta", help="SMRT Link host")
    f('--port', type=int, default=8081, help="SMRT Link port")
    f('-r', '--reference-set-uuid', type=str, default=Constants.DEFAULT_RSET_UUID, help="ReferenceSet UUID")
    f('-p', '--pipeline-id', type=str, default=Constants.DEFAULT_PIPELINE, help="Pipeline Id")
    f('--block', default=False, action="store_true", help="Block and Wait for job to complete")

    return p
Exemplo n.º 20
0
def _get_parser():
    p = get_default_argparser_with_base_opts(version=__version__,
                                             description=__doc__,
                                             default_level="INFO")
    p.add_argument("merged", help="Name of merge datastore file")
    p.add_argument("chunks", nargs="+", help="Chunk datastore outputs")
    p.add_argument("-j",
                   "--nproc",
                   dest="nproc",
                   type=int,
                   default=1,
                   help="Number of processors to use")
    return p
Exemplo n.º 21
0
def _get_parser():
    p = get_default_argparser_with_base_opts(
        version=__version__,
        description=__doc__,
        default_level="INFO")
    p.add_argument(
        "input_reads", help="SubreadSet or ConsensusReadSet use as INPUT for lima")
    p.add_argument("dataset_name", help="Dataset name")
    p.add_argument("output_file", help="Output dataset XML")
    p.add_argument("--biosamples-csv",
                   action="store",
                   default=None,
                   help="Optional CSV file containing Barcode/BioSample list")
    return p
Exemplo n.º 22
0
def get_parser():
    desc = ""
    parser = get_default_argparser_with_base_opts(
        version=__version__, description=__doc__)
    parser.add_argument('filter_summary_csv', type=validate_file,
                        help="Filter CSV file.")
    parser.add_argument(
        '-o', "--output", dest='output_dir', default=os.getcwd(), type=validate_dir,
        help="Output directory for histogram images generated")
    parser.add_argument('-r', '--report', dest='json_report',
                        help='Path of JSON report.')
    parser.add_argument("--dpi", default=60, type=int,
                        help="dots/inch")
    return parser
Exemplo n.º 23
0
def get_parser():
    desc = "Tool to import datasets, convert/import fasta file and run analysis jobs"
    p = get_default_argparser_with_base_opts(__version__, desc)

    sp = p.add_subparsers(help='commands')

    def builder(subparser_id, description, options_func, exe_func):
        subparser_builder(sp, subparser_id, description, options_func,
                          exe_func)

    status_desc = "Get System Status, DataSet and Job Summary"
    builder('status', status_desc, add_base_and_sal_options,
            args_get_sal_summary)

    local_desc = " The file location must be accessible from the host where the Services are running (often on a shared file system)"
    ds_desc = "Import Local DataSet XML." + local_desc
    builder('import-dataset', ds_desc, add_sal_and_xml_dir_options,
            args_runner_import_datasets)

    fasta_desc = "Import Fasta (and convert to ReferenceSet)." + local_desc
    builder("import-fasta", fasta_desc, add_import_fasta_opts,
            args_run_import_fasta)

    run_analysis_desc = "Run Secondary Analysis Pipeline using an analysis.json"
    builder("run-analysis", run_analysis_desc, add_run_analysis_job_opts,
            args_run_analysis_job)

    emit_analysis_json_desc = "Emit an analysis.json Template to stdout that can be run using 'run-analysis'"
    builder("emit-analysis-template", emit_analysis_json_desc,
            add_common_options, args_emit_analysis_template)

    # Get Summary Job by Id
    job_summary_desc = "Get Job Summary by Job Id"
    builder('get-job', job_summary_desc, add_get_job_options,
            args_get_job_summary)

    job_list_summary_desc = "Get Job Summary by Job Id"
    builder('get-jobs', job_list_summary_desc, add_get_job_list_options,
            args_get_job_list_summary)

    ds_summary_desc = "Get DataSet Summary by DataSet Id or UUID"
    builder('get-dataset', ds_summary_desc, add_get_dataset_options,
            args_run_dataset_summary)

    ds_list_summary_desc = "Get DataSet List Summary by DataSet Type"
    builder('get-datasets', ds_list_summary_desc, add_get_dataset_list_options,
            args_run_dataset_list_summary)

    return p
def get_parser():
    """Util func to create an argparse instance

    Removing explicit usage due to issues with thirdparty argparse (Python < 2.7)

    usage = "usage: %prog --input=inputRgn.Fofn --outputCsv=mySubreadSummary.csv"
    """
    desc = 'Tool for generating a CSV file of the filtered Subreads from a file name of files (FOFN).'
    parser = get_default_argparser_with_base_opts(
        version=__version__, description=__doc__)
    parser.add_argument('region_fofn', type=validate_fofn,
                        help='Input Region FOFN path')
    parser.add_argument('-o', '--output-csv', default=None, dest='output_csv',
                        help='Output File to write summary to')
    return parser
Exemplo n.º 25
0
def get_parser():
    description = 'Run dataset.py by specifying a command.'
    parser = get_default_argparser_with_base_opts(version=__VERSION__,
                                                  description=description,
                                                  default_level="WARNING")
    parser.add_argument("--strict",
                        default=False,
                        action='store_true',
                        help="Turn on strict tests, raise all errors")
    parser.add_argument("--skipCounts",
                        default=False,
                        action='store_true',
                        help="Turn on strict tests, raise all errors")
    subparser_list = get_subparsers()
    parser = add_subparsers(parser, subparser_list)
    return parser
Exemplo n.º 26
0
def _get_parser():
    p = get_default_argparser_with_base_opts(version=__version__,
                                             description=__doc__,
                                             default_level="INFO")
    p.add_argument(
        "dataset",
        help="PacBio dataset XML (AlignmentSet or ConsensusAlignmentSet)")
    p.add_argument("csv_out", help="CSV output file")
    p.add_argument("--load-snr",
                   action="store_true",
                   default=False,
                   help="Include per-read SNRs")
    p.add_argument("--load-numpasses",
                   action="store_true",
                   default=False,
                   help="Include numPasses (CCS only)")
    return p
Exemplo n.º 27
0
def get_parser():
    description = 'Run dataset.py by specifying a command.'
    parser = get_default_argparser_with_base_opts(version=__VERSION__,
                                                  description=description,
                                                  default_level="WARNING")
    parser.add_argument("--strict",
                        default=False,
                        action='store_true',
                        help="Turn on strict tests, raise all errors")
    parser.add_argument("--skipCounts",
                        default=False,
                        action='store_true',
                        help="Skip updating NumRecords and TotalLength counts")
    subparser_list = get_subparsers()
    parser = add_subparsers(parser, subparser_list)
    msg = "Please specify a sub-command, or call with --help for more options"
    parser.set_defaults(func=lambda args: parser.error(msg))
    return parser
Exemplo n.º 28
0
def get_parser():
    p = get_default_argparser_with_base_opts(version=__version__,
                                             description=__doc__)
    p.add_argument("testkit_cfg_fofn",
                   type=validate_testkit_cfg_fofn,
                   help="Text file listing testkit.cfg files to run")
    p.add_argument("-u",
                   "--host",
                   dest="host",
                   action="store",
                   default=Constants.HOST)
    p.add_argument("-p",
                   "--port",
                   dest="port",
                   action="store",
                   default=Constants.PORT,
                   help="Port number")
    p.add_argument("-n",
                   "--nworkers",
                   type=int,
                   default=Constants.NPROC,
                   help="Number of jobs to concurrently run.")
    p.add_argument("-t",
                   "--timeout",
                   dest="time_out",
                   type=int,
                   default=1800,
                   help="Timeout for blocking after job submission")
    p.add_argument("-s",
                   "--sleep",
                   dest="sleep",
                   type=int,
                   default=2,
                   help="Sleep time after job submission")
    p.add_argument("--ignore-test-failures",
                   dest="ignore_test_failures",
                   action="store_true",
                   help="Only exit with non-zero return code if the job " +
                   "itself failed, regardless of test outcome")
    p.add_argument("--import-only",
                   dest="import_only",
                   action="store_true",
                   help="Import datasets without running pipelines")
    return p
Exemplo n.º 29
0
def get_parser():
    """Util func to create an argparse instance

    Removing explicit usage due to issues with thirdparty argparse (Python < 2.7)

    usage = "usage: %prog --input=inputRgn.Fofn --outputCsv=mySubreadSummary.csv"
    """
    desc = 'Tool for generating a CSV file of the filtered Subreads from a file name of files (FOFN).'
    parser = get_default_argparser_with_base_opts(version=__version__,
                                                  description=__doc__)
    parser.add_argument('region_fofn',
                        type=validate_fofn,
                        help='Input Region FOFN path')
    parser.add_argument('-o',
                        '--output-csv',
                        default=None,
                        dest='output_csv',
                        help='Output File to write summary to')
    return parser
Exemplo n.º 30
0
def _get_parser():
    p = get_default_argparser_with_base_opts(
        version="0.1",
        description=__doc__)
    p.add_argument("testkit_cfg")
    p.add_argument("-u", "--host", dest="host", action="store",
                   default="http://localhost")
    p.add_argument("-p", "--port", dest="port", action="store", type=int,
                   default=8081, help="Port number")
    p.add_argument("-x", "--xunit", dest="xml_out", default="test-output.xml",
                   help="Output XUnit test results")
    p.add_argument("-t", "--timeout", dest="time_out", type=int, default=1800,
                   help="Timeout for blocking after job submission")
    p.add_argument("-s", "--sleep", dest="sleep", type=int, default=2,
                   help="Sleep time after job submission")
    p.add_argument("--ignore-test-failures", dest="ignore_test_failures",
                   action="store_true",
                   help="Only exit with non-zero return code if the job "+
                        "itself failed, regardless of test outcome")
    return p
Exemplo n.º 31
0
def get_parser():
    p = get_default_argparser_with_base_opts(
            version=VERSION,
            description=__doc__,
            default_level="WARN")
    p.add_argument("input_bam",
                   help="Input BAM or DataSet from which reads will be read")
    p.add_argument("output_bam", nargs='?', default=None,
                   help="Output BAM or DataSet to which filtered reads will "
                        "be written")
    p.add_argument("--show-zmws", action="store_true", default=False,
                   help="Print a list of ZMWs and exit")
    p.add_argument("--whitelist", action="store", default=None,
                   help="Comma-separated list of ZMWs, or file containing " +
                        "whitelist of one hole number per line, or " +
                        "BAM/DataSet file from which to extract ZMWs")
    p.add_argument("--blacklist", action="store", default=None,
                   help="Opposite of --whitelist, specifies ZMWs to discard")
    p.add_argument("--percentage", action="store", type=float, default=None,
                   help="If you prefer to recover a percentage of a SMRTcell "
                        "rather than a specific list of reads specify that "
                        "percentage (range 0-100) here")
    p.add_argument("-n", "--count", action="store", type=int, default=None,
                   help="Recover a specific number of ZMWs picked at random")
    p.add_argument("-s", "--seed", action="store", type=int, default=None,
                   help="Random seed for selecting a percentage of reads")
    p.add_argument("--ignore-metadata", action="store_true",
                   help="Discard input DataSet metadata")
    p.add_argument("--relative", action="store_true",
                   help="Make external resource paths relative")
    p.add_argument("--anonymize", action="store_true",
                   help="Randomize sequences for privacy")
    p.add_argument("--barcodes", action="store_true",
                   help="Indicates that the whitelist or blacklist contains "+
                        "barcode indices instead of ZMW numbers")
    p.add_argument("--sample-scraps", action="store_true",
                   help="If enabled, --percentage and --count will include "+
                        "hole numbers from scraps BAM files when picking a "+
                        "random sample (default is to sample only ZMWs "+
                        "present in subreads BAM).")
    return p
Exemplo n.º 32
0
def get_parser():
    p = get_default_argparser_with_base_opts(
        version=__version__,
        description=__doc__)
    p.add_argument("testkit_cfg_fofn", type=validate_testkit_cfg_fofn,
                  help="Text file listing testkit.cfg files to run")
    p.add_argument("-u", "--host", dest="host", action="store",
                   default=Constants.HOST)
    p.add_argument("-p", "--port", dest="port", action="store",
                   default=Constants.PORT, help="Port number")
    p.add_argument("-n", "--nworkers", type=int, default=Constants.NPROC,
                   help="Number of jobs to concurrently run.")
    p.add_argument("-t", "--timeout", dest="time_out", type=int, default=1800,
                   help="Timeout for blocking after job submission")
    p.add_argument("-s", "--sleep", dest="sleep", type=int, default=2,
                   help="Sleep time after job submission")
    p.add_argument("--ignore-test-failures", dest="ignore_test_failures",
                   action="store_true",
                   help="Only exit with non-zero return code if the job "+
                        "itself failed, regardless of test outcome")
    return p
Exemplo n.º 33
0
def get_parser():
    """Old Usage in pbpy:

    usage = filter_subread.py --debug filtered_subread_summary.csv --output /path/to/outputDir --report /path/to/outputDir/junk3.json

    filtered_subread_summary.csv
    """
    desc = ""
    parser = get_default_argparser_with_base_opts(
        version=__version__, description=__doc__)
    parser.add_argument("filter_summary_csv",
                        help="Path to Filter Subread Summary CSV file.",
                        type=validate_file)
    parser.add_argument('-o', '--output', dest='output', default=os.getcwd(),
                        type=validate_dir,
                        help='Output directory to write to Subread Hist plots to.')
    parser.add_argument('--dpi', type=int, dest='dpi', default=72,
                        help="dpi (dots/inch) for plots that were generated.")
    parser.add_argument('-r', '--report', dest='report', default=None,
                        help="Write the Json report to disk.")
    return parser
Exemplo n.º 34
0
def _get_parser():
    p = get_default_argparser_with_base_opts(
        version=__VERSION__,
        description=__doc__,
        default_level="INFO")
    p.add_argument("mode", choices=["consolidate", "fasta", "fastq"])
    p.add_argument("dataset_file")
    p.add_argument("datastore_out")
    p.add_argument("--min-rq",
                   dest="min_rq",
                   type=float,
                   default=Constants.HIFI_RQ,
                   help="Sets RQ cutoff for splitting output")
    p.add_argument("--min-qv",
                   dest="min_rq",
                   type=lambda arg: phred_qv_as_accuracy(int(arg)),
                   help="Alternative to --min-rq, on Phred scale (0-60)")
    p.add_argument("--no-zip",
                   action="store_true",
                   help="Disable ZIP output")
    return p
Exemplo n.º 35
0
def get_parser():
    desc = "Generate Pipeline documentation from a directory of Resolved Pipeline Templates"
    p = get_default_argparser_with_base_opts(__version__, desc)

    f = p.add_argument

    f("pipeline_dir",
      type=validate_dir,
      help="Path to Pipeline Template JSON Dir")
    f('-o',
      "--output-dir",
      default="pipeline-docs",
      help="Path to RST Output Dir")
    f('-t',
      '--title',
      default="PacBio Pipelines",
      help="Title of Pipeline documents")
    f('-d',
      '--doc-version',
      default="0.1.0",
      help="Version of Pipeline documents")
    return p
Exemplo n.º 36
0
def get_parser():
    desc = "Tool to import datasets, convert/import fasta file and run analysis jobs"
    p = get_default_argparser_with_base_opts(__version__, desc)

    sp = p.add_subparsers(help='commands')

    def builder(subparser_id, description, options_func, exe_func):
        subparser_builder(sp, subparser_id, description, options_func, exe_func)

    status_desc = "Get System Status, DataSet and Job Summary"
    builder('status', status_desc, add_base_and_sal_options, args_get_sal_summary)

    local_desc = " The file location must be accessible from the host where the Services are running (often on a shared file system)"
    ds_desc = "Import Local DataSet XML." + local_desc
    builder('import-dataset', ds_desc, add_sal_and_xml_dir_options, args_runner_import_datasets)

    fasta_desc = "Import Fasta (and convert to ReferenceSet)." + local_desc
    builder("import-fasta", fasta_desc, add_import_fasta_opts, args_run_import_fasta)

    run_analysis_desc = "Run Secondary Analysis Pipeline using an analysis.json"
    builder("run-analysis", run_analysis_desc, add_run_analysis_job_opts, args_run_analysis_job)

    emit_analysis_json_desc = "Emit an analysis.json Template to stdout that can be run using 'run-analysis'"
    builder("emit-analysis-template", emit_analysis_json_desc, add_common_options, args_emit_analysis_template)

    # Get Summary Job by Id
    job_summary_desc = "Get Job Summary by Job Id"
    builder('get-job', job_summary_desc, add_get_job_options, args_get_job_summary)

    job_list_summary_desc = "Get Job Summary by Job Id"
    builder('get-jobs', job_list_summary_desc, add_get_job_list_options, args_get_job_list_summary)

    ds_summary_desc = "Get DataSet Summary by DataSet Id or UUID"
    builder('get-dataset', ds_summary_desc, add_get_dataset_options, args_run_dataset_summary)

    ds_list_summary_desc = "Get DataSet List Summary by DataSet Type"
    builder('get-datasets', ds_list_summary_desc, add_get_dataset_list_options, args_run_dataset_list_summary)

    return p
Exemplo n.º 37
0
def get_parser():
    sample_picker = {'first': sample_first,
                     'random': sample_random,
                     'uniform': sample_uniform}
    p = get_default_argparser_with_base_opts(
        version=__version__,
        description=__doc__,
        default_level="WARN")
    p.add_argument('subreadset', type=SubreadSet,
                   help="Input SubreadSet for an Internal BAM")
    p.add_argument('alignmentset', type=AlignmentSet,
                   help="Input AlignmentSet for the SubreadSet")
    p.add_argument('outprefix', type=str,
                   help="Output prefix for csvs")
    p.add_argument('--nreads', type=int,
                   help="The number of reads to process")
    p.add_argument('--sampler', default='uniform',
                   type=lambda x: sample_picker.get(x),
                   choices=sample_picker.keys(),
                   help="Read sampling mechanism")
    p.add_argument('--search', type=int, default=25,
                   help=('Limit the number of hns to search for a '
                         'local hit'))
    return p
def _get_parser():
    p = get_default_argparser_with_base_opts(
        version=__version__,
        description=__doc__,
        default_level="INFO")
    p.add_argument(
        "input_reads", help="SubreadSet or ConsensusReadSet use as INPUT for lima")
    p.add_argument(
        "lima_datastore", help="Datastore json generated by lima to demultiplex input_reads.")
    p.add_argument(
        "barcodes", help="BarcodeSet lima used to demultiplex reads")
    p.add_argument("out_json", help="Output datastore json")
    p.add_argument("--isoseq-mode", action="store_true", default=False,
                   help="Iso-Seq mode")
    p.add_argument("--use-barcode-uuids", action="store_true", default=False,
                   help="Apply pre-defined barcoded dataset UUIDs from input_reads")
    p.add_argument("--min-bq-filter", action="store", type=int,
                   default=Constants.BARCODE_QUALITY_GREATER_THAN,
                   help="Minimum barcode quality encoded in dataset filter")
    p.add_argument("-j", "--nproc", dest="nproc", action="store", type=int,
                   default=1, help="Number of processors to use")
    p.add_argument("--outdir", action="store", default=None,
                   help="Output directory for update datasets")
    return p
Exemplo n.º 39
0
def _get_parser():
    p = get_default_argparser_with_base_opts(version=__version__,
                                             description=__doc__)
    p.add_argument("subreads", type=validate_file)
    p.add_argument("datastore", type=_validate_output_file)
    return p
Exemplo n.º 40
0
def get_parser():
    """Define Parser. Use the helper methods in validators to validate input"""
    p = get_default_argparser_with_base_opts(__version__, __doc__)
    p.add_argument('path_to_file', type=validate_file, help="Path to File")
    return p
Exemplo n.º 41
0
def get_base_parser(description, log_level="INFO"):
    from pbcommand.cli import get_default_argparser_with_base_opts
    from pbcoretools import __VERSION__
    return get_default_argparser_with_base_opts(version=__VERSION__,
                                                description=description,
                                                default_level=log_level)
Exemplo n.º 42
0
def get_parser():
    p = get_default_argparser_with_base_opts(version="0.1",
                                             description=__doc__)
    p.add_argument("testkit_cfg")
    p.add_argument(
        "-u",
        "--host",
        dest="host",
        action="store",
        default=os.environ.get("PB_SERVICE_HOST", "localhost"),
        help=
        "Hostname of SMRT Link server.  If this is anything other than 'localhost' you must supply authentication."
    )
    p.add_argument("-p",
                   "--port",
                   dest="port",
                   action="store",
                   type=int,
                   default=int(os.environ.get("PB_SERVICE_PORT", "8081")),
                   help="Services port number")
    p.add_argument("--user",
                   dest="user",
                   action="store",
                   default=os.environ.get("PB_SERVICE_AUTH_USER", None),
                   help="User to authenticate with (if using HTTPS)")
    p.add_argument("--password",
                   dest="password",
                   action="store",
                   default=os.environ.get("PB_SERVICE_AUTH_PASSWORD", None),
                   help="Password to authenticate with (if using HTTPS)")
    p.add_argument("-x",
                   "--xunit",
                   dest="xml_out",
                   default="test-output.xml",
                   help="Output XUnit test results")
    p.add_argument(
        "-n",
        "--nunit",
        dest="nunit_out",
        default="nunit_out.xml",
        help=
        "Optional NUnit output file, used for JIRA/Xray integration; will be written only if the 'xray_tests' field is populated."
    )
    p.add_argument("-t",
                   "--timeout",
                   dest="time_out",
                   type=int,
                   default=1800,
                   help="Timeout for blocking after job submission")
    p.add_argument("-s",
                   "--sleep",
                   dest="sleep",
                   type=int,
                   default=2,
                   help="Sleep time after job submission")
    p.add_argument("--ignore-test-failures",
                   dest="ignore_test_failures",
                   action="store_true",
                   help="Only exit with non-zero return code if the job " +
                   "itself failed, regardless of test outcome")
    p.add_argument("--import-only",
                   dest="import_only",
                   action="store_true",
                   help="Import datasets without running pipeline")
    p.add_argument("--only-tests",
                   dest="test_job_id",
                   action="store",
                   type=int,
                   default=None,
                   help="Run tests on an existing smrtlink job")
    return p
Exemplo n.º 43
0
def get_parser():
    """Define Parser. Use the helper methods in validators to validate input"""
    p = get_default_argparser_with_base_opts(__version__, __doc__)
    p.add_argument('path_to_file', type=validate_file, help="Path to File")
    return p
Exemplo n.º 44
0
def get_parser():
    p = get_default_argparser_with_base_opts(version=VERSION,
                                             description=__doc__,
                                             default_level="WARN")
    p.add_argument("input_bam",
                   help="Input BAM or DataSet from which reads will be read")
    p.add_argument("output_bam",
                   nargs='?',
                   default=None,
                   help="Output BAM or DataSet to which filtered reads will "
                   "be written")
    p.add_argument("--show-zmws",
                   action="store_true",
                   default=False,
                   help="Print a list of ZMWs and exit")
    p.add_argument("--whitelist",
                   action="store",
                   default=None,
                   help="Comma-separated list of ZMWs, or file containing " +
                   "whitelist of one hole number per line, or " +
                   "BAM/DataSet file from which to extract ZMWs")
    p.add_argument("--blacklist",
                   action="store",
                   default=None,
                   help="Opposite of --whitelist, specifies ZMWs to discard")
    p.add_argument(
        "--subreads",
        action="store_true",
        help="If set, the whitelist or blacklist will be assumed to contain " +
        "one subread name per line, or " +
        "a BAM/DataSet file from which to extract subreads")
    p.add_argument("--percentage",
                   action="store",
                   type=float,
                   default=None,
                   help="If you prefer to recover a percentage of a SMRTcell "
                   "rather than a specific list of reads specify that "
                   "percentage (range 0-100) here")
    p.add_argument("-n",
                   "--count",
                   action="store",
                   type=int,
                   default=None,
                   help="Recover a specific number of ZMWs picked at random")
    p.add_argument("-s",
                   "--seed",
                   action="store",
                   type=int,
                   default=None,
                   help="Random seed for selecting a percentage of reads")
    p.add_argument("--ignore-metadata",
                   action="store_true",
                   help="Discard input DataSet metadata")
    p.add_argument("--relative",
                   action="store_true",
                   help="Make external resource paths relative")
    p.add_argument("--anonymize",
                   action="store_true",
                   help="Randomize sequences for privacy")
    p.add_argument("--barcodes",
                   action="store_true",
                   help="Indicates that the whitelist or blacklist contains " +
                   "barcode indices instead of ZMW numbers")
    p.add_argument("--sample-scraps",
                   action="store_true",
                   help="If enabled, --percentage and --count will include " +
                   "hole numbers from scraps BAM files when picking a " +
                   "random sample (default is to sample only ZMWs " +
                   "present in subreads BAM).")
    p.add_argument("--keep-uuid",
                   action="store_true",
                   help="If enabled, the UUID from the input dataset will " +
                   "be used for the output as well.")
    p.add_argument("--min-adapters",
                   action="store",
                   type=int,
                   default=None,
                   help="Minimum number of adapters to filter for")
    return p
Exemplo n.º 45
0
def _get_parser():
    desc = "Custom PipelineTemplate Registry to write pipeline templates to output directory"
    p = get_default_argparser_with_base_opts(__version__, desc, default_level=logging.ERROR)
    p.add_argument('output_dir', help="Path to output directory")
    p.add_argument('--with-xml', action="store_true", default=False, help="Also Write Pipeline Templates as XML")
    return p
Exemplo n.º 46
0
def get_parser():
    p = get_default_argparser_with_base_opts(version=__version__,
                                             description=__doc__,
                                             default_level="INFO")
    p.add_argument("alignment_set", help="BAM or Alignment DataSet")
    p.add_argument("--reference",
                   action="store",
                   required=True,
                   type=validateFile,
                   help="Fasta or Reference DataSet")
    p.add_argument("--gff",
                   action="store",
                   default=None,
                   help="Output GFF file of modified bases")
    p.add_argument("--csv",
                   action="store",
                   default=None,
                   help="Output CSV file out per-nucleotide information")
    p.add_argument(
        "--bigwig",
        action="store",
        default=None,
        help="Output BigWig file encoding IpdRatio for both strands")
    # FIXME use central --nproc option
    p.add_argument('--numWorkers',
                   '-j',
                   dest='numWorkers',
                   default=1,
                   type=int,
                   help='Number of thread to use (-1 uses all logical cpus)')
    # common options
    p.add_argument("--pvalue",
                   type=float,
                   default=Constants.PVALUE_DEFAULT,
                   help="P-value cutoff")
    p.add_argument("--maxLength",
                   type=int,
                   default=Constants.MAX_LENGTH_DEFAULT,
                   help="Maximum number of bases to process per contig")
    p.add_argument(
        "--identify",
        action="store",
        default="m6A,m4C",
        help="Specific modifications to identify (comma-separated " +
        "list).  Currrent options are m6A, m4C, m5C_TET.  Using --control " +
        "overrides this option.")
    _DESC = "In the --identify mode, add --methylFraction to " +\
            "command line to estimate the methylated fraction, along with " +\
            "95%% confidence interval bounds."
    p.add_argument("--methylFraction", action="store_true", help=_DESC)
    p.add_argument(
        '--outfile',
        dest='outfile',
        default=None,
        help=
        'Use this option to generate all possible output files. Argument here is the root filename of the output files.'
    )

    # FIXME: Need to add an extra check for this; it can only be used if
    # --useLDA flag is set.
    p.add_argument('--m5Cgff',
                   dest='m5Cgff',
                   default=None,
                   help='Name of output GFF file containing m5C scores')

    # FIXME: Make sure that this is specified if --useLDA flag is set.
    p.add_argument('--m5Cclassifier',
                   dest='m5Cclassifier',
                   default=None,
                   help='Specify csv file containing a 127 x 2 matrix')

    p.add_argument('--pickle',
                   dest='pickle',
                   default=None,
                   help='Name of output pickle file.')

    p.add_argument('--ms_csv',
                   dest='ms_csv',
                   default=None,
                   help='Multisite detection CSV file.')

    # Calculation options:
    p.add_argument(
        '--control',
        dest='control',
        default=None,
        type=validateNoneOrFile,
        help=
        'AlignmentSet or mapped BAM file containing a control sample. Tool will perform a case-control analysis'
    )

    # Temporary addition to test LDA for Ca5C detection:
    p.add_argument('--useLDA',
                   action="store_true",
                   dest='useLDA',
                   default=False,
                   help='Set this flag to debug LDA for m5C/Ca5C detection')

    # Parameter options:
    defaultParamsPathSpec = _getResourcePathSpec()
    p.add_argument(
        '--paramsPath',
        dest='paramsPath',
        default=defaultParamsPathSpec,
        type=validateNoneOrPathSpec,
        help=
        'List of :-delimited directory paths containing in-silico trained models (default is "%s")'
        % defaultParamsPathSpec)

    # XXX hacky workaround for running tests using obsolete chemistry inputs
    p.add_argument("--useChemistry",
                   dest="useChemistry",
                   default=None,
                   help=argparse.SUPPRESS)

    p.add_argument('--minCoverage',
                   dest='minCoverage',
                   default=3,
                   type=int,
                   help='Minimum coverage required to call a modified base')

    p.add_argument('--maxQueueSize',
                   dest='maxQueueSize',
                   default=20,
                   type=int,
                   help='Max Queue Size')

    p.add_argument('--maxCoverage',
                   dest='maxCoverage',
                   type=int,
                   default=-1,
                   help='Maximum coverage to use at each site')

    p.add_argument('--mapQvThreshold',
                   dest='mapQvThreshold',
                   type=float,
                   default=-1.0)

    p.add_argument('--ipdModel',
                   dest='ipdModel',
                   default=None,
                   type=validateNoneOrFile,
                   help='Alternate synthetic IPD model HDF5 file')

    p.add_argument('--modelIters',
                   dest='modelIters',
                   type=int,
                   default=-1,
                   help='[Internal] Number of GBM model iteration to use')

    p.add_argument('--cap_percentile',
                   dest='cap_percentile',
                   type=float,
                   default=99.0,
                   help='Global IPD percentile to cap IPDs at')

    p.add_argument(
        "--methylMinCov",
        type=int,
        dest='methylMinCov',
        default=10,
        help=
        "Do not try to estimate methylFraction unless coverage is at least this."
    )

    p.add_argument(
        "--identifyMinCov",
        type=int,
        dest='identifyMinCov',
        default=5,
        help=
        "Do not try to identify the modification type unless coverage is at least this."
    )

    p.add_argument(
        "--maxAlignments",
        type=int,
        dest="maxAlignments",
        default=1500,
        help="Maximum number of alignments to use for a given window")

    # Computation management options:

    p.add_argument("-w", "--referenceWindow", "--referenceWindows",
                   "--refContigs",  # backwards compatibility
                   type=str,
                   dest='referenceWindowsAsString',
                   default=None,
                   help="The window (or multiple comma-delimited windows) of the reference to " + \
                   "be processed, in the format refGroup[:refStart-refEnd] " + \
                   "(default: entire reference).")

    def slurpWindowFile(fname):
        return ",".join(map(str.strip, open(fname).readlines()))

    p.add_argument(
        "--refContigIndex",
        type=int,
        dest='refContigIndex',
        default=-1,
        help=
        "For debugging purposes only - rather than enter a reference contig name, simply enter an index"
    )

    p.add_argument(
        "-W",
        "--referenceWindowsFile",
        "--refContigsFile",  # backwards compatibility
        type=slurpWindowFile,
        dest='referenceWindowsAsString',
        default=None,
        help="A file containing reference window designations, one per line")

    p.add_argument(
        "--skipUnrecognizedContigs",
        type=bool,
        default=False,
        help=
        "Whether to skip, or abort, unrecognized contigs in the -w/-W flags")
    # FIXME shouldn't it always do this?
    p.add_argument("--alignmentSetRefWindows",
                   action="store_true",
                   dest="referenceWindowsFromAlignment",
                   help="Use refWindows in dataset")

    # Debugging help options:
    p.add_argument("--profile",
                   action="store_true",
                   dest="doProfiling",
                   default=False,
                   help="Enable Python-level profiling (using cProfile).")

    add_debug_option(p)

    p.add_argument(
        "--seed",
        action="store",
        dest="randomSeed",
        type=int,
        default=None,
        help="Random seed (for development and debugging purposes only)")

    p.add_argument("--referenceStride",
                   action="store",
                   type=int,
                   default=1000,
                   help="Size of reference window in internal " +
                   "parallelization.  For testing purposes only.")

    return p