def add_graph_args(self, parser): parser.add_argument( "-p", "--parse-parallel", default=1, type=numeric_min_checker(minimum=1, message="read parallelism"), help="total paralellism level for reading data from disk") parser.add_argument("-i", "--dataset-dir", type=path_exists_checker(), help="Directory containing ALL of the chunk files") parser.add_argument( "-scale", "--scale", default=1, type=int, help= "Each coverage value is multiplied by this factor before being reported. Default is 1" ) parser.add_argument( "-max", "--max", default=-1, type=int, help= "Combine all positions with a depth >= max into a single bin in the histogram" ) parser.add_argument("-bg", "--bg", default=False, action="store_true", help="Report depth in BedGraph format") parser.add_argument( "-d", "--d", default=False, action="store_true", help= "Report the depth at each genome position with 1-based coordinates" ) parser.add_argument( "-strand", "--strand", default='B', help="Calculate coverage of intervals from a specific strand") parser.add_argument( "-bga", "--bga", default=False, action="store_true", help="Report depth in BedGraph format along with zero-entries") parser.add_argument( "-dz", "--dz", default=False, action="store_true", help= "Report the depth at each genome position with 0-based coordinates" )
def add_graph_args(self, parser): super().add_graph_args(parser=parser) parser.add_argument("--ceph-cluster-name", type=non_empty_string_checker, default="ceph", help="name for the ceph cluster") parser.add_argument("--ceph-user-name", type=non_empty_string_checker, default="client.dcsl1024", help="ceph username") parser.add_argument("--ceph-conf-path", type=path_exists_checker(check_dir=False), default="/etc/ceph/ceph.conf", help="path for the ceph configuration") parser.add_argument( "--ceph-read-chunk-size", default=(2**26), type=numeric_min_checker( 128, "must have a reasonably large minimum read size from Ceph"), help="minimum size to read from ceph storage, in bytes") parser.add_argument( "--ceph-pool-name", help= "override the pool name to use (if specified or not in the json file" )
class Ceph: ceph_attributes = tuple("_".join(("ceph", a)) for a in ("cluster_name", "user_name", "pool_name", "conf_path", "read_chunk_size")) full_ceph_attributes = ( { "attribute": "ceph_cluster_name", "type": non_empty_string_checker, "default": "ceph", "help": "name for the ceph cluster", }, { "attribute": "ceph_user_name", "type": non_empty_string_checker, "default": "client.dcsl1024", "help": "ceph username", }, { "attribute": "ceph_pool_name", "type": non_empty_string_checker, "default": "dcsl1024", "help": "ceph pool name", }, { "attribute": "ceph_conf_path", "type": path_exists_checker(check_dir=False), "default": "/etc/ceph/ceph.conf", "help": "ceph_configuration_path", }, { "attribute": "ceph_read_chunk_size", "type": numeric_min_checker( 128, "must have a reasonably large minimum read size from Ceph"), "default": (2**26), "help": "minimum size to read from ceph storage, in bytes", }, ) @classmethod def add_ceph_args(cls, parser): for attr_dict in cls.full_ceph_attributes: attr_name = attr_dict["attribute"] if parser.get_default(attr_name) is None: arg_name = "--{}".format(attr_name.replace("_", "-")) parser.add_argument(arg_name, dest=attr_name, type=attr_dict["type"], default=attr_dict["default"], help=attr_dict["help"]) def add_ceph_attrs(self, args): for ceph_attr in self.ceph_attributes: setattr(self, ceph_attr, getattr(args, ceph_attr))
def add_common_record_args(parser): parser.add_argument( "--record-stats", default=False, action="store_true", help="store statistics for this process into the output directory") parser.add_argument( "--output-directory", default="output", type=parse.path_exists_checker(make_if_empty=True, rm_if_exists=True), help= "directory to record output of all sorts into. will be made if doesn't exist" )
def add_args(parser): parser.add_argument( "--record-stats", default=False, action="store_true", help="store statistics for this process into the output directory") parser.add_argument("-o", "--output-directory", default=".", type=parse.path_exists_checker(check_dir=True), help="path in which to store the directory of outputs") parser.add_argument( "-n", "--number", default=random.randint(0, 2**30), type=int, help="number to assign to this server in the naming system") parser.add_argument( "--safe-register", default=False, action="store_true", help="error if the name already exists in the name server") parser.add_argument("--pyro-ns-port", type=int, help="override default Pyro4 nameserver port") parser.add_argument("--pyro-ns-host", help="override default Pyro4 nameserver port") parser.add_argument( "-i", "--run-sleep-interval", dest="run_sleep_interval", default=2, type=parse.numeric_min_checker( 0.5, numeric_type=float, message="must wait at least 0.5 seconds"), help="number of seconds to sleep while in the run loop") parser.add_argument( "-w", "--worker-name", default="", help= "if set, use this exact name to register on the nameserver. An error will occur if this name is already taken" ) parser.add_argument( "--startup-sleep", default=3, type=parse.numeric_min_checker( numeric_type=float, minimum=1, message="must wait at least 1 second after worker starts"), help="number of seconds to sleep after session is initialized")
def add_graph_args(self, parser): parser.add_argument( "-p", "--parse-parallel", default=1, type=numeric_min_checker(minimum=1, message="read parallelism"), help="total paralellism level for reading data from disk") parser.add_argument("-i", "--dataset-dir", type=path_exists_checker(), help="Directory containing ALL of the chunk files") parser.add_argument("-feature", "--feature", default='B', help="Feature name") parser.add_argument("-o", "--output", help="output directory")
def add_graph_args(self, parser): parser.add_argument( "-p", "--parse-parallel", default=1, type=numeric_min_checker(minimum=1, message="read parallelism"), help="total paralellism level for reading data from disk") parser.add_argument("-w", "--write-parallel", default=1, help="number of writers to use", type=numeric_min_checker( minimum=1, message="number of writers min")) parser.add_argument("-d", "--dataset-dir", type=path_exists_checker(), help="Directory containing ALL of the chunk files")
def add_graph_args(self, parser): # adds the common args to all graphs parser.add_argument("-p", "--parallel-parse", type=int, default=1, help="Parallelism of decompress stage") parser.add_argument("-o", "--output-path", default="", help="Output bam file path") parser.add_argument( "-t", "--threads", type=int, default=multiprocessing.cpu_count() - 1, help="Number of threads to use for compression [{}]".format( multiprocessing.cpu_count() - 1)) parser.add_argument("-d", "--dataset-dir", type=path_exists_checker(), help="Directory containing ALL of the chunk files")
def make_client_args(cls, parser): # TODO assume that for now it is just the local filesystem. Will need to differentiate for other stuff later add_dataset(parser=parser) parser.add_argument("-d", "--dataset-dir", type=path_exists_checker(), help="Directory containing ALL of the chunk files") parser.add_argument("--overwrite", default=False, action="store_true", help="Overwrite existing metadata file when the pipeline finishes. Default: create a new one")
def add_default_module_args(parser): cwd = os.getcwd() parser.add_argument("--record", default=False, action='store_true', help="record usage of the running process") parser.add_argument("--record-directory", default=cwd, type=parse.path_exists_checker(), help="directory to store runtime statistics") parser.add_argument("--summary", default=False, action="store_true", help="Add TensorFlow summary info to the graph") parser.add_argument("--summary-directory", default=os.path.join(cwd, "traces"), type=parse.path_exists_checker(make_if_empty=True), help="directory to record summary information into")
def add_graph_args(self, parser): # adds the common args to all graphs parser.add_argument("-p", "--parallel", type=numeric_min_checker(1, "parallel decompression"), default=2, help="parallel decompression") parser.add_argument("-e", "--enqueue", type=numeric_min_checker(1, "parallel enqueuing"), default=1, help="parallel enqueuing / reading from Ceph") parser.add_argument("-a", "--aligners", type=numeric_min_checker(1, "number of aligners"), default=1, help="number of aligners") parser.add_argument("-t", "--aligner-threads", type=numeric_min_checker(1, "threads per aligner"), default=multiprocessing.cpu_count(), help="the number of threads to use per aligner") parser.add_argument( "-x", "--subchunking", type=numeric_min_checker( 1, "don't go lower than 100 for subchunking size"), default=5000, help="the size of each subchunk (in number of reads)") parser.add_argument("-w", "--writers", type=numeric_min_checker( 0, "must have a non-negative number of writers"), default=1, help="the number of writer pipelines") parser.add_argument( "-c", "--compress-parallel", type=int, default=2, help="compress output in parallel. 0 for uncompressed [2]") parser.add_argument("--assemblers", default=1, type=numeric_min_checker( 1, "must have at least one assembler node"), help="level of parallelism for assembling records") parser.add_argument("--deep-verify", default=False, action='store_true', help="verify record integrity") parser.add_argument( "--paired", default=False, action='store_true', help="interpret dataset as interleaved paired dataset") parser.add_argument( "-i", "--index-path", type=path_exists_checker(), default="/scratch/stuart/ref_index", help= "location of the ref index on all machines. Make sure all machines have this path!" ) self.add_max_secondary(parser=parser) parser.add_argument( "--snap-args", type=str, default="", help= "SNAP algorithm specific args. Pass with enclosing \" \". E.g. \"-om 5 -omax 1\" . See SNAP documentation for all options." )
def add_run_args(self, parser): super().add_run_args(parser=parser) parser.add_argument("-d", "--dataset-dir", type=path_exists_checker(), help="Directory containing ALL of the chunk files")