def main(BouncerSubclass):
    '''Within a module that defines a subclass for BouncerProcessManager, say FooSubclass,
    you can do this:
        if __init__ == "__main__":
            main(FooSubclass)
    which parses command line arguments, instantiates your sublcass, and runs its server.'''

    _,filename,_,_,_,_ = inspect.getouterframes(inspect.currentframe())[1]
    logname = os.path.basename(filename)

    cwd = os.getcwd()

    default_config = os.path.join(cwd, "bouncer_config.json")


    parser = argparse.ArgumentParser(description='Bouncer process manager for %s' % logname)
    parser.add_argument("-c", "--config", type=str, default=default_config,
                        help="Default=%(default)s. The config file. See bouncer/bouncer_common.py for config-file format.")
    parser.add_argument("-a", "--addr", type=str, default="127.0.0.1",
                        help="Default=%(default)s. Address where the bouncer listens from")
    parser.add_argument("-p", "--port", type=int, default=3001,
                        help="Default=%(default)d. Port where the bouncer listens from")

    log.add_arguments(parser)
    args = parser.parse_args()
    logger = log.getLogger(args, name=logname)
    logger.info("Command line arguments: %s" % str(args))

    try:
        with open(args.config, "r") as f:
            pass
    except:
        logger.critical("Error: could not open config file (%s)" % args.config)
        sys.exit(1)

    if not issubclass(BouncerSubclass, BouncerProcessManager):
        raise ValueError("The given class, %s, is not a subclass of BouncerProcessManager" % bouncerClass)

    config_filename = args.config
    addr = args.addr
    port = args.port
    try:
        with open(config_filename) as f:
            config = Config(f)
        bpm = BouncerSubclass(config, addr, port, logger)
    except:
        logger.critical("Error while parsing config file. View bouncer/bouncer_common.py for format of config.")
        raise
    bpm.run()
Exemplo n.º 2
0
def create_parser(model, parser):
    parser = model.add_arguments(parser)
    parser = dataset.add_arguments(parser)
    parser = train.add_training_arguments(parser)
    parser = train.add_ipu_arguments(parser)
    parser = logging.add_arguments(parser)
    return parser
Exemplo n.º 3
0
def create_parser(model, lr_schedule, parser):
    parser = model.add_arguments(parser)
    parser = dataset.add_arguments(parser)
    parser = add_training_arguments(parser)
    parser = lr_schedule.add_arguments(parser)
    parser = add_ipu_arguments(parser)
    parser = logging.add_arguments(parser)
    return parser
Exemplo n.º 4
0
def create_all_options_parser():
    parser = argparse.ArgumentParser(
        description='BERT  Pretraining in TensorFlow',
        add_help=False,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser = add_main_arguments(parser)
    parser = dataset.add_arguments(parser)
    parser = add_training_arguments(parser)
    parser = add_ipu_arguments(parser)
    parser = bert_logging.add_arguments(parser)
    parser = add_bert_arguments(parser)
    return parser
Exemplo n.º 5
0
def add_common_arguments(parser, required=True):
    group = parser.add_argument_group('Common options')

    # Training options
    # fmt:off
    group.add_argument('--micro-batch-size',
                       type=int,
                       help="Set micro-batch-size for training graph")
    group.add_argument(
        '--global-batch-size',
        type=int,
        default=None,
        help="The total batch size at which we want the model to run")
    group.add_argument(
        '--base-learning-rate',
        type=float,
        default=2e-5,
        help="Base learning rate exponent (2**N). blr = lr /  bs")
    group.add_argument('--num-train-steps',
                       type=int,
                       help="Number of training steps.")
    group.add_argument('--loss-scaling',
                       type=float,
                       default=1,
                       help="Loss scaling factor.")
    group.add_argument(
        '--loss-scaling-by-step',
        type=str,
        default=None,
        help=
        "Specify changing loss scaling factors at given training steps, as a dictionary."
    )
    group.add_argument('--steps-per-ckpts',
                       type=int,
                       default=256,
                       help="Steps per checkpoints")
    group.add_argument('--optimizer',
                       type=str,
                       default="momentum",
                       choices=['sgd', 'momentum', 'adamw', 'lamb'],
                       help="Optimizer")
    group.add_argument('--momentum',
                       type=float,
                       default=0.984375,
                       help="Momentum coefficient.")
    group.add_argument('--beta1',
                       type=float,
                       default=0.9,
                       help="lamb/adam beta1 coefficient.")
    group.add_argument('--beta2',
                       type=float,
                       default=0.999,
                       help="lamb/adam beta2 coefficient.")
    group.add_argument('--weight-decay-rate',
                       type=float,
                       default=0.0,
                       help="Weight decay to use during optimisation.")
    group.add_argument('--epsilon',
                       type=float,
                       default=1e-4,
                       help="Optimiser epsilon value.")
    group.add_argument('--lr-schedule',
                       default='exponential',
                       choices=["custom", "natural_exponential", "polynomial"],
                       help="Learning rate schedule function.")
    group.add_argument(
        '--lr-schedule-by-step',
        type=str,
        help="Dictonary of changes in the learning rate at specified steps.")
    group.add_argument(
        '--warmup',
        default=0.1,
        help=
        "Learning rate schedule warm-up period, in epochs (float) or number of steps (integer)."
    )
    group.add_argument('--seed',
                       default=None,
                       help="Seed for randomizing training")
    group.add_argument(
        '--wandb',
        action='store_true',
        help="Enable logging and experiment tracking with Weights & Biases.")
    group.add_argument('--wandb-name',
                       type=str,
                       default=None,
                       help="Override Weights&Biases run name.")
    group.add_argument('--save-path',
                       type=str,
                       default="checkpoints",
                       help='Save checkpoints to this directory.')
    group.add_argument(
        '--init-checkpoint',
        type=str,
        default=None,
        help='Initialise a new training session from this checkpoint.')
    group.add_argument(
        '--restore-dir',
        type=str,
        default=None,
        help='Path to directory containing the checkpoint to restore.')
    group.add_argument('--restore-optimiser-from-checkpoint',
                       default=True,
                       action="store_true")
    group.add_argument('--save-optimiser-to-checkpoint',
                       default=True,
                       action="store_true")
    group.add_argument(
        '--epochs',
        default=None,
        type=float,
        help='Number of epochs we want to let the training last.')

    # BERT options
    group.add_argument(
        '--vocab-size',
        type=int,
        help="""Vocabulary size of `inputs_ids` in `BertModel`.""")
    group.add_argument(
        '--hidden-size',
        type=int,
        help="""Size ofthe encoder layers and the pooler layer.""")
    group.add_argument(
        '--num-hidden-layers',
        type=int,
        help="""Number of hidden layers in the Transformer encoder.""")
    group.add_argument(
        '--num-attention-heads',
        type=int,
        help=
        """Number of attention heads for each attention layer in the Transformer encoder."""
    )
    group.add_argument(
        '--intermediate-size',
        type=int,
        help=
        """The size of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder."""
    )
    group.add_argument(
        '--hidden-act',
        type=int,
        help=
        """The non-linear activation function (function or string) in the encoder and pooler."""
    )
    group.add_argument(
        '--hidden-dropout-prob',
        type=int,
        help=
        """The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."""
    )
    group.add_argument(
        '--attention-probs-dropout-prob',
        type=int,
        help="""The dropout ratio for the attention probabilities.""")
    group.add_argument(
        '--max-position-embeddings',
        type=int,
        help=
        """The maximum sequence length that this model might ever be used with. Typically set this to something large just in case (e.g., 512 or 1024 or 2048)."""
    )
    group.add_argument(
        '--type-vocab-size',
        type=int,
        help=
        """The vocabulary size of the `token_type_ids` passed into `BertModel`."""
    )
    group.add_argument(
        '--initializer-range',
        type=int,
        help=
        """The stdev of the truncated-normal-initializer for initializing all weight matrices."""
    )
    group.add_argument('--disable-acc',
                       default=False,
                       action='store_true',
                       help='Makes the calculation of the accuracy optional.')
    group.add_argument('--use-qkv-bias',
                       default=False,
                       action='store_true',
                       help='Use biases for QKV layer.')
    group.add_argument('--use-qkv-split',
                       default=False,
                       action='store_true',
                       help='Split QKV layer.')

    # Model options
    group.add_argument(
        '--use-attention-projection-bias',
        type=_str_to_bool,
        default=True,
        help="Whether to use bias in linear projection behind attention layer."
    )
    group.add_argument(
        '--use-cls-layer',
        type=_str_to_bool,
        default=True,
        help=
        """Include the CLS layer in pretraining. This layer comes after the encoders but before the projection for the MLM loss."""
    )
    group.add_argument('--use-prediction-bias',
                       type=_str_to_bool,
                       default=True,
                       help="""Whether to use bias in mlm prediction.""")
    group.add_argument(
        '--static-mask',
        action='store_true',
        default=False,
        help=
        "Use if the pretraining dataset was created with the masked tokens always at the beginning of the input tensor."
    )
    group.add_argument("--max-predictions-per-seq",
                       type=int,
                       default=20,
                       help="the number of masked words per sentence.")
    group.add_argument(
        '--duplicate-factor',
        default=5,
        type=int,
        help='The amount of duplication factor inside the dataset.')

    # GroupBert architecture options
    group.add_argument('--groupbert',
                       action='store_true',
                       default=False,
                       help="Use GroupBert architecture")
    group.add_argument('--groupbert-ffn-output-groups',
                       type=int,
                       default=4,
                       help="Set number of groups for FFN layer in GroupBert")
    group.add_argument('--groupbert-conv-kernel-size',
                       type=int,
                       default=7,
                       help="Set size of convolution kernel in GroupBert")
    group.add_argument('--groupbert-conv-group-size',
                       type=int,
                       default=16,
                       help="Set size of convolution groups in GroupBert")

    # IPU options
    pipeline_schedule_options = [
        _.name for _ in ipu.ops.pipelining_ops.PipelineSchedule
    ]
    schedulers_available = [_.name for _ in ipu.config.SchedulingAlgorithm]
    recomputation_modes_available = [
        p.name for p in ipu.ops.pipelining_ops.RecomputationMode
    ]

    group.add_argument(
        '--gradient-accumulation-count',
        type=int,
        default=None,
        help=
        "Number of gradients to accumulate in the pipeline. Must also set --shards > 1."
    )
    group.add_argument('--pipeline-schedule',
                       type=str,
                       default="Interleaved",
                       choices=pipeline_schedule_options,
                       help="Pipelining scheduler.")
    group.add_argument(
        '--replicas',
        type=int,
        default=1,
        help=
        "Replicate graph over N workers to increase batch to micro-batch-size*N"
    )
    group.add_argument(
        '--precision',
        type=str,
        default="16",
        choices=["16", "32"],
        help=
        "Precision of Ops(weights/activations/gradients) data types: 16, 32.")
    group.add_argument(
        '--batches-per-step',
        type=int,
        default=1,
        help=
        "Maximum number of batches to perform on the device before returning to the host."
    )
    group.add_argument(
        '--available-memory-proportion',
        type=str,
        default=0.23,
        help=
        "Proportion of IPU memory available to matmul operations. A list can be used to specify the value for each IPU."
    )
    group.add_argument(
        '--variable-offloading',
        type=_str_to_bool,
        default=True,
        help="Enable offloading of training variables into remote memory.")
    group.add_argument(
        '--min-remote-tensor-size',
        type=int,
        default=128,
        help=
        "The minimum remote tensor size (bytes) for partial variable offloading"
    )
    group.add_argument(
        '--replicated-tensor-sharding',
        action="store_true",
        default=False,
        help="Enable replicated tensor sharding of optimizer state.")
    group.add_argument(
        '--stochastic-rounding',
        type=_str_to_bool,
        default=True,
        help="Enable stochastic rounding. Set to False when run evaluation.")
    group.add_argument(
        '--no-outlining',
        type=_str_to_bool,
        default=False,
        help=
        "Disable TF outlining optimisations. This will increase memory for a small throughput improvement."
    )
    group.add_argument("--enable_recomputation",
                       default=True,
                       action="store_true",
                       help="Recompute activations during backward pass")
    group.add_argument('--fp-exceptions',
                       default=False,
                       action="store_true",
                       help="Enable floating-point exeptions.")
    group.add_argument(
        '--partials-type',
        type=str,
        default="half",
        choices=["half", "float"],
        help=
        "Floating-point precision of data in matmul and convolution operations.."
    )
    group.add_argument(
        '--max-cross-replica-sum-buffer-size',
        type=int,
        default=10 * 1024 * 1024,
        help=
        "The maximum number of bytes that can be waiting before a cross replica sum op is scheduled."
    )
    group.add_argument(
        '--max-reduce-scatter-buffer-size',
        type=int,
        default=0,
        help=
        "The maximum number of bytes that can be waiting before reduce-scatter op is scheduled."
    )
    group.add_argument(
        '--scheduler',
        type=str,
        default="CLUSTERING",
        choices=schedulers_available,
        help=
        "Forces the compiler to use a specific scheduler when ordering the instructions."
    )
    group.add_argument('--recomputation-mode',
                       type=str,
                       default="RecomputeAndBackpropagateInterleaved",
                       choices=recomputation_modes_available)
    group.add_argument(
        '--increase-optimiser-precision',
        action='store_true',
        default=False,
        help=
        "In the LAMB optimiser, it performs more operations in fp32. This operation increase precision in the weight update but consumes more memory and reduce the Tput."
    )
    group.add_argument(
        '--use-nvlamb',
        action='store_true',
        default=False,
        help="Flag to use the global normalisation for the gradients.")
    group.add_argument(
        '--use-debiasing',
        action='store_true',
        default=False,
        help="Flag to use the de biasing for the momenta of LAMB")
    group.add_argument(
        '--reduction-type',
        type=str,
        choices=['sum', 'mean'],
        default='mean',
        help=
        'The reduction type applied to the pipeline, the choice is between summation and mean.'
    )
    group.add_argument(
        '--weight-norm-clip',
        type=float,
        default=0.,
        help=
        'The value from which we want to clip the w_norm value, value of 0 is no weight clipping.'
    )
    group.add_argument(
        '--compile-only',
        action="store_true",
        default=False,
        help=
        "Configure Poplar to only compile the graph. This will not acquire any IPUs and thus facilitate profiling without using hardware resources."
    )
    group.add_argument(
        '--matmul-serialize-factor',
        type=int,
        default=6,
        help=
        'Serialization factor of the embeddings lookup and projection. Must be a divisor of vocab_size.'
    )
    group.add_argument('--glue-dropout-prob',
                       type=float,
                       default=0.1,
                       help='GLUE tasks dropout probability.')
    group.add_argument(
        '--pipeline-stages',
        type=str,
        help=
        """Pipeline stages, a list of [emb, pos, hid, mlm, nsp] layers forming the pipeline."""
    )
    group.add_argument('--device-mapping',
                       type=str,
                       help="""Mapping of pipeline stages to IPU""")
    group.add_argument('--sync-replicas-independently',
                       action='store_true',
                       default=False,
                       help='All the replicas will be in sync.')
    group.add_argument(
        '--log-all-workers',
        action='store_true',
        help='Allow all the workers to log into the terminal and the files.')

    # Dataset options
    group.add_argument(
        '--train-file',
        type=str,
        required=False,
        help="path to wiki/corpus training dataset tfrecord file.")
    group.add_argument("--seq-length",
                       type=int,
                       default=128,
                       help="the max sequence length.")
    group.add_argument('--parallel-io-threads',
                       type=int,
                       default=4,
                       help="Number of cpu threads used to do data prefetch.")
    group.add_argument(
        '--generated-data',
        action="store_true",
        default=False,
        help=
        "Generates synthetic-data on the host and then use it for training.")
    group.add_argument(
        '--synthetic-data',
        action='store_true',
        help="Run the model completely detaching it from the host.")
    group.add_argument('--dataset-repeat',
                       type=int,
                       default=1,
                       help="Number of times dataset to repeat.")

    # Env flag specific arguments
    group.add_argument(
        '--execution-profile',
        action='store_true',
        help=
        'Sets the Poplar engine options to output an execution profile to the profile-dir.'
    )
    group.add_argument(
        '--memory-profile',
        action='store_true',
        help=
        'Sets the Poplar engine options to output a memory profile to the profile-dir.'
    )
    group.add_argument(
        '--profile-dir',
        type=str,
        default='./',
        help='Defines the directory where the profile will be found.')
    group.add_argument(
        '--progress-bar',
        type=str,
        choices=['auto', 'true', 'false'],
        default='auto',
        help=
        'The compilation progress bar for the compilation. Pass false to disable it.'
    )

    # fmt:on
    # Add logging-specific arguments
    log.add_arguments(parser)

    return parser
Exemplo n.º 6
0
    default_output_filename = os.path.join(cwd, "legit_trace_%d.txt")

    parser = argparse.ArgumentParser(description='Creates trace files for cross-validation training of Beer Garden. " \
        "Outputs several files with filename template given by OUTPUT_FILENAME (default = %s).' % default_output_filename)
    parser.add_argument("-p", "--pages", type=str, default=default_pages_filename,
                    help="Default=%(default)s. The JSON file containing output from get_pages.py")
    parser.add_argument("-d", "--diff", type=float, default=0.0,
                    help="Default=%(default)f. The proportion of URL accesses that should be diffs")
    parser.add_argument("-n", "--num-urls", type=int, default=100,
                    help="Default=%(default)d. The number of URLs to generate")
    parser.add_argument("-r", "--partitions", type=int, default=2,
                    help="Default=%(default)d. The number of ways to partitions to produce for cross validation.")
    parser.add_argument("-o", "--output_filename", type=str, default=default_output_filename,
                    help="Default=%(default)s. The filename template for output files (must contain %%d).")

    log.add_arguments(parser)
    args = parser.parse_args()
    logger = log.getLogger(args)
    logger.info("Command line arguments: %s" % str(args))

    args.output_filename = os.path.realpath(args.output_filename)
    try:
        test_filename = args.output_filename % 1
    except TypeError:
        logger.critical("output filename must contain exactly one %d")
        sys.exit(1)

    try:
        with open(args.pages, "r") as f:
            pass
    except:
def run_client(name, desc, default_concurrent_puzzles, default_timeout):

    desc += " WARNING: this script does not have an off switch. You must forcefully kill it with someting like " + \
        "pkill -f 'python.*%s'" % name

    cwd = os.getcwd()

    parser = argparse.ArgumentParser(description=desc)

    parser.add_argument("-s", "--server",  type=str, required=True,
                    help="the domain part of the URL to submit requests to; e.g. 'localhost'")
    parser.add_argument("-u", "--url",  type=str, default=None,
                    help="Default=%(default)s. the rest of the url (not including SERVER); e.g. '/index.php'")
    parser.add_argument("-f", "--url-file",  type=str, default=None,
                    help="Default=%(default)s. filename contains urls, one per line (not including SERVER); e.g. '/index.php'")
    parser.add_argument("-to", "--timeout",  type=float, default=default_timeout,
                    help="Default=%(default)s. Connections timeout after TIMEOUT seconds.")
    parser.add_argument("-r", "--regex",  type=str, required=True,
                    help="regular expression that positively matches the target web-app page, NOT the puzzle page, and NOT 403 pages or anything else; e.g. MediaWiki")
    parser.add_argument("-e", "--rate",  type=float, default=10,
                    help="Default=%(default)s. Number of requests per second")
    parser.add_argument("-d", "--duration",  type=int, default=5,
                    help="Default=%(default)s. Duration of trial in seconds.")
    parser.add_argument("-z", "--concurrent-puzzles",  type=int, default=default_concurrent_puzzles,
                    help="Default=%(default)s. The maximum number of clients allowed to work on puzzles at the same time. If CONCURRENT_PUZZLES <= 0, then CONCURRENT_PUZZLES will be set to infinity.")
    parser.add_argument("-i", "--id",  type=str, default=1,
                    help="Default=%(default)s. An id to identify this process in the logs")
    parser.add_argument("-y", "--history",  type=int, default=20,
                    help="Default=%(default)s. When displaying averages, only use the last HISTORY measurements.")
    parser.add_argument("-p", "--poisson", action="store_true", default=False,
                    help="Set this flag to issue requests as poisson process. Else, issue requests as a non-random process.")
    parser.add_argument("-a", "--trace-filename",  type=str, default= name + ".csv",
                    help = '''Default=%(default)s. Name of output tracefile. The output tracefile is
                        a CSV with one row per event. Each row has 4 fields: (1) status, (2) event,
                        (3) latency, (4) num_bits. For further explanation, see the source.''')

    log.add_arguments(parser)
    args = parser.parse_args()

    logger = log.getLogger(args, name="%s.%s" % (name, args.id))
    logger.info("Command line arguments: %s" % str(args))

    queue = Queue.Queue()

    if args.url_file:
        urls = []
        with open(args.url_file) as f:
            for line in f:
                urls.append(line.strip())
    elif args.url:
        urls = [args.url]
    else:
        logger.error("Missing --url or --url-file arguments")
        sys.exit(1)

    logger.info("urls = %s ...", urls[:10])

    monitor = Monitor.spawn(logger, queue, args.history, args.trace_filename)

    jobs = []

    period = 1.0 / args.rate
    requests = int(args.rate * args.duration) + 1

    # the total amount of time this greenlet should have spent sleeping
    expected_duration = 0.0

    puzzles_being_solved = PuzzlesBeingSolved()

    start_time = time.time()
    num_urls = len(urls)
    for i in range(0, requests):
	url = urls[i % num_urls]
        job = ClientGreenlet.spawn(logger, queue, args.server, url, args.timeout + 10, \
                    args.regex, i + 1, args.concurrent_puzzles, puzzles_being_solved)
        jobs.append(job)
        if args.poisson:
            sleep_time = random.expovariate(1.0/period)
        else:
            sleep_time = period
        actual_duration = time.time() - start_time
        # If you overslept last time, then reduce your sleeptime now in order to catch up
        overslept = max(0, actual_duration - expected_duration)
        requested_sleep_time = max(0, sleep_time - overslept)
        expected_duration += sleep_time
        logger.debug("sleeping for %f sec before next request", requested_sleep_time)
	if requested_sleep_time > 0.0:
	        gevent.sleep(requested_sleep_time)

    # if actual_duration significantly longer than duration, then this process is too CPU bound
    # need to slow down the rate
    actual_duration = time.time() - start_time
    if actual_duration > (expected_duration * 1.05):
        logger.error("Actual duration (%f) significantly longer then specified duration (%f). Could not send requests " \
            "fast enough" % (actual_duration, expected_duration))
    else:
        logger.debug("Actual duration (%f) NOT significantly longer then specified duration (%f). Sent requests " \
            "fast enough" % (actual_duration, expected_duration))

    # could not get joinall to work
    for i in range(0, int(args.timeout) + 1):
        # get rid of all jobs that have finished
        jobs = filter(lambda j: not j.ready(), jobs)
        if len(jobs) == 0:
            break
        gevent.sleep(1)

    jobs = filter(lambda j: not j.ready(), jobs)
    if len(jobs) > 0:
        for job in jobs:
            job.kill(block=True, timeout=1)    
    monitor.kill()
    jobs = filter(lambda j: not j.ready(), jobs)
    if len(jobs) > 0:
        for job in jobs:
            job.kill(block=True, timeout=1)    
    monitor.kill()


if __name__ == "__main__":
    cwd = os.getcwd()

    parser = argparse.ArgumentParser(description='Solves Doorman puzzles. Solves puzzle given by stdin. Example usage: curl -s "http://localhost/index.php" | ./puzzle_solver.py | xargs -I REQ curl -s "http://localhostREQ"')

    parser.add_argument("-s", "--sleep-time",  type=float, default=None,
                    help="If specified, overrides the puzzles sleep time (num secs to sleep between bursts)")
    parser.add_argument("-b", "--burst_len",  type=int, default=None,
                    help="If specified, overrides the puzzles burst_len")

    log.add_arguments(parser, "CRITICAL", "CRITICAL")
    args = parser.parse_args()
    logger = log.getLogger(args)
    logger.info("Command line arguments: %s" % str(args))

    solver = PuzzleSolver(logger, sys.stdin.read())
    solver.sleep_time = args.sleep_time or solver.sleep_time
    solver.burst_len = args.burst_len or solver.burst_len
    print solver.solve()