Exemplo n.º 1
0
def recover(num_servers,
            backups_per_server,
            num_partitions,
            object_size,
            num_objects,
            num_overwrites,
            replicas,
            coordinator_args='',
            master_args='',
            backup_args='',
            master_ram=None,
            old_master_ram=None,
            num_removals=0,
            timeout=100,
            log_level='NOTICE',
            log_dir='logs',
            transport='infrc',
            verbose=False,
            debug=False):
    """Run a recovery on the cluster specified in the config module and
    return statisitics about the recovery.

    See the config module for more parameters that affect how the recovery
    will be run.  In particular config.hosts and config.old_master_host which
    select which hosts in the cluster recovery will be run on.

    @param num_servers: Number of hosts on which to run Masters.
    @type  num_servers: C{int}

    @param backups_per_server: Number of Backups to colocate on the same host
                               with each Master.  If this is 1 the Backup is
                               run in the same process as the Master.  If this
                               is 2 then an additional process is started to
                               run the second Backup.
    @type  backups_per_server: C{int}

    @param num_partitions: Number of partitions to create on the old Master
                           before it is crashed.  This also determines how many
                           Recovery Masters will participate in recovery.
    @type  num_partitions: C{int}

    @param object_size: Size of objects to fill old Master with before crash.
    @type  object_size: C{int}

    @param num_objects: Number of objects to fill old Master with before crash.
    @type  num_objects: C{int}

    @param num_overwrites: Number of writes on same key while filling old Master.
    @type  num_overwrites: C{int}

    @param replicas: Number of times each segment is replicated to different
                     backups in the cluster.
    @type  replicas: C{int}

    @param coordinator_args: Additional command-line arguments to pass to
                             the coordinator.
    @type  coordinator_args: C{str}

    @param master_args: Additional command-line arguments to pass to
                        each server that acts as a master.
    @type  master_args: C{str}

    @param backup_args: Additional command-line arguments to pass to
                        each server that acts as a backup.
    @type  backup_args: C{str}

    @param master_ram: Megabytes of space allocated in each of the Masters
                       (except the old Master, see old_master_ram).  If left
                       unspecified same sane default will be attempted based
                       on num_objects, object_size and num_overwrites.
    @type  master_ram: C{int}

    @param old_master_ram: Megabytes of space allocated in the old Master that
                           will eventually be crashed.  If left unspecified same
                           sane default will be attempted based on num_objects,
                           num_overwrites and object_size.
    @type  old_master_ram: C{int}

    @param num_removals: Number of erases to do after inserts.  Allows
                         recoveries where some of the log contains tombstones.
    @type  num_removals: C{int}

    @param timeout: Seconds to wait before giving up and declaring the recovery
                    to have failed.
    @type  timeout: C{int}

    @param log_level: Log level to use for all servers.
                      DEBUG, NOTICE, WARNING, or ERROR.
    @type  log_level: C{str}

    @param log_dir: Top-level directory in which to write log files.
                    A separate subdirectory will be created in this
                    directory for the log files from this run.
    @type  log_dir: C{str}

    @param transport: A transport name (e.g. infrc, fast+udp, tcp, ...)
    @type  transport: C{str}

    @param verbose: Print information about progress in starting clients
                    and servers.
    @type  verbose: C{bool}

    @param debug: If True, pause after starting all to allow for debugging
                  setup such as attaching gdb.
    @type  debug: C{bool}

    @return: A recoverymetrics stats struct (see recoverymetrics.parseRecovery).
    """
    server_binary = '%s/server' % obj_path
    client_binary = '%s/recovery' % obj_path
    ensure_servers_bin = '%s/ensureServers' % obj_path

    args = {}
    args['num_servers'] = num_servers
    args['backups_per_server'] = backups_per_server
    args['replicas'] = replicas
    args['timeout'] = timeout
    args['log_level'] = log_level
    args['log_dir'] = log_dir
    args['transport'] = transport
    args['verbose'] = verbose
    args['debug'] = debug
    args['coordinator_host'] = config.old_master_host
    args['coordinator_args'] = coordinator_args
    args['backup_args'] = backup_args
    # Just a guess of about how much capacity a master will have to have
    # to hold all the data from all the partitions
    if master_ram:
        log_space_per_partition = master_ram
    else:
        log_space_per_partition = (
            200 + (1.3 * num_objects / object_size * num_overwrites))
    # Extra segments are needed because the dummy tablets placed on the
    # masters cause log head rollovers. Without compensating for the
    # space waster by that the recovery masters run out of space.
    args['master_args'] = '-d -D -t %d' % (log_space_per_partition +
                                           8 * num_partitions)
    if master_args:
        args['master_args'] += ' ' + master_args
    args['client'] = ('%s -f -n %d -r %d -s %d '
                      '-t %d -k %d -l %s -o %d' %
                      (client_binary, num_objects, num_removals, object_size,
                       num_partitions, num_servers, log_level, num_overwrites))
    args['old_master_host'] = config.old_master_host
    args['client_hosts'] = [config.old_master_host]
    if old_master_ram:
        args['old_master_args'] = '-d -t %d' % old_master_ram
    else:
        old_master_ram = log_space_per_partition * num_partitions * num_overwrites
        if args['coordinator_host'][0] == 'rcmaster' and old_master_ram > 42000:
            print('Warning: pushing the limits of rcmaster; '
                  'limiting RAM to 42000 MB to avoid knocking it over; '
                  'use rcmonster if you need more')
            old_master_ram = 42000
        args['old_master_args'] = '-d -D -t %d' % old_master_ram
    recovery_logs = cluster.run(**args)

    # Collect metrics information.
    stats = {}
    stats['metrics'] = recoverymetrics.parseRecovery(recovery_logs)
    report = recoverymetrics.makeReport(stats['metrics']).jsonable()
    f = open('%s/metrics' % recovery_logs, 'w')
    getDumpstr().print_report(report, file=f)
    f.close()
    stats['run'] = recovery_logs
    stats['count'] = num_objects
    stats['size'] = object_size
    stats['ns'] = stats['metrics'].client.recoveryNs
    stats['report'] = report
    return stats
Exemplo n.º 2
0
def recover(num_servers,
            backup_disks_per_server,
            object_size,
            num_objects,
            num_overwrites,
            replicas,
            coordinator_args='',
            master_args='',
            backup_args='',
            master_ram=None,
            old_master_ram=None,
            num_removals=0,
            timeout=100,
            log_level='NOTICE',
            log_dir='logs',
            transport='basic+infud',
            verbose=False,
            debug=False):
    """Run a recovery on the cluster specified in the config module and
    return statisitics about the recovery.

    See the config module for more parameters that affect how the recovery
    will be run.  In particular config.hosts and config.old_master_host which
    select which hosts in the cluster recovery will be run on.

    @param num_servers: Number of hosts on which to run Masters.
    @type  num_servers: C{int}

    @param backup_disks_per_server: Number of backup disks to use on each
                                    Master. All disks are shared by one Backup
                                    (in the same process as the Master).
    @type  backup_disks_per_server: C{int}

    @param object_size: Size of objects to fill old Master with before crash.
    @type  object_size: C{int}

    @param num_objects: Number of objects to fill old Master with before crash.
    @type  num_objects: C{int}

    @param num_overwrites: Number of writes on same key while filling old Master.
    @type  num_overwrites: C{int}

    @param replicas: Number of times each segment is replicated to different
                     backups in the cluster.
    @type  replicas: C{int}

    @param coordinator_args: Additional command-line arguments to pass to
                             the coordinator.
    @type  coordinator_args: C{str}

    @param master_args: Additional command-line arguments to pass to
                        each server that acts as a master.
    @type  master_args: C{str}

    @param backup_args: Additional command-line arguments to pass to
                        each server that acts as a backup.
    @type  backup_args: C{str}

    @param master_ram: Megabytes of space allocated in each of the Masters
                       (except the old Master, see old_master_ram).  If left
                       unspecified same sane default will be attempted based
                       on num_objects, object_size and num_overwrites.
    @type  master_ram: C{int}

    @param old_master_ram: Megabytes of space allocated in the old Master that
                           will eventually be crashed.  If left unspecified same
                           sane default will be attempted based on num_objects,
                           num_overwrites and object_size.
    @type  old_master_ram: C{int}

    @param num_removals: Number of erases to do after inserts.  Allows
                         recoveries where some of the log contains tombstones.
    @type  num_removals: C{int}

    @param timeout: Seconds to wait before giving up and declaring the recovery
                    to have failed.
    @type  timeout: C{int}

    @param log_level: Log level to use for all servers.
                      DEBUG, NOTICE, WARNING, or ERROR.
    @type  log_level: C{str}

    @param log_dir: Top-level directory in which to write log files.
                    A separate subdirectory will be created in this
                    directory for the log files from this run.
    @type  log_dir: C{str}

    @param transport: A transport name (e.g. infrc, basic+udp, tcp, ...)
    @type  transport: C{str}

    @param verbose: Print information about progress in starting clients
                    and servers.
    @type  verbose: C{bool}

    @param debug: If True, pause after starting all to allow for debugging
                  setup such as attaching gdb.
    @type  debug: C{bool}

    @return: A recoverymetrics stats struct (see recoverymetrics.parseRecovery).
    """
    server_binary = '%s/server' % obj_path
    client_binary = '%s/recovery' % obj_path
    ensure_servers_bin = '%s/ensureServers' % obj_path

    args = {}
    args['num_servers'] = num_servers
    args['backup_disks_per_server'] = backup_disks_per_server
    args['replicas'] = replicas
    args['timeout'] = timeout
    args['log_level'] = log_level
    args['log_dir'] = log_dir
    args['transport'] = transport
    args['verbose'] = verbose
    args['debug'] = debug
    args['coordinator_host'] = getOldMasterHost()
    args['coordinator_args'] = coordinator_args
    if backup_args:
        args['backup_args'] += backup_args;
    else:
        args['backup_args'] = '--maxNonVolatileBuffers 1000'
    # Allocate enough memory on recovery masters to handle several
    # recovery partitions (most recoveries will only have one recovery
    # partition per master, which is about 500 MB).
    args['master_args'] = '-d -D -t 5000'
    if master_args:
        args['master_args'] += ' ' + master_args;
    args['client'] = ('%s -f -n %d -r %d -s %d '
                      '-k %d -l %s -o %d' % (client_binary,
                      num_objects, num_removals, object_size,
                      num_servers, log_level, num_overwrites))
    args['old_master_host'] = getOldMasterHost()
    args['client_hosts'] = [getOldMasterHost()]
    if old_master_ram:
        args['old_master_args'] = '-d -t %d' % old_master_ram
    else:
        # Estimate how much log space the old master will need to hold
        # all of the data.
        old_master_ram = (200 + (1.3 * num_objects / object_size * num_overwrites))
        if args['coordinator_host'][0] == 'rcmaster' and old_master_ram > 42000:
            print('Warning: pushing the limits of rcmaster; '
                  'limiting RAM to 42000 MB to avoid knocking it over; '
                  'use rcmonster if you need more')
            old_master_ram = 42000
        args['old_master_args'] = '-d -D -t %d' % old_master_ram
    recovery_logs = cluster.run(**args)

    # Collect metrics information.
    stats = {}
    stats['metrics'] = recoverymetrics.parseRecovery(recovery_logs)
    report = recoverymetrics.makeReport(stats['metrics']).jsonable()
    f = open('%s/metrics' % recovery_logs, 'w')
    getDumpstr().print_report(report, file=f)
    f.close()
    stats['run'] = recovery_logs
    stats['count'] = num_objects
    stats['size'] = object_size
    stats['ns'] = stats['metrics'].client.recoveryNs
    stats['report'] = report
    return stats
Exemplo n.º 3
0
def recover(
    num_servers,
    backups_per_server,
    num_partitions,
    object_size,
    num_objects,
    replicas,
    coordinator_args="",
    master_args="",
    backup_args="",
    master_ram=None,
    old_master_ram=None,
    num_removals=0,
    timeout=60,
    log_level="NOTICE",
    log_dir="logs",
    transport="infrc",
    verbose=False,
    debug=False,
):
    """Run a recovery on the cluster specified in the config module and
    return statisitics about the recovery.

    See the config module for more parameters that affect how the recovery
    will be run.  In particular config.hosts and config.old_master_host which
    select which hosts in the cluster recovery will be run on.

    @param num_servers: Number of hosts on which to run Masters.
    @type  num_servers: C{int}

    @param backups_per_server: Number of Backups to colocate on the same host
                               with each Master.  If this is 1 the Backup is
                               run in the same process as the Master.  If this
                               is 2 then an additional process is started to
                               run the second Backup.
    @type  backups_per_server: C{int}

    @param num_partitions: Number of partitions to create on the old Master
                           before it is crashed.  This also determines how many
                           Recovery Masters will participate in recovery.
    @type  num_partitions: C{int}

    @param object_size: Size of objects to fill old Master with before crash.
    @type  object_size: C{int}

    @param num_objects: Number of objects to fill old Master with before crash.
    @type  num_objects: C{int}

    @param replicas: Number of times each segment is replicated to different
                     backups in the cluster.
    @type  replicas: C{int}

    @param coordinator_args: Additional command-line arguments to pass to
                             the coordinator.
    @type  coordinator_args: C{str}

    @param master_args: Additional command-line arguments to pass to
                        each server that acts as a master.
    @type  master_args: C{str}

    @param backup_args: Additional command-line arguments to pass to
                        each server that acts as a backup.
    @type  backup_args: C{str}

    @param master_ram: Megabytes of space allocated in each of the Masters
                       (except the old Master, see old_master_ram).  If left
                       unspecified same sane default will be attempted based
                       on num_objects and object_size.
    @type  master_ram: C{int}

    @param old_master_ram: Megabytes of space allocated in the old Master that
                           will eventually be crashed.  If left unspecified same
                           sane default will be attempted based on num_objects
                           and object_size.
    @type  old_master_ram: C{int}

    @param num_removals: Number of erases to do after inserts.  Allows
                         recoveries where some of the log contains tombstones.
    @type  num_removals: C{int}

    @param timeout: Seconds to wait before giving up and declaring the recovery
                    to have failed.
    @type  timeout: C{int}

    @param log_level: Log level to use for all servers.
                      DEBUG, NOTICE, WARNING, or ERROR.
    @type  log_level: C{str}

    @param log_dir: Top-level directory in which to write log files.
                    A separate subdirectory will be created in this
                    directory for the log files from this run.
    @type  log_dir: C{str}

    @param transport: A transport name (e.g. infrc, fast+udp, tcp, ...)
    @type  transport: C{str}

    @param verbose: Print information about progress in starting clients
                    and servers.
    @type  verbose: C{bool}

    @param debug: If True, pause after starting all to allow for debugging
                  setup such as attaching gdb.
    @type  debug: C{bool}

    @return: A recoverymetrics stats struct (see recoverymetrics.parseRecovery).
    """
    server_binary = "%s/server" % obj_path
    client_binary = "%s/recovery" % obj_path
    ensure_servers_bin = "%s/ensureServers" % obj_path

    args = {}
    args["num_servers"] = num_servers
    args["backups_per_server"] = backups_per_server
    args["replicas"] = replicas
    args["timeout"] = timeout
    args["log_level"] = log_level
    args["log_dir"] = log_dir
    args["transport"] = transport
    args["verbose"] = verbose
    args["debug"] = debug
    args["coordinator_args"] = coordinator_args
    args["backup_args"] = backup_args
    # Just a guess of about how much capacity a master will have to have
    # to hold all the data from all the partitions
    if master_ram:
        log_space_per_partition = master_ram
    else:
        log_space_per_partition = 200 + (1.3 * num_objects / object_size)
    args["master_args"] = "-t %d" % log_space_per_partition
    if master_args:
        args["master_args"] += " " + master_args
    args["client"] = "%s -f -n %d -r %d -s %d " "-t %d -k %d" % (
        client_binary,
        num_objects,
        num_removals,
        object_size,
        num_partitions,
        num_servers,
    )
    args["old_master_host"] = config.old_master_host
    if old_master_ram:
        args["old_master_args"] = "-t %d" % old_master_ram
    else:
        args["old_master_args"] = "-t %d" % (log_space_per_partition * num_partitions)
    recovery_logs = cluster.run(**args)

    # Collect metrics information.
    stats = {}
    stats["metrics"] = recoverymetrics.parseRecovery(recovery_logs)
    report = recoverymetrics.makeReport(stats["metrics"]).jsonable()
    f = open("%s/metrics" % recovery_logs, "w")
    getDumpstr().print_report(report, file=f)
    f.close()
    stats["run"] = recovery_logs
    stats["count"] = num_objects
    stats["size"] = object_size
    stats["ns"] = stats["metrics"].client.recoveryNs
    stats["report"] = report
    return stats
Exemplo n.º 4
0
def recover(num_servers,
            backups_per_server,
            num_partitions,
            object_size,
            num_objects,
            num_overwrites,
            replicas,
            coordinator_args='',
            master_args='',
            backup_args='',
            master_ram=None,
            old_master_ram=None,
            num_removals=0,
            timeout=100,
            log_level='NOTICE',
            log_dir='logs',
            transport='infrc',
            verbose=False,
            debug=False):
    """Run a recovery on the cluster specified in the config module and
    return statisitics about the recovery.

    See the config module for more parameters that affect how the recovery
    will be run.  In particular config.hosts and config.old_master_host which
    select which hosts in the cluster recovery will be run on.

    @param num_servers: Number of hosts on which to run Masters.
    @type  num_servers: C{int}

    @param backups_per_server: Number of Backups to colocate on the same host
                               with each Master.  If this is 1 the Backup is
                               run in the same process as the Master.  If this
                               is 2 then an additional process is started to
                               run the second Backup.
    @type  backups_per_server: C{int}

    @param num_partitions: Number of partitions to create on the old Master
                           before it is crashed.  This also determines how many
                           Recovery Masters will participate in recovery.
    @type  num_partitions: C{int}

    @param object_size: Size of objects to fill old Master with before crash.
    @type  object_size: C{int}

    @param num_objects: Number of objects to fill old Master with before crash.
    @type  num_objects: C{int}

    @param num_overwrites: Number of writes on same key while filling old Master.
    @type  num_overwrites: C{int}

    @param replicas: Number of times each segment is replicated to different
                     backups in the cluster.
    @type  replicas: C{int}

    @param coordinator_args: Additional command-line arguments to pass to
                             the coordinator.
    @type  coordinator_args: C{str}

    @param master_args: Additional command-line arguments to pass to
                        each server that acts as a master.
    @type  master_args: C{str}

    @param backup_args: Additional command-line arguments to pass to
                        each server that acts as a backup.
    @type  backup_args: C{str}

    @param master_ram: Megabytes of space allocated in each of the Masters
                       (except the old Master, see old_master_ram).  If left
                       unspecified same sane default will be attempted based
                       on num_objects, object_size and num_overwrites.
    @type  master_ram: C{int}

    @param old_master_ram: Megabytes of space allocated in the old Master that
                           will eventually be crashed.  If left unspecified same
                           sane default will be attempted based on num_objects,
                           num_overwrites and object_size.
    @type  old_master_ram: C{int}

    @param num_removals: Number of erases to do after inserts.  Allows
                         recoveries where some of the log contains tombstones.
    @type  num_removals: C{int}

    @param timeout: Seconds to wait before giving up and declaring the recovery
                    to have failed.
    @type  timeout: C{int}

    @param log_level: Log level to use for all servers.
                      DEBUG, NOTICE, WARNING, or ERROR.
    @type  log_level: C{str}

    @param log_dir: Top-level directory in which to write log files.
                    A separate subdirectory will be created in this
                    directory for the log files from this run.
    @type  log_dir: C{str}

    @param transport: A transport name (e.g. infrc, fast+udp, tcp, ...)
    @type  transport: C{str}

    @param verbose: Print information about progress in starting clients
                    and servers.
    @type  verbose: C{bool}

    @param debug: If True, pause after starting all to allow for debugging
                  setup such as attaching gdb.
    @type  debug: C{bool}

    @return: A recoverymetrics stats struct (see recoverymetrics.parseRecovery).
    """
    server_binary = '%s/server' % obj_path
    client_binary = '%s/recovery' % obj_path
    ensure_servers_bin = '%s/ensureServers' % obj_path

    args = {}
    args['num_servers'] = num_servers
    args['backups_per_server'] = backups_per_server
    args['replicas'] = replicas
    args['timeout'] = timeout
    args['log_level'] = log_level
    args['log_dir'] = log_dir
    args['transport'] = transport
    args['verbose'] = verbose
    args['debug'] = debug
    args['coordinator_host'] = config.old_master_host
    args['coordinator_args'] = coordinator_args
    args['backup_args'] = backup_args
    # Just a guess of about how much capacity a master will have to have
    # to hold all the data from all the partitions
    if master_ram:
        log_space_per_partition = master_ram
    else:
        log_space_per_partition = (200 + (1.3 * num_objects / object_size * num_overwrites))
    # Extra segments are needed because the dummy tablets placed on the
    # masters cause log head rollovers. Without compensating for the
    # space waster by that the recovery masters run out of space.
    args['master_args'] = '-d -D -t %d' % (log_space_per_partition +
                                           8 * num_partitions)
    if master_args:
        args['master_args'] += ' ' + master_args;
    args['client'] = ('%s -f -n %d -r %d -s %d '
                      '-t %d -k %d -l %s -o %d' % (client_binary,
                      num_objects, num_removals, object_size,
                      num_partitions, num_servers, log_level, num_overwrites))
    args['old_master_host'] = config.old_master_host
    args['client_hosts'] = [config.old_master_host]
    if old_master_ram:
        args['old_master_args'] = '-d -t %d' % old_master_ram
    else:
        old_master_ram = log_space_per_partition * num_partitions * num_overwrites
        if args['coordinator_host'][0] == 'rcmaster' and old_master_ram > 42000:
            print('Warning: pushing the limits of rcmaster; '
                  'limiting RAM to 42000 MB to avoid knocking it over; '
                  'use rcmonster if you need more')
            old_master_ram = 42000
        args['old_master_args'] = '-d -D -t %d' % old_master_ram
    recovery_logs = cluster.run(**args)

    # Collect metrics information.
    stats = {}
    stats['metrics'] = recoverymetrics.parseRecovery(recovery_logs)
    report = recoverymetrics.makeReport(stats['metrics']).jsonable()
    f = open('%s/metrics' % recovery_logs, 'w')
    getDumpstr().print_report(report, file=f)
    f.close()
    stats['run'] = recovery_logs
    stats['count'] = num_objects
    stats['size'] = object_size
    stats['ns'] = stats['metrics'].client.recoveryNs
    stats['report'] = report
    return stats
Exemplo n.º 5
0
def recover(num_servers,
            backup_disks_per_server,
            object_size,
            num_objects,
            num_overwrites,
            replicas,
            coordinator_args='',
            master_args='',
            backup_args='',
            master_ram=None,
            old_master_ram=None,
            num_removals=0,
            timeout=100,
            log_level='NOTICE',
            log_dir='logs',
            transport='basic+infud',
            verbose=False,
            debug=False):
    """Run a recovery on the cluster specified in the config module and
    return statisitics about the recovery.

    See the config module for more parameters that affect how the recovery
    will be run.  In particular config.hosts and config.old_master_host which
    select which hosts in the cluster recovery will be run on.

    @param num_servers: Number of hosts on which to run Masters.
    @type  num_servers: C{int}

    @param backup_disks_per_server: Number of backup disks to use on each
                                    Master. All disks are shared by one Backup
                                    (in the same process as the Master).
    @type  backup_disks_per_server: C{int}

    @param object_size: Size of objects to fill old Master with before crash.
    @type  object_size: C{int}

    @param num_objects: Number of objects to fill old Master with before crash.
    @type  num_objects: C{int}

    @param num_overwrites: Number of writes on same key while filling old Master.
    @type  num_overwrites: C{int}

    @param replicas: Number of times each segment is replicated to different
                     backups in the cluster.
    @type  replicas: C{int}

    @param coordinator_args: Additional command-line arguments to pass to
                             the coordinator.
    @type  coordinator_args: C{str}

    @param master_args: Additional command-line arguments to pass to
                        each server that acts as a master.
    @type  master_args: C{str}

    @param backup_args: Additional command-line arguments to pass to
                        each server that acts as a backup.
    @type  backup_args: C{str}

    @param master_ram: Megabytes of space allocated in each of the Masters
                       (except the old Master, see old_master_ram).  If left
                       unspecified same sane default will be attempted based
                       on num_objects, object_size and num_overwrites.
    @type  master_ram: C{int}

    @param old_master_ram: Megabytes of space allocated in the old Master that
                           will eventually be crashed.  If left unspecified same
                           sane default will be attempted based on num_objects,
                           num_overwrites and object_size.
    @type  old_master_ram: C{int}

    @param num_removals: Number of erases to do after inserts.  Allows
                         recoveries where some of the log contains tombstones.
    @type  num_removals: C{int}

    @param timeout: Seconds to wait before giving up and declaring the recovery
                    to have failed.
    @type  timeout: C{int}

    @param log_level: Log level to use for all servers.
                      DEBUG, NOTICE, WARNING, or ERROR.
    @type  log_level: C{str}

    @param log_dir: Top-level directory in which to write log files.
                    A separate subdirectory will be created in this
                    directory for the log files from this run.
    @type  log_dir: C{str}

    @param transport: A transport name (e.g. infrc, basic+udp, tcp, ...)
    @type  transport: C{str}

    @param verbose: Print information about progress in starting clients
                    and servers.
    @type  verbose: C{bool}

    @param debug: If True, pause after starting all to allow for debugging
                  setup such as attaching gdb.
    @type  debug: C{bool}

    @return: A recoverymetrics stats struct (see recoverymetrics.parseRecovery).
    """
    server_binary = '%s/server' % obj_path
    client_binary = '%s/apps/recovery' % obj_path
    ensure_servers_bin = '%s/ensureServers' % obj_path

    args = {}
    args['num_servers'] = num_servers
    args['backup_disks_per_server'] = backup_disks_per_server
    args['replicas'] = replicas
    args['timeout'] = timeout
    args['log_level'] = log_level
    args['log_dir'] = log_dir
    args['transport'] = transport
    args['verbose'] = verbose
    args['debug'] = debug
    args['coordinator_host'] = getOldMasterHost()
    args['coordinator_args'] = coordinator_args
    if backup_args:
        args['backup_args'] += backup_args;
    else:
        args['backup_args'] = '--maxNonVolatileBuffers 1000'
    # Allocate enough memory on recovery masters to handle several
    # recovery partitions (most recoveries will only have one recovery
    # partition per master, which is about 500 MB).
    args['master_args'] = '-d -D -t 5000'
    if master_args:
        args['master_args'] += ' ' + master_args;
    args['client'] = ('%s -f -n %d -r %d -s %d '
                      '-k %d -l %s -o %d' % (client_binary,
                      num_objects, num_removals, object_size,
                      num_servers, log_level, num_overwrites))
    args['old_master_host'] = getOldMasterHost()
    args['client_hosts'] = [getOldMasterHost()]
    if old_master_ram:
        args['old_master_args'] = '-d -t %d' % old_master_ram
    else:
        # Estimate how much log space the old master will need to hold
        # all of the data.
        old_master_ram = (200 + (1.3 * num_objects / object_size * num_overwrites))
        if args['coordinator_host'][0] == 'rcmaster' and old_master_ram > 42000:
            print('Warning: pushing the limits of rcmaster; '
                  'limiting RAM to 42000 MB to avoid knocking it over; '
                  'use rcmonster if you need more')
            old_master_ram = 42000
        args['old_master_args'] = '-d -D -t %d' % old_master_ram
    recovery_logs = cluster.run(**args)

    # Collect metrics information.
    stats = {}
    stats['metrics'] = recoverymetrics.parseRecovery(recovery_logs)
    report = recoverymetrics.makeReport(stats['metrics']).jsonable()
    f = open('%s/metrics' % recovery_logs, 'w')
    getDumpstr().print_report(report, file=f)
    f.close()
    stats['run'] = recovery_logs
    stats['count'] = num_objects
    stats['size'] = object_size
    stats['ns'] = stats['metrics'].client.recoveryNs
    stats['report'] = report
    return stats