Exemplo n.º 1
0
def upload_logs():
    cluster_ID, log_directory, provider = read_conf_file(
        "cluster.conf", "cluster", ["id", "log_directory", "provider"])

    if provider == "amazon":
        S3_bucket = read_conf_file("amazon.conf", "amazon", "bucket")
        parallel_ssh["-m"]["aws --profile themis s3 sync --exact-timestamps "\
                           "%s s3://%s/cluster_%s/themis_logs" % (
                               log_directory, S3_bucket, cluster_ID)]()
    elif provider == "google":
        bucket = read_conf_file("google.conf", "google", "bucket")

        # gsutil appears to be buggy and can fail randomly so keep trying until
        # you succeed. Try 3 times even if it succeeds to make sure all files
        # get uploaded.
        for i in xrange(3):
            done = False
            while not done:
                try:
                    parallel_ssh["-m"]["gsutil -m rsync -r %s gs://%s/cluster_%s/" \
                                       "themis_logs" % (
                                           log_directory, bucket, cluster_ID)]()
                    done = True
                except ProcessExecutionError as e:
                    pass
    else:
        print >>sys.stderr, "Unknown provider %s" % provider
        return 1

    return 0
Exemplo n.º 2
0
def download_logs():
    cluster_ID, log_directory, provider = read_conf_file(
        "cluster.conf", "cluster", ["id", "log_directory", "provider"])
    log_directory = os.path.expanduser(log_directory)

    if provider == "amazon":
        S3_bucket = read_conf_file("amazon.conf", "amazon", "bucket")
        aws = plumbum.local["aws"]
        aws["--profile"]["themis"]["s3"]["sync"]["--exact-timestamps"]\
            ["s3://%s/cluster_%s/themis_logs" % (S3_bucket, cluster_ID)]\
            [log_directory]()
    elif provider == "google":
        bucket = read_conf_file("google.conf", "google", "bucket")

        # gsutil appears to be buggy and can fail randomly so keep trying until
        # you succeed. In fact it appears that even if the command succeeds it
        # might not download all files, so run the command 3 times.
        for i in xrange(3):
            done = False
            gsutil = plumbum.local["gsutil"]
            while not done:
                try:
                    gsutil["-m"]["rsync"]["-r"]\
                        ["gs://%s/cluster_%s/themis_logs" % (bucket, cluster_ID)]\
                        [log_directory]()
                    done = True
                except ProcessExecutionError as e:
                    pass

    else:
        print >>sys.stderr, "Unknown provider %s" % provider
        return 1

    return 0
Exemplo n.º 3
0
def download_logs():
    cluster_ID, log_directory, provider = read_conf_file(
        "cluster.conf", "cluster", ["id", "log_directory", "provider"])
    log_directory = os.path.expanduser(log_directory)

    if provider == "amazon":
        S3_bucket = read_conf_file("amazon.conf", "amazon", "bucket")
        aws = plumbum.local["aws"]
        aws["--profile"]["themis"]["s3"]["sync"]["--exact-timestamps"]\
            ["s3://%s/cluster_%s/themis_logs" % (S3_bucket, cluster_ID)]\
            [log_directory]()
    elif provider == "google":
        bucket = read_conf_file("google.conf", "google", "bucket")

        done = False
        gsutil = plumbum.local["gsutil"]
        while not done:
            try:
                gsutil["-m"]["rsync"]["-r"]\
                    ["gs://%s/cluster_%s/themis_logs" % (bucket, cluster_ID)]\
                    [log_directory]()
                done = True
            except ProcessExecutionError as e:
                pass

    else:
        print >> sys.stderr, "Unknown provider %s" % provider
        return 1

    return 0
Exemplo n.º 4
0
def upload_logs():
    cluster_ID, log_directory, provider = read_conf_file(
        "cluster.conf", "cluster", ["id", "log_directory", "provider"])

    if provider == "amazon":
        S3_bucket = read_conf_file("amazon.conf", "amazon", "bucket")
        parallel_ssh["-m"]["aws --profile themis s3 sync --exact-timestamps "\
                           "%s s3://%s/cluster_%s/themis_logs" % (
                               log_directory, S3_bucket, cluster_ID)]()
    elif provider == "google":
        bucket = read_conf_file("google.conf", "google", "bucket")

        done = False
        while not done:
            try:
                parallel_ssh["-m"]["gsutil -m rsync -r %s gs://%s/cluster_%s/" \
                                   "themis_logs" % (
                                       log_directory, bucket, cluster_ID)]()
                done = True
            except ProcessExecutionError as e:
                pass
    else:
        print >> sys.stderr, "Unknown provider %s" % provider
        return 1

    return 0
Exemplo n.º 5
0
def download_logs():
    cluster_ID, log_directory, provider = read_conf_file(
        "cluster.conf", "cluster", ["id", "log_directory", "provider"])
    log_directory = os.path.expanduser(log_directory)

    if provider == "amazon":
        S3_bucket = read_conf_file("amazon.conf", "amazon", "bucket")
        aws = plumbum.local["aws"]
        aws["--profile"]["themis"]["s3"]["sync"]["--exact-timestamps"]\
            ["s3://%s/cluster_%s/themis_logs" % (S3_bucket, cluster_ID)]\
            [log_directory]()
    elif provider == "google":
        bucket = read_conf_file("google.conf", "google", "bucket")

        # gsutil appears to be buggy and can fail randomly so keep trying until
        # you succeed. In fact it appears that even if the command succeeds it
        # might not download all files, so run the command 3 times.
        for i in xrange(3):
            done = False
            gsutil = plumbum.local["gsutil"]
            while not done:
                try:
                    gsutil["-m"]["rsync"]["-r"]\
                        ["gs://%s/cluster_%s/themis_logs" % (bucket, cluster_ID)]\
                        [log_directory]()
                    done = True
                except ProcessExecutionError as e:
                    pass

    else:
        print >> sys.stderr, "Unknown provider %s" % provider
        return 1

    return 0
Exemplo n.º 6
0
def upload_logs():
    cluster_ID, log_directory, provider = read_conf_file(
        "cluster.conf", "cluster", ["id", "log_directory", "provider"])

    if provider == "amazon":
        S3_bucket = read_conf_file("amazon.conf", "amazon", "bucket")
        parallel_ssh["-m"]["aws --profile themis s3 sync --exact-timestamps "\
                           "%s s3://%s/cluster_%s/themis_logs" % (
                               log_directory, S3_bucket, cluster_ID)]()
    elif provider == "google":
        bucket = read_conf_file("google.conf", "google", "bucket")

        # gsutil appears to be buggy and can fail randomly so keep trying until
        # you succeed. Try 3 times even if it succeeds to make sure all files
        # get uploaded.
        for i in xrange(3):
            done = False
            while not done:
                try:
                    parallel_ssh["-m"]["gsutil -m rsync -r %s gs://%s/cluster_%s/" \
                                       "themis_logs" % (
                                           log_directory, bucket, cluster_ID)]()
                    done = True
                except ProcessExecutionError as e:
                    pass
    else:
        print >> sys.stderr, "Unknown provider %s" % provider
        return 1

    return 0
def upload_logs():
    cluster_ID, log_directory, provider = read_conf_file(
        "cluster.conf", "cluster", ["id", "log_directory", "provider"])

    if provider == "amazon":
        S3_bucket = read_conf_file("amazon.conf", "amazon", "bucket")
        parallel_ssh["-m"]["aws --profile themis s3 sync --exact-timestamps "\
                           "%s s3://%s/cluster_%s/themis_logs" % (
                               log_directory, S3_bucket, cluster_ID)]()
    elif provider == "google":
        bucket = read_conf_file("google.conf", "google", "bucket")

        done = False
        while not done:
            try:
                parallel_ssh["-m"]["gsutil -m rsync -r %s gs://%s/cluster_%s/" \
                                   "themis_logs" % (
                                       log_directory, bucket, cluster_ID)]()
                done = True
            except ProcessExecutionError as e:
                pass
    else:
        print >>sys.stderr, "Unknown provider %s" % provider
        return 1

    return 0
def download_logs():
    cluster_ID, log_directory, provider = read_conf_file(
        "cluster.conf", "cluster", ["id", "log_directory", "provider"])
    log_directory = os.path.expanduser(log_directory)

    if provider == "amazon":
        S3_bucket = read_conf_file("amazon.conf", "amazon", "bucket")
        aws = plumbum.local["aws"]
        aws["--profile"]["themis"]["s3"]["sync"]["--exact-timestamps"]\
            ["s3://%s/cluster_%s/themis_logs" % (S3_bucket, cluster_ID)]\
            [log_directory]()
    elif provider == "google":
        bucket = read_conf_file("google.conf", "google", "bucket")

        done = False
        gsutil = plumbum.local["gsutil"]
        while not done:
            try:
                gsutil["-m"]["rsync"]["-r"]\
                    ["gs://%s/cluster_%s/themis_logs" % (bucket, cluster_ID)]\
                    [log_directory]()
                done = True
            except ProcessExecutionError as e:
                pass

    else:
        print >>sys.stderr, "Unknown provider %s" % provider
        return 1

    return 0
Exemplo n.º 9
0
def build_themis_rc(dump_core, port, database):
    port = int(port)

    # Read the cluster config to get master address.
    master, keyfile = read_conf_file(
        "cluster.conf", "cluster", ["master_internal_address", "private_key"])
    keyfile = os.path.join(os.path.expanduser("~"), ".ssh", keyfile)

    if os.path.exists(THEMIS_RC):
        os.unlink(THEMIS_RC)

    # .themisrc is written in yaml so do this manually
    with open(THEMIS_RC, "w") as themisrc_file:
        themisrc_file.write("ssh:\n")
        themisrc_file.write("  key: \"%s\"\n\n" % keyfile)

        if dump_core:
            themisrc_file.write("dump_core: true\n\n")
        else:
            themisrc_file.write("dump_core: false\n\n")

        themisrc_file.write("redis:\n")
        themisrc_file.write("  host: \"%s\"\n" % master)
        themisrc_file.write("  port: %d\n" % port)
        themisrc_file.write("  db: %d\n\n" % database)

    return 0
Exemplo n.º 10
0
def sync_config_files():
    cluster_ID, config_directory, provider = read_conf_file(
        "cluster.conf", "cluster", ["id", "config_directory", "provider"])

    if provider == "amazon":
        S3_bucket = read_conf_file("amazon.conf", "amazon", "bucket")
        aws = local["aws"]

        # First upload local config files
        aws["--profile"]["themis"]["s3"]["sync"]["--exact-timestamps"]\
            [os.path.expanduser(config_directory)]\
            ["s3://%s/cluster_%s/themis_config" % (S3_bucket, cluster_ID)]()

        # Then download config files to all nodes
        parallel_ssh["-m"]["aws --profile themis s3 sync --exact-timestamps "\
                               "s3://%s/cluster_%s/themis_config %s" % (
                S3_bucket, cluster_ID, config_directory)]()
    elif provider == "google":
        bucket = read_conf_file("google.conf", "google", "bucket")
        gsutil = local["gsutil"]

        # First upload local config files
        gsutil["-m"]["cp"]["-r"]\
            [os.path.expanduser(config_directory)]\
            ["gs://%s/cluster_%s" % (bucket, cluster_ID)]()

        # Then download config files to all nodes

        # gsutil appears to be buggy and can fail randomly so keep trying until
        # you succeed. Try 5 times even if it succeeds to make sure all files
        # get synced.
        for i in xrange(5):
            done = False
            while not done:
                try:
                    parallel_ssh["-m"]["gsutil -m rsync -r -c gs://%s/cluster_%s/" \
                                       "themis_config %s" % (
                                           bucket, cluster_ID, config_directory)]()
                    done = True
                except ProcessExecutionError as e:
                    pass

    else:
        print >>sys.stderr, "Unknown provider %s" % provider
        return 1

    return 0
Exemplo n.º 11
0
def sync_config_files():
    cluster_ID, config_directory, provider = read_conf_file(
        "cluster.conf", "cluster", ["id", "config_directory", "provider"])

    if provider == "amazon":
        S3_bucket = read_conf_file("amazon.conf", "amazon", "bucket")
        aws = local["aws"]

        # First upload local config files
        aws["--profile"]["themis"]["s3"]["sync"]["--exact-timestamps"]\
            [os.path.expanduser(config_directory)]\
            ["s3://%s/cluster_%s/themis_config" % (S3_bucket, cluster_ID)]()

        # Then download config files to all nodes
        parallel_ssh["-m"]["aws --profile themis s3 sync --exact-timestamps "\
                               "s3://%s/cluster_%s/themis_config %s" % (
                S3_bucket, cluster_ID, config_directory)]()
    elif provider == "google":
        bucket = read_conf_file("google.conf", "google", "bucket")
        gsutil = local["gsutil"]

        # First upload local config files
        gsutil["-m"]["cp"]["-r"]\
            [os.path.expanduser(config_directory)]\
            ["gs://%s/cluster_%s" % (bucket, cluster_ID)]()

        # Then download config files to all nodes

        # gsutil appears to be buggy and can fail randomly so keep trying until
        # you succeed. Try 5 times even if it succeeds to make sure all files
        # get synced.
        for i in xrange(5):
            done = False
            while not done:
                try:
                    parallel_ssh["-m"]["gsutil -m rsync -r -c gs://%s/cluster_%s/" \
                                       "themis_config %s" % (
                                           bucket, cluster_ID, config_directory)]()
                    done = True
                except ProcessExecutionError as e:
                    pass

    else:
        print >> sys.stderr, "Unknown provider %s" % provider
        return 1

    return 0
Exemplo n.º 12
0
def main():
    log_directory = read_conf_file("cluster.conf", "cluster", "log_directory")
    log_directory = os.path.expanduser(log_directory)
    log_directory = os.path.join(log_directory, "networkbench")

    parser = argparse.ArgumentParser(
        description="Harness for network benchmark application")
    parser.add_argument(
        "--config", "-c", help="config file to use for the benchmark "
        "(default: %(default)s)",
        default=os.path.join(BENCHMARK_DIR, "config.yaml"), type=str)
    parser.add_argument(
        "--log_directory", "-l",
        help="directory containing logs for an experiment "
        "(default: %(default)s)",
        default=log_directory)
    parser.add_argument(
        "--profiler", help="path to the binary of a profiling tool to use, for "
        "example valgrind or operf")
    parser.add_argument(
        "--profiler_options", help="options surrounded by quotes to pass to "
        "the profiler", type=str, default="")
    parser.add_argument(
        "--iterations", "-i", help="run the benchmark this many times "
        "(default: %(default)s)", type=int, default=1)
    parser.add_argument(
        "--sleep", "-s", help="sleep this many seconds between iterations "
        "(default: %(default)s)", type=int, default=0)
    parser.add_argument(
        "--per_peer_config", help="use separate config files for each peer, by "
        "appending the peer's IP address to the config file name: .A.B.C.D",
        action="store_true", default=False)
    parser.add_argument(
        "--dump_core_directory", "-d", help="dump core file to this directory "
        "if the benchmark crashes", default=None)
    parser.add_argument(
        "peer_ips", help="comma delimited list of host IPs to use for "
        "benchmarking")
    parser.add_argument(
        "--remote_connections_only", "-r", help="Only send to remote peers, "
        "instead of sending all-to-all, which includes localhost",
        action="store_true", default=False)

    utils.add_interfaces_params(parser)

    args = parser.parse_args()
    binary = os.path.join(BENCHMARK_DIR, "networkbench")
    delete_output = False
    solo_mode = False
    stage_stats = "sender,receiver"

    params = "-REMOTE_CONNECTIONS_ONLY %d" % (args.remote_connections_only)

    run_benchmark_iterations(
        binary, args.log_directory, args.config, args.peer_ips, args.profiler,
        args.profiler_options, args.iterations, args.sleep, delete_output,
        args.per_peer_config, args.dump_core_directory, solo_mode,
        stage_stats, args.interfaces, params)
Exemplo n.º 13
0
def main():
    parser = argparse.ArgumentParser(
        description="Mount Themis disks")

    disk_mountpoint = read_conf_file(
        "cluster.conf", "cluster", "disk_mountpoint")

    parser.add_argument(
        "--mountpoint", default=disk_mountpoint,
        help="Mount point for disks. Default %(default)s")
    parser.add_argument(
        "--format_disks", action="store_true", help="Format disks with XFS")
    parser.add_argument(
        "--partitions", action="store_true", help="If true, assume that the "
        "devices listed in node.conf are partitions and don't run fdisk.")

    args = parser.parse_args()
    return mount_disks(**vars(args))
Exemplo n.º 14
0
def main():
    parser = argparse.ArgumentParser(description="Mount Themis disks")

    disk_mountpoint = read_conf_file("cluster.conf", "cluster",
                                     "disk_mountpoint")

    parser.add_argument("--mountpoint",
                        default=disk_mountpoint,
                        help="Mount point for disks. Default %(default)s")
    parser.add_argument("--format_disks",
                        action="store_true",
                        help="Format disks with XFS")
    parser.add_argument(
        "--partitions",
        action="store_true",
        help="If true, assume that the "
        "devices listed in node.conf are partitions and don't run fdisk.")

    args = parser.parse_args()
    return mount_disks(**vars(args))
def main():
    # Read the cluster config to get cluster ID.
    parser = ConfigParser.SafeConfigParser()
    parser.read(CLUSTER_CONFIG)

    cluster_ID = int(parser.get("cluster", "id"))
    provider = parser.get("cluster", "provider")

    zone = read_conf_file("%s.conf" % provider, provider, "zone")

    # Store master address information
    master = get_cluster_status(provider, cluster_ID, zone)["master"]
    if master == None:
        print >> sys.stderr, "Could not find master hostname"
        return 1

    # Set master hostname in cluster.conf
    parser.set("cluster", "master_internal_address", master[0])
    parser.set("cluster", "master_external_address", master[1])

    with open(CLUSTER_CONFIG, "w") as config_file:
        parser.write(config_file)

    return 0
def main():
    # Read the cluster config to get cluster ID.
    parser = ConfigParser.SafeConfigParser()
    parser.read(CLUSTER_CONFIG)

    cluster_ID = int(parser.get("cluster", "id"))
    provider = parser.get("cluster", "provider")

    zone = read_conf_file("%s.conf" % provider, provider, "zone")

    # Store master address information
    master = get_cluster_status(provider, cluster_ID, zone)["master"]
    if master == None:
        print >>sys.stderr, "Could not find master hostname"
        return 1

    # Set master hostname in cluster.conf
    parser.set("cluster", "master_internal_address", master[0])
    parser.set("cluster", "master_external_address", master[1])

    with open(CLUSTER_CONFIG, "w") as config_file:
        parser.write(config_file)

    return 0
Exemplo n.º 17
0
def main():
    log_directory = read_conf_file("cluster.conf", "cluster", "log_directory")
    log_directory = os.path.expanduser(log_directory)
    log_directory = os.path.join(log_directory, "storagebench")

    parser = argparse.ArgumentParser(
        description="Harness for storage benchmark application")
    parser.add_argument("--config",
                        "-c",
                        help="config file to use for the benchmark "
                        "(default: %(default)s)",
                        default=os.path.join(BENCHMARK_DIR, "config.yaml"),
                        type=str)
    parser.add_argument("--log_directory",
                        "-l",
                        help="directory containing logs for an experiment "
                        "(default: %(default)s)",
                        default=log_directory)
    parser.add_argument(
        "--profiler",
        help="path to the binary of a profiling tool to use, for "
        "example valgrind or operf")
    parser.add_argument("--profiler_options",
                        help="options surrounded by quotes to pass to "
                        "the profiler",
                        type=str,
                        default="")
    parser.add_argument("--iterations",
                        "-i",
                        help="run the benchmark this many times "
                        "(default: %(default)s)",
                        type=int,
                        default=1)
    parser.add_argument("--sleep",
                        "-s",
                        help="sleep this many seconds between iterations "
                        "(default: %(default)s)",
                        type=int,
                        default=0)
    parser.add_argument("--delete_output",
                        help="delete output files after run completes",
                        action="store_true",
                        default=False)
    parser.add_argument(
        "--per_peer_config",
        help="use separate config files for each peer, by "
        "appending the peer's IP address to the config file name: .A.B.C.D",
        action="store_true",
        default=False)
    parser.add_argument("--dump_core_directory",
                        "-d",
                        help="dump core file to this directory "
                        "if the benchmark crashes",
                        default=None)
    parser.add_argument("--read_only",
                        "-r",
                        help="Only read files, don't write",
                        action="store_true",
                        default=False)
    parser.add_argument("--write_only",
                        "-w",
                        help="Only write (generate) files, don't read",
                        action="store_true",
                        default=False)
    parser.add_argument("peer_ips",
                        help="comma delimited list of host IPs to use for "
                        "benchmarking")

    args = parser.parse_args()
    binary = os.path.join(BENCHMARK_DIR, "storagebench")
    # Run the storage benchmark individually on each machine as if it were its
    # own cluster of size 1.
    solo_mode = True

    if args.read_only and args.write_only:
        sys.exit("Cannot specify both read-only and write-only")

    if args.write_only:
        read = 0
    else:
        read = 1

    if args.read_only:
        write = 0
    else:
        write = 1

    if read == 1 and write == 1:
        stage_stats = "reader,writer"
    elif read == 1 and write == 0:
        stage_stats = "reader"
    elif read == 0 and write == 1:
        stage_stats = "writer"
    else:
        sys.exit("Cannot specify both read-only and write-only")

    # Pass read/write params to Themis
    params = "-READ %d -WRITE %d" % (read, write)
    print params

    run_benchmark_iterations(binary, args.log_directory, args.config,
                             args.peer_ips, args.profiler,
                             args.profiler_options, args.iterations,
                             args.sleep, args.delete_output,
                             args.per_peer_config, args.dump_core_directory,
                             solo_mode, stage_stats, None, params)
Exemplo n.º 18
0
def mount_disks(format_disks, mountpoint, partitions):
    # Get comma delimited list of devices
    devices = read_conf_file("node.conf", "node", "devices")
    devices = devices.split(",")
    devices = [d for d in devices if len(d) > 0]

    username = read_conf_file("cluster.conf", "cluster", "username")

    # Setup mount point
    sudo[mkdir["-p"][mountpoint]]()
    sudo[chown]["%s:%s" % (username, username)][mountpoint]()

    mkfs_commands = []
    for device in devices:
        # Unmount ALL partitions connected to this device
        num_mounted = (mount | grep["-c"][device])(retcode=(0,1))
        num_mounted = int(num_mounted.strip())

        while num_mounted > 0:
            # Unmount device
            mounted_device =\
                (mount | grep[device] | head["-n1"] | awk["{print $1}"])()
            mounted_device = mounted_device.strip()

            print "Unmounting %s" % mounted_device
            sudo[umount[mounted_device]]()

            num_mounted -= 1

        # Format device
        if format_disks:
            if not partitions:
                print "Creating new partition for %s" % device
                (sudo[fdisk[device]] << "d\nn\np\n1\n\n\nw")()

                # It appears that the fdisk command returns before the partition is
                # usable...
                time.sleep(2)

            print "Creating xfs file system"
            if not partitions:
                # Use partition 1 on the device
                partition = "%s1" % device
            else:
                # The device itself is a partition
                partition = device

            mkfs_commands.append(sudo[mkfsxfs]["-f"][partition] & BG)

    for command in mkfs_commands:
        command.wait()
        if command.returncode != 0:
            print >>sys.stderr, command.stderr
            sys.exit(command.returncode)

    # Now mount all devices
    disk_index = 0
    for device in devices:
        # Setup mount point
        disk_mountpoint = os.path.join(mountpoint, "disk_%d" % disk_index)
        print "Mounting %s at %s" % (device, disk_mountpoint)
        mkdir["-p"][disk_mountpoint]()
        sudo[chown]["%s:%s" % (username, username)][disk_mountpoint]()

        # Mount disk
        if not partitions:
            # Use partition 1 on the device
            partition = "%s1" % device
        else:
            # The device itself is a partition
            partition = device
        sudo[mount["-o"]["noatime,discard"][partition][disk_mountpoint]]()
        sudo[chown]["%s:%s" % (username, username)][disk_mountpoint]()

        disk_index += 1
Exemplo n.º 19
0
def mount_disks(format_disks, mountpoint, partitions):
    # Get comma delimited list of devices
    devices = read_conf_file("node.conf", "node", "devices")
    devices = devices.split(",")
    devices = [d for d in devices if len(d) > 0]

    username = read_conf_file("cluster.conf", "cluster", "username")

    # Setup mount point
    sudo[mkdir["-p"][mountpoint]]()
    sudo[chown]["%s:%s" % (username, username)][mountpoint]()

    mkfs_commands = []
    for device in devices:
        # Unmount ALL partitions connected to this device
        num_mounted = (mount | grep["-c"][device])(retcode=(0, 1))
        num_mounted = int(num_mounted.strip())

        while num_mounted > 0:
            # Unmount device
            mounted_device =\
                (mount | grep[device] | head["-n1"] | awk["{print $1}"])()
            mounted_device = mounted_device.strip()

            print "Unmounting %s" % mounted_device
            sudo[umount[mounted_device]]()

            num_mounted -= 1

        # Format device
        if format_disks:
            if not partitions:
                print "Creating new partition for %s" % device
                (sudo[fdisk[device]] << "d\nn\np\n1\n\n\nw")()

                # It appears that the fdisk command returns before the partition is
                # usable...
                time.sleep(2)

            print "Creating xfs file system"
            if not partitions:
                # Use partition 1 on the device
                partition = "%s1" % device
            else:
                # The device itself is a partition
                partition = device

            mkfs_commands.append(sudo[mkfsxfs]["-f"][partition] & BG)

    for command in mkfs_commands:
        command.wait()
        if command.returncode != 0:
            print >> sys.stderr, command.stderr
            sys.exit(command.returncode)

    # Now mount all devices
    disk_index = 0
    for device in devices:
        # Setup mount point
        disk_mountpoint = os.path.join(mountpoint, "disk_%d" % disk_index)
        print "Mounting %s at %s" % (device, disk_mountpoint)
        mkdir["-p"][disk_mountpoint]()
        sudo[chown]["%s:%s" % (username, username)][disk_mountpoint]()

        # Mount disk
        if not partitions:
            # Use partition 1 on the device
            partition = "%s1" % device
        else:
            # The device itself is a partition
            partition = device
        sudo[mount["-o"]["noatime,discard"][partition][disk_mountpoint]]()
        sudo[chown]["%s:%s" % (username, username)][disk_mountpoint]()

        disk_index += 1
def run_benchmark(
    binary,
    config,
    batch_directory,
    phase_directory,
    profiler,
    profiler_options,
    peer_list,
    node_list,
    per_peer_config,
    dump_core_directory,
    solo_mode,
    vnstat_interface,
    params,
):

    # Get ssh username.
    username = read_conf_file("cluster.conf", "cluster", "username")

    # Add command line parameters to binary
    binary = "%s -LOG_DIR %s" % (binary, phase_directory)

    if dump_core_directory is not None:
        binary = "cd %s; ulimit -c unlimited; %s" % (dump_core_directory, binary)

    processes = []
    start_time = time.time()
    for index, ip in enumerate(node_list):
        # Now start themis binaries
        if solo_mode:
            # Act as if you are the only peer in the cluster.
            peer_binary = "%s -PEER_LIST %s" % (binary, ip)
        else:
            # Use the entire set of peers and designate yourself as
            # one of them.
            peer_binary = "%s -PEER_LIST %s -MYPEERID %d" % (binary, peer_list, index)

        if per_peer_config:
            # Append the IP address to the config file name
            peer_binary = "%s -CONFIG %s.%s" % (peer_binary, config, ip)
        else:
            peer_binary = "%s -CONFIG %s" % (peer_binary, config)

        # Override config file with specified parameters
        if params:
            peer_binary = "%s %s" % (peer_binary, params)

        if profiler == "operf":
            # Use the batch directory as the operf session dir
            session_dir = os.path.join(batch_directory, "oprofile_data.%s", ip)
            parallel_ssh(None, "mkdir -p %s" % session_dir, username, node_list, False, True, False)
            peer_binary = "%s %s --session-dir=%s %s" % (profiler, profiler_options, session_dir, peer_binary)
        elif profiler is not None:
            # Some other profiler, just prepend it to the binary
            peer_binary = "%s %s %s" % (profiler, profiler_options, peer_binary)

        # Run the node-local benchmark script.
        vnstat_param_string = ""
        if vnstat_interface != None:
            vnstat_param_string = "--vnstat_interface %s" % vnstat_interface
        command = "%s %s \"%s/run_benchmark_local.py %s %s '%s'\"" % (
            ssh_command(),
            ip,
            THEMIS_SCRIPTS_DIR,
            vnstat_param_string,
            phase_directory,
            peer_binary,
        )

        processes.append((subprocess.Popen(command, shell=True), ip))

    print "%d tasks launched on %s\n" % (len(processes), time.asctime())

    elapsed_times = []
    completed_ips = []

    num_nodes = len(processes)

    while len(processes) > 0:
        for process, ip in processes:
            process.poll()
            if process.returncode != None:
                elapsed_time = time.time() - start_time
                process.communicate()
                processes.remove((process, ip))
                elapsed_times.append(elapsed_time)
                completed_ips.append(ip)
                print "Node %s completed in %.2f seconds (%d / %d)" % (ip, elapsed_time, len(elapsed_times), num_nodes)

                break

    stop_time = time.time()

    return (stop_time - start_time, elapsed_times, completed_ips)
def run_benchmark_iterations(
    binary,
    log_directory,
    config,
    peer_ips,
    profiler,
    profiler_options,
    iterations,
    sleep,
    delete_output,
    per_peer_config,
    dump_core_directory,
    solo_mode,
    stage_stats,
    interfaces,
    params="",
):

    # Get ssh username and themis directory
    username, themis_directory = read_conf_file("cluster.conf", "cluster", ["username", "themis_directory"])
    themis_directory = os.path.expanduser(themis_directory)
    # Get cloud provider if applicable.
    provider = read_conf_file("cluster.conf", "cluster", "provider")

    if interfaces == None:
        vnstat_interface = None
    else:
        interface_list = filter(lambda x: len(x) > 0, interfaces.split(","))
        vnstat_interface = interface_list[0]

    if not os.path.exists(config):
        sys.exit("Config file %s does not exist." % config)

    with open(config, "r") as fp:
        app_config = yaml.load(fp)

    # If we're using more than 1 network interface per peer, the peer list is
    # going to look like:
    # Peer1_interface1, Peer1_interface2, Peer2_interface1, Peer2_interface2, ..
    # In this case, we only want to launch the benchmark once per peer, so
    # make sure we only look at the first interface for each peer, and let
    # the application itself deal with the other interfaces.
    num_interfaces = 1
    if "NUM_INTERFACES" in app_config:
        num_interfaces = app_config["NUM_INTERFACES"]

    # Remove trailing comma if any from the IP list. This will be the string we
    # pass into the benchmark binary.
    peer_list = peer_ips.rstrip(",")

    # If we're using multiple interfaces, only launch the benchmark once per
    # node.
    node_list = peer_list.split(",")[::num_interfaces]

    # Look for description files in the same directory as the binary.
    binary_dir = os.path.dirname(binary)
    description_directory = os.path.join(binary_dir, "description")

    if not os.path.exists(description_directory):
        sys.exit("Could not find description directory %s" % (description_directory))

    # Check for the phase name. For simplicity we're going to require that
    # the benchmark have only 1 phase
    description = Description(description_directory)
    phases = description.getPhaseList()
    if len(phases) != 1:
        sys.exit("Benchmark must have exactly one phase. Got %s" % phases)
    phase_name = phases[0]

    data_size_per_node = int(app_config["BENCHMARK_DATA_SIZE_PER_NODE"][phase_name])
    data_size = data_size_per_node * len(node_list)

    total_throughputs = {}
    if stage_stats is not None:
        stage_stats = stage_stats.split(",")
        for stage in stage_stats:
            total_throughputs[stage] = 0.0

    node_benchmark_throughputs = []

    for i in xrange(iterations):
        # Pick a unique batch ID
        batch = 0
        while os.path.exists(os.path.join(log_directory, "batch_%d" % batch)):
            batch += 1
        batch_directory = os.path.join(log_directory, "batch_%d" % batch)

        # Create directories
        phase_directory = os.path.join(batch_directory, phase_name)
        parallel_ssh(None, "mkdir -p %s" % phase_directory, username, node_list, False, True, False)

        # Copy description files and create phase directory.
        if not os.path.exists(batch_directory):
            os.makedirs(batch_directory)
        shutil.copy(os.path.join(description_directory, "stages.json"), batch_directory)
        shutil.copy(os.path.join(description_directory, "structure.json"), batch_directory)
        os.chmod(os.path.join(batch_directory, "stages.json"), 0777)
        os.chmod(os.path.join(batch_directory, "structure.json"), 0777)

        # Copy config file
        shutil.copyfile(config, os.path.join(batch_directory, "config.yaml"))

        print "\nLogging to %s" % (batch_directory)
        print "Running %s with batch ID %d on %d nodes..." % (phase_name, batch, len(node_list))

        (elapsed, elapsed_times, completed_ips) = run_benchmark(
            binary,
            config,
            batch_directory,
            phase_directory,
            profiler,
            profiler_options,
            peer_list,
            node_list,
            per_peer_config,
            dump_core_directory,
            solo_mode,
            vnstat_interface,
            params,
        )

        # Compute overall throughput
        throughput = (data_size / elapsed) / 1000000
        per_node_throughput = (data_size_per_node / elapsed) / 1000000
        print "Completed in %.2f seconds." % elapsed
        print "  Throughput: %.2f MB/s" % throughput
        print "  Per-server: %.2f MB/s" % per_node_throughput

        # Record individual throughputs
        throughputs = [(data_size_per_node / x) / 1000000 for x in elapsed_times]
        node_benchmark_throughputs += throughputs

        # Dump these results to a file in the batch directory
        results_file = open(os.path.join(batch_directory, "results"), "w")
        results_file.write(
            "Runtime: %.2f seconds\nThroughput: %.2f MB/s\nPer-server: "
            "%.2f MB/s\n\n" % (elapsed, throughput, per_node_throughput)
        )
        results_file.write("Node throughputs: %s\n\n" % throughputs)
        for ip, elapsed_time, throughput in zip(completed_ips, elapsed_times, throughputs):
            results_file.write("Node %s completed in %.2f seconds (%.2f MB/s)\n" % (ip, elapsed_time, throughput))
        results_file.write("\n")

        if stage_stats is not None:
            # Compute runtime stat throughputs

            done = False
            while not done:
                # Upload all logs.
                upload_logs()

                # Download logs locally.
                download_logs()

                try:
                    runtime_info = gather_runtime_info(batch_directory, False)
                    done = True
                except ValueError:
                    print "Runtime info script failed. Retrying log upload/downloads."

            stage_info = runtime_info[0]["stages"]
            node_throughputs = {}
            for worker_info in stage_info:
                stats_info = worker_info["stats_info"]
                # We only want to look at the overall stats, which includes all
                # nodes (hostname or worker ID won't be specified)
                if len(stats_info) == 1:
                    stage_name = stats_info["stage"]

                    if stage_name in stage_stats:
                        # This is one of the stages we care about
                        node_throughputs[stage_name] = worker_info["observed_processing_rate_per_node"]
                        total_throughputs[stage_name] += node_throughputs[stage_name]

            # Print throughputs in the correct order.
            for stage_name in stage_stats:
                print "  %s throughput: %.2f MB/s/node" % (stage_name, node_throughputs[stage_name])
                results_file.write("%s throughput: %.2f MB/s\n" % (stage_name, node_throughputs[stage_name]))

        results_file.close()

        if delete_output and "OUTPUT_DISK_LIST" in app_config and phase_name in app_config["OUTPUT_DISK_LIST"]:
            output_disk_list = app_config["OUTPUT_DISK_LIST"][phase_name]
            output_disks = output_disk_list.split(",")
            for disk in output_disks:
                print "Clearing %s" % disk
                parallel_ssh(None, "rm -rf %s" % disk, username, node_list, False, False, False)

        if sleep > 0 and i != iterations - 1:
            print "Sleeping %d seconds" % sleep
            time.sleep(sleep)

    print "\nCompleted %d iterations\n" % iterations
    # Format node throughputs
    node_benchmark_throughput_strings = ["%.2f" % x for x in node_benchmark_throughputs]
    print "  Node throughputs (MB/s):"
    print "    %s" % node_benchmark_throughput_strings
    print "  Average node throughput: %.2f MB/s" % (numpy.mean(node_benchmark_throughputs))
    print "  Standard deviation: %.2f MB/s" % (numpy.std(node_benchmark_throughputs))
    print "  Min node throughput: %.2f MB/s" % (numpy.min(node_benchmark_throughputs))
    print "  Max node throughput: %.2f MB/s\n" % (numpy.max(node_benchmark_throughputs))

    if stage_stats is not None:
        for stage_name in stage_stats:
            print "  Average %s throughput: %.2f MB/s/node" % (stage_name, total_throughputs[stage_name] / iterations)
Exemplo n.º 22
0
sys.path.append(
    os.path.abspath(os.path.join(SCRIPT_DIR, os.pardir, "job_runner")))

import redis_utils

sys.path.append(os.path.abspath(os.path.join(SCRIPT_DIR, os.pardir,
                                             os.pardir)))

import constants
from common import unitconversion

coordinator_db = None
username = None

disk_mountpoint = read_conf_file("cluster.conf", "cluster", "disk_mountpoint")
username = read_conf_file("cluster.conf", "cluster", "username")
themis_directory = read_conf_file("cluster.conf", "cluster",
                                  "themis_directory")
# Display the master's external address on the status page.
master_address = read_conf_file("cluster.conf", "cluster",
                                "master_external_address")

MOUNT_SCRIPT = os.path.join(themis_directory, "src", "scripts", "themis",
                            "cluster", "mount_disks.py")
UPDATE_SCRIPT = os.path.join(themis_directory, "src", "scripts", "themis",
                             "cluster", "update_repo.py")

generate_command = None
generate_data_size = None
import utils

sys.path.append(os.path.abspath(os.path.join(SCRIPT_DIR, os.pardir, "job_runner")))

import redis_utils

sys.path.append(os.path.abspath(os.path.join(SCRIPT_DIR, os.pardir, os.pardir)))

import constants
from common import unitconversion

coordinator_db = None
username = None

disk_mountpoint = read_conf_file("cluster.conf", "cluster", "disk_mountpoint")
username = read_conf_file("cluster.conf", "cluster", "username")
themis_directory = read_conf_file("cluster.conf", "cluster", "themis_directory")
# Display the master's external address on the status page.
master_address = read_conf_file("cluster.conf", "cluster", "master_external_address")

MOUNT_SCRIPT = os.path.join(themis_directory, "src", "scripts", "themis", "cluster", "mount_disks.py")
UPDATE_SCRIPT = os.path.join(themis_directory, "src", "scripts", "themis", "cluster", "update_repo.py")

generate_command = None
generate_data_size = None


def datetimeformat(value, format="%m-%d-%Y %H:%M:%S"):
    return time.strftime(format, time.localtime(float(value)))
def main():
    log_directory = read_conf_file("cluster.conf", "cluster", "log_directory")
    log_directory = os.path.expanduser(log_directory)
    log_directory = os.path.join(log_directory, "storagebench")

    parser = argparse.ArgumentParser(
        description="Harness for storage benchmark application")
    parser.add_argument(
        "--config", "-c", help="config file to use for the benchmark "
        "(default: %(default)s)",
        default=os.path.join(BENCHMARK_DIR, "config.yaml"), type=str)
    parser.add_argument(
        "--log_directory", "-l",
        help="directory containing logs for an experiment "
        "(default: %(default)s)",
        default=log_directory)
    parser.add_argument(
        "--profiler", help="path to the binary of a profiling tool to use, for "
        "example valgrind or operf")
    parser.add_argument(
        "--profiler_options", help="options surrounded by quotes to pass to "
        "the profiler", type=str, default="")
    parser.add_argument(
        "--iterations", "-i", help="run the benchmark this many times "
        "(default: %(default)s)", type=int, default=1)
    parser.add_argument(
        "--sleep", "-s", help="sleep this many seconds between iterations "
        "(default: %(default)s)", type=int, default=0)
    parser.add_argument(
        "--delete_output", help="delete output files after run completes",
        action="store_true", default=False)
    parser.add_argument(
        "--per_peer_config", help="use separate config files for each peer, by "
        "appending the peer's IP address to the config file name: .A.B.C.D",
        action="store_true", default=False)
    parser.add_argument(
        "--dump_core_directory", "-d", help="dump core file to this directory "
        "if the benchmark crashes", default=None)
    parser.add_argument(
        "--read_only", "-r", help="Only read files, don't write",
        action="store_true", default=False)
    parser.add_argument(
        "--write_only", "-w", help="Only write (generate) files, don't read",
        action="store_true", default=False)
    parser.add_argument(
        "peer_ips", help="comma delimited list of host IPs to use for "
        "benchmarking")

    args = parser.parse_args()
    binary = os.path.join(BENCHMARK_DIR, "storagebench")
    # Run the storage benchmark individually on each machine as if it were its
    # own cluster of size 1.
    solo_mode = True

    if args.read_only and args.write_only:
        sys.exit("Cannot specify both read-only and write-only")

    if args.write_only:
        read = 0
    else:
        read = 1

    if args.read_only:
        write = 0
    else:
        write = 1

    if read == 1 and write == 1:
        stage_stats = "reader,writer"
    elif read == 1 and write == 0:
        stage_stats = "reader"
    elif read == 0 and write == 1:
        stage_stats = "writer"
    else:
        sys.exit("Cannot specify both read-only and write-only")

    # Pass read/write params to Themis
    params = "-READ %d -WRITE %d" % (read, write)
    print params

    run_benchmark_iterations(
        binary, args.log_directory, args.config, args.peer_ips, args.profiler,
        args.profiler_options, args.iterations, args.sleep, args.delete_output,
        args.per_peer_config, args.dump_core_directory, solo_mode, stage_stats,
        None, params)
Exemplo n.º 25
0
def run_benchmark_iterations(
    binary, log_directory, config, peer_ips, profiler, profiler_options,
    iterations, sleep, delete_output, per_peer_config, dump_core_directory,
    solo_mode, stage_stats, interfaces, params=""):

    # Get ssh username and themis directory
    username, themis_directory = read_conf_file(
        "cluster.conf", "cluster", ["username", "themis_directory"])
    themis_directory = os.path.expanduser(themis_directory)
    # Get cloud provider if applicable.
    provider = read_conf_file("cluster.conf", "cluster", "provider")

    if interfaces == None:
        vnstat_interface = None
    else:
        interface_list = filter(lambda x: len(x) > 0, interfaces.split(','))
        vnstat_interface = interface_list[0]

    if not os.path.exists(config):
        sys.exit("Config file %s does not exist." % config)

    with open(config, 'r') as fp:
        app_config = yaml.load(fp)

    # If we're using more than 1 network interface per peer, the peer list is
    # going to look like:
    # Peer1_interface1, Peer1_interface2, Peer2_interface1, Peer2_interface2, ..
    # In this case, we only want to launch the benchmark once per peer, so
    # make sure we only look at the first interface for each peer, and let
    # the application itself deal with the other interfaces.
    num_interfaces = 1
    if "NUM_INTERFACES" in app_config:
        num_interfaces = app_config["NUM_INTERFACES"]

    # Remove trailing comma if any from the IP list. This will be the string we
    # pass into the benchmark binary.
    peer_list = peer_ips.rstrip(",")

    # If we're using multiple interfaces, only launch the benchmark once per
    # node.
    node_list = peer_list.split(",")[::num_interfaces]

    # Look for description files in the same directory as the binary.
    binary_dir = os.path.dirname(binary)
    description_directory = os.path.join(binary_dir, "description")

    if not os.path.exists(description_directory):
        sys.exit("Could not find description directory %s" % (
                description_directory))

    # Check for the phase name. For simplicity we're going to require that
    # the benchmark have only 1 phase
    description = Description(description_directory)
    phases = description.getPhaseList()
    if len(phases) != 1:
        sys.exit("Benchmark must have exactly one phase. Got %s" % phases)
    phase_name = phases[0]

    data_size_per_node = int(
        app_config["BENCHMARK_DATA_SIZE_PER_NODE"][phase_name])
    data_size = data_size_per_node * len(node_list)

    total_throughputs = {}
    if stage_stats is not None:
        stage_stats = stage_stats.split(",")
        for stage in stage_stats:
            total_throughputs[stage] = 0.0

    node_benchmark_throughputs = []

    for i in xrange(iterations):
        # Pick a unique batch ID
        batch = 0
        while os.path.exists(
            os.path.join(log_directory, "batch_%d" % batch)):
            batch += 1
        batch_directory = os.path.join(log_directory, "batch_%d" % batch)

        # Create directories
        phase_directory = os.path.join(batch_directory, phase_name)
        parallel_ssh(
            None, "mkdir -p %s" % phase_directory, username, node_list, False,
            True, False)

        # Copy description files and create phase directory.
        if not os.path.exists(batch_directory):
            os.makedirs(batch_directory)
        shutil.copy(
            os.path.join(description_directory, "stages.json"),
            batch_directory)
        shutil.copy(
            os.path.join(description_directory, "structure.json"),
            batch_directory)
        os.chmod(os.path.join(batch_directory, "stages.json"), 0777)
        os.chmod(os.path.join(batch_directory, "structure.json"), 0777)

        # Copy config file
        shutil.copyfile(config, os.path.join(batch_directory, "config.yaml"))

        print "\nLogging to %s" % (batch_directory)
        print "Running %s with batch ID %d on %d nodes..." % (
            phase_name, batch, len(node_list))

        (elapsed, elapsed_times, completed_ips) = run_benchmark(
            binary, config, batch_directory, phase_directory, profiler,
            profiler_options, peer_list, node_list, per_peer_config,
            dump_core_directory, solo_mode, vnstat_interface, params)

        # Compute overall throughput
        throughput = (data_size / elapsed) / 1000000
        per_node_throughput = (data_size_per_node / elapsed) / 1000000
        print "Completed in %.2f seconds." % elapsed
        print "  Throughput: %.2f MB/s" % throughput
        print "  Per-server: %.2f MB/s" % per_node_throughput

        # Record individual throughputs
        throughputs = [(data_size_per_node / x) / 1000000 \
                           for x in elapsed_times]
        node_benchmark_throughputs += throughputs

        # Dump these results to a file in the batch directory
        results_file = open(os.path.join(batch_directory, "results"), "w")
        results_file.write(
            "Runtime: %.2f seconds\nThroughput: %.2f MB/s\nPer-server: " \
                "%.2f MB/s\n\n" % (elapsed, throughput, per_node_throughput))
        results_file.write("Node throughputs: %s\n\n" % throughputs)
        for ip, elapsed_time, throughput in zip(
            completed_ips, elapsed_times, throughputs):
            results_file.write(
                "Node %s completed in %.2f seconds (%.2f MB/s)\n" % (
                    ip, elapsed_time, throughput))
        results_file.write("\n")

        if stage_stats is not None:
            # Compute runtime stat throughputs

            done = False
            while not done:
                # Upload all logs.
                upload_logs()

                # Download logs locally.
                download_logs()

                try:
                    runtime_info = gather_runtime_info(batch_directory, False)
                    done = True
                except ValueError:
                    print "Runtime info script failed. Retrying log upload/downloads."

            stage_info = runtime_info[0]["stages"]
            node_throughputs = {}
            for worker_info in stage_info:
                stats_info = worker_info["stats_info"]
                # We only want to look at the overall stats, which includes all
                # nodes (hostname or worker ID won't be specified)
                if len(stats_info) == 1:
                    stage_name = stats_info["stage"]

                    if stage_name in stage_stats:
                        # This is one of the stages we care about
                        node_throughputs[stage_name] = \
                            worker_info["observed_processing_rate_per_node"]
                        total_throughputs[stage_name] += \
                            node_throughputs[stage_name]

            # Print throughputs in the correct order.
            for stage_name in stage_stats:
                print "  %s throughput: %.2f MB/s/node" % (
                    stage_name, node_throughputs[stage_name])
                results_file.write("%s throughput: %.2f MB/s\n" % (
                        stage_name, node_throughputs[stage_name]))

        results_file.close()

        if delete_output and "OUTPUT_DISK_LIST" in app_config and \
                phase_name in app_config["OUTPUT_DISK_LIST"]:
            output_disk_list = app_config["OUTPUT_DISK_LIST"][phase_name]
            output_disks = output_disk_list.split(",")
            for disk in output_disks:
                print "Clearing %s" % disk
                parallel_ssh(
                    None, "rm -rf %s" % disk, username, node_list, False,
                    False, False)

        if sleep > 0 and i != iterations - 1:
            print "Sleeping %d seconds" % sleep
            time.sleep(sleep)

    print "\nCompleted %d iterations\n" % iterations
    # Format node throughputs
    node_benchmark_throughput_strings = [
        "%.2f" % x for x in node_benchmark_throughputs]
    print "  Node throughputs (MB/s):"
    print "    %s" % node_benchmark_throughput_strings
    print "  Average node throughput: %.2f MB/s" % (
        numpy.mean(node_benchmark_throughputs))
    print "  Standard deviation: %.2f MB/s" % (
        numpy.std(node_benchmark_throughputs))
    print "  Min node throughput: %.2f MB/s" % (
        numpy.min(node_benchmark_throughputs))
    print "  Max node throughput: %.2f MB/s\n" % (
        numpy.max(node_benchmark_throughputs))

    if stage_stats is not None:
        for stage_name in stage_stats:
            print "  Average %s throughput: %.2f MB/s/node" % (
                stage_name, total_throughputs[stage_name] / iterations)
Exemplo n.º 26
0
def run_benchmark(
    binary, config, batch_directory, phase_directory, profiler,
    profiler_options, peer_list, node_list, per_peer_config,
    dump_core_directory, solo_mode, vnstat_interface, params):

    # Get ssh username.
    username = read_conf_file("cluster.conf", "cluster", "username")

    # Add command line parameters to binary
    binary = "%s -LOG_DIR %s" % (
        binary, phase_directory)

    if dump_core_directory is not None:
        binary = "cd %s; ulimit -c unlimited; %s" % (
            dump_core_directory, binary)

    processes = []
    start_time = time.time()
    for index, ip in enumerate(node_list):
        # Now start themis binaries
        if solo_mode:
            # Act as if you are the only peer in the cluster.
            peer_binary = "%s -PEER_LIST %s" % (binary, ip)
        else:
            # Use the entire set of peers and designate yourself as
            # one of them.
            peer_binary = "%s -PEER_LIST %s -MYPEERID %d" % (
                binary, peer_list, index)

        if per_peer_config:
            # Append the IP address to the config file name
            peer_binary = "%s -CONFIG %s.%s" % (peer_binary, config, ip)
        else:
            peer_binary = "%s -CONFIG %s" % (peer_binary, config)

        # Override config file with specified parameters
        if params:
            peer_binary = "%s %s" % (peer_binary, params)

        if profiler == "operf":
            # Use the batch directory as the operf session dir
            session_dir = os.path.join(batch_directory, "oprofile_data.%s", ip)
            parallel_ssh(
                None, "mkdir -p %s" % session_dir, username, node_list,
                False, True, False)
            peer_binary = "%s %s --session-dir=%s %s" % (
                profiler, profiler_options, session_dir, peer_binary)
        elif profiler is not None:
            # Some other profiler, just prepend it to the binary
            peer_binary = "%s %s %s" % (
                profiler, profiler_options, peer_binary)

        # Run the node-local benchmark script.
        vnstat_param_string = ""
        if vnstat_interface != None:
            vnstat_param_string = "--vnstat_interface %s" % vnstat_interface
        command = '%s %s "%s/run_benchmark_local.py %s %s \'%s\'"' % (
            ssh_command(), ip, THEMIS_SCRIPTS_DIR, vnstat_param_string,
            phase_directory, peer_binary)

        processes.append((subprocess.Popen(command, shell=True), ip))

    print "%d tasks launched on %s\n" % (len(processes), time.asctime())

    elapsed_times = []
    completed_ips = []

    num_nodes = len(processes)

    while len(processes) > 0:
        for process, ip in processes:
            process.poll()
            if process.returncode != None:
                elapsed_time = time.time() - start_time
                process.communicate()
                processes.remove((process, ip))
                elapsed_times.append(elapsed_time)
                completed_ips.append(ip)
                print "Node %s completed in %.2f seconds (%d / %d)" % (
                    ip, elapsed_time, len(elapsed_times), num_nodes)

                break

    stop_time = time.time()

    return (stop_time - start_time, elapsed_times, completed_ips)
Exemplo n.º 27
0
def mount_disks(format_disks, mountpoint, partitions):
    # Get comma delimited list of devices
    devices = read_conf_file("node.conf", "node", "devices")
    devices = devices.split(",")
    devices = [d for d in devices if len(d) > 0]

    username = read_conf_file("cluster.conf", "cluster", "username")

    # Setup mount point
    sudo[mkdir["-p"][mountpoint]]()
    sudo[chown]["%s:%s" % (username, username)][mountpoint]()

    mkfs_commands = []
    for device in devices:
        # Unmount ALL partitions connected to this device
        num_mounted = (mount | grep["-c"][device])(retcode=(0, 1))
        num_mounted = int(num_mounted.strip())

        while num_mounted > 0:
            # Unmount device
            mounted_device =\
                (mount | grep[device] | head["-n1"] | awk["{print $1}"])()
            mounted_device = mounted_device.strip()

            print "Unmounting %s" % mounted_device
            sudo[umount[mounted_device]]()

            num_mounted -= 1

        # Format device
        if format_disks:
            if not partitions and "by-id" not in device:
                print "Creating new partition for %s" % device
                (sudo[fdisk[device]] << "d\nn\np\n1\n\n\nw")()

                # It appears that the fdisk command returns before the partition is
                # usable...
                time.sleep(2)

            print "Creating ext4 file system"
            if not partitions and "by-id" not in device:
                # Use partition 1 on the device
                partition = "%s1" % device
            else:
                # The device itself is a partition
                partition = device

            # Persistent devices can use fast formatting
            if "persist" in device:
                extra_opt = "lazy_itable_init=0,lazy_journal_init=0,discard"
                mkfs_commands.append(
                    sudo[mkfsext4]["-F"]["-E"][extra_opt][partition] & BG)
            else:
                mkfs_commands.append(sudo[mkfsext4]["-F"][partition] & BG)

    for command in mkfs_commands:
        command.wait()
        if command.returncode != 0:
            print >> sys.stderr, command.stderr
            sys.exit(command.returncode)

    # Now mount all devices
    disk_index = 0
    persist_disk_index = 0
    for device in devices:
        # Setup mount point
        disk_basename = "disk_persist_%d" % persist_disk_index if "persist" in device else "disk_%d" % disk_index
        disk_mountpoint = os.path.join(mountpoint, disk_basename)
        print "Mounting %s at %s" % (device, disk_mountpoint)
        mkdir["-p"][disk_mountpoint]()
        sudo[chown]["%s:%s" % (username, username)][disk_mountpoint]()

        # Mount disk
        if not partitions and "by-id" not in device:
            # Use partition 1 on the device
            partition = "%s1" % device
        else:
            # The device itself is a partition
            partition = device
        sudo[mount["-o"]["discard,defaults,dioread_nolock,noatime"][partition]
             [disk_mountpoint]]()
        sudo[chown]["%s:%s" % (username, username)][disk_mountpoint]()

        if "persist" in device:
            persist_disk_index += 1
        else:
            disk_index += 1
Exemplo n.º 28
0
def parallel_ssh(
        redis_client, command, username, hosts, ignore_bad_hosts, master,
        verbose=False):

    pending_commands = []
    stdout_dict = {}
    stderr_dict = {}

    start_time = time.time()
    try:
        if type(command) == list:
            command = ' '.join(command)

        if command == "":
            print >>sys.stderr, "Cannot run empty command."
            return (1, stdout_dict, stderr_dict)

        if verbose:
            print "Running %s in parallel." % command

        if hosts is None:
            # User did not specify host list override, so ask redis
            hosts = redis_client.smembers("nodes")

            if hosts is None:
                print >>sys.stderr, "Error extracting host list from "\
                    "redis database"
                return (1, stdout_dict, stderr_dict)
        else:
            hosts = set(hosts)

        if master:
            # Also run on the master node.
            master_address = read_conf_file(
                "cluster.conf", "cluster", "master_internal_address")

            if verbose:
                print "Including master %s" % master_address
            hosts.add(master_address)

        temp_dir = "/tmp/run-script-%s-%s-%08x" % (
            username, time.strftime("%Y-%m-%d-%H%M.%S"),
            random.randint(0, (16 ** 8) - 1))

        if os.path.exists(temp_dir):
            print >>sys.stderr, (
                "Temporary directory %s already (and extremely improbably) "
                "exists; aborting" % (temp_dir))
            return (1, stdout_dict, stderr_dict)

        os.makedirs(temp_dir)

        hosts_file = os.path.join(temp_dir, "hosts")

        with open(hosts_file, 'w') as fp:
            fp.write('\n'.join(hosts) + '\n')

        stderr_dir = os.path.join(temp_dir, "stderr")
        stdout_dir = os.path.join(temp_dir, "stdout")

        for dirname in stderr_dir, stdout_dir:
            os.makedirs(dirname, 0755)

        for host in hosts:
            try:
                ssh_client = paramiko.SSHClient()
                ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
                ssh_client.connect(host, username=username)
                channel = ssh_client.get_transport().open_session()
                channel.get_pty()
                channel.exec_command(command)

                pending_commands.append((host, ssh_client, channel))
                if verbose:
                    print "Launching remote command on %s (%d / %d)" % (
                        host, len(pending_commands), len(hosts))
            except socket.gaierror as error:
                if not ignore_bad_hosts:
                    raise error

        host_failed = False

        for host in hosts:
            stdout_dict[host] = ""
            stderr_dict[host] = ""

        while len(pending_commands) > 0:
            completed_commands = []
            for host, ssh_client, channel in pending_commands:
                # Loop until we find a command that finished.
                if channel.exit_status_ready():
                    num_completed_nodes = len(hosts) - len(pending_commands)
                    # This node just completed.
                    num_completed_nodes += 1
                    elapsed_time = time.time() - start_time
                    if verbose:
                        print "%s completed remote command in %.2f seconds " \
                            "(%d / %d)" % (
                            host, elapsed_time, num_completed_nodes, len(hosts))

                    return_code = channel.recv_exit_status()
                    if return_code != 0:
                        print >>sys.stderr, "%s FAILED:" % (host)
                        host_failed = True

                    # Save stdout and stderr to file and to dicts
                    stdout_file = os.path.join(stdout_dir, host)
                    with open(stdout_file, "w") as fp:
                        while channel.recv_ready():
                            stdout = channel.recv(1024)
                            fp.write(stdout)
                            stdout_dict[host] += stdout

                    stderr_file = os.path.join(stderr_dir, host)
                    with open(stderr_file, "w") as fp:
                        while channel.recv_stderr_ready():
                            stderr = channel.recv_stderr(1024)
                            fp.write(stderr)
                            stderr_dict[host] += stderr
                            if return_code != 0 and verbose:
                                sys.stderr.write(stderr)

                    ssh_client.close()
                    completed_commands.append((host, ssh_client, channel))
            for completed in completed_commands:
                pending_commands.remove(completed)
            time.sleep(1)

        pending_commands = []

        if host_failed:
            return (1, stdout_dict, stderr_dict)
        else:
            return (0, stdout_dict, stderr_dict)
    except KeyboardInterrupt:
        print >>sys.stderr, "\nCaught keyboard interrupt\n"
        return  (1, stdout_dict, stderr_dict)
    finally:
        # Cleanly stop any pending commands
        remaining_hosts = len(pending_commands)
        if remaining_hosts > 0:
            for host, ssh_client, channel in pending_commands:
                print >>sys.stderr, (
                    "Killing pending command on host '%s' ..." % (host))

                ssh_client.close()

            elapsed_time = time.time() - start_time
            print "Remaining %d commands terminated at %.2f seconds." % (
                remaining_hosts, elapsed_time)
            pending_commands = []
def mount_disks(format_disks, mountpoint, partitions):
    # Get comma delimited list of devices
    devices = read_conf_file("node.conf", "node", "devices")
    devices = devices.split(",")
    devices = [d for d in devices if len(d) > 0]

    username = read_conf_file("cluster.conf", "cluster", "username")

    # Setup mount point
    sudo[mkdir["-p"][mountpoint]]()
    sudo[chown]["%s:%s" % (username, username)][mountpoint]()

    mkfs_commands = []
    for device in devices:
        # Unmount ALL partitions connected to this device
        num_mounted = (mount | grep["-c"][device])(retcode=(0,1))
        num_mounted = int(num_mounted.strip())

        while num_mounted > 0:
            # Unmount device
            mounted_device =\
                (mount | grep[device] | head["-n1"] | awk["{print $1}"])()
            mounted_device = mounted_device.strip()

            print "Unmounting %s" % mounted_device
            sudo[umount[mounted_device]]()

            num_mounted -= 1

        # Format device
        if format_disks:
            if not partitions and "by-id" not in device:
                print "Creating new partition for %s" % device
                (sudo[fdisk[device]] << "d\nn\np\n1\n\n\nw")()

                # It appears that the fdisk command returns before the partition is
                # usable...
                time.sleep(2)

            print "Creating ext4 file system"
            if not partitions and "by-id" not in device:
                # Use partition 1 on the device
                partition = "%s1" % device
            else:
                # The device itself is a partition
                partition = device

            # Persistent devices can use fast formatting
            if "persist" in device:
                extra_opt = "lazy_itable_init=0,lazy_journal_init=0,discard"
                mkfs_commands.append(sudo[mkfsext4]["-F"]["-E"][extra_opt][partition] & BG)
            else:
                mkfs_commands.append(sudo[mkfsext4]["-F"][partition] & BG)

    for command in mkfs_commands:
        command.wait()
        if command.returncode != 0:
            print >>sys.stderr, command.stderr
            sys.exit(command.returncode)

    # Now mount all devices
    disk_index = 0
    persist_disk_index = 0
    for device in devices:
        # Setup mount point
        disk_basename = "disk_persist_%d" % persist_disk_index if "persist" in device else "disk_%d" % disk_index
        disk_mountpoint = os.path.join(mountpoint, disk_basename)
        print "Mounting %s at %s" % (device, disk_mountpoint)
        mkdir["-p"][disk_mountpoint]()
        sudo[chown]["%s:%s" % (username, username)][disk_mountpoint]()

        # Mount disk
        if not partitions and "by-id" not in device:
            # Use partition 1 on the device
            partition = "%s1" % device
        else:
            # The device itself is a partition
            partition = device
        sudo[mount["-o"]["discard,defaults,dioread_nolock,noatime"][partition][disk_mountpoint]]()
        sudo[chown]["%s:%s" % (username, username)][disk_mountpoint]()

        if "persist" in device:
            persist_disk_index += 1
        else:
            disk_index += 1