Exemplo n.º 1
0
def data_generation_post():
    global coordinator_db, username, parallel_ssh_cmd, generate_command,\
        generate_data_size
    print "Data generation post"

    if bottle.request.POST.generate_data:
        print "Generating data"
        data_size = int(bottle.request.POST.data_size)
        unit = bottle.request.POST.unit
        print "Generating %d%s of data..." % (data_size, unit)
        files_per_disk = int(bottle.request.POST.files_per_disk)

        generate_data_size = unitconversion.convert(data_size, unit, "B")

        generate_command = parallel_ssh_cmd[
            "%s --no_sudo -g -n%d %d%s gensort" % (
                os.path.join(SCRIPT_DIR, os.pardir,
                             "generate_graysort_inputs.py"),
                files_per_disk, data_size, unit)] & BG

    elif bottle.request.POST.abort_data_generation:
        print "Aborting data generation"

        pkill["-f"]["generate_graysort_inputs.py"]()
        generate_command = None

    elif bottle.request.POST.wipe_input:
        allowed = bottle.request.POST.getall('allow_wipe_data')
        if len(allowed) == 1 and allowed[0] == "True":
            print "Wiping input data"
            parallel_ssh_cmd[
                "%s input %s" % (
                    os.path.join(SCRIPT_DIR, os.pardir,
                                 "clear_disks.sh"), disk_mountpoint)]()

    elif bottle.request.POST.wipe_intermediate_output:
        allowed = bottle.request.POST.getall('allow_wipe_data')
        if len(allowed) == 1 and allowed[0] == "True":
            print "Wiping intermediate/output data"
            parallel_ssh_cmd[
                "%s noinput %s" % (
                    os.path.join(SCRIPT_DIR, os.pardir,
                                 "clear_disks.sh"), disk_mountpoint)]()

    elif bottle.request.POST.wipe_all:
        allowed = bottle.request.POST.getall('allow_wipe_data')
        if len(allowed) == 1 and allowed[0] == "True":
            print "Wiping all data"
            parallel_ssh_cmd[
                "%s all %s" % (
                    os.path.join(SCRIPT_DIR, os.pardir,
                                 "clear_disks.sh"), disk_mountpoint)]()


    # Call the normal /data_generation method.
    return data_generation()
Exemplo n.º 2
0
def data_generation_post():
    global coordinator_db, username, parallel_ssh_cmd, generate_command,\
        generate_data_size
    print "Data generation post"

    if bottle.request.POST.generate_data:
        print "Generating data"
        data_size = int(bottle.request.POST.data_size)
        unit = bottle.request.POST.unit
        print "Generating %d%s of data..." % (data_size, unit)
        files_per_disk = int(bottle.request.POST.files_per_disk)

        generate_data_size = unitconversion.convert(data_size, unit, "B")
        skewed_data = bool(bottle.request.POST.skewed_data)
        replica_data = bool(bottle.request.POST.replica_data)
        extra_args = ""
        if skewed_data:
            extra_args += "-s "
        if replica_data:
            extra_args += "-r 2 "
        generate_command = parallel_ssh_cmd[
            "%s --no_sudo %s-g -n%d %d%s gensort" %
            (os.path.join(SCRIPT_DIR, os.pardir, "generate_graysort_inputs.py"
                          ), extra_args, files_per_disk, data_size, unit)] & BG

    elif bottle.request.POST.abort_data_generation:
        print "Aborting data generation"

        pkill["-f"]["generate_graysort_inputs.py"]()
        generate_command = None

    elif bottle.request.POST.wipe_input:
        allowed = bottle.request.POST.getall('allow_wipe_data')
        if len(allowed) == 1 and allowed[0] == "True":
            print "Wiping input data"
            parallel_ssh_cmd["%s input %s" % (os.path.join(
                SCRIPT_DIR, os.pardir, "clear_disks.sh"), disk_mountpoint)]()

    elif bottle.request.POST.wipe_intermediate_output:
        allowed = bottle.request.POST.getall('allow_wipe_data')
        if len(allowed) == 1 and allowed[0] == "True":
            print "Wiping intermediate/output data"
            parallel_ssh_cmd["%s noinput %s" % (os.path.join(
                SCRIPT_DIR, os.pardir, "clear_disks.sh"), disk_mountpoint)]()

    elif bottle.request.POST.wipe_all:
        allowed = bottle.request.POST.getall('allow_wipe_data')
        if len(allowed) == 1 and allowed[0] == "True":
            print "Wiping all data"
            parallel_ssh_cmd["%s all %s" % (os.path.join(
                SCRIPT_DIR, os.pardir, "clear_disks.sh"), disk_mountpoint)]()

    # Call the normal /data_generation method.
    return data_generation()
def load_constraints(filename):
    with open(filename, 'r') as fp:
        constraints = json.load(fp)

    for phase in constraints:
        for stage in constraints[phase][consts.STAGES_KEY]:
            stage_info = constraints[phase][consts.STAGES_KEY][stage]

            if consts.IO_BOUND_KEY in stage_info:
                (rate, sep, unit) = stage_info[
                    consts.MAX_RATE_PER_WORKER_KEY].partition(' ')
                rate = float(rate)

                rate_Bps = convert(rate, unit, "Bps")

                stage_info[consts.MAX_RATE_PER_WORKER_KEY] = rate_Bps

    return constraints
Exemplo n.º 4
0
def load_constraints(filename):
    with open(filename, 'r') as fp:
        constraints = json.load(fp)

    for phase in constraints:
        for stage in constraints[phase][consts.STAGES_KEY]:
            stage_info = constraints[phase][consts.STAGES_KEY][stage]

            if consts.IO_BOUND_KEY in stage_info:
                (rate, sep, unit
                 ) = stage_info[consts.MAX_RATE_PER_WORKER_KEY].partition(' ')
                rate = float(rate)

                rate_Bps = convert(rate, unit, "Bps")

                stage_info[consts.MAX_RATE_PER_WORKER_KEY] = rate_Bps

    return constraints
Exemplo n.º 5
0
    def _make_cell_readable(self, cell_value):
        # Make sure we don't try to convert non-numeric types
        if type(cell_value) not in [int, float]:
            return cell_value

        units = ["B", "KiB", "MiB", "GiB", "TiB"]

        if cell_value > 0:
            # floor(log_2(X)) gives the closest integer power of 2 to
            # X. Dividing it by 10 gives the unit (2^0 = bytes, 2^10 =
            # kibibytes, and so on, so 0 = bytes, 1 = kibibytes, etc.)
            cell_value_dest_unit = units[min(
                int(math.floor(math.log(cell_value, 2) / 10)),
                len(units) - 1)]

            cell_value = "%.2f %s" % (convert(
                cell_value, "B", cell_value_dest_unit), cell_value_dest_unit)
        else:
            cell_value = "0 B"

        return cell_value
    def _make_cell_readable(self, cell_value):
        # Make sure we don't try to convert non-numeric types
        if type(cell_value) not in [int, float]:
            return cell_value

        units = ["B", "KiB", "MiB", "GiB", "TiB"]

        if cell_value > 0:
            # floor(log_2(X)) gives the closest integer power of 2 to
            # X. Dividing it by 10 gives the unit (2^0 = bytes, 2^10 =
            # kibibytes, and so on, so 0 = bytes, 1 = kibibytes, etc.)
            cell_value_dest_unit = units[
                min(int(math.floor(math.log(cell_value, 2) / 10)),
                    len(units) - 1)]

            cell_value = "%.2f %s" % (
                convert(cell_value, "B", cell_value_dest_unit),
                cell_value_dest_unit)
        else:
            cell_value = "0 B"

        return cell_value
Exemplo n.º 7
0
def postprocess_rate_data(data):
    display_data = {}

    disk_data = {}

    for data_key in sorted(data.keys()):
        (job, phase, epoch) = data_key

        epoch_data = data[data_key]

        if phase is None and epoch is None:
            disk_data = epoch_data["disks"]

            for key, value in disk_data.items():
                disk_data[key] = value

            continue

        epoch_display_data = {}

        epoch_display_data["job"] = job
        epoch_display_data["phase"] = phase
        epoch_display_data["epoch"] = epoch

        host_data = epoch_data["hosts"]
        stats_data = epoch_data["stats"]

        epoch_display_data["intermediate_disks"] = sum(
            (disk_data[host]["num_intermediate_disks"] for host in disk_data))

        epoch_display_data["num_nodes"] = len(host_data)

        input_size = stats_data["input_size"]
        if "output_size" in stats_data:
            output_size = stats_data["output_size"]
        else:
            output_size = 0
        max_size = max(input_size, output_size)

        for (size_name, size) in [("input_size", input_size),
                                  ("output_size", output_size),
                                  ("max_size", max_size)]:

            epoch_display_data[size_name] = {}

            for unit in ["B", "MB", "MiB", "GB", "Gb", "GiB", "TB"]:
                epoch_display_data[size_name][unit] = convert(size, "B", unit)

        epoch_display_data["total_time"] = {}

        if "timestamps" in stats_data:
            smallest_start_time = stats_data["timestamps"]["start"].min()
            largest_stop_time = stats_data["timestamps"]["stop"].max()

            total_time = largest_stop_time - smallest_start_time

            for unit in ["s", "min"]:
                epoch_display_data["total_time"][unit] = convert(
                    total_time, "us", unit)
        else:
            for unit in ["s", "min"]:
                epoch_display_data["total_time"][unit] = 0

        display_data[data_key] = epoch_display_data

    return display_data
def postprocess_rate_data(data):
    display_data = {}

    disk_data = {}

    for data_key in sorted(data.keys()):
        (job, phase, epoch) = data_key

        epoch_data = data[data_key]

        if phase is None and epoch is None:
            disk_data = epoch_data["disks"]

            for key, value in disk_data.items():
                disk_data[key] = value

            continue

        epoch_display_data = {}

        epoch_display_data["job"] = job
        epoch_display_data["phase"] = phase
        epoch_display_data["epoch"] = epoch

        host_data = epoch_data["hosts"]
        stats_data = epoch_data["stats"]

        epoch_display_data["intermediate_disks"] = sum(
            (disk_data[host]["num_intermediate_disks"] for host in disk_data))

        epoch_display_data["num_nodes"] = len(host_data)

        input_size = stats_data["input_size"]
        if "output_size" in stats_data:
            output_size = stats_data["output_size"]
        else:
            output_size = 0
        max_size = max(input_size, output_size)

        for (size_name, size) in [("input_size", input_size),
                                  ("output_size", output_size),
                                  ("max_size", max_size)]:

            epoch_display_data[size_name] = {}

            for unit in ["B", "MB", "MiB", "GB", "Gb", "GiB", "TB"]:
                epoch_display_data[size_name][unit] = convert(
                    size, "B", unit)

        epoch_display_data["total_time"] = {}

        if "timestamps" in stats_data:
            smallest_start_time = stats_data["timestamps"]["start"].min()
            largest_stop_time = stats_data["timestamps"]["stop"].max()

            total_time = largest_stop_time - smallest_start_time

            for unit in ["s", "min"]:
                epoch_display_data["total_time"][unit] = convert(
                    total_time, "us", unit)
        else:
            for unit in ["s", "min"]:
                epoch_display_data["total_time"][unit] = 0


        display_data[data_key] = epoch_display_data

    return display_data