Пример #1
0
def compute_capacity(stream: VCFStream, population: Population, repeats: int,
                     ratio: float, predictor_factory: PredictorFactory):
    individuals = stream.individuals
    split_index = int(len(individuals) * ratio)

    labels = population.get_labels(individuals)

    # Split once, since the call to `split` takes a lot of time to do for every
    # polymorphism and repeat. We call for the number of repeats and apply the
    # corresponding one, copying the same approach for all polymorphisms.
    if repeats > 1:
        train_target_indices_list = [
            split(range(len(individuals)), split_index) for _ in range(repeats)
        ]
        train_target_indices = [(set(i), set(j))
                                for i, j in train_target_indices_list]
    else:
        train_target_indices = [(set(range(len(individuals))),
                                 set(range(len(individuals))))]

    for polymorphism in stream:
        accuracies: List[float] = []

        for repeat_idx in range(repeats):
            genotypes = (process_variant(i) for i in polymorphism.data_fields)
            to_split = zip(individuals, genotypes, labels)

            train = []
            target = []
            for i, val in enumerate(to_split):
                if i in train_target_indices[repeat_idx][0]:
                    train.append(val)
                if i in train_target_indices[repeat_idx][1]:
                    target.append(val)

            _train_individuals, train_genotypes, train_labels = zip(*train)

            predictor = predictor_factory.build(train_genotypes, train_labels)

            count = 0.0
            for _target_individual, genotype, label in target:
                predictions = predictor.predict(genotype)
                if label in predictions:
                    count += predictions[label]

            accuracies.append(count / len(target))

        m = mean(accuracies)
        if repeats == 1:
            print(f'{polymorphism.get_identifier()}\t{m:.5f}')
        else:
            s = stdev(accuracies)
            print(f'{polymorphism.get_identifier()}\t{m:.5f}\t{s:.5f}')
Пример #2
0
def main():
    df2 = pd.DataFrame()
    if not (len(sys.argv) == 1 + 1):
        print('\033[91m' + '✘ Error: ' + '\033[0m' +
              'CSV file is missing, please add his path as argument')
        sys.exit()
    df = utils.dataframe(sys.argv[1])
    columnsNamesArr = df.columns.values
    listOfColumnNames = list(columnsNamesArr)

    dico_numerals = {}
    for label in listOfColumnNames:
        if df[label].dtypes == str or df[label].dtypes == object:
            continue
        dico_numerals[label] = {
            'count': 0,
            'mean': 0,
            'std': 0,
            'min': float('inf'),
            '25%': 0,
            '50%': 0,
            '75%': 0,
            'max': float('-inf'),
            'total': 0
        }

    for label in listOfColumnNames:
        if df[label].dtypes == str or df[label].dtypes == object:
            continue
        for index, row in df.iterrows():
            if row[label] > dico_numerals[label]['max']:
                dico_numerals[label]['max'] = row[label]
            if row[label] < dico_numerals[label]['min']:
                dico_numerals[label]['min'] = row[label]
            if np.isnan(row[label]) != True:
                dico_numerals[label]['count'] += 1
                dico_numerals[label]['total'] += row[label]
        if (dico_numerals[label]['count'] > 0):
            dico_numerals[label]['mean'] = dico_numerals[label][
                'total'] / dico_numerals[label]['count']
            dico_numerals[label]['25%'] = utils.calc_quantile(
                df[label].dropna(), 0.25)
            dico_numerals[label]['50%'] = utils.calc_quantile(
                df[label].dropna(), 0.5)
            dico_numerals[label]['75%'] = utils.calc_quantile(
                df[label].dropna(), 0.75)
            dico_numerals[label]['std'] = utils.stdev(df[label].dropna())

    describe(dico_numerals)
Пример #3
0
def table_soak_vs_ndr(table, input_data):
    """Generate the table(s) with algorithm: table_soak_vs_ndr
    specified in the specification file.

    :param table: Table to generate.
    :param input_data: Data to process.
    :type table: pandas.Series
    :type input_data: InputData
    """

    logging.info("  Generating the table {0} ...".format(table.get(
        "title", "")))

    # Transform the data
    logging.info("    Creating the data set for the {0} '{1}'.".format(
        table.get("type", ""), table.get("title", "")))
    data = input_data.filter_data(table, continue_on_error=True)

    # Prepare the header of the table
    try:
        header = [
            "Test case",
            "{0} Throughput [Mpps]".format(table["reference"]["title"]),
            "{0} Stdev [Mpps]".format(table["reference"]["title"]),
            "{0} Throughput [Mpps]".format(table["compare"]["title"]),
            "{0} Stdev [Mpps]".format(table["compare"]["title"]), "Delta [%]",
            "Stdev of delta [%]"
        ]
        header_str = ",".join(header) + "\n"
    except (AttributeError, KeyError) as err:
        logging.error(
            "The model is invalid, missing parameter: {0}".format(err))
        return

    # Create a list of available SOAK test results:
    tbl_dict = dict()
    for job, builds in table["compare"]["data"].items():
        for build in builds:
            for tst_name, tst_data in data[job][str(build)].iteritems():
                if tst_data["type"] == "SOAK":
                    tst_name_mod = tst_name.replace("-soak", "")
                    if tbl_dict.get(tst_name_mod, None) is None:
                        groups = re.search(REGEX_NIC, tst_data["parent"])
                        nic = groups.group(0) if groups else ""
                        name = "{0}-{1}".format(
                            nic, "-".join(tst_data["name"].split("-")[:-1]))
                        tbl_dict[tst_name_mod] = {
                            "name": name,
                            "ref-data": list(),
                            "cmp-data": list()
                        }
                    try:
                        tbl_dict[tst_name_mod]["cmp-data"].append(
                            tst_data["throughput"]["LOWER"])
                    except (KeyError, TypeError):
                        pass
    tests_lst = tbl_dict.keys()

    # Add corresponding NDR test results:
    for job, builds in table["reference"]["data"].items():
        for build in builds:
            for tst_name, tst_data in data[job][str(build)].iteritems():
                tst_name_mod = tst_name.replace("-ndrpdr", "").\
                    replace("-mrr", "")
                if tst_name_mod in tests_lst:
                    try:
                        if tst_data["type"] in ("NDRPDR", "MRR", "BMRR"):
                            if table["include-tests"] == "MRR":
                                result = tst_data["result"]["receive-rate"].avg
                            elif table["include-tests"] == "PDR":
                                result = tst_data["throughput"]["PDR"]["LOWER"]
                            elif table["include-tests"] == "NDR":
                                result = tst_data["throughput"]["NDR"]["LOWER"]
                            else:
                                result = None
                            if result is not None:
                                tbl_dict[tst_name_mod]["ref-data"].append(
                                    result)
                    except (KeyError, TypeError):
                        continue

    tbl_lst = list()
    for tst_name in tbl_dict.keys():
        item = [
            tbl_dict[tst_name]["name"],
        ]
        data_r = tbl_dict[tst_name]["ref-data"]
        if data_r:
            data_r_mean = mean(data_r)
            item.append(round(data_r_mean / 1000000, 2))
            data_r_stdev = stdev(data_r)
            item.append(round(data_r_stdev / 1000000, 2))
        else:
            data_r_mean = None
            data_r_stdev = None
            item.extend([None, None])
        data_c = tbl_dict[tst_name]["cmp-data"]
        if data_c:
            data_c_mean = mean(data_c)
            item.append(round(data_c_mean / 1000000, 2))
            data_c_stdev = stdev(data_c)
            item.append(round(data_c_stdev / 1000000, 2))
        else:
            data_c_mean = None
            data_c_stdev = None
            item.extend([None, None])
        if data_r_mean and data_c_mean:
            delta, d_stdev = relative_change_stdev(data_r_mean, data_c_mean,
                                                   data_r_stdev, data_c_stdev)
            item.append(round(delta, 2))
            item.append(round(d_stdev, 2))
            tbl_lst.append(item)

    # Sort the table according to the relative change
    tbl_lst.sort(key=lambda rel: rel[-1], reverse=True)

    # Generate csv tables:
    csv_file = "{0}.csv".format(table["output-file"])
    with open(csv_file, "w") as file_handler:
        file_handler.write(header_str)
        for test in tbl_lst:
            file_handler.write(",".join([str(item) for item in test]) + "\n")

    convert_csv_to_pretty_txt(csv_file, "{0}.txt".format(table["output-file"]))
Пример #4
0
def table_nics_comparison(table, input_data):
    """Generate the table(s) with algorithm: table_nics_comparison
    specified in the specification file.

    :param table: Table to generate.
    :param input_data: Data to process.
    :type table: pandas.Series
    :type input_data: InputData
    """

    logging.info("  Generating the table {0} ...".format(table.get(
        "title", "")))

    # Transform the data
    logging.info("    Creating the data set for the {0} '{1}'.".format(
        table.get("type", ""), table.get("title", "")))
    data = input_data.filter_data(table, continue_on_error=True)

    # Prepare the header of the tables
    try:
        header = [
            "Test case",
        ]

        if table["include-tests"] == "MRR":
            hdr_param = "Receive Rate"
        else:
            hdr_param = "Throughput"

        header.extend([
            "{0} {1} [Mpps]".format(table["reference"]["title"], hdr_param),
            "{0} Stdev [Mpps]".format(table["reference"]["title"]),
            "{0} {1} [Mpps]".format(table["compare"]["title"], hdr_param),
            "{0} Stdev [Mpps]".format(table["compare"]["title"]), "Delta [%]"
        ])
        header_str = ",".join(header) + "\n"
    except (AttributeError, KeyError) as err:
        logging.error(
            "The model is invalid, missing parameter: {0}".format(err))
        return

    # Prepare data to the table:
    tbl_dict = dict()
    for job, builds in table["data"].items():
        for build in builds:
            for tst_name, tst_data in data[job][str(build)].iteritems():
                tst_name_mod = tst_name.replace("-ndrpdrdisc", "").\
                    replace("-ndrpdr", "").replace("-pdrdisc", "").\
                    replace("-ndrdisc", "").replace("-pdr", "").\
                    replace("-ndr", "").\
                    replace("1t1c", "1c").replace("2t1c", "1c").\
                    replace("2t2c", "2c").replace("4t2c", "2c").\
                    replace("4t4c", "4c").replace("8t4c", "4c")
                tst_name_mod = re.sub(REGEX_NIC, "", tst_name_mod)
                if tbl_dict.get(tst_name_mod, None) is None:
                    name = "-".join(tst_data["name"].split("-")[:-1])
                    tbl_dict[tst_name_mod] = {
                        "name": name,
                        "ref-data": list(),
                        "cmp-data": list()
                    }
                try:
                    if table["include-tests"] == "MRR":
                        result = tst_data["result"]["receive-rate"].avg
                    elif table["include-tests"] == "PDR":
                        result = tst_data["throughput"]["PDR"]["LOWER"]
                    elif table["include-tests"] == "NDR":
                        result = tst_data["throughput"]["NDR"]["LOWER"]
                    else:
                        result = None

                    if result:
                        if table["reference"]["nic"] in tst_data["tags"]:
                            tbl_dict[tst_name_mod]["ref-data"].append(result)
                        elif table["compare"]["nic"] in tst_data["tags"]:
                            tbl_dict[tst_name_mod]["cmp-data"].append(result)
                except (TypeError, KeyError) as err:
                    logging.debug("No data for {0}".format(tst_name))
                    logging.debug(repr(err))
                    # No data in output.xml for this test

    tbl_lst = list()
    for tst_name in tbl_dict.keys():
        item = [
            tbl_dict[tst_name]["name"],
        ]
        data_t = tbl_dict[tst_name]["ref-data"]
        if data_t:
            item.append(round(mean(data_t) / 1000000, 2))
            item.append(round(stdev(data_t) / 1000000, 2))
        else:
            item.extend([None, None])
        data_t = tbl_dict[tst_name]["cmp-data"]
        if data_t:
            item.append(round(mean(data_t) / 1000000, 2))
            item.append(round(stdev(data_t) / 1000000, 2))
        else:
            item.extend([None, None])
        if item[-4] is not None and item[-2] is not None and item[-4] != 0:
            item.append(int(relative_change(float(item[-4]), float(item[-2]))))
        if len(item) == len(header):
            tbl_lst.append(item)

    # Sort the table according to the relative change
    tbl_lst.sort(key=lambda rel: rel[-1], reverse=True)

    # Generate csv tables:
    csv_file = "{0}.csv".format(table["output-file"])
    with open(csv_file, "w") as file_handler:
        file_handler.write(header_str)
        for test in tbl_lst:
            file_handler.write(",".join([str(item) for item in test]) + "\n")

    convert_csv_to_pretty_txt(csv_file, "{0}.txt".format(table["output-file"]))
Пример #5
0
def table_performance_comparison(table, input_data):
    """Generate the table(s) with algorithm: table_performance_comparison
    specified in the specification file.

    :param table: Table to generate.
    :param input_data: Data to process.
    :type table: pandas.Series
    :type input_data: InputData
    """

    logging.info("  Generating the table {0} ...".format(table.get(
        "title", "")))

    # Transform the data
    logging.info("    Creating the data set for the {0} '{1}'.".format(
        table.get("type", ""), table.get("title", "")))
    data = input_data.filter_data(table, continue_on_error=True)

    # Prepare the header of the tables
    try:
        header = [
            "Test case",
        ]

        if table["include-tests"] == "MRR":
            hdr_param = "Receive Rate"
        else:
            hdr_param = "Throughput"

        history = table.get("history", None)
        if history:
            for item in history:
                header.extend([
                    "{0} {1} [Mpps]".format(item["title"], hdr_param),
                    "{0} Stdev [Mpps]".format(item["title"])
                ])
        header.extend([
            "{0} {1} [Mpps]".format(table["reference"]["title"], hdr_param),
            "{0} Stdev [Mpps]".format(table["reference"]["title"]),
            "{0} {1} [Mpps]".format(table["compare"]["title"], hdr_param),
            "{0} Stdev [Mpps]".format(table["compare"]["title"]), "Delta [%]"
        ])
        header_str = ",".join(header) + "\n"
    except (AttributeError, KeyError) as err:
        logging.error(
            "The model is invalid, missing parameter: {0}".format(err))
        return

    # Prepare data to the table:
    tbl_dict = dict()
    for job, builds in table["reference"]["data"].items():
        for build in builds:
            for tst_name, tst_data in data[job][str(build)].iteritems():
                tst_name_mod = tst_name.replace("-ndrpdrdisc", "").\
                    replace("-ndrpdr", "").replace("-pdrdisc", "").\
                    replace("-ndrdisc", "").replace("-pdr", "").\
                    replace("-ndr", "").\
                    replace("1t1c", "1c").replace("2t1c", "1c").\
                    replace("2t2c", "2c").replace("4t2c", "2c").\
                    replace("4t4c", "4c").replace("8t4c", "4c")
                if "across topologies" in table["title"].lower():
                    tst_name_mod = tst_name_mod.replace("2n1l-", "")
                if tbl_dict.get(tst_name_mod, None) is None:
                    groups = re.search(REGEX_NIC, tst_data["parent"])
                    nic = groups.group(0) if groups else ""
                    name = "{0}-{1}".format(
                        nic, "-".join(tst_data["name"].split("-")[:-1]))
                    if "across testbeds" in table["title"].lower() or \
                            "across topologies" in table["title"].lower():
                        name = name.\
                            replace("1t1c", "1c").replace("2t1c", "1c").\
                            replace("2t2c", "2c").replace("4t2c", "2c").\
                            replace("4t4c", "4c").replace("8t4c", "4c")
                    tbl_dict[tst_name_mod] = {
                        "name": name,
                        "ref-data": list(),
                        "cmp-data": list()
                    }
                try:
                    # TODO: Re-work when NDRPDRDISC tests are not used
                    if table["include-tests"] == "MRR":
                        tbl_dict[tst_name_mod]["ref-data"]. \
                            append(tst_data["result"]["receive-rate"].avg)
                    elif table["include-tests"] == "PDR":
                        if tst_data["type"] == "PDR":
                            tbl_dict[tst_name_mod]["ref-data"]. \
                                append(tst_data["throughput"]["value"])
                        elif tst_data["type"] == "NDRPDR":
                            tbl_dict[tst_name_mod]["ref-data"].append(
                                tst_data["throughput"]["PDR"]["LOWER"])
                    elif table["include-tests"] == "NDR":
                        if tst_data["type"] == "NDR":
                            tbl_dict[tst_name_mod]["ref-data"]. \
                                append(tst_data["throughput"]["value"])
                        elif tst_data["type"] == "NDRPDR":
                            tbl_dict[tst_name_mod]["ref-data"].append(
                                tst_data["throughput"]["NDR"]["LOWER"])
                    else:
                        continue
                except TypeError:
                    pass  # No data in output.xml for this test

    for job, builds in table["compare"]["data"].items():
        for build in builds:
            for tst_name, tst_data in data[job][str(build)].iteritems():
                tst_name_mod = tst_name.replace("-ndrpdrdisc", ""). \
                    replace("-ndrpdr", "").replace("-pdrdisc", ""). \
                    replace("-ndrdisc", "").replace("-pdr", ""). \
                    replace("-ndr", "").\
                    replace("1t1c", "1c").replace("2t1c", "1c").\
                    replace("2t2c", "2c").replace("4t2c", "2c").\
                    replace("4t4c", "4c").replace("8t4c", "4c")
                if "across topologies" in table["title"].lower():
                    tst_name_mod = tst_name_mod.replace("2n1l-", "")
                try:
                    # TODO: Re-work when NDRPDRDISC tests are not used
                    if table["include-tests"] == "MRR":
                        tbl_dict[tst_name_mod]["cmp-data"]. \
                            append(tst_data["result"]["receive-rate"].avg)
                    elif table["include-tests"] == "PDR":
                        if tst_data["type"] == "PDR":
                            tbl_dict[tst_name_mod]["cmp-data"]. \
                                append(tst_data["throughput"]["value"])
                        elif tst_data["type"] == "NDRPDR":
                            tbl_dict[tst_name_mod]["cmp-data"].append(
                                tst_data["throughput"]["PDR"]["LOWER"])
                    elif table["include-tests"] == "NDR":
                        if tst_data["type"] == "NDR":
                            tbl_dict[tst_name_mod]["cmp-data"]. \
                                append(tst_data["throughput"]["value"])
                        elif tst_data["type"] == "NDRPDR":
                            tbl_dict[tst_name_mod]["cmp-data"].append(
                                tst_data["throughput"]["NDR"]["LOWER"])
                    else:
                        continue
                except KeyError:
                    pass
                except TypeError:
                    tbl_dict.pop(tst_name_mod, None)
    if history:
        for item in history:
            for job, builds in item["data"].items():
                for build in builds:
                    for tst_name, tst_data in data[job][str(
                            build)].iteritems():
                        tst_name_mod = tst_name.replace("-ndrpdrdisc", ""). \
                            replace("-ndrpdr", "").replace("-pdrdisc", ""). \
                            replace("-ndrdisc", "").replace("-pdr", ""). \
                            replace("-ndr", "").\
                            replace("1t1c", "1c").replace("2t1c", "1c").\
                            replace("2t2c", "2c").replace("4t2c", "2c").\
                            replace("4t4c", "4c").replace("8t4c", "4c")
                        if "across topologies" in table["title"].lower():
                            tst_name_mod = tst_name_mod.replace("2n1l-", "")
                        if tbl_dict.get(tst_name_mod, None) is None:
                            continue
                        if tbl_dict[tst_name_mod].get("history", None) is None:
                            tbl_dict[tst_name_mod]["history"] = OrderedDict()
                        if tbl_dict[tst_name_mod]["history"].get(
                                item["title"], None) is None:
                            tbl_dict[tst_name_mod]["history"][item["title"]] = \
                                list()
                        try:
                            # TODO: Re-work when NDRPDRDISC tests are not used
                            if table["include-tests"] == "MRR":
                                tbl_dict[tst_name_mod]["history"][
                                    item["title"]].append(
                                        tst_data["result"]["receive-rate"].avg)
                            elif table["include-tests"] == "PDR":
                                if tst_data["type"] == "PDR":
                                    tbl_dict[tst_name_mod]["history"][
                                        item["title"]].\
                                        append(tst_data["throughput"]["value"])
                                elif tst_data["type"] == "NDRPDR":
                                    tbl_dict[tst_name_mod]["history"][
                                        item["title"]].append(
                                            tst_data["throughput"]["PDR"]
                                            ["LOWER"])
                            elif table["include-tests"] == "NDR":
                                if tst_data["type"] == "NDR":
                                    tbl_dict[tst_name_mod]["history"][
                                        item["title"]].\
                                        append(tst_data["throughput"]["value"])
                                elif tst_data["type"] == "NDRPDR":
                                    tbl_dict[tst_name_mod]["history"][
                                        item["title"]].append(
                                            tst_data["throughput"]["NDR"]
                                            ["LOWER"])
                            else:
                                continue
                        except (TypeError, KeyError):
                            pass

    tbl_lst = list()
    for tst_name in tbl_dict.keys():
        item = [
            tbl_dict[tst_name]["name"],
        ]
        if history:
            if tbl_dict[tst_name].get("history", None) is not None:
                for hist_data in tbl_dict[tst_name]["history"].values():
                    if hist_data:
                        item.append(round(mean(hist_data) / 1000000, 2))
                        item.append(round(stdev(hist_data) / 1000000, 2))
                    else:
                        item.extend([None, None])
            else:
                item.extend([None, None])
        data_t = tbl_dict[tst_name]["ref-data"]
        if data_t:
            item.append(round(mean(data_t) / 1000000, 2))
            item.append(round(stdev(data_t) / 1000000, 2))
        else:
            item.extend([None, None])
        data_t = tbl_dict[tst_name]["cmp-data"]
        if data_t:
            item.append(round(mean(data_t) / 1000000, 2))
            item.append(round(stdev(data_t) / 1000000, 2))
        else:
            item.extend([None, None])
        if item[-4] is not None and item[-2] is not None and item[-4] != 0:
            item.append(int(relative_change(float(item[-4]), float(item[-2]))))
        if len(item) == len(header):
            tbl_lst.append(item)

    # Sort the table according to the relative change
    tbl_lst.sort(key=lambda rel: rel[-1], reverse=True)

    # Generate csv tables:
    csv_file = "{0}.csv".format(table["output-file"])
    with open(csv_file, "w") as file_handler:
        file_handler.write(header_str)
        for test in tbl_lst:
            file_handler.write(",".join([str(item) for item in test]) + "\n")

    convert_csv_to_pretty_txt(csv_file, "{0}.txt".format(table["output-file"]))
Пример #6
0
def table_performance_comparison_mrr(table, input_data):
    """Generate the table(s) with algorithm: table_performance_comparison_mrr
    specified in the specification file.

    :param table: Table to generate.
    :param input_data: Data to process.
    :type table: pandas.Series
    :type input_data: InputData
    """

    logging.info("  Generating the table {0} ...".format(table.get(
        "title", "")))

    # Transform the data
    logging.info("    Creating the data set for the {0} '{1}'.".format(
        table.get("type", ""), table.get("title", "")))
    data = input_data.filter_data(table, continue_on_error=True)

    # Prepare the header of the tables
    try:
        header = [
            "Test case",
            "{0} Throughput [Mpps]".format(table["reference"]["title"]),
            "{0} stdev [Mpps]".format(table["reference"]["title"]),
            "{0} Throughput [Mpps]".format(table["compare"]["title"]),
            "{0} stdev [Mpps]".format(table["compare"]["title"]), "Change [%]"
        ]
        header_str = ",".join(header) + "\n"
    except (AttributeError, KeyError) as err:
        logging.error(
            "The model is invalid, missing parameter: {0}".format(err))
        return

    # Prepare data to the table:
    tbl_dict = dict()
    for job, builds in table["reference"]["data"].items():
        for build in builds:
            for tst_name, tst_data in data[job][str(build)].iteritems():
                if tbl_dict.get(tst_name, None) is None:
                    name = "{0}-{1}".format(
                        tst_data["parent"].split("-")[0],
                        "-".join(tst_data["name"].split("-")[1:]))
                    tbl_dict[tst_name] = {
                        "name": name,
                        "ref-data": list(),
                        "cmp-data": list()
                    }
                try:
                    tbl_dict[tst_name]["ref-data"].\
                        append(tst_data["result"]["receive-rate"].avg)
                except TypeError:
                    pass  # No data in output.xml for this test

    for job, builds in table["compare"]["data"].items():
        for build in builds:
            for tst_name, tst_data in data[job][str(build)].iteritems():
                try:
                    tbl_dict[tst_name]["cmp-data"].\
                        append(tst_data["result"]["receive-rate"].avg)
                except KeyError:
                    pass
                except TypeError:
                    tbl_dict.pop(tst_name, None)

    tbl_lst = list()
    for tst_name in tbl_dict.keys():
        item = [
            tbl_dict[tst_name]["name"],
        ]
        data_t = tbl_dict[tst_name]["ref-data"]
        if data_t:
            item.append(round(mean(data_t) / 1000000, 2))
            item.append(round(stdev(data_t) / 1000000, 2))
        else:
            item.extend([None, None])
        data_t = tbl_dict[tst_name]["cmp-data"]
        if data_t:
            item.append(round(mean(data_t) / 1000000, 2))
            item.append(round(stdev(data_t) / 1000000, 2))
        else:
            item.extend([None, None])
        if item[1] is not None and item[3] is not None and item[1] != 0:
            item.append(int(relative_change(float(item[1]), float(item[3]))))
        if len(item) == 6:
            tbl_lst.append(item)

    # Sort the table according to the relative change
    tbl_lst.sort(key=lambda rel: rel[-1], reverse=True)

    # Generate tables:
    # All tests in csv:
    tbl_names = [
        "{0}-1t1c-full{1}".format(table["output-file"],
                                  table["output-file-ext"]),
        "{0}-2t2c-full{1}".format(table["output-file"],
                                  table["output-file-ext"]),
        "{0}-4t4c-full{1}".format(table["output-file"],
                                  table["output-file-ext"])
    ]
    for file_name in tbl_names:
        logging.info("      Writing file: '{0}'".format(file_name))
        with open(file_name, "w") as file_handler:
            file_handler.write(header_str)
            for test in tbl_lst:
                if file_name.split("-")[-2] in test[0]:  # cores
                    test[0] = "-".join(test[0].split("-")[:-1])
                    file_handler.write(",".join([str(item)
                                                 for item in test]) + "\n")

    # All tests in txt:
    tbl_names_txt = [
        "{0}-1t1c-full.txt".format(table["output-file"]),
        "{0}-2t2c-full.txt".format(table["output-file"]),
        "{0}-4t4c-full.txt".format(table["output-file"])
    ]

    for i, txt_name in enumerate(tbl_names_txt):
        logging.info("      Writing file: '{0}'".format(txt_name))
        convert_csv_to_pretty_txt(tbl_names[i], txt_name)
Пример #7
0
def table_performance_comparison(table, input_data):
    """Generate the table(s) with algorithm: table_performance_comparison
    specified in the specification file.

    :param table: Table to generate.
    :param input_data: Data to process.
    :type table: pandas.Series
    :type input_data: InputData
    """

    logging.info("  Generating the table {0} ...".format(table.get(
        "title", "")))

    # Transform the data
    logging.info("    Creating the data set for the {0} '{1}'.".format(
        table.get("type", ""), table.get("title", "")))
    data = input_data.filter_data(table, continue_on_error=True)

    # Prepare the header of the tables
    try:
        header = [
            "Test case",
        ]

        history = table.get("history", None)
        if history:
            for item in history:
                header.extend([
                    "{0} Throughput [Mpps]".format(item["title"]),
                    "{0} Stdev [Mpps]".format(item["title"])
                ])
        header.extend([
            "{0} Throughput [Mpps]".format(table["reference"]["title"]),
            "{0} Stdev [Mpps]".format(table["reference"]["title"]),
            "{0} Throughput [Mpps]".format(table["compare"]["title"]),
            "{0} Stdev [Mpps]".format(table["compare"]["title"]), "Change [%]"
        ])
        header_str = ",".join(header) + "\n"
    except (AttributeError, KeyError) as err:
        logging.error(
            "The model is invalid, missing parameter: {0}".format(err))
        return

    # Prepare data to the table:
    tbl_dict = dict()
    for job, builds in table["reference"]["data"].items():
        for build in builds:
            for tst_name, tst_data in data[job][str(build)].iteritems():
                if tbl_dict.get(tst_name, None) is None:
                    name = "{0}-{1}".format(
                        tst_data["parent"].split("-")[0],
                        "-".join(tst_data["name"].split("-")[1:]))
                    tbl_dict[tst_name] = {
                        "name": name,
                        "ref-data": list(),
                        "cmp-data": list()
                    }
                try:
                    tbl_dict[tst_name]["ref-data"].\
                        append(tst_data["throughput"]["value"])
                except TypeError:
                    pass  # No data in output.xml for this test

    for job, builds in table["compare"]["data"].items():
        for build in builds:
            for tst_name, tst_data in data[job][str(build)].iteritems():
                try:
                    tbl_dict[tst_name]["cmp-data"].\
                        append(tst_data["throughput"]["value"])
                except KeyError:
                    pass
                except TypeError:
                    tbl_dict.pop(tst_name, None)
    if history:
        for item in history:
            for job, builds in item["data"].items():
                for build in builds:
                    for tst_name, tst_data in data[job][str(
                            build)].iteritems():
                        if tbl_dict.get(tst_name, None) is None:
                            continue
                        if tbl_dict[tst_name].get("history", None) is None:
                            tbl_dict[tst_name]["history"] = OrderedDict()
                        if tbl_dict[tst_name]["history"].get(
                                item["title"], None) is None:
                            tbl_dict[tst_name]["history"][item["title"]] = \
                                list()
                        try:
                            tbl_dict[tst_name]["history"][item["title"]].\
                                append(tst_data["throughput"]["value"])
                        except (TypeError, KeyError):
                            pass

    tbl_lst = list()
    for tst_name in tbl_dict.keys():
        item = [
            tbl_dict[tst_name]["name"],
        ]
        if history:
            if tbl_dict[tst_name].get("history", None) is not None:
                for hist_data in tbl_dict[tst_name]["history"].values():
                    if hist_data:
                        item.append(round(mean(hist_data) / 1000000, 2))
                        item.append(round(stdev(hist_data) / 1000000, 2))
                    else:
                        item.extend([None, None])
            else:
                item.extend([None, None])
        data_t = tbl_dict[tst_name]["ref-data"]
        if data_t:
            item.append(round(mean(data_t) / 1000000, 2))
            item.append(round(stdev(data_t) / 1000000, 2))
        else:
            item.extend([None, None])
        data_t = tbl_dict[tst_name]["cmp-data"]
        if data_t:
            item.append(round(mean(data_t) / 1000000, 2))
            item.append(round(stdev(data_t) / 1000000, 2))
        else:
            item.extend([None, None])
        if item[-4] is not None and item[-2] is not None and item[-4] != 0:
            item.append(int(relative_change(float(item[-4]), float(item[-2]))))
        if len(item) == len(header):
            tbl_lst.append(item)

    # Sort the table according to the relative change
    tbl_lst.sort(key=lambda rel: rel[-1], reverse=True)

    # Generate tables:
    # All tests in csv:
    tbl_names = [
        "{0}-ndr-1t1c-full{1}".format(table["output-file"],
                                      table["output-file-ext"]),
        "{0}-ndr-2t2c-full{1}".format(table["output-file"],
                                      table["output-file-ext"]),
        "{0}-ndr-4t4c-full{1}".format(table["output-file"],
                                      table["output-file-ext"]),
        "{0}-pdr-1t1c-full{1}".format(table["output-file"],
                                      table["output-file-ext"]),
        "{0}-pdr-2t2c-full{1}".format(table["output-file"],
                                      table["output-file-ext"]),
        "{0}-pdr-4t4c-full{1}".format(table["output-file"],
                                      table["output-file-ext"])
    ]
    for file_name in tbl_names:
        logging.info("      Writing file: '{0}'".format(file_name))
        with open(file_name, "w") as file_handler:
            file_handler.write(header_str)
            for test in tbl_lst:
                if (file_name.split("-")[-3] in test[0] and  # NDR vs PDR
                        file_name.split("-")[-2] in test[0]):  # cores
                    test[0] = "-".join(test[0].split("-")[:-1])
                    file_handler.write(",".join([str(item)
                                                 for item in test]) + "\n")

    # All tests in txt:
    tbl_names_txt = [
        "{0}-ndr-1t1c-full.txt".format(table["output-file"]),
        "{0}-ndr-2t2c-full.txt".format(table["output-file"]),
        "{0}-ndr-4t4c-full.txt".format(table["output-file"]),
        "{0}-pdr-1t1c-full.txt".format(table["output-file"]),
        "{0}-pdr-2t2c-full.txt".format(table["output-file"]),
        "{0}-pdr-4t4c-full.txt".format(table["output-file"])
    ]

    for i, txt_name in enumerate(tbl_names_txt):
        logging.info("      Writing file: '{0}'".format(txt_name))
        convert_csv_to_pretty_txt(tbl_names[i], txt_name)

    # Selected tests in csv:
    input_file = "{0}-ndr-1t1c-full{1}".format(table["output-file"],
                                               table["output-file-ext"])
    with open(input_file, "r") as in_file:
        lines = list()
        for line in in_file:
            lines.append(line)

    output_file = "{0}-ndr-1t1c-top{1}".format(table["output-file"],
                                               table["output-file-ext"])
    logging.info("      Writing file: '{0}'".format(output_file))
    with open(output_file, "w") as out_file:
        out_file.write(header_str)
        for i, line in enumerate(lines[1:]):
            if i == table["nr-of-tests-shown"]:
                break
            out_file.write(line)

    output_file = "{0}-ndr-1t1c-bottom{1}".format(table["output-file"],
                                                  table["output-file-ext"])
    logging.info("      Writing file: '{0}'".format(output_file))
    with open(output_file, "w") as out_file:
        out_file.write(header_str)
        for i, line in enumerate(lines[-1:0:-1]):
            if i == table["nr-of-tests-shown"]:
                break
            out_file.write(line)

    input_file = "{0}-pdr-1t1c-full{1}".format(table["output-file"],
                                               table["output-file-ext"])
    with open(input_file, "r") as in_file:
        lines = list()
        for line in in_file:
            lines.append(line)

    output_file = "{0}-pdr-1t1c-top{1}".format(table["output-file"],
                                               table["output-file-ext"])
    logging.info("      Writing file: '{0}'".format(output_file))
    with open(output_file, "w") as out_file:
        out_file.write(header_str)
        for i, line in enumerate(lines[1:]):
            if i == table["nr-of-tests-shown"]:
                break
            out_file.write(line)

    output_file = "{0}-pdr-1t1c-bottom{1}".format(table["output-file"],
                                                  table["output-file-ext"])
    logging.info("      Writing file: '{0}'".format(output_file))
    with open(output_file, "w") as out_file:
        out_file.write(header_str)
        for i, line in enumerate(lines[-1:0:-1]):
            if i == table["nr-of-tests-shown"]:
                break
            out_file.write(line)
            # Append zero to the remaining group
            if len(group_rev_cnt_list) < no_of_groups:
                group_rev_cnt_list = assign_zero(class_group_list,
                                                 group_rev_cnt_list)

            for row in group_rev_cnt_list:  # group_rev_cnt_list is a 2D-list
                group_rev_no.append(row[0])
                group_rev_count.append(row[1])

            # Output for this part
            if is_null:
                stat = "Not a single group made any revisions this week. Please consider encouraging students to contribute more actively.</p>"
            else:
                avg = mean(group_rev_count)
                sd = stdev(group_rev_count)
                stat = "In this class, the average number of revisions per group is " + str(
                    avg
                ) + ". Following is a brief analysis of weekly performance of the class.</p>"
                print(
                    str(sum(group_rev_count)) + " " + str(avg) + " " + str(sd))

            # This part is to get the Best 3 and Worst 3 groups in a class by comparing their revision counts
            # No need to fetch data from db here, @group_rev_cnt_list contains all group numbers with there revision counts.
            group_rev_cnt_list.sort(
                key=lambda x: x[1])  #order by 2nd column @revision_count AS

            num = 3
            len_group_rev_cnt_list = len(group_rev_cnt_list)
            if len_group_rev_cnt_list < num:
                num = len_group_rev_cnt_list
Пример #9
0
  xs = list()
  for code in votes:
    xs.append(votes[code][dim])
    part.append(votes[code]['particip'])
  plt.scatter(xs, part)
  plt.show()


### Participation-related analysis

print "Histogram of participation diff"
diff = list()
for code in jt:
  diff.append(tables[1][code]['particip'] - tables[0][code]['particip'])
m = utils.mean(diff)
st = utils.stdev(diff)
print "Mean= %f , and stdev= %f " % (m, st)

#plt.hist(diff, bins=range(-30, 30))
#plt.show()

pbound = 98
geq = lambda v: v['particip'] >= pbound
print "Tables in 7O with >%d pct particip: %d" % (pbound, len(utils.filter_by(tables[0], geq)))
print "Tables in 14A with >%d pct particip: %d" % (pbound, len(utils.filter_by(tables[1], geq)))

#delta = round(m + 3*st, 2)
delta = 10
print "Tables in which there is more than %.2f pct participation difference:" % delta
codes = utils.compare_by(
  tables,