def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    filename_database = options["--database"]
    rows_limit = options["--rows"]
    if rows_limit is not None:
        rows_limit = int(rows_limit)

    log.info("\naccess database {filename}".format(filename=filename_database))
    database = dataset.connect("sqlite:///{filename_database}".format(
        filename_database=filename_database))

    for name_table in database.tables:

        log.info("access table \"{name_table}\"".format(name_table=name_table))
        table = database[name_table]
        log.info("number of rows in table \"{name_table}\": {number_of_rows}".
                 format(name_table=name_table, number_of_rows=str(len(table))))
        log.info(
            "\ntable {name_table} printout:\n".format(name_table=name_table))

        print(
            pyprel.Table(contents=pyprel.table_dataset_database_table(
                table=database[name_table], rows_limit=rows_limit)))

    program.terminate()
Exemplo n.º 2
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    filename_database = options["--database"]
    name_table = options["--table"]
    name_table_metadata = options["--tablemetadata"]
    rows_limit = options["--rows"]
    if rows_limit is not None:
        rows_limit = int(rows_limit)

    log.info("\naccess database {filename}".format(filename=filename_database))
    database = dataset.connect("sqlite:///{filename_database}".format(
        filename_database=filename_database))
    log.info("access table \"{name_table}\"".format(name_table=name_table))
    table = database[name_table]
    log.info(
        "number of rows in table \"{name_table}\": {number_of_rows}".format(
            name_table=name_table, number_of_rows=str(len(table))))
    log.info("\ntable {name_table} printout:\n".format(name_table=name_table))

    print(
        pyprel.Table(contents=pyprel.table_dataset_database_table(
            table=database[name_table],
            include_attributes=["utterance", "response", "exchangeReference"],
            rows_limit=rows_limit)))

    log.info("database metadata:")

    print(
        pyprel.Table(contents=pyprel.table_dataset_database_table(
            table=database[name_table_metadata], )))

    program.terminate()
Exemplo n.º 3
0
def main(options):

    filename_database = options["--database"]
    name_table = options["--table"]

    print("\npyprel database examples\n")

    if os.path.exists(filename_database):
        print("create database {database}".format(database=filename_database))
        create_database(filename="database.db")

    print("access database {filename}".format(filename=filename_database))
    database = dataset.connect("sqlite:///{filename_database}".format(
        filename_database=filename_database))
    table = database[name_table]

    print("add data to database")
    table.insert(
        dict(name="Legolas Greenleaf",
             age=2000,
             country="Mirkwood",
             uuid4=str(uuid.uuid4())))
    table.insert(
        dict(name="Cody Rapol",
             age=30,
             country="USA",
             activity="DDR",
             uuid4=str(uuid.uuid4())))

    print("""
database tables:\n{tables}
\ntable {table} columns:\n{columns}
\ntable {table} row one:\n{row}
""".format(tables=database.tables,
           table=name_table,
           columns=database[name_table].columns,
           row=[entry for entry in table.find(id="1")]))

    print("table {table} printout:\n".format(table=name_table))

    print(
        pyprel.Table(contents=pyprel.table_dataset_database_table(
            table=database[name_table])))
Exemplo n.º 4
0
def display_refresh():
    members = [
        user.get_display_name() for user in scalar.room.get_joined_members()
    ]
    events = scalar.room.get_events()[10:]
    datetimes = [
        datetime.datetime.fromtimestamp(event["origin_server_ts"] / 1000)
        for event in events
    ]
    senders = [event["sender"] for event in events]
    senders = [sender.split("@")[1].split(":")[0] for sender in senders]
    messages = [event["content"]["body"] for event in events]
    table_contents = [["MEMBERS", "DATETIMES", "SENDERS", "MESSAGES"]]
    height_contents = int(0.5 * pyprel.terminal_height() - 0.5) - 3
    members = members + [
        "" for blank in list(range(1, height_contents - len(members)))
    ]
    datetimes = [
        "" for blank in list(range(1, height_contents - len(datetimes)))
    ] + datetimes
    senders = ["" for blank in list(range(1, height_contents - len(senders)))
               ] + senders
    messages = [
        "" for blank in list(range(1, height_contents - len(messages)))
    ] + messages
    for member, _datetime, sender, message in zip(members, datetimes, senders,
                                                  messages):
        table_contents.append([member, _datetime, sender, message])
    terminal_flash_clear()
    logo()
    print(
        pyprel.Table(
            contents=table_contents,
            column_delimiter=" ",
            row_delimiter=" ",
            table_width_requested=None  #50
        ))
Exemplo n.º 5
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    # access options and arguments
    expression = options["--expression"]
    word_vector_model = options["--wordvectormodel"]

    # Define a dictionary of natural language expressions and word vectors.
    stored_expressions = {
        "This is a test.":
        numpy.array([
            -0.3828682, -0.36397889, 0.46676171, 0.32530552, 0.20376287,
            -0.41326976, -0.58228827, 0.05073506, -0.29834735, 0.62523258,
            0.48247468, 0.63565594, 0.61466146, -0.05790123, 0.49383548,
            0.17871667, 0.26640224, -0.05172781, -0.43991241, 0.8027305,
            0.13174312, -0.70332521, -0.56575418, -0.21705133, -0.93002945,
            0.04151381, -0.15113404, 0.06264834, 0.03022593, -0.00822711,
            -0.23755306, -0.9215641, 0.21348992, 0.38396335, 0.3020944,
            -0.08034055, -0.36891997, -0.86551458, -1.02402425, 0.03633916,
            0.34436008, 0.43058148, -0.32728755, 0.50974292, -0.31518513,
            -0.63085675, -0.40564051, 0.30009648, -0.06426927, -0.6588546,
            0.06724164, 0.08611558, -0.13476974, 0.43107161, -0.26038069,
            0.03187743, 0.05931987, 0.28155532, 0.3636784, -0.76867509,
            -0.2253349, -0.77433741, 0.01924273, 0.63751495, 0.03874384,
            0.28651205, 0.14867969, -0.2256701, 0.23747981, 0.12383705,
            0.27097231, -0.06902695, 0.06664967, 0.05863822, -0.06882346,
            0.59539717, 0.08472043, -0.13579898, -0.31311297, -0.68136102,
            0.33296993, 0.26578408, -0.55723149, 0.38583612, -0.18033087,
            -0.50730389, 0.39173275, 0.57567608, -0.42063141, 0.22387385,
            0.473548, 0.41959459, 0.34881225, 0.1939103, -0.54997987,
            0.30737191, -0.6659264, 0.0437102, -0.11230323, -0.13493723
        ],
                    dtype=numpy.float32),
        "All those moments will be lost in time.":
        numpy.array([
            -1.19203818e+00, -2.22961619e-01, 6.69643760e-01, 3.70975524e-01,
            -6.15832031e-01, -4.36573088e-01, -6.77924156e-01, 6.26985192e-01,
            1.36510044e-01, 1.09196387e-01, 7.61598766e-01, 7.17226386e-01,
            -1.08178332e-01, -1.00655735e+00, 7.45964348e-01, 1.64966106e-01,
            5.85332870e-01, -3.83911550e-01, -6.85201228e-01, 1.31213856e+00,
            8.04567218e-01, -1.28810382e+00, -2.52677381e-01, -9.27993536e-01,
            -4.17307138e-01, -4.56952095e-01, -7.27599859e-01, 7.54008472e-01,
            6.67124987e-04, 2.75971144e-01, 2.75658131e-01, -6.79417193e-01,
            -1.73686996e-01, 8.78942013e-01, 4.39480424e-01, -6.37802243e-01,
            -6.99860230e-02, -7.99779966e-02, -7.58146644e-02, 8.09784770e-01,
            -3.71645451e-01, 1.04973994e-01, -1.34749603e+00, 2.96185315e-01,
            5.85593104e-01, -1.40544206e-01, -3.77467513e-01, 3.46597135e-01,
            2.56733745e-01, 4.04421866e-01, 1.57907709e-01, 3.00843865e-01,
            -5.41967154e-01, 5.51929235e-01, -1.69145897e-01, 4.42785203e-01,
            -2.69805342e-02, 1.31654418e+00, 3.19460958e-01, 5.08862257e-01,
            3.44371676e-01, -6.95496798e-01, 4.88163918e-01, 2.55316138e-01,
            5.03436685e-01, 9.24195647e-02, -2.38671958e-01, -8.97032142e-01,
            -3.73697281e-03, 2.99875826e-01, 1.65674359e-01, 2.01489821e-01,
            1.58179402e-02, 1.30668238e-01, -1.56954467e-01, -2.88258016e-01,
            6.76668346e-01, -3.77742261e-01, 2.20978767e-01, -6.34561360e-01,
            8.33457410e-01, -2.13193640e-01, -6.35235757e-02, 1.89480215e-01,
            6.02166615e-02, -6.64785147e-01, 1.07347333e+00, 6.22629285e-01,
            -4.63467717e-01, -1.13483839e-01, 3.43968630e-01, 2.75979757e-01,
            -1.28710240e-01, 1.50670230e+00, -3.10248852e-01, 3.29222828e-01,
            1.64443821e-01, -7.78683364e-01, -9.80837345e-02, -1.07415296e-01
        ],
                    dtype=numpy.float32),
        "All those moments were lost in time.":
        numpy.array([
            -0.94025505, -0.45476836, 0.41891485, 1.06683254, -0.49607083,
            -0.60043317, -0.55656326, 0.05368682, 0.20896676, 0.19261286,
            0.51067233, 0.01298623, -0.67276001, -0.51130211, 0.61433661,
            0.03579944, 0.4515644, -0.19222273, -0.3919456, 0.65209424,
            0.98329031, -0.78390068, -0.0611292, -0.88086104, 0.25153416,
            -0.16051427, -0.33223695, 0.86147106, -0.19569418, -0.21456225,
            0.27583197, -0.65764415, -0.76533222, 0.78306556, 0.84534264,
            -0.26408321, 0.04312199, -0.00636051, 0.1322974, 0.72321951,
            -0.01186696, 0.40505514, -0.87730938, 0.58147532, 0.89738142,
            -0.16748536, -0.38406748, -0.12007161, 0.49123141, 0.48998365,
            0.15616624, 0.52637529, -0.66329396, 0.10376941, -0.33025965,
            0.04188792, 0.30536407, 0.38240519, 0.01627355, 1.23012972,
            0.46352714, -0.74617827, 0.43505573, -0.16246299, 0.34668511,
            -0.02247265, -0.34742412, -0.64483654, -0.2243523, 0.04222834,
            0.42057285, 0.22310457, 0.36833102, -0.05716853, -0.44688487,
            -0.51298815, 0.61859602, -0.21154809, -0.08168469, -0.15004104,
            0.21371906, 0.21713886, 0.21935812, 0.04912762, 0.02854752,
            -0.55747426, 0.70036995, 0.20306921, -0.46556181, -0.10637223,
            0.60909081, 0.55366743, -0.22907487, 1.13089538, 0.34430629,
            0.35133895, 0.085365, -0.58662325, -0.13062993, -0.04200239
        ],
                    dtype=numpy.float32),
        "All those moments are lost in time.":
        numpy.array([
            -0.78943789, -0.30322614, 0.3780162, 0.80896467, -0.42042252,
            -0.64176518, -0.51211309, -0.1537444, -0.04233316, 0.07710438,
            0.66949254, 0.37771451, -0.74869132, -0.55132926, 0.53695548,
            -0.11229508, 0.6673997, -0.34724045, -0.42173663, 0.7451877,
            1.01433206, -0.85418928, -0.31583607, -0.6812892, 0.42722669,
            -0.43322188, -0.35293943, 0.7662127, -0.30090365, -0.13694993,
            -0.04172039, -0.65059775, -0.62617165, 0.71341687, 0.82349646,
            -0.31194365, 0.00356466, -0.32218212, 0.15857732, 0.82880032,
            0.0566355, 0.43106011, -1.01921201, 0.51658779, 0.8068108,
            -0.09396499, -0.37920368, -0.08726061, 0.29975161, 0.25999272,
            0.23571083, 0.24800834, -0.73045135, 0.19150458, -0.19696848,
            -0.11186107, 0.1336731, 0.33246318, 0.22474274, 1.15420532,
            0.39482915, -0.70385826, 0.54841375, -0.03638301, 0.54499787,
            0.02484709, -0.2070619, -0.69282937, -0.21465099, 0.11578664,
            0.22713676, 0.21237181, 0.2007356, 0.14489903, -0.37357002,
            -0.50091666, 0.59818357, -0.36113665, 0.06037673, -0.26377741,
            0.31544513, -0.23714744, -0.01429842, 0.17592101, -0.16280818,
            -0.58340323, 0.63590413, 0.31803992, -0.47035503, -0.17544734,
            0.66008455, 0.77849454, -0.04235193, 1.29202402, 0.12573826,
            0.20377615, -0.08164676, -0.41151166, -0.1280518, 0.02905136
        ],
                    dtype=numpy.float32),
    }

    model_word2vec = abstraction.load_word_vector_model(
        filename=word_vector_model)

    working_expression_NL = expression

    # Convert the expression to a word vector.
    working_expression_WV =\
        abstraction.convert_sentence_string_to_word_vector(
            sentence_string = working_expression_NL,
            model_word2vec  = model_word2vec
        )
    log.info(
        "word vector representation of expression \"{working_expression_NL}\":"
        "\n{working_expression_WV}".format(
            working_expression_NL=working_expression_NL,
            working_expression_WV=working_expression_WV))

    # Define table headings.
    table_contents = [[
        "working expression natural language",
        "stored expression natural language",
        "absolute magnitude difference between working amd stored expression "
        "word vectors",
        "angle between working and stored expression word vectors"
    ]]

    # Compare the expression word vector representation to existing word
    # vectors.
    magnitude_differences = []
    angles = []
    stored_expressions_NL_list = []
    magnitude_working_expression_WV = datavision.magnitude(
        working_expression_WV)
    for stored_expression_NL in stored_expressions:
        stored_expression_WV = stored_expressions[stored_expression_NL]
        magnitude_stored_expression_WV = datavision.magnitude(
            stored_expression_WV)
        magnitude_difference_working_expression_WV_stored_expression_WV = abs(
            magnitude_working_expression_WV - magnitude_stored_expression_WV)
        angle_working_expression_WV_stored_expression_WV = datavision.angle(
            working_expression_WV, stored_expression_WV)
        # Store comparison results in lists.
        magnitude_differences.append(
            magnitude_difference_working_expression_WV_stored_expression_WV)
        angles.append(angle_working_expression_WV_stored_expression_WV)
        stored_expressions_NL_list.append(stored_expression_NL)
        # Build table.
        table_contents.append([
            str(working_expression_NL),
            str(stored_expression_NL),
            str(magnitude_difference_working_expression_WV_stored_expression_WV
                ),
            str(angle_working_expression_WV_stored_expression_WV)
        ])

    # Record table.
    print(pyprel.Table(contents=table_contents))

    log.info("")

    index_minimum_magnitude_differences =\
        magnitude_differences.index(min(magnitude_differences))
    index_minimum_angles = angles.index(min(angles))
    index_minimum_match_width = len(angles) / 4
    if abs(index_minimum_magnitude_differences -
           index_minimum_angles) < index_minimum_match_width:
        log.info("translation: {translation_expression_NL}".format(
            translation_expression_NL =\
                stored_expressions_NL_list[index_minimum_angles]
        ))
    else:
        log.error("unable to translate")

    log.info("")

    program.terminate()
Exemplo n.º 6
0
def most_similar_expression(expression=None,
                            expressions=None,
                            model_word2vec=None,
                            detail=True):

    working_expression_NL = expression

    # Convert the expression to a word vector.
    working_expression_WV =\
        abstraction.convert_sentence_string_to_word_vector(
            sentence_string = working_expression_NL,
            model_word2vec  = model_word2vec
        )

    stored_expressions = dict()
    for expression in expressions:
        stored_expressions[expression] =\
            abstraction.convert_sentence_string_to_word_vector(
                sentence_string = expression,
                model_word2vec  = model_word2vec
            )

    # Define table headings.
    table_contents = [[
        "working expression natural language",
        "stored expression natural language",
        "absolute magnitude difference between working amd stored expression "
        "word vectors",
        "angle between working and stored expression word vectors"
    ]]

    # Compare the expression word vector representation to existing word
    # vectors.
    magnitude_differences = []
    angles = []
    stored_expressions_NL_list = []
    magnitude_working_expression_WV = datavision.magnitude(
        working_expression_WV)
    for stored_expression_NL in stored_expressions:
        stored_expression_WV = stored_expressions[stored_expression_NL]
        magnitude_stored_expression_WV = datavision.magnitude(
            stored_expression_WV)
        magnitude_difference_working_expression_WV_stored_expression_WV = abs(
            magnitude_working_expression_WV - magnitude_stored_expression_WV)
        angle_working_expression_WV_stored_expression_WV = datavision.angle(
            working_expression_WV, stored_expression_WV)
        # Store comparison results in lists.
        magnitude_differences.append(
            magnitude_difference_working_expression_WV_stored_expression_WV)
        angles.append(angle_working_expression_WV_stored_expression_WV)
        stored_expressions_NL_list.append(stored_expression_NL)
        # Build table.
        table_contents.append([
            str(working_expression_NL),
            str(stored_expression_NL),
            str(magnitude_difference_working_expression_WV_stored_expression_WV
                ),
            str(angle_working_expression_WV_stored_expression_WV)
        ])

    if detail:
        # Record table.
        print(pyprel.Table(contents=table_contents))

    index_minimum_angles = angles.index(min(angles))
    translation_expression_NL = stored_expressions_NL_list[
        index_minimum_angles]

    return translation_expression_NL
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    log.info("")

    # access options and arguments
    grid_search_filename = options["--gridsearchfile"]

    # load grid search map
    grid_search_map = shijian.import_object(filename=grid_search_filename)

    number_of_entries = len(grid_search_map["epoch"])
    log.info("number of entries: {number_of_entries}".format(
        number_of_entries=number_of_entries))

    # table

    table_contents = [[
        "epoch", "architecture", "score training", "score testing"
    ]]
    for index in range(0, number_of_entries):
        table_contents.append([
            str(grid_search_map["epoch"][index]),
            str(grid_search_map["hidden_nodes"][index]),
            str(grid_search_map["score_training"][index]),
            str(grid_search_map["score_test"][index])
        ])
    log.info("\ngrid search map:\n")
    log.info(pyprel.Table(contents=table_contents, ))

    # parallel coordinates plot

    number_of_entries = len(grid_search_map["epoch"])
    datasets = []
    for index in range(0, number_of_entries):
        row = []
        architecture_padded = grid_search_map["hidden_nodes"][index] + [0] * (
            5 - len(grid_search_map["hidden_nodes"][index]))
        row.append(grid_search_map["epoch"][index])
        row.extend(architecture_padded)
        row.append(grid_search_map["score_training"][index])
        row.append(grid_search_map["score_test"][index])
        datasets.append(row)

    datavision.save_parallel_coordinates_matplotlib(
        datasets[::-1], filename="parallel_coordinates.png")

    # plot

    architectures = shijian.unique_list_elements(
        grid_search_map["hidden_nodes"])

    architecture_epoch_score = {}
    for architecture in architectures:
        architecture_epoch_score[str(architecture)] = []
        for index in range(0, number_of_entries):
            if grid_search_map["hidden_nodes"][index] == architecture:
                architecture_epoch_score[str(architecture)].append([
                    grid_search_map["epoch"][index],
                    grid_search_map["score_test"][index]
                ])

    figure = matplotlib.pyplot.figure()
    figure.set_size_inches(10, 10)
    axes = figure.add_subplot(1, 1, 1)
    axes.set_xscale("log")
    figure.suptitle("hyperparameter map", fontsize=20)
    matplotlib.pyplot.xlabel("epochs")
    matplotlib.pyplot.ylabel("training test score")

    for key, value in architecture_epoch_score.iteritems():
        epochs = [element[0] for element in value]
        score_test = [element[1] for element in value]
        matplotlib.pyplot.plot(epochs, score_test, label=key)

    matplotlib.pyplot.legend(loc="center left",
                             bbox_to_anchor=(1, 0.5),
                             fontsize=10)

    matplotlib.pyplot.savefig("hyperparameter_map.eps",
                              bbox_inches="tight",
                              format="eps")

    # find best-scoring models

    # Find the 15 best scores and plot them using parallel coordinates.
    best_models = sorted(zip(grid_search_map["score_test"],
                             grid_search_map["score_training"],
                             grid_search_map["hidden_nodes"]),
                         reverse=True)[:15]
    datasets = []
    for model in best_models:
        row = []
        architecture_padded = model[2] + [0] * (5 - len(model[2]))
        row.extend(architecture_padded)
        row.append(model[1])
        row.append(model[0])
        datasets.append(row)

    datavision.save_parallel_coordinates_matplotlib(
        datasets, filename="15_best_models_parallel_coordinates.png")

    # Find the 3 best scores.
    best_models = sorted(zip(grid_search_map["score_test"],
                             grid_search_map["hidden_nodes"]),
                         reverse=True)[:3]

    # table
    table_contents = [["architecture", "score testing"]]
    for model in best_models:
        table_contents.append([str(model[1]), str(model[0])])
    log.info("\nbest-scoring models:\n")
    log.info(pyprel.Table(contents=table_contents, ))

    log.info("")

    program.terminate()
Exemplo n.º 8
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    print("")

    filename_ROOT = options["--fileroot"]
    filename_CSV = options["--filecsv"]
    selection = options["--selection"]
    class_label = int(options["--classlabel"])
    name_tree = options["--tree"]
    maximum_number_of_events = None if options["--maxevents"].lower() == "none"\
                                  else int(options["--maxevents"])
    include_headings = options["--headings"].lower() == "true"

    if not os.path.isfile(os.path.expandvars(filename_ROOT)):
        log.error("file {filename} not found".format(filename=filename_ROOT))
        program.terminate()

    if os.path.isfile(os.path.expandvars(filename_CSV)):
        log.warning(
            "CSV file {filename} exists -- *append* data to file".format(
                filename=filename_CSV))
        print("")
        append = True
    else:
        append = False

    file_ROOT = abstraction.open_ROOT_file(filename_ROOT)
    tree = file_ROOT.Get(name_tree)
    number_of_events = tree.GetEntries()

    file_CSV = open(filename_CSV, "a")
    writer = csv.writer(file_CSV, delimiter=",")

    log.info(
        textwrap.dedent("""
        input ROOT file: {filename_ROOT}
        output CSV file: {filename_CSV}
        selection:       {selection}
        class label:     {class_label}
        """.format(filename_ROOT=filename_ROOT,
                   filename_CSV=filename_CSV,
                   selection=selection,
                   class_label=class_label)))

    print("")
    log.info("save variables of events to CSV {filename}".format(
        filename=filename_CSV))
    print("")

    progress = shijian.Progress()
    progress.engage_quick_calculation_mode()
    index_selected = 0
    detail = True
    for index, event in enumerate(tree):
        if select_event(event=event, selection=selection):
            index_selected = index_selected + 1
            if                                           \
                maximum_number_of_events is not None and \
                index_selected > maximum_number_of_events:
                break
            line = [
                #Variable_ttHbb(event = event, name = "Aplan_bjets"),
                Variable_ttHbb(event=event, name="Aplan_jets"),  #
                Variable_ttHbb(event=event, name="Centrality_all"),  #
                #Variable_ttHbb(event = event, name = "ClassifBDTOutput_6jsplit"),
                #Variable_ttHbb(event = event, name = "ClassifBDTOutput_basic"),
                #Variable_ttHbb(event = event, name = "ClassifBDTOutput_withReco_6jsplit"),
                #Variable_ttHbb(event = event, name = "ClassifBDTOutput_withReco_basic"),
                #Variable_ttHbb(event = event, name = "ClassifHPLUS_Semilep_HF_BDT200_Output"),
                Variable_ttHbb(event=event, name="dEtajj_MaxdEta"),  #
                Variable_ttHbb(event=event, name="dRbb_avg"),  #
                #Variable_ttHbb(event = event, name = "dRbb_MaxM"),
                Variable_ttHbb(event=event, name="dRbb_MaxPt"),  #
                #Variable_ttHbb(event = event, name = "dRbb_min"),
                #Variable_ttHbb(event = event, name = "dRbj_Wmass"),
                #Variable_ttHbb(event = event, name = "dRHl_MaxdR"),
                #Variable_ttHbb(event = event, name = "dRHl_MindR"),
                #Variable_ttHbb(event = event, name = "dRjj_min"),
                #Variable_ttHbb(event = event, name = "dRlepbb_MindR"),
                #Variable_ttHbb(event = event, name = "dRlj_MindR"),
                #Variable_ttHbb(event = event, name = "dRuu_MindR"),
                Variable_ttHbb(event=event, name="H1_all"),  #
                #Variable_ttHbb(event = event, name = "H4_all"),
                #Variable_ttHbb(event = event, name = "HhadT_nJets"),
                #Variable_ttHbb(event = event, name = "HiggsbbM"),
                #Variable_ttHbb(event = event, name = "HiggsjjM"),
                #Variable_ttHbb(event = event, name = "HT_all"),
                #Variable_ttHbb(event = event, name = "HT_jets"),
                #Variable_ttHbb(event = event, name = "Mbb_MaxM"),
                #Variable_ttHbb(event = event, name = "Mbb_MaxPt"),
                Variable_ttHbb(event=event, name="Mbb_MindR"),  #
                #Variable_ttHbb(event = event, name = "Mbj_MaxPt"),
                #Variable_ttHbb(event = event, name = "Mbj_MindR"),
                #Variable_ttHbb(event = event, name = "Mbj_Wmass"),
                #Variable_ttHbb(event = event, name = "met_met"),
                #Variable_ttHbb(event = event, name = "met_phi"),
                #Variable_ttHbb(event = event, name = "MHiggs"),
                #Variable_ttHbb(event = event, name = "Mjj_HiggsMass"),
                #Variable_ttHbb(event = event, name = "Mjjj_MaxPt"),
                #Variable_ttHbb(event = event, name = "Mjj_MaxPt"),
                #Variable_ttHbb(event = event, name = "Mjj_MindR"),
                #Variable_ttHbb(event = event, name = "Mjj_MinM"),
                #Variable_ttHbb(event = event, name = "mu"),
                #Variable_ttHbb(event = event, name = "Muu_MindR"),
                #Variable_ttHbb(event = event, name = "NBFricoNN_dil"),
                #Variable_ttHbb(event = event, name = "nBTags"),
                #Variable_ttHbb(event = event, name = "nBTags30"),
                #Variable_ttHbb(event = event, name = "nBTags50"),
                #Variable_ttHbb(event = event, name = "nBTags60"),
                #Variable_ttHbb(event = event, name = "nBTags70"),
                #Variable_ttHbb(event = event, name = "nBTags77"),
                #Variable_ttHbb(event = event, name = "nBTags80"),
                #Variable_ttHbb(event = event, name = "nBTags85"),
                #Variable_ttHbb(event = event, name = "nBTags90"),
                #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_30"),
                #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_40"),
                #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_50"),
                #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_60"),
                #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_70"),
                #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_77"),
                #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_85"),

                #Variable_ttHbb(event = event, name = "nElectrons"),
                #Variable_ttHbb(event = event, name = "nHFJets"),
                Variable_ttHbb(event=event, name="NHiggs_30"),  #
                #Variable_ttHbb(event = event, name = "Njet_pt40"),
                #Variable_ttHbb(event = event, name = "Njet_pt40"),
                #Variable_ttHbb(event = event, name = "nJets"),
                #Variable_ttHbb(event = event, name = "nMuons"),
                #Variable_ttHbb(event = event, name = "nPrimaryVtx"),

                #Variable_ttHbb(event = event, name = "pT_jet3"),
                Variable_ttHbb(event=event, name="pT_jet5"),  #
                #Variable_ttHbb(event = event, name = "pTuu_MindR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_b1higgsbhadtop_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_bbhiggs_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_output"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_output_6jsplit"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_output_truthMatchPattern"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_withH_output"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_withH_output_6jsplit"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_withH_output_truthMatchPattern"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_hadWb1Higgs_mass"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsbhadtop_withH_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsbleptop_mass"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsbleptop_withH_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_higgslep_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsleptop_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_higgs_mass"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsq1hadW_mass"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsttbar_withH_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_leptophadtop_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_leptophadtop_withH_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_Ncombinations"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_nuApprox_recoBDT"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_nuApprox_recoBDT_6jsplit"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_nuApprox_recoBDT_withH"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_nuApprox_recoBDT_withH_6jsplit"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_ttH_Ht_withH"),
                #Variable_ttHbb(event = event, name = "ttHF_mva_discriminant"),

                #Variable_ttHbb(event = event, name = "el_d0sig[0]"),
                #Variable_ttHbb(event = event, name = "el_delta_z0_sintheta[0]"),
                #Variable_ttHbb(event = event, name = "el_e[0]"),
                #Variable_ttHbb(event = event, name = "el_eta[0]"),
                #Variable_ttHbb(event = event, name = "el_phi[0]"),
                #Variable_ttHbb(event = event, name = "el_pt[0]"),
                #Variable_ttHbb(event = event, name = "el_topoetcone20[0]"),

                #Variable_ttHbb(event = event, name = "mu_d0sig[0]"),
                #Variable_ttHbb(event = event, name = "mu_delta_z0_sintheta[0]"),
                #Variable_ttHbb(event = event, name = "mu_e[0]"),
                #Variable_ttHbb(event = event, name = "mu_eta[0]"),
                #Variable_ttHbb(event = event, name = "mu_phi[0]"),
                #Variable_ttHbb(event = event, name = "mu_pt[0]"),
                #Variable_ttHbb(event = event, name = "mu_topoetcone20[0]"),

                #Variable_ttHbb(event = event, name = "jet_e[0]"),
                #Variable_ttHbb(event = event, name = "jet_eta[0]"),
                #Variable_ttHbb(event = event, name = "jet_jvt[0]"),
                #Variable_ttHbb(event = event, name = "jet_mv2c10[0]"),
                #Variable_ttHbb(event = event, name = "jet_mv2c20[0]"),
                #Variable_ttHbb(event = event, name = "jet_phi[0]"),
                #Variable_ttHbb(event = event, name = "jet_pt[0]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand[0]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand_6jsplit[0]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand[0]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand_6jsplit[0]"),

                #Variable_ttHbb(event = event, name = "jet_e[1]"),
                #Variable_ttHbb(event = event, name = "jet_eta[1]"),
                #Variable_ttHbb(event = event, name = "jet_jvt[1]"),
                #Variable_ttHbb(event = event, name = "jet_mv2c10[1]"),
                #Variable_ttHbb(event = event, name = "jet_mv2c20[1]"),
                #Variable_ttHbb(event = event, name = "jet_phi[1]"),
                #Variable_ttHbb(event = event, name = "jet_pt[1]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand[1]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand_6jsplit[1]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand[1]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand_6jsplit[1]"),

                #Variable_ttHbb(event = event, name = "jet_e[2]"),
                #Variable_ttHbb(event = event, name = "jet_eta[2]"),
                #Variable_ttHbb(event = event, name = "jet_jvt[2]"),
                #Variable_ttHbb(event = event, name = "jet_mv2c10[2]"),
                #Variable_ttHbb(event = event, name = "jet_mv2c20[2]"),
                #Variable_ttHbb(event = event, name = "jet_phi[2]"),
                #Variable_ttHbb(event = event, name = "jet_pt[2]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand[2]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand_6jsplit[2]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand[2]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand_6jsplit[2]"),

                #Variable_ttHbb(event = event, name = "jet_e[3]"),
                #Variable_ttHbb(event = event, name = "jet_eta[3]"),
                #Variable_ttHbb(event = event, name = "jet_jvt[3]"),
                #Variable_ttHbb(event = event, name = "jet_mv2c10[3]"),
                #Variable_ttHbb(event = event, name = "jet_mv2c20[3]"),
                #Variable_ttHbb(event = event, name = "jet_phi[3]"),
                #Variable_ttHbb(event = event, name = "jet_pt[3]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand[3]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand_6jsplit[3]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand[3]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand_6jsplit[3]"),

                # large-R jets
                #Variable_ttHbb(event = event, name = "FirstLjetM"),
                #Variable_ttHbb(event = event, name = "FirstLjetPt"),
                #Variable_ttHbb(event = event, name = "HhadT_nLjets"),
                #Variable_ttHbb(event = event, name = "HT_ljets"),
                #Variable_ttHbb(event = event, name = "NBFricoNN_ljets"),
                #Variable_ttHbb(event = event, name = "nBjetOutsideLjet"),
                #Variable_ttHbb(event = event, name = "nJetOutsideLjet"),
                #Variable_ttHbb(event = event, name = "nLjet_m100"),
                #Variable_ttHbb(event = event, name = "nLjet_m50"),
                #Variable_ttHbb(event = event, name = "nLjets"),
                #Variable_ttHbb(event = event, name = "SecondLjetM"),
                #Variable_ttHbb(event = event, name = "SecondLjetPt"),
                #Variable_ttHbb(event = event, name = "ljet_C2[0]"),
                #Variable_ttHbb(event = event, name = "ljet_D2[0]"),
                #Variable_ttHbb(event = event, name = "ljet_e[0]"),
                #Variable_ttHbb(event = event, name = "ljet_eta[0]"),
                #Variable_ttHbb(event = event, name = "ljet_m[0]"),
                #Variable_ttHbb(event = event, name = "ljet_phi[0]"),
                #Variable_ttHbb(event = event, name = "ljet_pt[0]"),
                #Variable_ttHbb(event = event, name = "ljet_sd12[0]"),
                #Variable_ttHbb(event = event, name = "ljet_sd23[0]"),
                #Variable_ttHbb(event = event, name = "ljet_tau21[0]"),
                #Variable_ttHbb(event = event, name = "ljet_tau21_wta[0]"),
                #Variable_ttHbb(event = event, name = "ljet_tau32[0]"),
                #Variable_ttHbb(event = event, name = "ljet_tau32_wta[0]"),

                #rcjet_d12,
                #rcjet_d23,
                #rcjet_e,
                #rcjet_eta,
                #rcjet_phi,
                #rcjet_pt,
                Variable_ttHbb(name="class", value=class_label)
            ]
            if detail:
                log.info("event variable details:")
                log.info(
                    "\nnumber of variables: {number}".format(number=len(line)))
                table_contents = [["variable value", "variable type"]]
                for variable in line:
                    table_contents.append(
                        [str(variable.name()),
                         str(type(variable.value()))])
                print(pyprel.Table(contents=table_contents, ))
                detail = False
            if include_headings and not append:
                headings = [variable.name() for variable in line]
                writer.writerow(headings)
                include_headings = False
            values = [variable.value() for variable in line]
            writer.writerow(values)
        print(progress.add_datum(fraction=index / number_of_events))

    print("")
    log.info(
        "{number_selected} events of {number_total} passed selection".format(
            number_selected=index_selected, number_total=index))

    print("")

    program.terminate()
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    log.info("")

    # access options and arguments
    input_data_filename = options["--data"]

    # define dataset

    # Load the SUSY dataset (https://archive.ics.uci.edu/ml/datasets/SUSY).
    # The first column is the class label (1 for signal, 0 for background),
    # followed by 18 features (8 low-level features and 10 high-level features):
    #
    # - lepton 1 pT
    # - lepton 1 eta
    # - lepton 1 phi
    # - lepton 2 pT
    # - lepton 2 eta
    # - lepton 2 phi
    # - missing energy magnitude
    # - missing energy phi
    # - MET_rel
    # - axial MET
    # - M_R
    # - M_TR_2
    # - R
    # - MT2
    # - S_R
    # - M_Delta_R
    # - dPhi_r_b
    # - cos(theta_r1)

    data = abstraction.access_SUSY_dataset_format_file(input_data_filename)

    dataset = abstraction.Dataset(data=data)

    # define data

    log.info("split data for cross-validation")
    features_train, features_test, targets_train, targets_test =\
        cross_validation.train_test_split(
            dataset.features(),
            dataset.targets(),
            train_size = 0.7
        )

    # grid search

    import itertools

    epochs = [10, 100, 500, 1000]
    architecture = [200, 300, 300, 300, 200]

    grid_search_map = {}
    grid_search_map["epoch"] = []
    grid_search_map["hidden_nodes"] = []
    grid_search_map["score_training"] = []
    grid_search_map["score_test"] = []

    # define progress
    count_total = 0
    for epoch in epochs:
        for nodes_count in xrange(1, len(architecture) + 1):
            combinations = itertools.product(architecture, repeat=nodes_count)
            for combination in combinations:
                count_total += 1
    count = 0
    progress = shijian.Progress()
    progress.engage_quick_calculation_mode()

    for epoch in epochs:
        for nodes_count in xrange(1, len(architecture) + 1):
            combinations = itertools.product(architecture, repeat=nodes_count)
            for combination in combinations:
                hidden_nodes = list(combination)

                # define model

                log.info("define classification model")
                classifier = abstraction.Classification(
                    number_of_classes=2,
                    hidden_nodes=hidden_nodes,
                    epochs=epoch)

                # train model

                log.info("fit model to dataset features and targets")
                classifier._model.fit(features_train, targets_train)
                #classifier.save()

                # predict and cross-validate training

                log.info("test trained model on training dataset")
                score_training = metrics.accuracy_score(
                    classifier._model.predict(features_train), targets_train)
                score_test = metrics.accuracy_score(
                    classifier._model.predict(features_test), targets_test)
                log.info("\ntraining-testing instance complete:")
                log.info("epoch:          {epoch}".format(epoch=epoch))
                log.info("architecture:   {architecture}".format(
                    architecture=hidden_nodes))
                log.info("score training: {score_training}".format(
                    score_training=100 * score_training))
                log.info("score test:     {score_test}".format(score_test=100 *
                                                               score_test))
                pyprel.print_line()
                grid_search_map["epoch"].append(epoch)
                grid_search_map["hidden_nodes"].append(hidden_nodes)
                grid_search_map["score_training"].append(score_training)
                grid_search_map["score_test"].append(score_test)

                # save current grid search map
                shijian.export_object(grid_search_map,
                                      filename="grid_search_map.pkl",
                                      overwrite=True)

                count += 1
                print(progress.add_datum(fraction=(count + 1) / count_total))

    number_of_entries = len(grid_search_map["epoch"])

    # table

    table_contents = [[
        "epoch", "architecture", "score training", "score testing"
    ]]
    for index in range(0, number_of_entries):
        table_contents.append([
            str(grid_search_map["epoch"][index]),
            str(grid_search_map["hidden_nodes"][index]),
            str(grid_search_map["score_training"][index]),
            str(grid_search_map["score_test"][index])
        ])
    print("\ngrid search map:\n")
    print(pyprel.Table(contents=table_contents, ))

    # plot

    architectures = shijian.unique_list_elements(
        grid_search_map["hidden_nodes"])

    architecture_epoch_score = {}
    for architecture in architectures:
        architecture_epoch_score[str(architecture)] = []
        for index in range(0, number_of_entries):
            if grid_search_map["hidden_nodes"][index] == architecture:
                architecture_epoch_score[str(architecture)].append([
                    grid_search_map["epoch"][index],
                    grid_search_map["score_test"][index]
                ])

    figure = matplotlib.pyplot.figure()
    figure.set_size_inches(10, 10)
    axes = figure.add_subplot(1, 1, 1)
    axes.set_xscale("log")
    figure.suptitle("hyperparameter map", fontsize=20)
    matplotlib.pyplot.xlabel("epochs")
    matplotlib.pyplot.ylabel("training test score")

    for key, value in architecture_epoch_score.iteritems():
        epochs = [element[0] for element in value]
        score_test = [element[1] for element in value]
        matplotlib.pyplot.plot(epochs, score_test, label=key)

    matplotlib.pyplot.legend(loc="center right")

    matplotlib.pyplot.savefig("hyperparameter_map.eps",
                              bbox_inches="tight",
                              format="eps")

    # find best-scoring models

    # Find the 3 best scores.
    best_models = sorted(zip(grid_search_map["score_test"],
                             grid_search_map["hidden_nodes"]),
                         reverse=True)[:3]

    # table
    table_contents = [["architecture", "score testing"]]
    for model in best_models:
        table_contents.append([str(model[1]), str(model[0])])
    print("\nbest-scoring models:\n")
    print(pyprel.Table(contents=table_contents, ))

    log.info("")

    program.terminate()
Exemplo n.º 10
0
def main():

    global options
    options = docopt.docopt(__doc__)
    if options["--version"]:
        print(__version__)
        sys.exit(0)

    graph_power       =       options["--graphpower"]
    graph_temperature =       options["--graphtemperature"]
    table             =       options["--table"]
    CSV_logging       =       options["--CSV_logging"].lower() == "true"
    filepath_CSV      =       options["--filepath_CSV"]
    interval          = float(options["--interval"])

    if CSV_logging: log.info("logging to CSV " + filepath_CSV)

    command_general     = "nvidia-smi "                     \
                              "--query-gpu="                \
                                  "name,"                   \
                                  "temperature.gpu,"        \
                                  "power.draw,"             \
                                  "memory.used,"            \
                                  "memory.total,"           \
                                  "utilization.gpu "        \
                              "--format="                   \
                                  "csv,"                    \
                                  "noheader"

    command_power       = "nvidia-smi "                     \
                              "--query-gpu=power.draw "     \
                              "--format=csv,noheader"

    command_temperature = "nvidia-smi "                     \
                              "--query-gpu=temperature.gpu "\
                              "--format=csv,noheader"

    measurements = []
    try:
        while True:
            if not graph_power and not graph_temperature:
                timestamp          = datetime.datetime.utcnow()
                timestamp_string   = timestamp.strftime("%Y-%m-%dT%H%M%SZ")
                result             = subprocess.check_output(command_general.split(' ')).decode('utf-8')
                data               = [datum.strip() for datum in result.split(",")]
                temperature        = str(data[1])
                temperature_string = temperature + " °C"
                power_draw         = str(data[2])
                utilization        = str(data[3])
                memory_used        = str(data[4])
                memory_total       = str(data[5])
                if table:
                    print(pyprel.Table(
                    contents = [[
                                   timestamp_string,
                                   temperature_string,
                                   utilization,
                                   memory_used,
                                   memory_total,
                                   power_draw
                               ]]
                    ))
                if CSV_logging:
                    df = pd.DataFrame(columns = [
                        "datetime",
                        "temperature_C",
                        "power_draw_W",
                        "utilization_MiB",
                        "memory_used_MiB",
                        "memory_total_percentage"
                    ])
                    df = df.append(
                        {
                            "datetime"               : timestamp,
                            "temperature_C"          : temperature,
                            "power_draw_W"           : power_draw[:-2],
                            "utilization_MiB"        : utilization[:-4],
                            "memory_used_MiB"        : memory_used[:-4],
                            "memory_total_percentage": memory_total[:-2]
                        },
                        ignore_index=True
                    )
                    log.info(timestamp_string + " log to CSV " + filepath_CSV)
                    df.to_csv(filepath_CSV, header=not os.path.isfile(filepath_CSV), index=False, mode="a")
                else:
                    temperature_string = temperature_string.rjust(5)
                    power_draw         = power_draw.rjust(8)
                    utilization        = utilization.rjust(8)
                    memory_used        = memory_used.rjust(8)
                    memory_total       = memory_total.rjust(5)
                    print(
                        "|{timestamp_string}|{temperature_string}|{power_draw}"\
                        "|{utilization}|{memory_used}|{memory_total}|".format(
                        timestamp_string   = timestamp_string,
                        temperature_string = temperature_string,
                        power_draw         = power_draw,
                        utilization        = utilization,
                        memory_used        = memory_used,
                        memory_total       = memory_total
                    ))
                time.sleep(interval)
            elif graph_power or graph_temperature:
                if graph_power:
                    result = subprocess.check_output(command_power.split(' ')).decode('utf-8')
                    result = result.strip().strip(" W")
                elif graph_temperature:
                    result = subprocess.check_output(command_temperature.split(' ')).decode('utf-8')
                measurements.append(float(result.strip()))
                measurements = measurements[-20:]
                y = measurements
                x = range(0, len(y))
                plot = datavision.TTYFigure()
                tmp = plot.plot(x, y, marker="_o")
                print(tmp)
                time.sleep(interval)
                print(chr(27) + "[2J")
    except KeyboardInterrupt:
        print("")
Exemplo n.º 11
0
def main(options):

    global program
    program = propyte.Program(
        options = options,
        name    = name,
        version = version,
        logo    = logo
    )
    global log
    from propyte import log

    print("")

    filename_CSV               = options["--infile"]
    make_histogram_comparisons = options["--histogramcomparisons"].lower() == "true"
    make_scatter_matrix        = options["--scattermatrix"].lower() == "true"
    make_event_images          = options["--eventimages"].lower() == "true"
    number_of_event_images     = int(options["--numberofeventimages"])
    directoryname_plots        = options["--directoryplots"]

    if not os.path.isfile(os.path.expandvars(filename_CSV)):
        log.error("file {filename} not found".format(
            filename = filename_CSV
        ))
        program.terminate()

    log.info("read CSV from {filename}".format(filename = filename_CSV))
    data = pd.read_csv(filename_CSV)

    number_of_columns          = data.shape[1]
    indices_of_feature_columns = range(0, number_of_columns -1)

    feature_names = list(data.columns)

    data_class_0 = data.loc[data["class"] == 0]
    data_class_1 = data.loc[data["class"] == 1]

    print("")
    log.info("basic feature characteristics")
    print("")

    table_contents = [[
        "feature",
        "minimum value in class 0",
        "minimum value in class 1",
        "maximum value in class 0",
        "maximum value in class 1",
        "mean value in class 0",
        "mean value in class 1"
    ]]

    for feature_name in feature_names:

        values_class_0 = list(data_class_0[feature_name])
        values_class_1 = list(data_class_1[feature_name])

        table_contents.append([
            feature_name,
            min(values_class_0),
            min(values_class_1),
            max(values_class_0),
            max(values_class_1),
            sum(values_class_0)/len(values_class_0),
            sum(values_class_1)/len(values_class_1)
        ])

    print(
        pyprel.Table(
            contents = table_contents
        )
    )

    if make_histogram_comparisons:

        for feature_name in feature_names:

            filename = shijian.propose_filename(
                filename = feature_name + "_ttbb_ttH.png"
            )
            log.info("save histogram {filename}".format(filename = filename))
            datavision.save_histogram_comparison_matplotlib(
                values_1      = list(data_class_0[feature_name]),
                values_2      = list(data_class_1[feature_name]),
                label_1       = "ttbb",
                label_2       = "ttH",
                label_ratio_x = "",
                label_y       = "",
                title         = feature_name,
                filename      = filename,
                directory     = directoryname_plots
            )

    if make_scatter_matrix:

        filename = "scatter_matrix_ttbb_ttH.jpg"
        log.info("save scatter matrix {filename}".format(filename = filename))
        scatter_matrix = pd.scatter_matrix(
            data,
            figsize  = [15, 15],
            marker   = ".",
            s        = 0.2,
            diagonal = "kde"
        )
        for ax in scatter_matrix.ravel():
            ax.set_xlabel(
                ax.get_xlabel(),
                fontsize = 15,
                rotation = 90
            )
            ax.set_ylabel(
                ax.get_ylabel(),
                fontsize = 15,
                rotation = 0,
                labelpad = 60
            )
            ax.get_xaxis().set_ticks([])
            ax.get_yaxis().set_ticks([])
        if not os.path.exists(directoryname_plots):
            os.makedirs(directoryname_plots)
        plt.savefig(
            directoryname_plots + "/" + filename,
            dpi = 700
        )

    if make_event_images:

        directoryname = "event_images"

        if not os.path.exists(directoryname):
            os.makedirs(directoryname)

        for class_label in [0, 1]:

            data_class = data.loc[data["class"] == class_label]

            for index, row in data_class[0:number_of_event_images].iterrows():
                image = datavision.NumPy_array_pad_square_shape(
                    array     = row.as_matrix(),
                    pad_value = -4
                )
                plt.imshow(
                    image,
                    cmap          = "Greys",
                    interpolation = "nearest"
                )
                filename = "event_image_class_" + str(class_label) + "_index_" + str(index) + ".png"
                log.info("save event image {filename}".format(filename = filename))
                plt.savefig(
                    directoryname + "/" + filename,
                    dpi = 200
                )

    print("")

    program.terminate()
Exemplo n.º 12
0
def main():

    print("\nexample: printout of dictionary")
    get_input("Press Enter to continue.")

    information = {
        "sample information": {
            "ID": 169888,
            "name": "ttH",
            "number of events": 124883,
            "cross section": 0.055519,
            "k factor": 1.0201,
            "generator": "pythia8",
            "variables": {
                "trk_n": 147,
                "zappo_n": 9001
            }
        }
    }

    pyprel.print_line()
    pyprel.print_dictionary(dictionary = information)
    pyprel.print_line()
    print(pyprel.dictionary_string(dictionary = information))
    pyprel.print_line()

    print("\nexample: printout of existing logo")
    get_input("Press Enter to continue.")

    text = (
    "   ____      _            _____ _                \n"
    "  / ___|___ | | ___  _ __|  ___| | _____      __ \n"
    " | |   / _ \| |/ _ \| '__| |_  | |/ _ \ \ /\ / / \n"
    " | |__| (_) | | (_) | |  |  _| | | (_) \ V  V /  \n"
    "  \____\___/|_|\___/|_|  |_|   |_|\___/ \_/\_/   "
    )

    pyprel.print_center(text = text)

    print("\nexample: rendering and printout of logo")
    get_input("Press Enter to continue.")

    name = "aria"
    logo = pyprel.render_banner(
        text = name.upper()
    )
    pyprel.print_line()
    print(pyprel.center_string(text = logo))
    pyprel.print_line()

    print("\nexample: rendering and printout segment display")
    get_input("Press Enter to continue.")

    print(pyprel.render_segment_display(text = "0123456789"))

    print("\nexample: printout of tables")
    get_input("Press Enter to continue.")

    table_contents = [
        ["heading 1", "heading 2"],
        ["some text", "some more text"],
        ["lots and lots and lots and lots and lots of text", "some more text"]
    ]
    print(
        pyprel.Table(
            contents = table_contents,
            column_width = 25
        )
    )
    print(
        pyprel.Table(
            contents = table_contents,
            table_width_requested = 30
        )
    )
    print(
        pyprel.Table(
            contents = table_contents,
            table_width_requested = 30,
            hard_wrapping = True
        )
    )
    print(
        pyprel.Table(
            contents = table_contents
        )
    )
    pyprel.print_center(
        text = pyprel.Table(
            contents = table_contents,
            table_width_requested = 30
        ).__str__()
    )
    print(
        pyprel.Table(
            contents = table_contents,
            column_width = 25,
            column_delimiter = "||"
        )
    )
    print(
        pyprel.Table(
            contents = table_contents,
            column_width = 25,
            row_delimiter = "~"
        )
    )
    table_contents = [
        [
            "heading 1",
            "heading 2",
            "heading 3"
        ],
        [
            "some text",
            "some more text",
            "even more text"
        ],
        [
            "lots and lots and lots and lots and lots of text",
            "some more text",
            "some more text"
        ]
    ]
    print(
        pyprel.Table(
            contents = table_contents
        )
    )
    table_contents = [
        [
            "heading 1",
            "heading 2",
            "heading 3",
            "heading 4"
        ],
        [
            "some text",
            "some more text",
            "even more text",
            "yeah more text"
        ],
        [
            "lots and lots and lots and lots and lots of text",
            "some more text",
            "some more text",
            "some more text"
        ]
    ]
    print(
        pyprel.Table(
            contents = table_contents
        )
    )
Exemplo n.º 13
0
def main():

    pyprel.print_line()

    print("\nconvert Markdown table to pyprel table\n")

    table_Markdown = """
|**variable 1**|**variable 2**|
|--------------|--------------|
|1             |0.23545       |
|2             |0.63523       |
|3             |0.55231       |
|4             |0.89563       |
|5             |0.55345       |
"""

    table_contents = pyprel.table_Markdown_to_table_pyprel(
        table = table_Markdown
    )

    print(
        pyprel.Table(
            contents = table_contents,
        )
    )

    pyprel.print_line()

    print("\ncompose and print table\n")
    
    table_contents = [
        [
            "number",
            "letter"
        ],
        [
            1,
            "a"
        ],
        [
            2,
            "b"
        ]
    ]
    print(
        pyprel.Table(
            contents = table_contents
        )
    )

    pyprel.print_line()

    print("\ncompose and print a table using list comprehensions and zip\n")

    data_x = numpy.linspace(0, numpy.pi, 10)
    data_y = [numpy.sin(x) for x in data_x]

    print(pyprel.Table(
        contents = [["x", "y"]] + [[x, y] for x, y in zip(data_x, data_y)]
    ))

    pyprel.print_line()

    print("\ncompose aligned printouts of data using tables\n")

    table_contents = [
        ["msg:"      , "1536155474294"],
        ["signature:", "0C118313F6D19"],
        ["data:"     , "1536155474294"]
    ]
    print(pyprel.Table(
        contents              = table_contents,
        column_delimiter      = "",
        row_delimiter         = "",
        table_width_requested = 40
    ))

    table_contents = [
        ["msg:"      , "15361554742941536155474294153615547429415361554742941536155474294"],
        ["signature:", "0C118313F6D190C118313F6D190C118313F6D190C118313F6D190C118313F6D19"],
        ["data:"     , "15361554742941536155474294153615547429415361554742941536155474294"]
    ]
    print(pyprel.Table(
        contents              = table_contents,
        column_delimiter      = "",
        row_delimiter         = "",
        table_width_requested = 40
    ))

    pyprel.print_line()
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    log.info("")

    # access options and arguments
    ROOT_filename_ttH = options["--datattH"]
    ROOT_filename_ttbb = options["--datattbb"]
    engage_plotting = string_to_bool(options["--plot"])

    log.info("ttH data file: {filename}".format(filename=ROOT_filename_ttH))
    log.info("ttbb data file: {filename}".format(filename=ROOT_filename_ttbb))

    # Access data for event classes ttbb and ttH.

    data_ttbb = abstraction.load_HEP_data(ROOT_filename=ROOT_filename_ttbb,
                                          tree_name="nominal",
                                          maximum_number_of_events=None)

    data_ttH = abstraction.load_HEP_data(ROOT_filename=ROOT_filename_ttH,
                                         tree_name="nominal",
                                         maximum_number_of_events=None)

    if engage_plotting is True:

        # Plot the loaded datasets.

        for variable_name in data_ttbb.variables():
            log.info(
                "plot ttbb versus ttH comparison of {variable_name}".format(
                    variable_name=variable_name))
            datavision.save_histogram_comparison_matplotlib(
                values_1=data_ttbb.values(name=variable_name),
                values_2=data_ttH.values(name=variable_name),
                label_1=variable_name + "_ttbb",
                label_2=variable_name + "_ttH",
                normalize=True,
                label_ratio_x="frequency",
                label_y="",
                title=variable_name + "_ttbb_ttH",
                filename=variable_name + "_ttbb_ttH.png")

    # upcoming: consider data ordering

    # Preprocess all data (to be updated).

    data_ttbb.preprocess_all()
    data_ttH.preprocess_all()

    # Add class labels to the data sets, 0 for ttbb and 1 for ttH.

    for index in data_ttbb.indices():
        data_ttbb.variable(index=index, name="class", value=0)

    for index in data_ttH.indices():
        data_ttH.variable(index=index, name="class", value=1)

    # Convert the data sets to a simple list format with the first column
    # containing the class label.
    _data = []
    for index in data_ttbb.indices():
        _data.append([
            data_ttbb.variable(index=index, name="el_1_pt"),
            data_ttbb.variable(index=index, name="el_1_eta"),
            data_ttbb.variable(index=index, name="el_1_phi"),
            data_ttbb.variable(index=index, name="jet_1_pt"),
            data_ttbb.variable(index=index, name="jet_1_eta"),
            data_ttbb.variable(index=index, name="jet_1_phi"),
            data_ttbb.variable(index=index, name="jet_1_e"),
            data_ttbb.variable(index=index, name="jet_2_pt"),
            data_ttbb.variable(index=index, name="jet_2_eta"),
            data_ttbb.variable(index=index, name="jet_2_phi"),
            data_ttbb.variable(index=index, name="jet_2_e"),
            data_ttbb.variable(index=index, name="met"),
            data_ttbb.variable(index=index, name="met_phi"),
            data_ttbb.variable(index=index, name="nJets"),
            data_ttbb.variable(index=index, name="Centrality_all"),
            #data_ttbb.variable(index = index, name = "Mbb_MindR")
        ])
        _data.append([data_ttbb.variable(name="class")])
    for index in data_ttH.indices():
        _data.append([
            data_ttH.variable(index=index, name="el_1_pt"),
            data_ttH.variable(index=index, name="el_1_eta"),
            data_ttH.variable(index=index, name="el_1_phi"),
            data_ttH.variable(index=index, name="jet_1_pt"),
            data_ttH.variable(index=index, name="jet_1_eta"),
            data_ttH.variable(index=index, name="jet_1_phi"),
            data_ttH.variable(index=index, name="jet_1_e"),
            data_ttH.variable(index=index, name="jet_2_pt"),
            data_ttH.variable(index=index, name="jet_2_eta"),
            data_ttH.variable(index=index, name="jet_2_phi"),
            data_ttH.variable(index=index, name="jet_2_e"),
            data_ttH.variable(index=index, name="met"),
            data_ttH.variable(index=index, name="met_phi"),
            data_ttH.variable(index=index, name="nJets"),
            data_ttH.variable(index=index, name="Centrality_all"),
            #data_ttH.variable(index = index, name = "Mbb_MindR")
        ])
        _data.append([data_ttH.variable(name="class")])
    dataset = abstraction.Dataset(data=_data)

    log.info("")

    # define data

    log.info("split data for cross-validation")
    features_train, features_test, targets_train, targets_test =\
        cross_validation.train_test_split(
            dataset.features(),
            dataset.targets(),
            train_size = 0.7
        )
    # grid search

    import itertools

    epochs = [100, 100000]
    architecture = [200, 300, 300, 200]

    grid_search_map = {}
    grid_search_map["epoch"] = []
    grid_search_map["hidden_nodes"] = []
    grid_search_map["score_training"] = []
    grid_search_map["score_test"] = []

    # define progress
    count_total = 0
    for epoch in epochs:
        for nodes_count in xrange(1, len(architecture) + 1):
            combinations = itertools.product(architecture, repeat=nodes_count)
            for combination in combinations:
                count_total += 1
    count = 0
    progress = shijian.Progress()
    progress.engage_quick_calculation_mode()

    for epoch in epochs:
        for nodes_count in xrange(1, len(architecture) + 1):
            combinations = itertools.product(architecture, repeat=nodes_count)
            for combination in combinations:
                hidden_nodes = list(combination)

                # define model

                log.info("define classification model")
                classifier = abstraction.Classification(
                    number_of_classes=2,
                    hidden_nodes=hidden_nodes,
                    epochs=epoch)

                # train model

                log.info("fit model to dataset features and targets")
                classifier._model.fit(features_train, targets_train)
                #classifier.save()

                # predict and cross-validate training

                log.info("test trained model on training dataset")
                score_training = metrics.accuracy_score(
                    classifier._model.predict(features_train), targets_train)
                score_test = metrics.accuracy_score(
                    classifier._model.predict(features_test), targets_test)
                log.info("\ntraining-testing instance complete:")
                log.info("epoch:          {epoch}".format(epoch=epoch))
                log.info("architecture:   {architecture}".format(
                    architecture=hidden_nodes))
                log.info("score training: {score_training}".format(
                    score_training=100 * score_training))
                log.info("score test:     {score_test}".format(score_test=100 *
                                                               score_test))
                pyprel.print_line()
                grid_search_map["epoch"].append(epoch)
                grid_search_map["hidden_nodes"].append(hidden_nodes)
                grid_search_map["score_training"].append(score_training)
                grid_search_map["score_test"].append(score_test)

                # save current grid search map
                shijian.export_object(grid_search_map,
                                      filename="grid_search_map.pkl",
                                      overwrite=True)

                count += 1
                print(progress.add_datum(fraction=(count + 1) / count_total))

    number_of_entries = len(grid_search_map["epoch"])

    # table

    table_contents = [[
        "epoch", "architecture", "score training", "score testing"
    ]]
    for index in range(0, number_of_entries):
        table_contents.append([
            str(grid_search_map["epoch"][index]),
            str(grid_search_map["hidden_nodes"][index]),
            str(grid_search_map["score_training"][index]),
            str(grid_search_map["score_test"][index])
        ])
    print("\ngrid search map:\n")
    print(pyprel.Table(contents=table_contents, ))

    # plot

    architectures = shijian.unique_list_elements(
        grid_search_map["hidden_nodes"])

    architecture_epoch_score = {}
    for architecture in architectures:
        architecture_epoch_score[str(architecture)] = []
        for index in range(0, number_of_entries):
            if grid_search_map["hidden_nodes"][index] == architecture:
                architecture_epoch_score[str(architecture)].append([
                    grid_search_map["epoch"][index],
                    grid_search_map["score_test"][index]
                ])

    figure = matplotlib.pyplot.figure()
    figure.set_size_inches(10, 10)
    axes = figure.add_subplot(1, 1, 1)
    axes.set_xscale("log")
    figure.suptitle("hyperparameter map", fontsize=20)
    matplotlib.pyplot.xlabel("epochs")
    matplotlib.pyplot.ylabel("training test score")

    for key, value in architecture_epoch_score.iteritems():
        epochs = [element[0] for element in value]
        score_test = [element[1] for element in value]
        matplotlib.pyplot.plot(epochs, score_test, label=key)

    matplotlib.pyplot.legend(loc="center right")

    matplotlib.pyplot.savefig("hyperparameter_map.eps",
                              bbox_inches="tight",
                              format="eps")

    # find best-scoring models

    # Find the 3 best scores.
    best_models = sorted(zip(grid_search_map["score_test"],
                             grid_search_map["hidden_nodes"]),
                         reverse=True)[:3]

    # table
    table_contents = [["architecture", "score testing"]]
    for model in best_models:
        table_contents.append([str(model[1]), str(model[0])])
    print("\nbest-scoring models:\n")
    print(pyprel.Table(contents=table_contents, ))

    log.info("")

    program.terminate()
Exemplo n.º 15
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    # access options and arguments
    database_filename = options["--database"]
    table_limit = options["--tableLimit"]
    if table_limit is not None:
        table_limit = int(table_limit)
    output_filename = options["--outputFile"]
    if output_filename is not None:
        output_filename = str(output_filename)

    # Access database.
    database = abstraction.access_database(filename=database_filename)
    log.info("\ndatabase metadata:")
    abstraction.log_database_metadata(filename=database_filename)
    log.info("")
    # Print the tables in the database.
    log.info("tables in database: {tables}".format(tables=database.tables))
    # Access the exchanges table.
    table_name = "exchanges"
    log.info("access table \"{table_name}\"".format(table_name=table_name))
    # Print the columns of the table.
    log.info("columns in table \"{table_name}\": {columns}".format(
        table_name=table_name, columns=database[table_name].columns))
    # Print the number of rows of the table.
    log.info(
        "number of rows in table \"{table_name}\": {number_of_rows}".format(
            table_name=table_name,
            number_of_rows=str(len(database[table_name]))))
    # Print the table entries:
    log.info("entries of table {table_name}:\n".format(table_name=table_name))
    # Define table headings.
    table_contents = [[
        "id", "utterance", "response", "utteranceTimeUNIX", "responseTimeUNIX",
        "utteranceReference", "responseReference", "exchangeReference"
    ]]
    simple_training_representation = ""
    # Fill table data.
    count_entries = 0
    for entry in database[table_name].all():
        table_contents.append([
            str(entry["id"]),
            str(entry["utterance"]),
            str(entry["response"]),
            str(entry["utteranceTimeUNIX"]),
            str(entry["responseTimeUNIX"]),
            str(entry["utteranceReference"]),
            str(entry["responseReference"]),
            str(entry["exchangeReference"])
        ])
        count_entries += 1
        # simple training representation
        if output_filename is not None:
            if simple_training_representation is "":
                simple_training_representation = \
                    str(entry["utterance"]) + \
                    " => " + \
                    str(entry["response"])
            else:
                simple_training_representation = \
                    simple_training_representation + \
                    "\n" + \
                    str(entry["utterance"]) + \
                    " => " + \
                    str(entry["response"])
        if table_limit is not None:
            if count_entries >= table_limit:
                break
    # Record table.
    print(pyprel.Table(contents=table_contents))
    # Record to file, if specified.
    if output_filename is not None:
        log.info(
            "save simple training representation to file {filename}".format(
                filename=output_filename))
        output_file = open(output_filename, "w")
        output_file.write(simple_training_representation)
        output_file.close()

    program.terminate()