Пример #1
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    log.info("")

    input_data_filename = options["--data"]

    log.info(
        "input data file: {filename}".format(filename=input_data_filename))

    log.debug("start to print log messages at various levels")

    log.debug("message at level DEBUG")
    log.info("message at level INFO")
    log.warning("message at level WARNING")
    log.error("message at level ERROR")
    log.critical("message at level CRITICAL")

    log.debug("stop printing log messages at various levels")

    function_1()

    log.info("")

    program.terminate()
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    filename_database = options["--database"]
    rows_limit = options["--rows"]
    if rows_limit is not None:
        rows_limit = int(rows_limit)

    log.info("\naccess database {filename}".format(filename=filename_database))
    database = dataset.connect("sqlite:///{filename_database}".format(
        filename_database=filename_database))

    for name_table in database.tables:

        log.info("access table \"{name_table}\"".format(name_table=name_table))
        table = database[name_table]
        log.info("number of rows in table \"{name_table}\": {number_of_rows}".
                 format(name_table=name_table, number_of_rows=str(len(table))))
        log.info(
            "\ntable {name_table} printout:\n".format(name_table=name_table))

        print(
            pyprel.Table(contents=pyprel.table_dataset_database_table(
                table=database[name_table], rows_limit=rows_limit)))

    program.terminate()
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    day_run_time = options["--dayruntime"]

    propyte.start_messaging_Telegram()
    propyte.start_receiving_messages_Telegram()

    # Check for messages every few seconds and whenever a status request is
    # received send a status update.

    while True:

        if shijian.in_daily_time_range(time_range=day_run_time):

            text = str(propyte.get_text_last_message_received_Telegram())
            if "sup" in text:
                propyte.send_message_Telegram(recipient="@wbreadenmadden",
                                              text="what up dog")
            if "how r u" in text:
                propyte.send_message_Telegram(recipient="@wbreadenmadden",
                                              text="nae bad fam")

        time.sleep(5)
Пример #4
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    expression = options["--expression"]
    word_vector_model = options["--wordvectormodel"]

    model_word2vec = abstraction.load_word_vector_model(
        filename=word_vector_model)

    sentences = [
        "What are you dirty hooers doing on my planet?", "What time is it?",
        "What can you do?", "Change the color from red to black.",
        "All those moments will be lost in time.",
        "All of those moments will be lost in time.",
        "All of those moments are to be lost in time."
    ]

    result = most_similar_expression(expression=expression,
                                     expressions=sentences,
                                     model_word2vec=model_word2vec)

    pyprel.print_line()
    log.info(
        "input expression:        {expression}".format(expression=expression))
    log.info("most similar expression: {expression}".format(expression=result))
    pyprel.print_line()

    program.terminate()
Пример #5
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    # access options and arguments
    database = options["--database"]

    model_word2vec = abstraction.model_word2vec_Brown_Corpus()

    # Access database.
    database = abstraction.access_database(filename=database)
    log.info("database metadata:")
    abstraction.log_database_metadata(filename=database)
    # Print the tables in the database.
    log.info("tables in database: {tables}".format(tables=database.tables))
    # Access the exchanges table.
    table_name = "exchanges"
    log.info("access table \"{table_name}\"".format(table_name=table_name))
    # Print the columns of the table.
    log.info("columns in table \"{table_name}\": {columns}".format(
        table_name=table_name, columns=database[table_name].columns))
    # Print the number of rows of the table.
    log.info(
        "number of rows in table \"{table_name}\": {number_of_rows}".format(
            table_name=table_name,
            number_of_rows=str(len(database[table_name]))))
    log.info(
        "create word vector representations of each utterance and response " +
        "of all exchanges")
    # Create a vector representation of each utterance and response of all
    # exchanges.
    for entry in database[table_name].all():
        utterance = entry["utterance"]
        utterance_word_vector =\
            abstraction.convert_sentence_string_to_word_vector(
                sentence_string = utterance,
                model_word2vec  = model_word2vec
            )
        log.info("word vector representation of utterance \"{utterance}\":"
                 "\n{utterance_word_vector}".format(
                     utterance=utterance,
                     utterance_word_vector=utterance_word_vector))
        response = entry["response"]
        response_word_vector =\
            abstraction.convert_sentence_string_to_word_vector(
                sentence_string = response,
                model_word2vec  = model_word2vec
            )
        log.info("word vector representation of response \"{response}\":"
                 "\n{response_word_vector}".format(
                     response=response,
                     response_word_vector=response_word_vector))

    program.terminate()
Пример #6
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    # access options and arguments
    database_filename = options["--database"]

    log.info("")

    database = abstraction.access_database(filename=database_filename)

    for table in database.tables:
        log.info("\ntable: {table}/n".format(table=table))
        for entry in database[table].all():
            pyprel.print_line()
            for column in database[table].columns:
                log.info("\n{column}: {content}".format(column=column,
                                                        content=str(
                                                            entry[column])))
        pyprel.print_line()

    log.info("")

    program.terminate()
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    interval = float(options["--interval"])

    clock_restart = shijian.Clock(name="restart")

    log.info("\nrestart interval: {interval} s".format(interval=interval))

    while True:
        log.info("\ncurrent run time: {run_time}".format(
            run_time=clock_restart.time()))
        log.info("restart yet? (i.e. current run time >= interval): {restart}".
                 format(restart=clock_restart.time() >= interval))
        if clock_restart.time() >= interval:
            print(
                pyprel.center_string(text=pyprel.render_banner(
                    text="restart")))
            propyte.restart()
        time.sleep(1)
Пример #8
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    # access options and arguments
    database = options["--database"]
    word_vector_model = options["--wordvectormodel"]

    log.info("")

    log.info("load word vector model {model}".format(model=word_vector_model))
    model_word2vec = abstraction.load_word_vector_model(
        filename=word_vector_model)
    log.info("add exchange word vectors to database {database}".format(
        database=database))
    abstraction.add_exchange_word_vectors_to_database(
        filename=database, model_word2vec=model_word2vec)

    log.info("")

    program.terminate()
Пример #9
0
def main(options):

    global program
    program = propyte.Program(
        options = options,
        name    = name,
        version = version,
        logo    = logo
        )
    global log
    from propyte import log

    log.info("")

    # access options and arguments
    hypermap_filename = options["--hypermapfile"]

    # load grid search map
    hypermap = shijian.import_object(filename = hypermap_filename)

    abstraction.analyze_hypermap(
        hypermap = hypermap
    )

    log.info("")

    program.terminate()
Пример #10
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    print("")

    filename_ROOT = options["--fileroot"]
    name_tree = options["--tree"]

    if not os.path.isfile(os.path.expandvars(filename_ROOT)):
        log.error("file {filename} not found".format(filename=filename_ROOT))
        program.terminate()

    file_ROOT = abstraction.open_ROOT_file(filename_ROOT)
    tree = file_ROOT.Get(name_tree)

    number_entries = tree.GetEntries()
    names_variables = [
        variable.GetName() for variable in tree.GetListOfBranches()
    ]
    names_variables = shijian.natural_sort(names_variables)
    names_objects = [key.GetName() for key in file_ROOT.GetListOfKeys()]
    names_objects = shijian.natural_sort(names_objects)

    log.info(
        textwrap.dedent("""
        input ROOT file:   {filename_ROOT}
        number of entries: {number_entries}
        """.format(filename_ROOT=filename_ROOT,
                   number_entries=number_entries)))

    log.info("variables:")
    print("")
    for name_variable in names_variables:
        log.info("    " + name_variable)

    print("")
    log.info("objects:")
    print("")
    for name_object in names_objects:
        log.info("    " + name_object)

    #print("")
    #log.info("tree printout:")
    #print("")
    #tree.Print()
    #print("")

    program.terminate()
Пример #11
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    application = QtGui.QApplication(sys.argv)
    interface = Interface(options)
    sys.exit(application.exec_())
Пример #12
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    log.info("")

    # access options and arguments
    ROOT_filename = options["--data"]

    log.info("load classification model")

    classifier = abstraction.Classification(
        load_from_directory=
        "abstraction_classifier_ttH_ttbb_300000_50_200_400_50_300"
        #load_from_directory = "abstraction_classifier_ttH_ttbb_300000_50_150_250_300_400"
        #load_from_directory = "abstraction_classifier"
    )

    # Access data.
    data = abstraction.load_HEP_data(ROOT_filename=ROOT_filename,
                                     tree_name="nominal",
                                     maximum_number_of_events=5000)
    # Add class labels.
    if "ttH" in ROOT_filename:
        class_value = 1
    if "ttbb" in ROOT_filename:
        class_value = 0
    for index in data.indices():
        data.variable(index=index, name="class", value=class_value)
    # Preprocess all data.
    data.preprocess_all()
    # Convert the datavision dataset to an abstraction dataset.
    dataset = abstraction.convert_HEP_datasets_from_datavision_datasets_to_abstraction_datasets(
        datasets=data)
    # Classify data and add the results to the datavision dataset.
    results = list(classifier._model.predict(dataset.features()))
    for count, index in enumerate(data.indices()):
        data.variable(index=index, name="abstraction1", value=results[count])

    log.info(data.table())

    log.info("")

    program.terminate()
def main(options):

    utterance = options["--utterance"]

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    log.warning("input utterance: {utterance}".format(utterance=utterance))
    response = abstraction.generate_response(utterance=utterance)
    log.warning("proposed response: {response}".format(response=response))

    program.terminate()
Пример #14
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    print("")

    filename_CSV_input = options["--infile"]
    filename_CSV_output = options["--outfile"]

    if not os.path.isfile(os.path.expandvars(filename_CSV_input)):
        log.error(
            "file {filename} not found".format(filename=filename_CSV_input))
        program.terminate()

    log.info("read CSV from {filename}".format(filename=filename_CSV_input))
    data = pd.read_csv(filename_CSV_input)

    scaler = sklearn.preprocessing.MinMaxScaler(feature_range=(-1, 1))

    number_of_columns = data.shape[1]
    indices_of_feature_columns = range(0, number_of_columns - 1)

    # scale feature columns
    log.info("scale features")
    data[indices_of_feature_columns] = scaler.fit_transform(
        data[indices_of_feature_columns])

    log.info(
        "save scaled CSV to {filename}".format(filename=filename_CSV_output))
    data.to_csv(
        filename_CSV_output,
        index=False,
        #header = False
    )

    print("")

    program.terminate()
Пример #15
0
def main(options):

    filename_log = options["--logfile"]

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo,
                              filename_log=filename_log)
    global log
    from propyte import log

    log.debug("message at level DEBUG")
    log.info("message at level INFO")
    log.warning("message at level WARNING")
    log.error("message at level ERROR")
    log.critical("message at level CRITICAL")

    program.terminate()
Пример #16
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    filename_database = options["--database"]
    name_table = options["--table"]
    name_table_metadata = options["--tablemetadata"]
    rows_limit = options["--rows"]
    if rows_limit is not None:
        rows_limit = int(rows_limit)

    log.info("\naccess database {filename}".format(filename=filename_database))
    database = dataset.connect("sqlite:///{filename_database}".format(
        filename_database=filename_database))
    log.info("access table \"{name_table}\"".format(name_table=name_table))
    table = database[name_table]
    log.info(
        "number of rows in table \"{name_table}\": {number_of_rows}".format(
            name_table=name_table, number_of_rows=str(len(table))))
    log.info("\ntable {name_table} printout:\n".format(name_table=name_table))

    print(
        pyprel.Table(contents=pyprel.table_dataset_database_table(
            table=database[name_table],
            include_attributes=["utterance", "response", "exchangeReference"],
            rows_limit=rows_limit)))

    log.info("database metadata:")

    print(
        pyprel.Table(contents=pyprel.table_dataset_database_table(
            table=database[name_table_metadata], )))

    program.terminate()
Пример #17
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    # access options and arguments
    expression = options["--expression"]
    word_vector_model = options["--wordvectormodel"]

    model_word2vec = abstraction.load_word_vector_model(
        filename=word_vector_model)

    # Convert the expression to a word vector.
    expression_word_vector =\
        abstraction.convert_sentence_string_to_word_vector(
            sentence_string = expression,
            model_word2vec  = model_word2vec
        )
    log.info("word vector representation of expression \"{expression}\":"
             "\n{expression_word_vector}".format(
                 expression=expression,
                 expression_word_vector=expression_word_vector))

    log.info("")

    log.info(
        "word vector representation of expression \"{expression}\" as NumPy "
        "array:\n{expression_NumPy_array}".format(
            expression=expression,
            expression_NumPy_array=numpy.array_repr(expression_word_vector)))

    program.terminate()
Пример #18
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    # access options and arguments
    subreddits = options["--subreddits"].split(",")
    number_of_utterances = options["--numberOfUtterances"]
    database = options["--database"]

    log.info("access exchanges")
    exchanges_Reddit = abstraction.access_exchanges_Reddit(
        subreddits=subreddits, number_of_utterances=number_of_utterances)
    log.info("save exchanges to database (only those not saved previously)")
    abstraction.save_exchanges_to_database(exchanges=exchanges_Reddit,
                                           filename=database)
    abstraction.save_database_metadata(filename=database)

    program.terminate()
Пример #19
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    # access options and arguments
    expression = options["--expression"]
    word_vector_model = options["--wordvectormodel"]

    # Define a dictionary of natural language expressions and word vectors.
    stored_expressions = {
        "This is a test.":
        numpy.array([
            -0.3828682, -0.36397889, 0.46676171, 0.32530552, 0.20376287,
            -0.41326976, -0.58228827, 0.05073506, -0.29834735, 0.62523258,
            0.48247468, 0.63565594, 0.61466146, -0.05790123, 0.49383548,
            0.17871667, 0.26640224, -0.05172781, -0.43991241, 0.8027305,
            0.13174312, -0.70332521, -0.56575418, -0.21705133, -0.93002945,
            0.04151381, -0.15113404, 0.06264834, 0.03022593, -0.00822711,
            -0.23755306, -0.9215641, 0.21348992, 0.38396335, 0.3020944,
            -0.08034055, -0.36891997, -0.86551458, -1.02402425, 0.03633916,
            0.34436008, 0.43058148, -0.32728755, 0.50974292, -0.31518513,
            -0.63085675, -0.40564051, 0.30009648, -0.06426927, -0.6588546,
            0.06724164, 0.08611558, -0.13476974, 0.43107161, -0.26038069,
            0.03187743, 0.05931987, 0.28155532, 0.3636784, -0.76867509,
            -0.2253349, -0.77433741, 0.01924273, 0.63751495, 0.03874384,
            0.28651205, 0.14867969, -0.2256701, 0.23747981, 0.12383705,
            0.27097231, -0.06902695, 0.06664967, 0.05863822, -0.06882346,
            0.59539717, 0.08472043, -0.13579898, -0.31311297, -0.68136102,
            0.33296993, 0.26578408, -0.55723149, 0.38583612, -0.18033087,
            -0.50730389, 0.39173275, 0.57567608, -0.42063141, 0.22387385,
            0.473548, 0.41959459, 0.34881225, 0.1939103, -0.54997987,
            0.30737191, -0.6659264, 0.0437102, -0.11230323, -0.13493723
        ],
                    dtype=numpy.float32),
        "All those moments will be lost in time.":
        numpy.array([
            -1.19203818e+00, -2.22961619e-01, 6.69643760e-01, 3.70975524e-01,
            -6.15832031e-01, -4.36573088e-01, -6.77924156e-01, 6.26985192e-01,
            1.36510044e-01, 1.09196387e-01, 7.61598766e-01, 7.17226386e-01,
            -1.08178332e-01, -1.00655735e+00, 7.45964348e-01, 1.64966106e-01,
            5.85332870e-01, -3.83911550e-01, -6.85201228e-01, 1.31213856e+00,
            8.04567218e-01, -1.28810382e+00, -2.52677381e-01, -9.27993536e-01,
            -4.17307138e-01, -4.56952095e-01, -7.27599859e-01, 7.54008472e-01,
            6.67124987e-04, 2.75971144e-01, 2.75658131e-01, -6.79417193e-01,
            -1.73686996e-01, 8.78942013e-01, 4.39480424e-01, -6.37802243e-01,
            -6.99860230e-02, -7.99779966e-02, -7.58146644e-02, 8.09784770e-01,
            -3.71645451e-01, 1.04973994e-01, -1.34749603e+00, 2.96185315e-01,
            5.85593104e-01, -1.40544206e-01, -3.77467513e-01, 3.46597135e-01,
            2.56733745e-01, 4.04421866e-01, 1.57907709e-01, 3.00843865e-01,
            -5.41967154e-01, 5.51929235e-01, -1.69145897e-01, 4.42785203e-01,
            -2.69805342e-02, 1.31654418e+00, 3.19460958e-01, 5.08862257e-01,
            3.44371676e-01, -6.95496798e-01, 4.88163918e-01, 2.55316138e-01,
            5.03436685e-01, 9.24195647e-02, -2.38671958e-01, -8.97032142e-01,
            -3.73697281e-03, 2.99875826e-01, 1.65674359e-01, 2.01489821e-01,
            1.58179402e-02, 1.30668238e-01, -1.56954467e-01, -2.88258016e-01,
            6.76668346e-01, -3.77742261e-01, 2.20978767e-01, -6.34561360e-01,
            8.33457410e-01, -2.13193640e-01, -6.35235757e-02, 1.89480215e-01,
            6.02166615e-02, -6.64785147e-01, 1.07347333e+00, 6.22629285e-01,
            -4.63467717e-01, -1.13483839e-01, 3.43968630e-01, 2.75979757e-01,
            -1.28710240e-01, 1.50670230e+00, -3.10248852e-01, 3.29222828e-01,
            1.64443821e-01, -7.78683364e-01, -9.80837345e-02, -1.07415296e-01
        ],
                    dtype=numpy.float32),
        "All those moments were lost in time.":
        numpy.array([
            -0.94025505, -0.45476836, 0.41891485, 1.06683254, -0.49607083,
            -0.60043317, -0.55656326, 0.05368682, 0.20896676, 0.19261286,
            0.51067233, 0.01298623, -0.67276001, -0.51130211, 0.61433661,
            0.03579944, 0.4515644, -0.19222273, -0.3919456, 0.65209424,
            0.98329031, -0.78390068, -0.0611292, -0.88086104, 0.25153416,
            -0.16051427, -0.33223695, 0.86147106, -0.19569418, -0.21456225,
            0.27583197, -0.65764415, -0.76533222, 0.78306556, 0.84534264,
            -0.26408321, 0.04312199, -0.00636051, 0.1322974, 0.72321951,
            -0.01186696, 0.40505514, -0.87730938, 0.58147532, 0.89738142,
            -0.16748536, -0.38406748, -0.12007161, 0.49123141, 0.48998365,
            0.15616624, 0.52637529, -0.66329396, 0.10376941, -0.33025965,
            0.04188792, 0.30536407, 0.38240519, 0.01627355, 1.23012972,
            0.46352714, -0.74617827, 0.43505573, -0.16246299, 0.34668511,
            -0.02247265, -0.34742412, -0.64483654, -0.2243523, 0.04222834,
            0.42057285, 0.22310457, 0.36833102, -0.05716853, -0.44688487,
            -0.51298815, 0.61859602, -0.21154809, -0.08168469, -0.15004104,
            0.21371906, 0.21713886, 0.21935812, 0.04912762, 0.02854752,
            -0.55747426, 0.70036995, 0.20306921, -0.46556181, -0.10637223,
            0.60909081, 0.55366743, -0.22907487, 1.13089538, 0.34430629,
            0.35133895, 0.085365, -0.58662325, -0.13062993, -0.04200239
        ],
                    dtype=numpy.float32),
        "All those moments are lost in time.":
        numpy.array([
            -0.78943789, -0.30322614, 0.3780162, 0.80896467, -0.42042252,
            -0.64176518, -0.51211309, -0.1537444, -0.04233316, 0.07710438,
            0.66949254, 0.37771451, -0.74869132, -0.55132926, 0.53695548,
            -0.11229508, 0.6673997, -0.34724045, -0.42173663, 0.7451877,
            1.01433206, -0.85418928, -0.31583607, -0.6812892, 0.42722669,
            -0.43322188, -0.35293943, 0.7662127, -0.30090365, -0.13694993,
            -0.04172039, -0.65059775, -0.62617165, 0.71341687, 0.82349646,
            -0.31194365, 0.00356466, -0.32218212, 0.15857732, 0.82880032,
            0.0566355, 0.43106011, -1.01921201, 0.51658779, 0.8068108,
            -0.09396499, -0.37920368, -0.08726061, 0.29975161, 0.25999272,
            0.23571083, 0.24800834, -0.73045135, 0.19150458, -0.19696848,
            -0.11186107, 0.1336731, 0.33246318, 0.22474274, 1.15420532,
            0.39482915, -0.70385826, 0.54841375, -0.03638301, 0.54499787,
            0.02484709, -0.2070619, -0.69282937, -0.21465099, 0.11578664,
            0.22713676, 0.21237181, 0.2007356, 0.14489903, -0.37357002,
            -0.50091666, 0.59818357, -0.36113665, 0.06037673, -0.26377741,
            0.31544513, -0.23714744, -0.01429842, 0.17592101, -0.16280818,
            -0.58340323, 0.63590413, 0.31803992, -0.47035503, -0.17544734,
            0.66008455, 0.77849454, -0.04235193, 1.29202402, 0.12573826,
            0.20377615, -0.08164676, -0.41151166, -0.1280518, 0.02905136
        ],
                    dtype=numpy.float32),
    }

    model_word2vec = abstraction.load_word_vector_model(
        filename=word_vector_model)

    working_expression_NL = expression

    # Convert the expression to a word vector.
    working_expression_WV =\
        abstraction.convert_sentence_string_to_word_vector(
            sentence_string = working_expression_NL,
            model_word2vec  = model_word2vec
        )
    log.info(
        "word vector representation of expression \"{working_expression_NL}\":"
        "\n{working_expression_WV}".format(
            working_expression_NL=working_expression_NL,
            working_expression_WV=working_expression_WV))

    # Define table headings.
    table_contents = [[
        "working expression natural language",
        "stored expression natural language",
        "absolute magnitude difference between working amd stored expression "
        "word vectors",
        "angle between working and stored expression word vectors"
    ]]

    # Compare the expression word vector representation to existing word
    # vectors.
    magnitude_differences = []
    angles = []
    stored_expressions_NL_list = []
    magnitude_working_expression_WV = datavision.magnitude(
        working_expression_WV)
    for stored_expression_NL in stored_expressions:
        stored_expression_WV = stored_expressions[stored_expression_NL]
        magnitude_stored_expression_WV = datavision.magnitude(
            stored_expression_WV)
        magnitude_difference_working_expression_WV_stored_expression_WV = abs(
            magnitude_working_expression_WV - magnitude_stored_expression_WV)
        angle_working_expression_WV_stored_expression_WV = datavision.angle(
            working_expression_WV, stored_expression_WV)
        # Store comparison results in lists.
        magnitude_differences.append(
            magnitude_difference_working_expression_WV_stored_expression_WV)
        angles.append(angle_working_expression_WV_stored_expression_WV)
        stored_expressions_NL_list.append(stored_expression_NL)
        # Build table.
        table_contents.append([
            str(working_expression_NL),
            str(stored_expression_NL),
            str(magnitude_difference_working_expression_WV_stored_expression_WV
                ),
            str(angle_working_expression_WV_stored_expression_WV)
        ])

    # Record table.
    print(pyprel.Table(contents=table_contents))

    log.info("")

    index_minimum_magnitude_differences =\
        magnitude_differences.index(min(magnitude_differences))
    index_minimum_angles = angles.index(min(angles))
    index_minimum_match_width = len(angles) / 4
    if abs(index_minimum_magnitude_differences -
           index_minimum_angles) < index_minimum_match_width:
        log.info("translation: {translation_expression_NL}".format(
            translation_expression_NL =\
                stored_expressions_NL_list[index_minimum_angles]
        ))
    else:
        log.error("unable to translate")

    log.info("")

    program.terminate()
Пример #20
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    # access options and arguments
    database = options["--inputdatabase"]
    database_out = options["--outputdatabase"]

    # Access database.
    database = abstraction.access_database(filename=database)
    log.info("database metadata:")
    abstraction.log_database_metadata(filename=database)
    # Print the tables in the database.
    log.info("tables in database: {tables}".format(tables=database.tables))
    # Access the exchanges table.
    tablename = "exchanges"
    log.info("access table \"{tablename}\"".format(tablename=tablename))
    # Print the columns of the table.
    log.info("columns in table \"{tablename}\": {columns}".format(
        tablename=tablename, columns=database[tablename].columns))
    # Print the number of rows of the table.
    log.info(
        "number of rows in table \"{tablename}\": {number_of_rows}".format(
            tablename=tablename, number_of_rows=str(len(database[tablename]))))
    # Build a list of unique exchanges.
    exchanges = []
    for entry in database[tablename].all():
        # Create a new exchange object for the existing exchange data, check its
        # utterance data against existing utterance data in the new list of
        # exchanges and append it to the new list of exchanges if it does not
        # exist in the list.
        exchange = abstraction.Exchange(
            utterance=entry["utterance"],
            response=entry["response"],
            utterance_time_UNIX=entry["utteranceTimeUNIX"],
            response_time_UNIX=entry["responseTimeUNIX"],
            utterance_reference=entry["utteranceReference"],
            response_reference=entry["responseReference"],
            exchange_reference=entry["exchangeReference"])
        # Check new exchange against exchanges in new list.
        append_flag = True
        for exchange_in_new_list in exchanges:
            if exchange.utterance == exchange_in_new_list.utterance:
                append_flag = False
        if append_flag is True:
            log.debug("keep exchange \"{utterance}\"".format(
                utterance=exchange.utterance))
            exchanges.append(exchange)
        else:
            log.debug("skip exchange \"{utterance}\"".format(
                utterance=exchange.utterance))
    # Save the exchanges to the new database.
    log.info("save exchanges to database (only those not saved previously)")
    abstraction.save_exchanges_to_database(exchanges=exchanges,
                                           filename=database_out)
    # Save metadata to the new database.
    abstraction.save_database_metadata(filename=database_out)

    program.terminate()
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    log.info("")

    # access options and arguments
    grid_search_filename = options["--gridsearchfile"]

    # load grid search map
    grid_search_map = shijian.import_object(filename=grid_search_filename)

    number_of_entries = len(grid_search_map["epoch"])
    log.info("number of entries: {number_of_entries}".format(
        number_of_entries=number_of_entries))

    # table

    table_contents = [[
        "epoch", "architecture", "score training", "score testing"
    ]]
    for index in range(0, number_of_entries):
        table_contents.append([
            str(grid_search_map["epoch"][index]),
            str(grid_search_map["hidden_nodes"][index]),
            str(grid_search_map["score_training"][index]),
            str(grid_search_map["score_test"][index])
        ])
    log.info("\ngrid search map:\n")
    log.info(pyprel.Table(contents=table_contents, ))

    # parallel coordinates plot

    number_of_entries = len(grid_search_map["epoch"])
    datasets = []
    for index in range(0, number_of_entries):
        row = []
        architecture_padded = grid_search_map["hidden_nodes"][index] + [0] * (
            5 - len(grid_search_map["hidden_nodes"][index]))
        row.append(grid_search_map["epoch"][index])
        row.extend(architecture_padded)
        row.append(grid_search_map["score_training"][index])
        row.append(grid_search_map["score_test"][index])
        datasets.append(row)

    datavision.save_parallel_coordinates_matplotlib(
        datasets[::-1], filename="parallel_coordinates.png")

    # plot

    architectures = shijian.unique_list_elements(
        grid_search_map["hidden_nodes"])

    architecture_epoch_score = {}
    for architecture in architectures:
        architecture_epoch_score[str(architecture)] = []
        for index in range(0, number_of_entries):
            if grid_search_map["hidden_nodes"][index] == architecture:
                architecture_epoch_score[str(architecture)].append([
                    grid_search_map["epoch"][index],
                    grid_search_map["score_test"][index]
                ])

    figure = matplotlib.pyplot.figure()
    figure.set_size_inches(10, 10)
    axes = figure.add_subplot(1, 1, 1)
    axes.set_xscale("log")
    figure.suptitle("hyperparameter map", fontsize=20)
    matplotlib.pyplot.xlabel("epochs")
    matplotlib.pyplot.ylabel("training test score")

    for key, value in architecture_epoch_score.iteritems():
        epochs = [element[0] for element in value]
        score_test = [element[1] for element in value]
        matplotlib.pyplot.plot(epochs, score_test, label=key)

    matplotlib.pyplot.legend(loc="center left",
                             bbox_to_anchor=(1, 0.5),
                             fontsize=10)

    matplotlib.pyplot.savefig("hyperparameter_map.eps",
                              bbox_inches="tight",
                              format="eps")

    # find best-scoring models

    # Find the 15 best scores and plot them using parallel coordinates.
    best_models = sorted(zip(grid_search_map["score_test"],
                             grid_search_map["score_training"],
                             grid_search_map["hidden_nodes"]),
                         reverse=True)[:15]
    datasets = []
    for model in best_models:
        row = []
        architecture_padded = model[2] + [0] * (5 - len(model[2]))
        row.extend(architecture_padded)
        row.append(model[1])
        row.append(model[0])
        datasets.append(row)

    datavision.save_parallel_coordinates_matplotlib(
        datasets, filename="15_best_models_parallel_coordinates.png")

    # Find the 3 best scores.
    best_models = sorted(zip(grid_search_map["score_test"],
                             grid_search_map["hidden_nodes"]),
                         reverse=True)[:3]

    # table
    table_contents = [["architecture", "score testing"]]
    for model in best_models:
        table_contents.append([str(model[1]), str(model[0])])
    log.info("\nbest-scoring models:\n")
    log.info(pyprel.Table(contents=table_contents, ))

    log.info("")

    program.terminate()
Пример #22
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    log.info("")

    # define dataset

    dataset = abstraction.Dataset(
        data =\
            [
            [5.1, 3.5, 1.4, 0.2], [0],
            [4.9, 3.0, 1.4, 0.2], [0],
            [4.7, 3.2, 1.3, 0.2], [0],
            [4.6, 3.1, 1.5, 0.2], [0],
            [5.0, 3.6, 1.4, 0.2], [0],
            [5.4, 3.9, 1.7, 0.4], [0],
            [4.6, 3.4, 1.4, 0.3], [0],
            [5.0, 3.4, 1.5, 0.2], [0],
            [4.4, 2.9, 1.4, 0.2], [0],
            [4.9, 3.1, 1.5, 0.1], [0],
            [5.4, 3.7, 1.5, 0.2], [0],
            [4.8, 3.4, 1.6, 0.2], [0],
            [4.8, 3.0, 1.4, 0.1], [0],
            [4.3, 3.0, 1.1, 0.1], [0],
            [5.8, 4.0, 1.2, 0.2], [0],
            [5.7, 4.4, 1.5, 0.4], [0],
            [5.4, 3.9, 1.3, 0.4], [0],
            [5.1, 3.5, 1.4, 0.3], [0],
            [5.7, 3.8, 1.7, 0.3], [0],
            [5.1, 3.8, 1.5, 0.3], [0],
            [5.4, 3.4, 1.7, 0.2], [0],
            [5.1, 3.7, 1.5, 0.4], [0],
            [4.6, 3.6, 1.0, 0.2], [0],
            [5.1, 3.3, 1.7, 0.5], [0],
            [4.8, 3.4, 1.9, 0.2], [0],
            [5.0, 3.0, 1.6, 0.2], [0],
            [5.0, 3.4, 1.6, 0.4], [0],
            [5.2, 3.5, 1.5, 0.2], [0],
            [5.2, 3.4, 1.4, 0.2], [0],
            [4.7, 3.2, 1.6, 0.2], [0],
            [4.8, 3.1, 1.6, 0.2], [0],
            [5.4, 3.4, 1.5, 0.4], [0],
            [5.2, 4.1, 1.5, 0.1], [0],
            [5.5, 4.2, 1.4, 0.2], [0],
            [4.9, 3.1, 1.5, 0.1], [0],
            [5.0, 3.2, 1.2, 0.2], [0],
            [5.5, 3.5, 1.3, 0.2], [0],
            [4.9, 3.1, 1.5, 0.1], [0],
            [4.4, 3.0, 1.3, 0.2], [0],
            [5.1, 3.4, 1.5, 0.2], [0],
            [5.0, 3.5, 1.3, 0.3], [0],
            [4.5, 2.3, 1.3, 0.3], [0],
            [4.4, 3.2, 1.3, 0.2], [0],
            [5.0, 3.5, 1.6, 0.6], [0],
            [5.1, 3.8, 1.9, 0.4], [0],
            [4.8, 3.0, 1.4, 0.3], [0],
            [5.1, 3.8, 1.6, 0.2], [0],
            [4.6, 3.2, 1.4, 0.2], [0],
            [5.3, 3.7, 1.5, 0.2], [0],
            [5.0, 3.3, 1.4, 0.2], [0],
            [7.0, 3.2, 4.7, 1.4], [1],
            [6.4, 3.2, 4.5, 1.5], [1],
            [6.9, 3.1, 4.9, 1.5], [1],
            [5.5, 2.3, 4.0, 1.3], [1],
            [6.5, 2.8, 4.6, 1.5], [1],
            [5.7, 2.8, 4.5, 1.3], [1],
            [6.3, 3.3, 4.7, 1.6], [1],
            [4.9, 2.4, 3.3, 1.0], [1],
            [6.6, 2.9, 4.6, 1.3], [1],
            [5.2, 2.7, 3.9, 1.4], [1],
            [5.0, 2.0, 3.5, 1.0], [1],
            [5.9, 3.0, 4.2, 1.5], [1],
            [6.0, 2.2, 4.0, 1.0], [1],
            [6.1, 2.9, 4.7, 1.4], [1],
            [5.6, 2.9, 3.6, 1.3], [1],
            [6.7, 3.1, 4.4, 1.4], [1],
            [5.6, 3.0, 4.5, 1.5], [1],
            [5.8, 2.7, 4.1, 1.0], [1],
            [6.2, 2.2, 4.5, 1.5], [1],
            [5.6, 2.5, 3.9, 1.1], [1],
            [5.9, 3.2, 4.8, 1.8], [1],
            [6.1, 2.8, 4.0, 1.3], [1],
            [6.3, 2.5, 4.9, 1.5], [1],
            [6.1, 2.8, 4.7, 1.2], [1],
            [6.4, 2.9, 4.3, 1.3], [1],
            [6.6, 3.0, 4.4, 1.4], [1],
            [6.8, 2.8, 4.8, 1.4], [1],
            [6.7, 3.0, 5.0, 1.7], [1],
            [6.0, 2.9, 4.5, 1.5], [1],
            [5.7, 2.6, 3.5, 1.0], [1],
            [5.5, 2.4, 3.8, 1.1], [1],
            [5.5, 2.4, 3.7, 1.0], [1],
            [5.8, 2.7, 3.9, 1.2], [1],
            [6.0, 2.7, 5.1, 1.6], [1],
            [5.4, 3.0, 4.5, 1.5], [1],
            [6.0, 3.4, 4.5, 1.6], [1],
            [6.7, 3.1, 4.7, 1.5], [1],
            [6.3, 2.3, 4.4, 1.3], [1],
            [5.6, 3.0, 4.1, 1.3], [1],
            [5.5, 2.5, 4.0, 1.3], [1],
            [5.5, 2.6, 4.4, 1.2], [1],
            [6.1, 3.0, 4.6, 1.4], [1],
            [5.8, 2.6, 4.0, 1.2], [1],
            [5.0, 2.3, 3.3, 1.0], [1],
            [5.6, 2.7, 4.2, 1.3], [1],
            [5.7, 3.0, 4.2, 1.2], [1],
            [5.7, 2.9, 4.2, 1.3], [1],
            [6.2, 2.9, 4.3, 1.3], [1],
            [5.1, 2.5, 3.0, 1.1], [1],
            [5.7, 2.8, 4.1, 1.3], [1],
            [6.3, 3.3, 6.0, 2.5], [2],
            [5.8, 2.7, 5.1, 1.9], [2],
            [7.1, 3.0, 5.9, 2.1], [2],
            [6.3, 2.9, 5.6, 1.8], [2],
            [6.5, 3.0, 5.8, 2.2], [2],
            [7.6, 3.0, 6.6, 2.1], [2],
            [4.9, 2.5, 4.5, 1.7], [2],
            [7.3, 2.9, 6.3, 1.8], [2],
            [6.7, 2.5, 5.8, 1.8], [2],
            [7.2, 3.6, 6.1, 2.5], [2],
            [6.5, 3.2, 5.1, 2.0], [2],
            [6.4, 2.7, 5.3, 1.9], [2],
            [6.8, 3.0, 5.5, 2.1], [2],
            [5.7, 2.5, 5.0, 2.0], [2],
            [5.8, 2.8, 5.1, 2.4], [2],
            [6.4, 3.2, 5.3, 2.3], [2],
            [6.5, 3.0, 5.5, 1.8], [2],
            [7.7, 3.8, 6.7, 2.2], [2],
            [7.7, 2.6, 6.9, 2.3], [2],
            [6.0, 2.2, 5.0, 1.5], [2],
            [6.9, 3.2, 5.7, 2.3], [2],
            [5.6, 2.8, 4.9, 2.0], [2],
            [7.7, 2.8, 6.7, 2.0], [2],
            [6.3, 2.7, 4.9, 1.8], [2],
            [6.7, 3.3, 5.7, 2.1], [2],
            [7.2, 3.2, 6.0, 1.8], [2],
            [6.2, 2.8, 4.8, 1.8], [2],
            [6.1, 3.0, 4.9, 1.8], [2],
            [6.4, 2.8, 5.6, 2.1], [2],
            [7.2, 3.0, 5.8, 1.6], [2],
            [7.4, 2.8, 6.1, 1.9], [2],
            [7.9, 3.8, 6.4, 2.0], [2],
            [6.4, 2.8, 5.6, 2.2], [2],
            [6.3, 2.8, 5.1, 1.5], [2],
            [6.1, 2.6, 5.6, 1.4], [2],
            [7.7, 3.0, 6.1, 2.3], [2],
            [6.3, 3.4, 5.6, 2.4], [2],
            [6.4, 3.1, 5.5, 1.8], [2],
            [6.0, 3.0, 4.8, 1.8], [2],
            [6.9, 3.1, 5.4, 2.1], [2],
            [6.7, 3.1, 5.6, 2.4], [2],
            [6.9, 3.1, 5.1, 2.3], [2],
            [5.8, 2.7, 5.1, 1.9], [2],
            [6.8, 3.2, 5.9, 2.3], [2],
            [6.7, 3.3, 5.7, 2.5], [2],
            [6.7, 3.0, 5.2, 2.3], [2],
            [6.3, 2.5, 5.0, 1.9], [2],
            [6.5, 3.0, 5.2, 2.0], [2],
            [6.2, 3.4, 5.4, 2.3], [2],
            [5.9, 3.0, 5.1, 1.8], [2]
            ]
        )

    # define data

    log.info("split data for cross-validation")
    features_train, features_test, targets_train, targets_test =\
        cross_validation.train_test_split(
            dataset.features(),
            dataset.targets(),
            train_size = 0.7
        )
    log.info("define classification model")

    # define model

    classifier = abstraction.Classification(number_of_classes=3,
                                            hidden_nodes=[10, 20, 10],
                                            epochs=500)

    # train model

    log.info("fit classification model to dataset features and targets")
    classifier._model.fit(features_train, targets_train)
    #classifier.save()

    # predict and cross-validate training

    log.info("test trained classification model on training dataset")
    score = metrics.accuracy_score(classifier._model.predict(features_train),
                                   targets_train)
    log.info("prediction accuracy on training dataset: {percentage}".format(
        percentage=100 * score))
    log.info("accuracy of classifier on test dataset:")
    score = metrics.accuracy_score(classifier._model.predict(features_test),
                                   targets_test)
    log.info("prediction accuracy on test dataset: {percentage}".format(
        percentage=100 * score))

    log.info("")

    program.terminate()
Пример #23
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    # access options and arguments
    input_image_filename = options["--inputimage"]
    output_pixels_filename = options["--outputpixels"]
    input_pixels_filename = options["--inputpixels"]
    output_image_filename = options["--outputimage"]
    output_image_width = int(options["--outputimagewidth"])
    mode_convert_image_to_pixels = bool(options["--convertimagetopixels"])
    mode_validate_pixels = bool(options["--validatepixels"])
    mode_convert_pixels_to_image = bool(options["--convertPixelsToImage"])

    log.info("")

    if mode_convert_image_to_pixels is True:
        log.info("convert image to pixels")
        # Access the input image.
        log.info("access input image")
        input_image = PIL.Image.open(input_image_filename)
        log.info(
            "input image mode: {image_mode}".format(image_mode=image.mode))
        log.info(
            "input image size: {image_size}".format(image_size=image.size))
        pixels = list(input_image.getdata())
        pixels_text = str(pixels)
        # Create and save the output pixels.
        output_pixels_file = open(output_pixels_filename, "w")
        output_pixels_file.truncate()
        log.info("save output pixels {output_pixels_filename}".format(
            output_pixels_filename=output_pixels_filename))
        output_pixels_file.write(pixels_text)
        output_pixels_file.close()
    elif mode_validate_pixels is True:
        log.info("validate pixels")
        # Access input pixels.
        log.info("access input pixels")
        with open(input_pixels_filename) as input_pixels_file:
            text = input_pixels_file.read()
        parts = text[2:-2].split("), (")
        log.info("validate pixels")
        for n, part in enumerate(parts):
            if not re.match(r"^\d+, \d+, \d+, \d+$", part):
                print("tuple {tuple_index} malformed: {tuple}".format(
                    tuple_index=n, tuple=part))
    elif mode_convert_pixels_to_image is True:
        log.info("convert pixels to image")
        # Access input pixels.
        log.info("access input pixels")
        input_pixels_file = open(input_pixels_filename)
        pixels = input_pixels_file.read()
        pixels = ast.literal_eval(pixels)
        # Determine the image height by determining the maximum number of image
        # widths are possible with the available pixel data.
        log.info("determine output image dimensions")
        image_mode = "RGBA"
        image_width = output_image_width  # e.g. 2379
        image_height = int(len(pixels) / image_width)  # e.g. 2196
        image_size = (image_width, image_height)
        print("output image mode: {image_mode}".format(image_mode=image_mode))
        print("output image size: {image_size}".format(image_size=image_size))
        # Create and save the output image.
        log.info("create output image")
        output_image_file = Image.new(image_width, image_height)
        output_image_file.putdata(pixels)
        log.info("save output image {output_image_filename}".format(
            output_image_filename=output_image_filename))
        output_image_file.save(output_image_filename)
    else:
        log.info("no operation selected\n")
        print(__doc__)

    log.info("")

    program.terminate()
Пример #24
0
def main(options):

    global program
    program = propyte.Program(
        options = options,
        name    = name,
        version = version,
        logo    = logo
    )
    global log
    from propyte import log

    print("")

    filename_CSV               = options["--infile"]
    make_histogram_comparisons = options["--histogramcomparisons"].lower() == "true"
    make_scatter_matrix        = options["--scattermatrix"].lower() == "true"
    make_event_images          = options["--eventimages"].lower() == "true"
    number_of_event_images     = int(options["--numberofeventimages"])
    directoryname_plots        = options["--directoryplots"]

    if not os.path.isfile(os.path.expandvars(filename_CSV)):
        log.error("file {filename} not found".format(
            filename = filename_CSV
        ))
        program.terminate()

    log.info("read CSV from {filename}".format(filename = filename_CSV))
    data = pd.read_csv(filename_CSV)

    number_of_columns          = data.shape[1]
    indices_of_feature_columns = range(0, number_of_columns -1)

    feature_names = list(data.columns)

    data_class_0 = data.loc[data["class"] == 0]
    data_class_1 = data.loc[data["class"] == 1]

    print("")
    log.info("basic feature characteristics")
    print("")

    table_contents = [[
        "feature",
        "minimum value in class 0",
        "minimum value in class 1",
        "maximum value in class 0",
        "maximum value in class 1",
        "mean value in class 0",
        "mean value in class 1"
    ]]

    for feature_name in feature_names:

        values_class_0 = list(data_class_0[feature_name])
        values_class_1 = list(data_class_1[feature_name])

        table_contents.append([
            feature_name,
            min(values_class_0),
            min(values_class_1),
            max(values_class_0),
            max(values_class_1),
            sum(values_class_0)/len(values_class_0),
            sum(values_class_1)/len(values_class_1)
        ])

    print(
        pyprel.Table(
            contents = table_contents
        )
    )

    if make_histogram_comparisons:

        for feature_name in feature_names:

            filename = shijian.propose_filename(
                filename = feature_name + "_ttbb_ttH.png"
            )
            log.info("save histogram {filename}".format(filename = filename))
            datavision.save_histogram_comparison_matplotlib(
                values_1      = list(data_class_0[feature_name]),
                values_2      = list(data_class_1[feature_name]),
                label_1       = "ttbb",
                label_2       = "ttH",
                label_ratio_x = "",
                label_y       = "",
                title         = feature_name,
                filename      = filename,
                directory     = directoryname_plots
            )

    if make_scatter_matrix:

        filename = "scatter_matrix_ttbb_ttH.jpg"
        log.info("save scatter matrix {filename}".format(filename = filename))
        scatter_matrix = pd.scatter_matrix(
            data,
            figsize  = [15, 15],
            marker   = ".",
            s        = 0.2,
            diagonal = "kde"
        )
        for ax in scatter_matrix.ravel():
            ax.set_xlabel(
                ax.get_xlabel(),
                fontsize = 15,
                rotation = 90
            )
            ax.set_ylabel(
                ax.get_ylabel(),
                fontsize = 15,
                rotation = 0,
                labelpad = 60
            )
            ax.get_xaxis().set_ticks([])
            ax.get_yaxis().set_ticks([])
        if not os.path.exists(directoryname_plots):
            os.makedirs(directoryname_plots)
        plt.savefig(
            directoryname_plots + "/" + filename,
            dpi = 700
        )

    if make_event_images:

        directoryname = "event_images"

        if not os.path.exists(directoryname):
            os.makedirs(directoryname)

        for class_label in [0, 1]:

            data_class = data.loc[data["class"] == class_label]

            for index, row in data_class[0:number_of_event_images].iterrows():
                image = datavision.NumPy_array_pad_square_shape(
                    array     = row.as_matrix(),
                    pad_value = -4
                )
                plt.imshow(
                    image,
                    cmap          = "Greys",
                    interpolation = "nearest"
                )
                filename = "event_image_class_" + str(class_label) + "_index_" + str(index) + ".png"
                log.info("save event image {filename}".format(filename = filename))
                plt.savefig(
                    directoryname + "/" + filename,
                    dpi = 200
                )

    print("")

    program.terminate()
Пример #25
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    # access options and arguments
    input_image_filename = options["--inputImage"]
    output_image_filename = options["--outputImage"]
    recursive_zoom = bool(options["--recursiveZoom"])
    GPU_mode = bool(options["--gpu"])

    log.info("")

    if GPU_mode is True:
        log.info("engage GPU mode")
        caffe.set_mode_gpu()
        # If multiple devices exist, select GPU.
        caffe.set_device(0)
    else:
        log.info("engage CPU mode")

    # Load GoogLeNet model trained on ImageNet dataset.
    log.info("load model")
    username = os.getenv("USER")
    model_path = "/home/{username}/caffe/models/bvlc_googlenet/".format(
        username=username)
    net_fn = model_path + "deploy.prototxt"
    param_fn = model_path + "bvlc_googlenet.caffemodel"

    # Patch the model to enable computation of gradients.
    model = caffe.io.caffe_pb2.NetParameter()
    text_format.Merge(open(net_fn).read(), model)
    model.force_backward = True
    open("tmp.prototxt", "w").write(str(model))

    net = caffe.Classifier("tmp.prototxt",
                           param_fn,
                           mean=np.float32([104.0, 116.0, 122.0]),
                           channel_swap=(2, 1, 0))

    ## Save an image of the network.
    #net.name = "network"
    #caffe.draw.draw_net_to_file(
    #    net,
    #    "LR"
    #)

    log.info("access {filename}".format(filename=input_image_filename, ))
    input_image = np.float32(PIL.Image.open(input_image_filename))

    log.info("generate")

    if recursive_zoom is not True:
        output_image = deepdream(net, input_image, end="inception_4c/output")
        output_image = np.uint8(output_image)
        log.info("save {filename}".format(filename=output_image_filename, ))
        PIL.Image.fromarray(output_image, "RGB").save(output_image_filename,
                                                      "PNG")
    else:
        os.makedirs("frames")
        frame = input_image
        frame_i = 0

        h, w = frame.shape[:2]
        s = 0.05  # scale coefficient
        for i in xrange(100):
            frame = deepdream(net, frame, end="inception_4c/output")
            output_filename = "frames/{index}.jpg".format(index=frame_i)
            PIL.Image.fromarray(np.uint8(frame)).save(output_filename)
            frame = nd.affine_transform(frame, [1 - s, 1 - s, 1],
                                        [h * s / 2, w * s / 2, 0],
                                        order=1)
            frame_i += 1

    log.info("")

    program.terminate()
Пример #26
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    print("")

    filename_ROOT = options["--fileroot"]
    filename_CSV = options["--filecsv"]
    selection = options["--selection"]
    class_label = int(options["--classlabel"])
    name_tree = options["--tree"]
    maximum_number_of_events = None if options["--maxevents"].lower() == "none"\
                                  else int(options["--maxevents"])
    include_headings = options["--headings"].lower() == "true"

    if not os.path.isfile(os.path.expandvars(filename_ROOT)):
        log.error("file {filename} not found".format(filename=filename_ROOT))
        program.terminate()

    if os.path.isfile(os.path.expandvars(filename_CSV)):
        log.warning(
            "CSV file {filename} exists -- *append* data to file".format(
                filename=filename_CSV))
        print("")
        append = True
    else:
        append = False

    file_ROOT = abstraction.open_ROOT_file(filename_ROOT)
    tree = file_ROOT.Get(name_tree)
    number_of_events = tree.GetEntries()

    file_CSV = open(filename_CSV, "a")
    writer = csv.writer(file_CSV, delimiter=",")

    log.info(
        textwrap.dedent("""
        input ROOT file: {filename_ROOT}
        output CSV file: {filename_CSV}
        selection:       {selection}
        class label:     {class_label}
        """.format(filename_ROOT=filename_ROOT,
                   filename_CSV=filename_CSV,
                   selection=selection,
                   class_label=class_label)))

    print("")
    log.info("save variables of events to CSV {filename}".format(
        filename=filename_CSV))
    print("")

    progress = shijian.Progress()
    progress.engage_quick_calculation_mode()
    index_selected = 0
    detail = True
    for index, event in enumerate(tree):
        if select_event(event=event, selection=selection):
            index_selected = index_selected + 1
            if                                           \
                maximum_number_of_events is not None and \
                index_selected > maximum_number_of_events:
                break
            line = [
                #Variable_ttHbb(event = event, name = "Aplan_bjets"),
                Variable_ttHbb(event=event, name="Aplan_jets"),  #
                Variable_ttHbb(event=event, name="Centrality_all"),  #
                #Variable_ttHbb(event = event, name = "ClassifBDTOutput_6jsplit"),
                #Variable_ttHbb(event = event, name = "ClassifBDTOutput_basic"),
                #Variable_ttHbb(event = event, name = "ClassifBDTOutput_withReco_6jsplit"),
                #Variable_ttHbb(event = event, name = "ClassifBDTOutput_withReco_basic"),
                #Variable_ttHbb(event = event, name = "ClassifHPLUS_Semilep_HF_BDT200_Output"),
                Variable_ttHbb(event=event, name="dEtajj_MaxdEta"),  #
                Variable_ttHbb(event=event, name="dRbb_avg"),  #
                #Variable_ttHbb(event = event, name = "dRbb_MaxM"),
                Variable_ttHbb(event=event, name="dRbb_MaxPt"),  #
                #Variable_ttHbb(event = event, name = "dRbb_min"),
                #Variable_ttHbb(event = event, name = "dRbj_Wmass"),
                #Variable_ttHbb(event = event, name = "dRHl_MaxdR"),
                #Variable_ttHbb(event = event, name = "dRHl_MindR"),
                #Variable_ttHbb(event = event, name = "dRjj_min"),
                #Variable_ttHbb(event = event, name = "dRlepbb_MindR"),
                #Variable_ttHbb(event = event, name = "dRlj_MindR"),
                #Variable_ttHbb(event = event, name = "dRuu_MindR"),
                Variable_ttHbb(event=event, name="H1_all"),  #
                #Variable_ttHbb(event = event, name = "H4_all"),
                #Variable_ttHbb(event = event, name = "HhadT_nJets"),
                #Variable_ttHbb(event = event, name = "HiggsbbM"),
                #Variable_ttHbb(event = event, name = "HiggsjjM"),
                #Variable_ttHbb(event = event, name = "HT_all"),
                #Variable_ttHbb(event = event, name = "HT_jets"),
                #Variable_ttHbb(event = event, name = "Mbb_MaxM"),
                #Variable_ttHbb(event = event, name = "Mbb_MaxPt"),
                Variable_ttHbb(event=event, name="Mbb_MindR"),  #
                #Variable_ttHbb(event = event, name = "Mbj_MaxPt"),
                #Variable_ttHbb(event = event, name = "Mbj_MindR"),
                #Variable_ttHbb(event = event, name = "Mbj_Wmass"),
                #Variable_ttHbb(event = event, name = "met_met"),
                #Variable_ttHbb(event = event, name = "met_phi"),
                #Variable_ttHbb(event = event, name = "MHiggs"),
                #Variable_ttHbb(event = event, name = "Mjj_HiggsMass"),
                #Variable_ttHbb(event = event, name = "Mjjj_MaxPt"),
                #Variable_ttHbb(event = event, name = "Mjj_MaxPt"),
                #Variable_ttHbb(event = event, name = "Mjj_MindR"),
                #Variable_ttHbb(event = event, name = "Mjj_MinM"),
                #Variable_ttHbb(event = event, name = "mu"),
                #Variable_ttHbb(event = event, name = "Muu_MindR"),
                #Variable_ttHbb(event = event, name = "NBFricoNN_dil"),
                #Variable_ttHbb(event = event, name = "nBTags"),
                #Variable_ttHbb(event = event, name = "nBTags30"),
                #Variable_ttHbb(event = event, name = "nBTags50"),
                #Variable_ttHbb(event = event, name = "nBTags60"),
                #Variable_ttHbb(event = event, name = "nBTags70"),
                #Variable_ttHbb(event = event, name = "nBTags77"),
                #Variable_ttHbb(event = event, name = "nBTags80"),
                #Variable_ttHbb(event = event, name = "nBTags85"),
                #Variable_ttHbb(event = event, name = "nBTags90"),
                #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_30"),
                #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_40"),
                #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_50"),
                #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_60"),
                #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_70"),
                #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_77"),
                #Variable_ttHbb(event = event, name = "nBTagsFlatBEff_85"),

                #Variable_ttHbb(event = event, name = "nElectrons"),
                #Variable_ttHbb(event = event, name = "nHFJets"),
                Variable_ttHbb(event=event, name="NHiggs_30"),  #
                #Variable_ttHbb(event = event, name = "Njet_pt40"),
                #Variable_ttHbb(event = event, name = "Njet_pt40"),
                #Variable_ttHbb(event = event, name = "nJets"),
                #Variable_ttHbb(event = event, name = "nMuons"),
                #Variable_ttHbb(event = event, name = "nPrimaryVtx"),

                #Variable_ttHbb(event = event, name = "pT_jet3"),
                Variable_ttHbb(event=event, name="pT_jet5"),  #
                #Variable_ttHbb(event = event, name = "pTuu_MindR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_b1higgsbhadtop_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_bbhiggs_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_output"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_output_6jsplit"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_output_truthMatchPattern"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_withH_output"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_withH_output_6jsplit"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_BDT_withH_output_truthMatchPattern"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_hadWb1Higgs_mass"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsbhadtop_withH_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsbleptop_mass"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsbleptop_withH_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_higgslep_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsleptop_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_higgs_mass"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsq1hadW_mass"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_higgsttbar_withH_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_leptophadtop_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_leptophadtop_withH_dR"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_Ncombinations"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_nuApprox_recoBDT"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_nuApprox_recoBDT_6jsplit"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_nuApprox_recoBDT_withH"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_nuApprox_recoBDT_withH_6jsplit"),
                #Variable_ttHbb(event = event, name = "semilepMVAreco_ttH_Ht_withH"),
                #Variable_ttHbb(event = event, name = "ttHF_mva_discriminant"),

                #Variable_ttHbb(event = event, name = "el_d0sig[0]"),
                #Variable_ttHbb(event = event, name = "el_delta_z0_sintheta[0]"),
                #Variable_ttHbb(event = event, name = "el_e[0]"),
                #Variable_ttHbb(event = event, name = "el_eta[0]"),
                #Variable_ttHbb(event = event, name = "el_phi[0]"),
                #Variable_ttHbb(event = event, name = "el_pt[0]"),
                #Variable_ttHbb(event = event, name = "el_topoetcone20[0]"),

                #Variable_ttHbb(event = event, name = "mu_d0sig[0]"),
                #Variable_ttHbb(event = event, name = "mu_delta_z0_sintheta[0]"),
                #Variable_ttHbb(event = event, name = "mu_e[0]"),
                #Variable_ttHbb(event = event, name = "mu_eta[0]"),
                #Variable_ttHbb(event = event, name = "mu_phi[0]"),
                #Variable_ttHbb(event = event, name = "mu_pt[0]"),
                #Variable_ttHbb(event = event, name = "mu_topoetcone20[0]"),

                #Variable_ttHbb(event = event, name = "jet_e[0]"),
                #Variable_ttHbb(event = event, name = "jet_eta[0]"),
                #Variable_ttHbb(event = event, name = "jet_jvt[0]"),
                #Variable_ttHbb(event = event, name = "jet_mv2c10[0]"),
                #Variable_ttHbb(event = event, name = "jet_mv2c20[0]"),
                #Variable_ttHbb(event = event, name = "jet_phi[0]"),
                #Variable_ttHbb(event = event, name = "jet_pt[0]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand[0]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand_6jsplit[0]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand[0]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand_6jsplit[0]"),

                #Variable_ttHbb(event = event, name = "jet_e[1]"),
                #Variable_ttHbb(event = event, name = "jet_eta[1]"),
                #Variable_ttHbb(event = event, name = "jet_jvt[1]"),
                #Variable_ttHbb(event = event, name = "jet_mv2c10[1]"),
                #Variable_ttHbb(event = event, name = "jet_mv2c20[1]"),
                #Variable_ttHbb(event = event, name = "jet_phi[1]"),
                #Variable_ttHbb(event = event, name = "jet_pt[1]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand[1]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand_6jsplit[1]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand[1]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand_6jsplit[1]"),

                #Variable_ttHbb(event = event, name = "jet_e[2]"),
                #Variable_ttHbb(event = event, name = "jet_eta[2]"),
                #Variable_ttHbb(event = event, name = "jet_jvt[2]"),
                #Variable_ttHbb(event = event, name = "jet_mv2c10[2]"),
                #Variable_ttHbb(event = event, name = "jet_mv2c20[2]"),
                #Variable_ttHbb(event = event, name = "jet_phi[2]"),
                #Variable_ttHbb(event = event, name = "jet_pt[2]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand[2]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand_6jsplit[2]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand[2]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand_6jsplit[2]"),

                #Variable_ttHbb(event = event, name = "jet_e[3]"),
                #Variable_ttHbb(event = event, name = "jet_eta[3]"),
                #Variable_ttHbb(event = event, name = "jet_jvt[3]"),
                #Variable_ttHbb(event = event, name = "jet_mv2c10[3]"),
                #Variable_ttHbb(event = event, name = "jet_mv2c20[3]"),
                #Variable_ttHbb(event = event, name = "jet_phi[3]"),
                #Variable_ttHbb(event = event, name = "jet_pt[3]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand[3]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_cand_6jsplit[3]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand[3]"),
                #Variable_ttHbb(event = event, name = "jet_semilepMVAreco_recoBDT_withH_cand_6jsplit[3]"),

                # large-R jets
                #Variable_ttHbb(event = event, name = "FirstLjetM"),
                #Variable_ttHbb(event = event, name = "FirstLjetPt"),
                #Variable_ttHbb(event = event, name = "HhadT_nLjets"),
                #Variable_ttHbb(event = event, name = "HT_ljets"),
                #Variable_ttHbb(event = event, name = "NBFricoNN_ljets"),
                #Variable_ttHbb(event = event, name = "nBjetOutsideLjet"),
                #Variable_ttHbb(event = event, name = "nJetOutsideLjet"),
                #Variable_ttHbb(event = event, name = "nLjet_m100"),
                #Variable_ttHbb(event = event, name = "nLjet_m50"),
                #Variable_ttHbb(event = event, name = "nLjets"),
                #Variable_ttHbb(event = event, name = "SecondLjetM"),
                #Variable_ttHbb(event = event, name = "SecondLjetPt"),
                #Variable_ttHbb(event = event, name = "ljet_C2[0]"),
                #Variable_ttHbb(event = event, name = "ljet_D2[0]"),
                #Variable_ttHbb(event = event, name = "ljet_e[0]"),
                #Variable_ttHbb(event = event, name = "ljet_eta[0]"),
                #Variable_ttHbb(event = event, name = "ljet_m[0]"),
                #Variable_ttHbb(event = event, name = "ljet_phi[0]"),
                #Variable_ttHbb(event = event, name = "ljet_pt[0]"),
                #Variable_ttHbb(event = event, name = "ljet_sd12[0]"),
                #Variable_ttHbb(event = event, name = "ljet_sd23[0]"),
                #Variable_ttHbb(event = event, name = "ljet_tau21[0]"),
                #Variable_ttHbb(event = event, name = "ljet_tau21_wta[0]"),
                #Variable_ttHbb(event = event, name = "ljet_tau32[0]"),
                #Variable_ttHbb(event = event, name = "ljet_tau32_wta[0]"),

                #rcjet_d12,
                #rcjet_d23,
                #rcjet_e,
                #rcjet_eta,
                #rcjet_phi,
                #rcjet_pt,
                Variable_ttHbb(name="class", value=class_label)
            ]
            if detail:
                log.info("event variable details:")
                log.info(
                    "\nnumber of variables: {number}".format(number=len(line)))
                table_contents = [["variable value", "variable type"]]
                for variable in line:
                    table_contents.append(
                        [str(variable.name()),
                         str(type(variable.value()))])
                print(pyprel.Table(contents=table_contents, ))
                detail = False
            if include_headings and not append:
                headings = [variable.name() for variable in line]
                writer.writerow(headings)
                include_headings = False
            values = [variable.value() for variable in line]
            writer.writerow(values)
        print(progress.add_datum(fraction=index / number_of_events))

    print("")
    log.info(
        "{number_selected} events of {number_total} passed selection".format(
            number_selected=index_selected, number_total=index))

    print("")

    program.terminate()
Пример #27
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    filename_database = options["--inputdatabase"]
    filename_database_out = options["--outputdatabase"]
    name_table = options["--table"]
    name_table_metadata = options["--tablemetadata"]

    log.info("\naccess database {filename}".format(filename=filename_database))
    database = dataset.connect("sqlite:///{filename_database}".format(
        filename_database=filename_database))
    log.info("access table \"{name_table}\"".format(name_table=name_table))
    table = database[name_table]
    log.info(
        "number of rows in table \"{name_table}\": {number_of_rows}".format(
            name_table=name_table, number_of_rows=str(len(table))))

    # Fix database with data version 2015-01-06T172242Z.

    # Build a list of unique exchanges.
    exchanges = []
    for entry in table:

        utterance = entry["utterance"]
        response = entry["response"]
        utterance_time_UNIX = entry["utteranceTimeUNIX"]
        response_time_UNIX = entry["responseTimeUNIX"]
        utterance_reference = entry["utteranceReference"]
        response_reference = entry["responseReference"]
        exchange_reference = entry["exchangeReference"]

        if type(utterance_reference) is tuple:
            log.debug("\nchange utterance reference")
            log.debug("from:\n{utterance_reference}".format(
                utterance_reference=utterance_reference))
            utterance_reference = utterance_reference[0]
            log.debug("to:\n{utterance_reference}".format(
                utterance_reference=utterance_reference))
        if type(response_reference) is tuple:
            log.debug("\nchange response reference")
            log.debug("from:\n{response_reference}".format(
                response_reference=response_reference))
            response_reference = response_reference[0]
            log.debug("to:\n{response_reference}".format(
                response_reference=response_reference))
        if exchange_reference[0] == "(":
            log.debug("\nchange exchange reference")
            log.debug("from:\n{exchange_reference}".format(
                exchange_reference=exchange_reference))
            exchange_reference = ast.literal_eval(exchange_reference)
            exchange_reference = unicode(str(exchange_reference[0]), "utf-8")
            log.debug("to:\n{exchange_reference}".format(
                exchange_reference=exchange_reference))

        # Create a new exchange object using the fixed entries and append it to
        # the list of modified exchanges.
        exchange = abstraction.Exchange(
            utterance=utterance,
            response=response,
            utterance_time_UNIX=utterance_time_UNIX,
            response_time_UNIX=response_time_UNIX,
            utterance_reference=utterance_reference,
            response_reference=response_reference,
            exchange_reference=exchange_reference)
        exchange.printout()
        exchanges.append(exchange)
    # Save the exchanges to the new database.
    log.info("save exchanges to database")
    abstraction.save_exchanges_to_database(exchanges=exchanges,
                                           filename=filename_database_out)
    # Save metadata to the new database.
    abstraction.save_database_metadata(filename=filename_database_out)

    program.terminate()
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    log.info("")

    # access options and arguments
    input_data_filename = options["--data"]

    # define dataset

    # Load the SUSY dataset (https://archive.ics.uci.edu/ml/datasets/SUSY).
    # The first column is the class label (1 for signal, 0 for background),
    # followed by 18 features (8 low-level features and 10 high-level features):
    #
    # - lepton 1 pT
    # - lepton 1 eta
    # - lepton 1 phi
    # - lepton 2 pT
    # - lepton 2 eta
    # - lepton 2 phi
    # - missing energy magnitude
    # - missing energy phi
    # - MET_rel
    # - axial MET
    # - M_R
    # - M_TR_2
    # - R
    # - MT2
    # - S_R
    # - M_Delta_R
    # - dPhi_r_b
    # - cos(theta_r1)

    data = abstraction.access_SUSY_dataset_format_file(input_data_filename)

    dataset = abstraction.Dataset(data=data)

    # define data

    log.info("split data for cross-validation")
    features_train, features_test, targets_train, targets_test =\
        cross_validation.train_test_split(
            dataset.features(),
            dataset.targets(),
            train_size = 0.7
        )

    # grid search

    import itertools

    epochs = [10, 100, 500, 1000]
    architecture = [200, 300, 300, 300, 200]

    grid_search_map = {}
    grid_search_map["epoch"] = []
    grid_search_map["hidden_nodes"] = []
    grid_search_map["score_training"] = []
    grid_search_map["score_test"] = []

    # define progress
    count_total = 0
    for epoch in epochs:
        for nodes_count in xrange(1, len(architecture) + 1):
            combinations = itertools.product(architecture, repeat=nodes_count)
            for combination in combinations:
                count_total += 1
    count = 0
    progress = shijian.Progress()
    progress.engage_quick_calculation_mode()

    for epoch in epochs:
        for nodes_count in xrange(1, len(architecture) + 1):
            combinations = itertools.product(architecture, repeat=nodes_count)
            for combination in combinations:
                hidden_nodes = list(combination)

                # define model

                log.info("define classification model")
                classifier = abstraction.Classification(
                    number_of_classes=2,
                    hidden_nodes=hidden_nodes,
                    epochs=epoch)

                # train model

                log.info("fit model to dataset features and targets")
                classifier._model.fit(features_train, targets_train)
                #classifier.save()

                # predict and cross-validate training

                log.info("test trained model on training dataset")
                score_training = metrics.accuracy_score(
                    classifier._model.predict(features_train), targets_train)
                score_test = metrics.accuracy_score(
                    classifier._model.predict(features_test), targets_test)
                log.info("\ntraining-testing instance complete:")
                log.info("epoch:          {epoch}".format(epoch=epoch))
                log.info("architecture:   {architecture}".format(
                    architecture=hidden_nodes))
                log.info("score training: {score_training}".format(
                    score_training=100 * score_training))
                log.info("score test:     {score_test}".format(score_test=100 *
                                                               score_test))
                pyprel.print_line()
                grid_search_map["epoch"].append(epoch)
                grid_search_map["hidden_nodes"].append(hidden_nodes)
                grid_search_map["score_training"].append(score_training)
                grid_search_map["score_test"].append(score_test)

                # save current grid search map
                shijian.export_object(grid_search_map,
                                      filename="grid_search_map.pkl",
                                      overwrite=True)

                count += 1
                print(progress.add_datum(fraction=(count + 1) / count_total))

    number_of_entries = len(grid_search_map["epoch"])

    # table

    table_contents = [[
        "epoch", "architecture", "score training", "score testing"
    ]]
    for index in range(0, number_of_entries):
        table_contents.append([
            str(grid_search_map["epoch"][index]),
            str(grid_search_map["hidden_nodes"][index]),
            str(grid_search_map["score_training"][index]),
            str(grid_search_map["score_test"][index])
        ])
    print("\ngrid search map:\n")
    print(pyprel.Table(contents=table_contents, ))

    # plot

    architectures = shijian.unique_list_elements(
        grid_search_map["hidden_nodes"])

    architecture_epoch_score = {}
    for architecture in architectures:
        architecture_epoch_score[str(architecture)] = []
        for index in range(0, number_of_entries):
            if grid_search_map["hidden_nodes"][index] == architecture:
                architecture_epoch_score[str(architecture)].append([
                    grid_search_map["epoch"][index],
                    grid_search_map["score_test"][index]
                ])

    figure = matplotlib.pyplot.figure()
    figure.set_size_inches(10, 10)
    axes = figure.add_subplot(1, 1, 1)
    axes.set_xscale("log")
    figure.suptitle("hyperparameter map", fontsize=20)
    matplotlib.pyplot.xlabel("epochs")
    matplotlib.pyplot.ylabel("training test score")

    for key, value in architecture_epoch_score.iteritems():
        epochs = [element[0] for element in value]
        score_test = [element[1] for element in value]
        matplotlib.pyplot.plot(epochs, score_test, label=key)

    matplotlib.pyplot.legend(loc="center right")

    matplotlib.pyplot.savefig("hyperparameter_map.eps",
                              bbox_inches="tight",
                              format="eps")

    # find best-scoring models

    # Find the 3 best scores.
    best_models = sorted(zip(grid_search_map["score_test"],
                             grid_search_map["hidden_nodes"]),
                         reverse=True)[:3]

    # table
    table_contents = [["architecture", "score testing"]]
    for model in best_models:
        table_contents.append([str(model[1]), str(model[0])])
    print("\nbest-scoring models:\n")
    print(pyprel.Table(contents=table_contents, ))

    log.info("")

    program.terminate()
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    log.info("")

    # access options and arguments
    ROOT_filename_ttH = options["--datattH"]
    ROOT_filename_ttbb = options["--datattbb"]
    engage_plotting = shijian.string_to_bool(options["--plot"])
    engage_correlations_analysis = shijian.string_to_bool(
        options["--analyzecorrelations"])

    log.info("ttH data file: {filename}".format(filename=ROOT_filename_ttH))
    log.info("ttbb data file: {filename}".format(filename=ROOT_filename_ttbb))

    # Access data for event classes ttbb and ttH.

    data_ttbb = abstraction.load_HEP_data(ROOT_filename=ROOT_filename_ttbb,
                                          tree_name="nominal",
                                          maximum_number_of_events=None)

    data_ttH = abstraction.load_HEP_data(ROOT_filename=ROOT_filename_ttH,
                                         tree_name="nominal",
                                         maximum_number_of_events=None)

    log.info("\nnumber of ttbb and ttH events: {number_of_events}\n".format(
        number_of_events=len(data_ttbb.indices()) + len(data_ttH.indices())))

    # Plot comparisons of variables of the two datasets.

    if engage_plotting is True:

        for variable_name in data_ttbb.variables():
            log.info(
                "plot ttbb versus ttH comparison of {variable_name}".format(
                    variable_name=variable_name))
            datavision.save_histogram_comparison_matplotlib(
                values_1=data_ttbb.values(name=variable_name),
                values_2=data_ttH.values(name=variable_name),
                label_1=variable_name + "_ttbb",
                label_2=variable_name + "_ttH",
                normalize=True,
                label_ratio_x="frequency",
                label_y="",
                title=variable_name + "_ttbb_ttH",
                filename=variable_name + "_ttbb_ttH.png",
                directory="variables_comparisons")

    # Analyse variable correlations.

    if engage_correlations_analysis is True:

        variables_names = data_ttH.variables()
        variables_values = []
        for variable_name in variables_names:
            variables_values.append(data_ttH.values(name=variable_name))
        datavision.analyze_correlations(variables=variables_values,
                                        variables_names=variables_names,
                                        table_order_variable="p_value")

    # Add class labels to the data sets, 0 for ttbb and 1 for ttH.

    for index in data_ttbb.indices():
        data_ttbb.variable(index=index, name="class", value=0)

    for index in data_ttH.indices():
        data_ttH.variable(index=index, name="class", value=1)

    # With classes now defined, combine the datasets before preprocessing them.

    data_ttbb.add(dataset=data_ttH)

    # Preprocess all data: standardize the dataset by centering its variables to
    # mean and scaling its variables to unit variance.

    data_ttbb.preprocess_all()

    # Convert the data sets to a simple list format with the first column
    # containing the class label.
    dataset = abstraction.convert_HEP_datasets_from_datavision_datasets_to_abstraction_datasets(
        datasets=[data_ttbb])

    log.info("")

    # define data

    log.info("split data for cross-validation")
    features_train, features_test, targets_train, targets_test =\
        sklearn.cross_validation.train_test_split(
            dataset.features(),
            dataset.targets(),
            train_size = 0.7
        )
    log.info("define classification model")

    # define model

    #architecture = [10000, 10000, 1000, 10000, 1000]
    architecture = [10000, 10000, 10000, 10000, 1000]
    #architecture = [50, 150, 250, 300, 400]
    classifier = abstraction.Classification(number_of_classes=2,
                                            hidden_nodes=architecture,
                                            epochs=30)

    # train model

    log.info("fit classification model to dataset features and targets")
    classifier._model.fit(
        features_train,
        targets_train,
        #logdir = "log"
    )

    # predict and cross-validate training

    log.info("test trained classification model on training dataset")
    score = sklearn.metrics.accuracy_score(
        classifier._model.predict(features_train), targets_train)
    log.info("prediction accuracy on training dataset: {percentage}".format(
        percentage=100 * score))
    log.info("accuracy of classifier on test dataset:")
    score = sklearn.metrics.accuracy_score(
        classifier._model.predict(features_test), targets_test)
    log.info("prediction accuracy on test dataset: {percentage}".format(
        percentage=100 * score))

    classifier.save(
        directory="abstraction_classifier_ttH_ttbb_{architecture}".format(
            architecture=str(architecture).replace(" ", "_")))

    log.info("")

    program.terminate()
Пример #30
0
def main(options):

    global program
    program = propyte.Program(options=options,
                              name=name,
                              version=version,
                              logo=logo)
    global log
    from propyte import log

    # access options and arguments
    input_image_filename = options["--inputimage"]
    output_pixels_filename = options["--outputpixels"]
    input_pixels_filename = options["--inputpixels"]
    output_image_filename = options["--outputimage"]
    output_image_width = int(options["--outputimagewidth"])
    mode_convert_image_to_pixels = bool(options["--convertimagetopixels"])
    mode_convert_pixels_to_NL_format = bool(
        options["--convertpixelstoNLformat"])
    mode_validate_NL_pixels = bool(options["--validateNLpixels"])
    mode_validate_pixels = bool(options["--validatepixels"])
    mode_convert_pixels_to_image = bool(options["--convertpixelstoimage"])
    mode_convert_NL_pixels_to_image = bool(options["--convertNLpixelstoimage"])

    log.info("")

    if mode_convert_image_to_pixels is True:
        log.info("convert image to pixels")
        # Access the input image.
        log.info("access input image")
        input_image = PIL.Image.open(input_image_filename)
        log.info(
            "input image mode: {image_mode}".format(image_mode=image.mode))
        log.info(
            "input image size: {image_size}".format(image_size=image.size))
        pixels = list(input_image.getdata())
        pixels_text = str(pixels)
        # Create and save the output pixels.
        output_pixels_file = open(output_pixels_filename, "w")
        output_pixels_file.truncate()
        log.info("save output pixels {output_pixels_filename}".format(
            output_pixels_filename=output_pixels_filename))
        output_pixels_file.write(pixels_text)
        output_pixels_file.close()

    elif mode_convert_pixels_to_NL_format is True:
        log.info("convert pixels to pixels with newlines")
        # Access input pixels.
        log.info("access input pixels")
        with open(input_pixels_filename) as input_pixels_file:
            text = input_pixels_file.read()
        # Add newlines.
        pixels_text = text.replace("), (", "),\n(")
        # Save the pixels.
        output_pixels_file = open(output_pixels_filename, "w")
        output_pixels_file.truncate()
        log.info("save output pixels {output_pixels_filename}".format(
            output_pixels_filename=output_pixels_filename))
        output_pixels_file.write(pixels_text)
        output_pixels_file.close()

    elif mode_validate_NL_pixels:
        log.info("validate pixels with newlines")
        # Access input pixels.
        log.info("access input pixels")
        with open(input_pixels_filename) as input_pixels_file:
            text = input_pixels_file.read()
        parts = text.split("\n")
        log.info("replace invalidate pixels")
        for n, part in enumerate(parts):
            # Create a temporary part for regex examination.
            tmp_part = part.strip("),").strip("(")
            if not re.match(r"^\d+, \d+, \d+, \d+$", tmp_part):
                log.info(
                    "tuple {tuple_index} malformed: {tuple} -- replacing with (0, 0, 0, 255)"
                    .format(tuple_index=n, tuple=tmp_part))
                parts[n] = "(0, 0, 0, 255)"
        # Remove trailing commas.
        parts = [part.strip(",") for part in parts]
        pixels_text = str(parts)
        pixels_text = pixels_text.replace(")', '(", ")',\n'(")
        # Save the pixels.
        output_pixels_file = open(output_pixels_filename, "w")
        output_pixels_file.truncate()
        log.info("save output pixels {output_pixels_filename}".format(
            output_pixels_filename=output_pixels_filename))
        output_pixels_file.write(pixels_text)
        output_pixels_file.close()

    elif mode_validate_pixels is True:
        log.info("validate pixels")
        # Access input pixels.
        log.info("access input pixels")
        with open(input_pixels_filename) as input_pixels_file:
            text = input_pixels_file.read()
        parts = text[2:-2].split("), (")
        log.info("validate pixels")
        for n, part in enumerate(parts):
            if not re.match(r"^\d+, \d+, \d+, \d+$", part):
                print("tuple {tuple_index} malformed: {tuple}".format(
                    tuple_index=n, tuple=part))

    elif mode_convert_pixels_to_image is True:
        log.info("convert pixels to image")
        # Access input pixels.
        log.info("access input pixels")
        input_pixels_file = open(input_pixels_filename)
        pixels = input_pixels_file.read()
        pixels = ast.literal_eval(pixels)
        pixels = pixels[:10000]
        # Determine the image height by determining the maximum number of image
        # widths that are possible with the available pixel data.
        log.info("determine output image dimensions")
        image_mode = "RGBA"
        image_width = output_image_width  # e.g. 2379
        image_height = int(len(pixels) / image_width)  # e.g. 2196
        image_size = (image_width, image_height)
        print type(image_size)
        print("output image mode: {image_mode}".format(image_mode=image_mode))
        print("output image size: {image_size}".format(image_size=image_size))

        pixels = [
            pixel.replace("))", ")").replace("((", "(").replace("),)", "), ")
            for pixel in pixels
        ]

        # Convert list of pixel strings to list of pixel tuples.
        pixels = [
            ast.literal_eval(re.sub(r'\b0+\B', '', pixel)) for pixel in pixels
        ]

        # Use only the number of pixels to make the image of the defined
        # dimensions.
        number_of_pixels = image_width * image_height
        pixels = pixels[:number_of_pixels]
        # Create and save the output image.
        log.info("create output image")
        #output_image_file = Image.new(image_mode, image_size)
        output_image_file = Image.new("RGBA", (2379, 2069))

        for pixel in pixels:
            if str(type(pixel)) is not "<type 'tuple'>":
                print "not tuple ", pixel

        output_image_file.putdata(pixels)
        log.info("save output image {output_image_filename}".format(
            output_image_filename=output_image_filename))
        output_image_file.save(output_image_filename)

    elif mode_convert_NL_pixels_to_image is True:
        log.info("convert pixels to image")
        # Access input pixels.
        log.info("access input pixels")
        input_pixels_file = open(input_pixels_filename)
        pixels = input_pixels_file.read()
        pixels = ast.literal_eval(pixels)
        #pixels = pixels[:10000]
        # Determine the image height by determining the maximum number of image
        # widths that are possible with the available pixel data.
        log.info("determine output image dimensions")
        image_mode = "RGBA"
        image_width = output_image_width  # e.g. 2379
        image_height = int(len(pixels) / image_width)  # e.g. 2196
        image_size = (image_width, image_height)
        print type(image_size)
        print("output image mode: {image_mode}".format(image_mode=image_mode))
        print("output image size: {image_size}".format(image_size=image_size))

        pixels = [
            pixel.replace("))", ")").replace("((", "(").replace("),)", "), ")
            for pixel in pixels
        ]

        # Convert list of pixel strings to list of pixel tuples.
        pixels = [
            ast.literal_eval(re.sub(r'\b0+\B', '', pixel)) for pixel in pixels
        ]

        # Use only the number of pixels to make the image of the defined
        # dimensions.
        number_of_pixels = image_width * image_height
        pixels = pixels[:number_of_pixels]

        # Create and save the output image.
        log.info("create output image")
        #output_image_file = Image.new(image_mode, image_size)
        output_image_file = Image.new("RGBA", (2379, 2069))

        log.info("number of pixels: {number_of_pixels}".format(
            number_of_pixels=len(pixels)))

        #for pixel in pixels:
        #    if str(type(pixel)) is not "<type 'tuple'>":
        #        print("not tuple: {pixel}".format(pixel = pixel))

        pixels = pixels[:4922151]

        output_image_file.putdata(pixels)
        log.info("save output image {output_image_filename}".format(
            output_image_filename=output_image_filename))
        output_image_file.save(output_image_filename)

    else:
        log.info("no operation selected\n")
        print(__doc__)

    log.info("")

    program.terminate()