Пример #1
0
def classify(**args):
    """
    Main method that prepares dataset, builds model, executes training and displays results.
    
    :param args: keyword arguments passed from cli parser
    """
    # only allow print-outs if execution has no repetitions
    allow_print = args['repetitions'] == 1
    # determine classification targets and parameters to construct datasets properly
    cls_target, cls_str = set_classification_targets(args['cls_choice'])
    d = prepare_dataset(args['dataset_choice'], cls_target, args['batch_size'])

    print('\n\tTask: Classify «{}» using «{}»\n'.format(
        cls_str, d['data_str']))
    print_dataset_info(d)

    # build and train
    inputs = Input(shape=(7810, ))
    models = [
        build_model(i, d['num_classes'], inputs=inputs)
        for i in range(args['num_models'])
    ]

    # combine outputs of all models
    y = Average()([m.outputs[0] for m in models])
    model = Model(inputs, outputs=y, name='multiple')
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    if allow_print:
        model.summary()
        print('')
        plot_model(model, to_file='img/multiple_mlp.png')

    model.fit(d['train_data'],
              steps_per_epoch=d['train_steps'],
              epochs=args['epochs'],
              verbose=1,
              class_weight=d['class_weights'])

    # evaluation model
    print('Evaluate ...')
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.evaluate(d['eval_data'], steps=d['test_steps'], verbose=1)

    # predict on testset and calculate classification report and confusion matrix for diagnosis
    print('Test ...')
    pred = model.predict(d['test_data'], steps=d['test_steps'])

    if allow_print:
        diagnose_output(d['test_labels'], pred.argmax(axis=1),
                        d['classes_trans'])

    return balanced_accuracy_score(d['test_labels'], pred.argmax(axis=1))
Пример #2
0
def classify(**args):
    """
    Main method that prepares dataset, builds model, executes training and displays results.
    
    :param args: keyword arguments passed from cli parser
    """
    # only allow print-outs if execution has no repetitions
    allow_print = args['repetitions'] == 1
    # determine classification targets and parameters to construct datasets properly
    cls_target, cls_str = set_classification_targets(args['cls_choice'])
    d = prepare_dataset(args['dataset_choice'],
                        cls_target,
                        args['batch_size'],
                        train_shuffle_repeat=False,
                        categorical_labels=False)

    print('\n\tTask: Classify «{}» using «{}» with DecisionTreeClassifier\n'.
          format(cls_str, d['data_str']))
    print_dataset_info(d)

    model = DecisionTreeClassifier(class_weight='balanced')

    # empty train data generator into list, then train. Careful with RAM
    train_data = [
        sample for batch in tqdm(
            d['train_data'], total=d['train_steps'], desc='prep_train')
        for sample in batch[0]
    ]
    model.fit(train_data, d['train_labels'])
    del train_data

    # predict on testset and calculate classification report and confusion matrix for diagnosis
    test_data = [
        sample for batch in tqdm(
            d['test_data'], total=d['test_steps'], desc='prep_test')
        for sample in batch[0]
    ]
    pred = model.predict(test_data)
    del test_data

    if allow_print:
        # visualise decision tree, from datacamp.com/community/tutorials/decision-tree-classification-python
        dot_data = StringIO()
        export_graphviz(model,
                        out_file=dot_data,
                        filled=True,
                        rounded=True,
                        special_characters=True)
        graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
        graph.write_pdf('img/decision_tree.pdf')

        diagnose_output(d['test_labels'], pred, d['classes_trans'])

    return balanced_accuracy_score(d['test_labels'], pred)
Пример #3
0
def classify(**args):
    """
    Main method that prepares dataset, builds model, executes training and displays results.

    :param args: keyword arguments passed from cli parser
    """
    # only allow print-outs if execution has no repetitions
    allow_print = args['repetitions'] == 1
    # determine classification targets and parameters to construct datasets properly
    cls_target, cls_str = set_classification_targets(args['cls_choice'])
    d = prepare_dataset(args['dataset_choice'], cls_target, args['batch_size'],
                        args['norm_choice'])

    print('\n\tTask: Classify «{}» using «{}»\n'.format(
        cls_str, d['data_str']))
    print_dataset_info(d)

    model = build_model(0,
                        d['num_classes'],
                        name='baseline_mlp',
                        new_input=True)
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    if allow_print:
        model.summary()
        print('')

    # callback to log data for TensorBoard
    # tb_callback = TensorBoard(log_dir='./results', histogram_freq=0, write_graph=True, write_images=True)

    # train and evaluate
    model.fit(
        d['train_data'],
        steps_per_epoch=d['train_steps'],
        epochs=args['epochs'],
        # callbacks=[tb_callback],
        verbose=1,
        class_weight=d['class_weights'])

    model.evaluate(d['eval_data'], steps=d['test_steps'], verbose=1)

    # predict on testset and calculate classification report and confusion matrix for diagnosis
    pred = model.predict(d['test_data'], steps=d['test_steps'])

    if allow_print:
        diagnose_output(d['test_labels'], pred.argmax(axis=1),
                        d['classes_trans'])

    return balanced_accuracy_score(d['test_labels'], pred.argmax(axis=1))
Пример #4
0
def classify(**args):
    """
    Main method that prepares dataset, builds model, executes training and displays results.

    :param args: keyword arguments passed from cli parser
    """
    with open('config/datasets.yaml') as cnf:
        dataset_configs = yaml.safe_load(cnf)
        try:
            repo_path = dataset_configs['repo_path']
        except KeyError as e:
            print(f'Missing dataset config key: {e}')
            sys.exit(1)

    batch_size = 64
    repetitions = args['repetitions']
    # determine classification targets and parameters to construct datasets properly
    cls_target, cls_str = set_classification_targets(args['cls_choice'])

    # list of 5% increments ranging from 0% to 100%
    mixture_range = np.arange(0, 1.01, .05)
    results = np.zeros((len(mixture_range), repetitions))

    for i,cut in enumerate(mixture_range):
        print(f'cut: {cut}')
        d = prepare_mixture_dataset(
            cls_target,
            args['batch_size'],
            mixture_pct=cut,
            normalisation=args['norm_choice'])

        # perform #repetitions per 5% dataset mixture
        for j in range(repetitions):
            model = build_model(0, d['num_classes'], name='baseline_mlp', new_input=True)
            model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

            # train and evaluate
            model.fit(
                d['train_data'],
                steps_per_epoch=d['train_steps'],
                epochs=args['epochs'],
                verbose=0,
                class_weight=d['class_weights'])
            results[i,j] = balanced_accuracy_score(d['test_labels'], model.predict(d['test_data'](), steps=d['test_steps']).argmax(axis=1))
    print(results)
    np.save(join(repo_path, 'data/synthetic_influence_target_{cls_target}', results))
Пример #5
0
def classify(**args):
    """
    Main method that prepares dataset, builds model, executes training and displays results.
    
    :param args: keyword arguments passed from cli parser
    """
    # only allow print-outs if execution has no repetitions
    allow_print = args['repetitions'] == 1
    # determine classification targets and parameters to construct datasets properly
    cls_target, cls_str = set_classification_targets(args['cls_choice'])
    d = prepare_dataset(
        args['dataset_choice'],
        cls_target,
        args['batch_size'],
        args['norm_choice'],
        mp_heatmap=True)

    print('\n\tTask: Classify «{}» using «{}»\n'.format(cls_str, d['data_str']))
    print_dataset_info(d)

    model = build_model(0, d['num_classes'], name='64shot_mlp', new_input=True)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    if allow_print:
        model.summary()
        print('')

    # train and evaluate
    model.fit(
        x=d['train_data'],
        steps_per_epoch=d['train_steps'],
        epochs=args['epochs'],
        verbose=1,
        class_weight=d['class_weights'])

    model.evaluate(d['eval_data'], steps=d['test_steps'], verbose=1)

    # predict on testset and calculate classification report and confusion matrix for diagnosis
    pred = model.predict(d['test_data'], steps=d['test_steps'], verbose=1)
    # instead of argmax, reduce list to only on-target predictions to see how accurate the model judged each shot
    target_preds = [pred[i][l] for i,l in enumerate(d['test_labels'])]
    pred = pred.argmax(axis=1)

    compute_accuracy_heatmaps(d, target_preds, cls_target, args['epochs'])

    return balanced_accuracy_score(d['test_labels'], pred)
Пример #6
0
def classify(**args):
    """
    Main method that prepares dataset, builds model, executes training and displays results.
    
    :param args: keyword arguments passed from cli parser
    """
    batch_size = 64
    # determine classification targets and parameters to construct datasets properly
    cls_target, cls_str = set_classification_targets(args['cls_choice'])
    d = prepare_dataset(args['dataset_choice'], cls_target, batch_size)

    print('\n\tTask: Classify «{}» using «{}»\n'.format(
        cls_str, d['data_str']))
    print_dataset_info(d)

    model = build_model(0,
                        d['num_classes'],
                        name='baseline_mlp',
                        new_input=True)
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # train and evaluate
    model.fit(d['train_data'],
              steps_per_epoch=d['train_steps'],
              epochs=args['epochs'],
              verbose=1,
              class_weight=d['class_weights'])
    print('Evaluate ...')
    model.evaluate(d['eval_data'], steps=d['test_steps'], verbose=1)

    # predict on testset and calculate classification report and confusion matrix for diagnosis
    print('Test ...')
    pred = model.predict(d['test_data'], steps=d['test_steps'])

    diagnose_output(d['test_labels'], pred.argmax(axis=1), d['classes_trans'])
Пример #7
0
def classify(**args):
    """
    Main method that prepares dataset, builds model, executes training and displays results.

    :param args: keyword arguments passed from cli parser
    """
    # only allow print-outs if execution has no repetitions
    allow_print = args['repetitions'] == 1
    # determine classification targets and parameters to construct datasets properly
    cls_target, cls_str = set_classification_targets(args['cls_choice'])
    d = prepare_dataset(0, cls_target, args['batch_size'], args['norm_choice'])

    print('\n\tTask: Classify «{}» using «{}»'.format(cls_str, d['data_str']))
    print_dataset_info(d)

    model = build_model(0,
                        d['num_classes'],
                        name='baseline_mlp',
                        new_input=True)
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # train and evaluate - pre-transfer
    model.fit(d['train_data'],
              steps_per_epoch=d['train_steps'],
              epochs=args['epochs'],
              verbose=1,
              class_weight=d['class_weights'])
    print('Evaluate ...')
    model.evaluate(d['eval_data'], steps=d['test_steps'], verbose=1)

    del d
    d = prepare_dataset(
        1,  # HH12
        cls_target,
        args['batch_size'],
        args['norm_choice'])
    print_dataset_info(d)

    # make layers untrainable and remove classification layer, then train new last layer on handheld data
    for l in model.layers[:-1]:
        l.trainable = False

    if allow_print:
        plot_model(model, to_file='img/transfer_mlp_pre.png')

    new_layer = Dense(d['num_classes'],
                      activation='softmax',
                      name='dense_transfer')(model.layers[-2].output)
    model = Model(inputs=model.inputs,
                  outputs=new_layer,
                  name='transfer_model')
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    if allow_print:
        model.summary()
        print('')
        plot_model(model, to_file='img/transfer_mlp_post.png')

    # train and evaluate - post-transfer
    model.fit(d['train_data'],
              steps_per_epoch=d['train_steps'],
              epochs=args['epochs'] * 2,
              verbose=1,
              class_weight=d['class_weights'])
    print('Evaluate ...')
    model.evaluate(d['eval_data'], steps=d['test_steps'], verbose=1)

    # predict on testset and calculate classification report and confusion matrix for diagnosis
    print('Test ...')
    pred = model.predict(d['test_data'], steps=d['test_steps'])

    diagnose_output(
        d['test_labels'],
        pred.argmax(axis=1),
        d['classes_trans'],
        show=False,
        file_name=
        f'heatmap_transfer_{datetime.now().hour}_{datetime.now().minute}')

    return balanced_accuracy_score(d['test_labels'], pred.argmax(axis=1))
Пример #8
0
def classify(**args):
    """
    Main method that prepares dataset, builds model, executes training and displays results.
    
    :param args: keyword arguments passed from cli parser
    """
    # only allow print-outs if execution has no repetitions
    allow_print = args['repetitions'] == 1
    # determine classification targets and parameters to construct datasets properly
    cls_target, cls_str = set_classification_targets(args['cls_choice'])
    d = prepare_dataset(
        0,  # any synthetic
        cls_target,
        args['batch_size'])

    print('\n\tTask: Classify «{}» using «{}»\n'.format(
        cls_str, d['data_str']))
    print_dataset_info(d)

    model = build_model(1, d['num_classes'], name='concat_mlp', new_input=True)
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    if allow_print:
        model.summary()
        plot_model(model, to_file='img/concat_mlp.png')

    # train and evaluate
    model.fit(d['train_data'],
              steps_per_epoch=d['train_steps'],
              epochs=args['epochs'],
              verbose=1,
              class_weight=d['class_weights'])
    model.evaluate(d['eval_data'], steps=d['test_steps'], verbose=1)

    del d

    # load handheld dataset for evaluation
    d = prepare_dataset(
        2,  # any handheld
        cls_target,
        args['batch_size'])
    print_dataset_info(d)

    # build model for handheld data, concatenates the output of the last pre-classification layer of the synthetic network
    concat_model = build_model_concat(2, d['num_classes'], concat_model=model)
    concat_model.compile(optimizer='adam',
                         loss='categorical_crossentropy',
                         metrics=['accuracy'])
    if allow_print:
        concat_model.summary()
        plot_model(concat_model, to_file='img/concat_mlp.png')

    concat_model.fit(d['train_data'],
                     steps_per_epoch=d['train_steps'],
                     epochs=args['epochs'],
                     verbose=1,
                     class_weight=d['class_weights'])

    # predict on test set and calculate classification report and confusion matrix for diagnosis
    pred = model.predict(d['test_data'], steps=d['test_steps'])

    if allow_print:
        diagnose_output(d['test_labels'], pred.argmax(axis=1),
                        d['classes_trans'])

    return balanced_accuracy_score(d['test_labels'], pred.argmax(axis=1))
Пример #9
0
        activation='relu',
        kernel_regularizer=reg(
            0.0001)  #params.Choice(f'dense_reg_lb_last', reg_lambas))
    )(net)
    net = Dense(num_classes, activation='softmax')(net)

    model = Model(inputs=inputs, outputs=net, name=model_name)
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model


batch_size = 64
# determine classification targets and parameters to construct datasets properly
num_classes, cls_target, cls_str = set_classification_targets(0)
train_data, test_data, train_labels, test_labels, epoch_steps, _ = prepare_dataset(
    2, cls_target, num_classes, batch_size)

# list of "class" names used for confusion matrices and validity testing. Not always classes, also subgroups or minerals
class_names = [i for i in range(num_classes)]
class_weights = class_weight.compute_class_weight('balanced', class_names,
                                                  train_labels)

tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=150,
    executions_per_trial=3,
    directory='/home/ben/Dropbox/uni/3_semester/ml/libs-pewpew/results',
    project_name='tuned_mlp')
Пример #10
0
def classify(**args):
    """
    Main method that prepares dataset, builds model, executes training and displays results.

    :param args: keyword arguments passed from cli parser
    """

    # determine classification targets and parameters to construct datasets properly
    cls_target, cls_str = set_classification_targets(args['cls_choice'])

    overall_res = list()
    for amount_t in [.1, .4, .9]:
        print('Amount t: ', amount_t, '\n')
        res_per_amount = list()
        for drop_t in [.1, .4, .9]:
            print('Drop t: ', drop_t, '\n')
            d = prepare_dataset(args['dataset_choice'], cls_target,
                                args['batch_size'], args['norm_choice'],
                                amount_t, drop_t)

            if len(d['train_labels']) == 0 or len(np.unique(
                    d['train_labels'])) != 12:
                print('not enough data')
                res_per_amount.append(0.)
                continue

            model = build_model(0,
                                d['num_classes'],
                                name='baseline_mlp',
                                new_input=True)
            model.compile(optimizer='adam',
                          loss='categorical_crossentropy',
                          metrics=['accuracy'])

            # callback to log data for TensorBoard
            # tb_callback = TensorBoard(log_dir='./results', histogram_freq=0, write_graph=True, write_images=True)

            # train and evaluate
            model.fit(
                d['train_data'],
                steps_per_epoch=d['train_steps'],
                epochs=args['epochs'],
                # callbacks=[tb_callback],
                verbose=1,
                class_weight=d['class_weights'])

            model.evaluate(d['eval_data'], steps=d['test_steps'], verbose=1)

            # predict on testset and calculate classification report and confusion matrix for diagnosis
            pred = model.predict(d['test_data'], steps=d['test_steps'])

            print(
                'Accuracy: ',
                balanced_accuracy_score(d['test_labels'], pred.argmax(axis=1)))
            res = [
                balanced_accuracy_score(d['test_labels'], pred.argmax(axis=1))
            ]  #, precision_score(
            # test_labels, pred, average='samples'), recall_score(test_labels, pred, average='samples')]
            res_per_amount.append(res)

        overall_res.append(res_per_amount)

    l = [.1, .4, .9]
    for i in range(len(overall_res)):
        for j in range(len(overall_res[0])):
            print(f'{l[i]:3}\t{l[j]:3}\t{overall_res[i][j]}')
        print(overall_res)