示例#1
0
def trasfer_learn():
    base_model = applications.InceptionV3(include_top=False, weights='imagenet',
        input_tensor=Input(shape=(TEST_IMAGE_WIDTH, TEST_IMAGE_HEIGHT, 3)))

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    predictions = Dense(5, activation='softmax')(x)

    train_generator = get_train_generator()
    validation_generator = get_test_generator()

    model = Model(inputs=base_model.input, outputs=predictions)

    for layer in base_model.layers:
        layer.trainable = False
    model.compile(Adam(lr=.0001), loss='categorical_crossentropy', metrics=['accuracy'])
    print(model.summary())
    model.fit_generator(
        train_generator,
        steps_per_epoch=150,
        epochs=epochs,
        validation_data=validation_generator,
        validation_steps=44)
    
    train_eval = model.evaluate_generator(train_generator)
    print(train_eval)

    test_eval = model.evaluate_generator(validation_generator)
    print(test_eval)
    model.save(FINETUNE_FULL_MODEL)
    model.save_weights(FINETUNE_WEIGHTS)
示例#2
0
def evaluate_model(model: keras.Model, datagen, x_valid, y_valid):
    """
    Evaluates the model on x_valid, y_valid.

    :param model:
    :param datagen:
    :param x_valid:
    :param y_valid:
    :return:
    """
    print('evaluating model', model.name)
    time6 = time.time()
    print('metrics:')
    labels = ['loss', 'acc', 'sens', 'spec', 'fp', 'tn']
    print('labels:', labels)
    values = model.evaluate_generator(
        datagen.flow(x_valid, y_valid, batch_size=8))
    print('values:', values)
    # Note that we standardize when we predict x_valid
    x_valid: np.ndarray
    y_pred = model.predict(datagen.standardize(x_valid.astype(np.float32)))
    print('auc score:', sklearn.metrics.roc_auc_score(y_valid, y_pred))
    time7 = time.time()
    print(f'seconds to evaluate: {time7 - time6}')
    return values
示例#3
0
    def get_accs_times(self,
                       A,
                       X,
                       y,
                       num_graph_classes,
                       splits=None,
                       batch_size=50):

        A = map(csr_matrix.todense, A)
        A = map(self._add_self_loops, A)
        A = map(self._sym_normalise_A, A)
        A = list(map(csr_matrix, A))

        accuracies = []
        times = []
        for train_idx, val_idx in iter(splits):
            A_test, A_train, X_test, X_train, y_test, y_train \
                = self.split_test_train(A, X, y, train_idx, val_idx)

            A_in = Input((A[0].shape[0], A[0].shape[1]), name='A_in')
            X_in = Input(X[0].shape, name='X_in')

            x1 = MyGCN(100, activation='relu')([A_in, X_in])
            x2 = MyGCN(64, activation='relu')([A_in, x1])
            x3 = Lambda(lambda x: K.mean(x, axis=1))(
                x2) if self.with_mean else Flatten()(x2)
            x4 = Dense(num_graph_classes, activation='softmax')(x3)

            model = Model(inputs=[A_in, X_in], outputs=x4)

            # print(model.summary())

            model.compile(Adam(),
                          loss='categorical_crossentropy',
                          metrics=['acc'])
            generator = PiNet().batch_generator([A_train, X_train], y_train,
                                                batch_size)
            start = time.time()
            model.fit_generator(generator,
                                ceil(y_train.shape[0] / batch_size),
                                200,
                                verbose=0)
            train_time = time.time() - start

            stats = model.evaluate_generator(
                PiNet().batch_generator([A_test, X_test], y_test, batch_size),
                y_test.shape[0] / batch_size)

            for metric, val in zip(model.metrics_names, stats):
                print(metric + ": ", val)

            accuracies.append(stats[1])
            times.append(train_time)

        # print("mean acc:", np.mean(accuracies))
        # print("std:", np.std(accuracies))
        return accuracies, times
示例#4
0
def kfold_cross_validation(model_id: str,
                           model: Model,
                           dataset_inputs: np.ndarray = None,
                           dataset_labels: np.ndarray = None,
                           train_generator: DirectoryIterator = None,
                           test_generator: DirectoryIterator = None,
                           batch_size: int = 32,
                           epochs: int = 100,
                           k_folds: int = 5) -> (float, float, float, float):
    """
    Performs K-fold cross validation of the given model, returning minimum, average (+ std dev), and maximum accuracy achieved.
    Supports generator-based training providing generator instead of dataset_inputs and dataset_labels.
    In this case, test_generator has to be provided as well.
    In case dataset_inputs, dataset_labels, and test_generator are provided: the validation set will be extracted from
    dataset_inputs and dataset_labels, but test_generator will be used when reporting the accuracy score.

    :param dataset_inputs: NumPy array representing input samples of the whole training set
    :param dataset_labels: NumPy array representing labels of the whole training set
    :param train_generator: DirectoryIterator instance for iterating over the training set
    :param test_generator: DirectoryIterator instance for iterating over the test set
    :param model_id: str identifying the current model being evaluated
    :param model: Keras model instance (ATTENTION: must have freshly initialized weights - straight from 'compile()')
    :param batch_size: Size of the batch to use (default: 32)
    :param epochs: Number of epochs to train the model (default: 100)
    :param k_folds: Number of folds to use for CV (default: 5)

    :return: (min accuracy, average accuracy, accuracy standard deviation, max accuracy)
    """

    assert not (
        (dataset_inputs is None or dataset_labels is None)
        and train_generator is None
    ), "dataset_inputs and dataset_labels must be provided if generator is not in use"
    assert not (train_generator is not None and test_generator is None
                ), "test_generator must be provided if generator is in use"

    history = []
    print("Saving weights...")
    model.save_weights("../misc/cv/{model_id}.h5".format(model_id=model_id))

    for i in range(1, k_folds + 1):
        print("{m} | Iteration {i}/{k}".format(m=model_id, i=i, k=k_folds))

        # start with fresh weights
        print("Loading weights...")
        model.load_weights(
            "../misc/cv/{model_id}.h5".format(model_id=model_id))

        # early stopping in case validation set accuracy does not increase
        early_stopping_callback = CustomEarlyStopping(monitor="val_acc",
                                                      patience=5,
                                                      min_epochs=int(epochs /
                                                                     4))
        val_acc = 0.

        # train the model
        print("Training model...")
        # dataset-based training
        if dataset_inputs is not None and dataset_labels is not None:
            # use 80% as training set, remaining 20% as validation set
            train_inputs, val_inputs, train_labels, val_labels = train_test_split(
                dataset_inputs, dataset_labels, test_size=0.2)

            h = model.fit(x=train_inputs,
                          y=train_labels,
                          validation_data=(val_inputs, val_labels),
                          batch_size=batch_size,
                          epochs=epochs,
                          verbose=0,
                          callbacks=[early_stopping_callback])

            # model evaluation
            if test_generator is not None:
                val_acc = model.evaluate_generator(
                    generator=test_generator,
                    steps=ceil(test_generator.n / batch_size))
                val_acc = val_acc[1]
            else:
                val_acc = h.history["val_acc"][-1]

        # generator-based training
        elif train_generator is not None and test_generator is not None:
            h = model.fit_generator(
                generator=train_generator,
                steps_per_epoch=ceil(train_generator.n / batch_size),
                validation_data=test_generator,
                validation_steps=ceil(test_generator.n / batch_size),
                callbacks=[early_stopping_callback],
                epochs=epochs,
                verbose=0)

            # model evaluation
            val_acc = h.history["val_acc"][-1]

        history.append(val_acc)
        print("Training ended/stopped at epoch {e} with accuracy {a:.3f}.".
              format(e=early_stopping_callback.stopped_epoch, a=val_acc))

    history = np.asarray(history)
    return np.min(history), np.mean(history), np.std(history), np.max(history)
示例#5
0
def infer_attributes_gat(Gnx, savepred=True, plot=False):
    # Define node data
    feature_names = [
        "in_degree",
        "out_degree",
        # "in_degree_centrality",
        # "out_degree_centrality",
        # "closeness_centrality",
        # "betweenness_centrality",
        "clustering_coefficient",
        # "square_clustering",
        "core_number",
        # "pagerank",
        # "constraint",
        # "effective_size"
    ]
    node_type = [v for k, v in nx.get_node_attributes(Gnx, 'data').items()]
    d = {"node_type": node_type}
    if "in_degree" in feature_names:
        indeg = [v for k, v in Gnx.in_degree]
        indeg = np.divide(indeg, max(indeg))
        indeg[indeg >= 0.5] = 1
        indeg[indeg < 0.5] = 0
        d["in_degree"] = indeg
    if "out_degree" in feature_names:
        outdeg = [v for k, v in Gnx.out_degree]
        outdeg = np.divide(outdeg, max(outdeg))
        outdeg[outdeg >= 0.5] = 1
        outdeg[outdeg < 0.5] = 0
        d["out_degree"] = outdeg
    if "in_degree_centrality" in feature_names:
        indeg_cent = [
            v for k, v in nx.algorithms.in_degree_centrality(Gnx).items()
        ]
        indeg_cent = np.divide(indeg_cent, max(indeg_cent))
        indeg_cent[indeg_cent >= 0.5] = 1
        indeg_cent[indeg_cent < 0.5] = 0
        d["in_degree_centrality"] = indeg_cent
    if "out_degree_centrality" in feature_names:
        outdeg_cent = [
            v for k, v in nx.algorithms.out_degree_centrality(Gnx).items()
        ]
        outdeg_cent = np.divide(outdeg_cent, max(outdeg_cent))
        outdeg_cent[outdeg_cent >= 0.5] = 1
        outdeg_cent[outdeg_cent < 0.5] = 0
        d["out_degree_centrality"] = outdeg_cent
    if "closeness_centrality" in feature_names:
        close_cent = [
            v for k, v in nx.algorithms.closeness_centrality(Gnx).items()
        ]
        close_cent = np.divide(close_cent, max(close_cent))
        close_cent[close_cent >= 0.5] = 1
        close_cent[close_cent < 0.5] = 0
        d["closeness_centrality"] = close_cent
    if "betweenness_centrality" in feature_names:
        between_cent = [
            v for k, v in nx.algorithms.betweenness_centrality(Gnx).items()
        ]
        between_cent = np.divide(between_cent, max(between_cent))
        between_cent[between_cent >= 0.5] = 1
        between_cent[between_cent < 0.5] = 0
        d["betweenness_centrality"] = between_cent
    if "clustering_coefficient" in feature_names:
        clustering_co = [v for k, v in nx.algorithms.clustering(Gnx).items()]
        clustering_co = np.divide(clustering_co, max(clustering_co))
        clustering_co[clustering_co >= 0.5] = 1
        clustering_co[clustering_co < 0.5] = 0
        d["clustering_coefficient"] = clustering_co
    if "square_clustering" in feature_names:
        sq_clustering = [
            v for k, v in nx.algorithms.square_clustering(Gnx).items()
        ]
        sq_clustering = np.divide(sq_clustering, max(sq_clustering))
        sq_clustering[sq_clustering >= 0.5] = 1
        sq_clustering[sq_clustering < 0.5] = 0
        d["square_clustering"] = sq_clustering
    if "core_number" in feature_names:
        core_number = [v for k, v in nx.algorithms.core_number(Gnx).items()]
        core_number = np.divide(core_number, max(core_number))
        core_number[core_number >= 0.5] = 1
        core_number[core_number < 0.5] = 0
        d["core_number"] = core_number
    if "pagerank" in feature_names:
        pagerank = [v for k, v in nx.algorithms.pagerank(Gnx).items()]
        pagerank = np.divide(pagerank, max(pagerank))
        pagerank[pagerank >= 0.5] = 1
        pagerank[pagerank < 0.5] = 0
        d["pagerank"] = pagerank
    if "constraint" in feature_names:
        constraint = [v for k, v in nx.algorithms.constraint(Gnx).items()]
        constraint = np.divide(constraint, max(constraint))
        constraint[np.isnan(constraint)] = 0
        constraint[constraint >= 0.5] = 1
        constraint[constraint < 0.5] = 0
        d["constraint"] = constraint
    if "effective_size" in feature_names:
        effective_size = [
            v for k, v in nx.algorithms.effective_size(Gnx).items()
        ]
        effective_size = np.divide(effective_size, max(effective_size))
        effective_size[np.isnan(effective_size)] = 0
        effective_size[effective_size >= 0.5] = 1
        effective_size[effective_size < 0.5] = 0
        d["effective_size"] = effective_size
    node_data = pd.DataFrame(data=d, index=nodes)
    node_data = shuffle(node_data)

    # Split the data
    train_data, test_data = model_selection.train_test_split(
        node_data, train_size=int(0.80 * len(Gnx)))
    val_data, test_data = model_selection.train_test_split(
        test_data, train_size=int(0.15 * len(Gnx)))

    # Convert to numeric arrays
    target_encoding = feature_extraction.DictVectorizer(sparse=False)

    train_targets = target_encoding.fit_transform(
        train_data[["node_type"]].to_dict('records'))
    val_targets = target_encoding.transform(val_data[["node_type"
                                                      ]].to_dict('records'))
    test_targets = target_encoding.transform(test_data[["node_type"
                                                        ]].to_dict('records'))

    node_features = node_data[feature_names]

    # Create the GAT model in Keras
    G = sg.StellarDiGraph(Gnx, node_features=node_features)
    print(G.info())

    generator = FullBatchNodeGenerator(G)

    train_gen = generator.flow(train_data.index, train_targets)

    gat = GAT(
        layer_sizes=[8, train_targets.shape[1]],
        attn_heads=8,
        generator=generator,
        bias=True,
        in_dropout=0.5,
        attn_dropout=0.5,
        activations=["elu", "softmax"],
        normalize=None,
    )

    # Expose the input and output tensors of the GAT model for node prediction, via GAT.node_model() method:
    x_inp, predictions = gat.node_model()

    # Train the model
    model = Model(inputs=x_inp, outputs=predictions)
    model.compile(
        optimizer=optimizers.Adam(lr=0.005),
        loss=losses.categorical_crossentropy,
        weighted_metrics=["acc"],
    )

    val_gen = generator.flow(val_data.index, val_targets)

    if not os.path.isdir(".temp/logs"):
        os.makedirs(".temp/logs")
    if not os.path.isdir(".temp/output"):
        os.makedirs(".temp/output")

    es_callback = EarlyStopping(
        monitor="val_weighted_acc",
        patience=
        100  # patience is the number of epochs to wait before early stopping in case of no further improvement
    )

    mc_callback = ModelCheckpoint(
        ".temp/logs/best_model.h5",
        monitor="val_weighted_acc",
        save_best_only=True,
        save_weights_only=True,
    )

    history = model.fit_generator(
        train_gen,
        epochs=2000,
        validation_data=val_gen,
        verbose=2,
        shuffle=
        False,  # this should be False, since shuffling data means shuffling the whole graph
        callbacks=[es_callback, mc_callback],
    )

    # Reload the saved weights
    model.load_weights(".temp/logs/best_model.h5")

    # Evaluate the best nidek in the test set
    test_gen = generator.flow(test_data.index, test_targets)

    test_metrics = model.evaluate_generator(test_gen)
    print("\nTest Set Metrics:")
    for name, val in zip(model.metrics_names, test_metrics):
        print("\t{}: {:0.4f}".format(name, val))

    # Make predictions with the model
    all_nodes = node_data.index
    all_gen = generator.flow(all_nodes)
    all_predictions = model.predict_generator(all_gen)

    node_predictions = target_encoding.inverse_transform(all_predictions)

    results = pd.DataFrame(node_predictions, index=G.nodes()).idxmax(axis=1)
    df = pd.DataFrame({"Predicted": results, "True": node_data['node_type']})
    print(df.head)

    if savepred:
        df.to_excel(".temp/output/output" +
                    str(datetime.datetime.now()).replace(':', '-') + ".xlsx")

    if plot:
        # Node embeddings
        emb_layer = model.layers[3]
        print("Embedding layer: {}, output shape {}".format(
            emb_layer.name, emb_layer.output_shape))
        embedding_model = Model(inputs=x_inp, outputs=emb_layer.output)
        emb = embedding_model.predict_generator(all_gen)

        X = emb
        y = np.argmax(target_encoding.transform(
            node_data.reindex(G.nodes())[["node_type"]].to_dict('records')),
                      axis=1)

        if X.shape[1] > 2:
            transform = TSNE  #PCA
            trans = transform(n_components=2)
            emb_transformed = pd.DataFrame(trans.fit_transform(X),
                                           index=list(G.nodes()))
            emb_transformed['label'] = y
        else:
            emb_transformed = pd.DataFrame(X, index=list(G.nodes()))
            emb_transformed = emb_transformed.rename(columns={'0': 0, '1': 1})

        def plot_emb(transform, emb_transformed):
            fig, ax = plt.subplots(figsize=(7, 7))
            ax.scatter(emb_transformed[0],
                       emb_transformed[1],
                       c=emb_transformed['label'].astype("category"),
                       cmap="jet",
                       alpha=0.7)
            ax.set(aspect="equal", xlabel="$X_1$", ylabel="$X_2$")
            plt.title(
                '{} visualization of GAT embeddings for the fighter graph'.
                format(transform.__name__))

        # Plot the training history
        def remove_prefix(text, prefix):
            return text[text.startswith(prefix) and len(prefix):]

        def plot_history(history):
            metrics = sorted(
                set([
                    remove_prefix(m, "val_")
                    for m in list(history.history.keys())
                ]))
            for m in metrics:
                # summarize history for metric m
                plt.figure()
                plt.plot(history.history[m])
                plt.plot(history.history['val_' + m])
                plt.title(m)
                plt.ylabel(m)
                plt.xlabel('epoch')
                plt.legend(['train', 'validation'], loc='best')

        plot_history(history)
        plot_emb(transform, emb_transformed)
        plt.show()

    return df
示例#6
0
def model_mnist():
    (X_train, Y_train), (X_test, Y_test) = mnist.load_data()  # 28*28

    X_train = X_train.astype('float32').reshape(-1, 28, 28, 1)
    X_test = X_test.astype('float32').reshape(-1, 28, 28, 1)
    X_train /= 255
    X_test /= 255
    print('Train:{},Test:{}'.format(len(X_train), len(X_test)))

    nb_classes = 10

    Y_train = np_utils.to_categorical(Y_train, nb_classes)
    Y_test = np_utils.to_categorical(Y_test, nb_classes)
    print('data success')

    input_tensor = Input((28, 28, 1))
    #28*28
    temp = Conv2D(filters=32,
                  kernel_size=(3, 3),
                  padding='valid',
                  use_bias=False)(input_tensor)
    temp = Activation('relu')(temp)
    #26*26
    temp = MaxPooling2D(pool_size=(2, 2))(temp)
    #13*13
    temp = Conv2D(filters=64,
                  kernel_size=(3, 3),
                  padding='valid',
                  use_bias=False)(temp)
    temp = Activation('relu')(temp)
    #11*11
    temp = MaxPooling2D(pool_size=(2, 2))(temp)
    #5*5
    temp = Conv2D(filters=128,
                  kernel_size=(3, 3),
                  padding='valid',
                  use_bias=False)(temp)
    temp = Activation('relu')(temp)
    #3*3
    temp = Flatten()(temp)

    temp = Dense(nb_classes)(temp)
    output = Activation('softmax')(temp)

    model = Model(input=input_tensor, outputs=output)

    model.summary()

    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy',
                  optimizer=sgd,
                  metrics=['accuracy'])

    datagen = ImageDataGenerator(rotation_range=20,
                                 width_shift_range=0.1,
                                 height_shift_range=0.1)

    batch_size = 32
    model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size),
                        steps_per_epoch=len(X_train) // batch_size,
                        epochs=5,
                        validation_data=(X_test, Y_test))

    #model.fit(X_train, Y_train, batch_size=64, nb_epoch=5,validation_data=(X_test, Y_test))
    #Y_pred = model.predict(X_test, verbose=0)
    score_org = model.evaluate(X_test, Y_test, verbose=0)

    score_aug = model.evaluate_generator(
        datagen.flow(X_test, Y_test, batch_size=32))
    print('测试集 score(val_loss): %.4f' % score_org[0])
    print('测试集 accuracy: %.4f' % score_aug[1])
    model.save('./model/model.hdf5')
    return score_org[1], score_aug[1]
示例#7
0
def main(args):

    num_of_folds = len(os.listdir(args.data_path))
    print("Found {} number of folds".format(num_of_folds))
    for fold_index in range(num_of_folds):
        print("================================")
        print("Starting fold {}".format(fold_index))

        #Create dataset
        dataset = TGSDataset(data_path="{}/fold_{}".format(
            args.data_path, fold_index),
                             batch_size=args.batch_size)
        input_shape = (args.target_size, args.target_size)
        mask_shape = (101, 101)
        train_data_generator = dataset.get_train_data_generator(
            input_size=input_shape, mask_size=mask_shape, seed=args.seed)
        val_data_generator = dataset.get_val_data_generator(
            input_size=input_shape, mask_size=mask_shape, seed=args.seed)

        #Find best saved model
        best_model_file = 'weights/{}/fold_{}_{epoch}_best.h5'.format(
            args.model, fold_index, epoch='{epoch}')
        resume_from_epoch = 0
        for try_epoch in range(args.epochs, 0, -1):
            if os.path.exists(best_model_file.format(epoch=try_epoch)):
                resume_from_epoch = try_epoch
                break

        if resume_from_epoch > 0:
            print("Resuming from epoch {}".format(resume_from_epoch))
            model = load_model(best_model_file.format(epoch=resume_from_epoch),
                               custom_objects={'c_iou': metrics.c_iou})
        else:
            model = make_model(args.model,
                               (args.target_size, args.target_size, 3), 1)

        #Optimizer
        opt = adam(lr=args.learning_rate)

        #Compile model
        model.compile(loss=binary_crossentropy,
                      optimizer=opt,
                      metrics=[binary_accuracy, metrics.c_iou])

        #Keras callbacks
        callbacks = [
            keras.callbacks.TensorBoard(args.log_dir),
            keras.callbacks.ModelCheckpoint(best_model_file,
                                            save_best_only=True,
                                            save_weights_only=False),
            keras.callbacks.EarlyStopping(monitor='c_iou',
                                          patience=20,
                                          verbose=0,
                                          mode='max')
        ]

        train_step_size = dataset.train_step_size
        val_step_size = dataset.val_step_size

        history = model.fit_generator(train_data_generator,
                                      steps_per_epoch=train_step_size,
                                      callbacks=callbacks,
                                      epochs=args.epochs,
                                      verbose=args.v,
                                      workers=4,
                                      initial_epoch=resume_from_epoch,
                                      validation_data=val_data_generator,
                                      validation_steps=val_step_size)

        #Load weights
        resume_from_epoch = 0
        for try_epoch in range(args.epochs, 0, -1):
            if os.path.exists(best_model_file.format(epoch=try_epoch)):
                resume_from_epoch = try_epoch
                break

        if resume_from_epoch > 0:
            print("Resuming from epoch {}".format(resume_from_epoch))
            model_with_lovasz = load_model(
                best_model_file.format(epoch=resume_from_epoch),
                custom_objects={"c_iou": metrics.c_iou})
        else:
            model_with_lovasz = make_model(
                args.model, (args.target_size, args.target_size, 3), 1)
            #Lovasz Loss

        #Optimizer
        #Keras callbacks
        callbacks = [
            keras.callbacks.TensorBoard(args.log_dir),
            keras.callbacks.ModelCheckpoint(best_model_file,
                                            save_best_only=True,
                                            save_weights_only=False),
            keras.callbacks.EarlyStopping(monitor='c_iou_zero',
                                          mode='max',
                                          patience=20,
                                          verbose=0)
        ]

        train_data_generator = dataset.get_train_data_generator(
            input_size=input_shape, mask_size=mask_shape, seed=args.seed)
        val_data_generator = dataset.get_val_data_generator(
            input_size=input_shape, mask_size=mask_shape, seed=args.seed)

        model_with_lovasz = Model(model_with_lovasz.layers[0].input,
                                  model_with_lovasz.layers[-1].input)
        opt = adam(lr=args.learning_rate)
        model_with_lovasz.compile(
            loss=losses.c_lovasz_loss,
            optimizer=opt,
            metrics=[binary_accuracy, metrics.c_iou_zero])
        print("Fine tuning with lovasz loss")
        model_with_lovasz.fit_generator(train_data_generator,
                                        steps_per_epoch=train_step_size,
                                        callbacks=callbacks,
                                        epochs=args.epochs,
                                        verbose=args.v,
                                        workers=4,
                                        initial_epoch=resume_from_epoch,
                                        validation_data=val_data_generator,
                                        validation_steps=val_step_size)

        # Evaluate the model on the validation data set.
        score = model_with_lovasz.evaluate_generator(val_data_generator,
                                                     val_step_size)
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])
    def train(
        self,
        layer_size,
        num_samples,
        train_size=0.7,
        batch_size: int = 200,
        num_epochs: int = 20,
        learning_rate=5e-3,
        dropout=0.0,
        use_bias=True,
    ):
        """
        Build and train the HinSAGE model for link attribute prediction on the specified graph G
        with given parameters.

        Args:
            layer_size: a list of number of hidden nodes in each layer
            num_samples: number of neighbours to sample at each layer
            batch_size: size of mini batch
            num_epochs: number of epochs to train the model (epoch = all training batches are streamed through the model once)
            learning_rate: initial learning rate
            dropout: dropout probability in the range [0, 1)
            use_bias: tells whether to use a bias terms in HinSAGE model

        Returns:

        """

        # Training and test edges
        edges = list(self.g.edges(data=True))
        edges_train, edges_test = model_selection.train_test_split(
            edges, train_size=train_size)

        #  Edgelists:
        edgelist_train = [(e[0], e[1]) for e in edges_train]
        edgelist_test = [(e[0], e[1]) for e in edges_test]

        labels_train = [e[2]["score"] for e in edges_train]
        labels_test = [e[2]["score"] for e in edges_test]

        # Our machine learning task of learning user-movie ratings can be framed as a supervised Link Attribute Inference:
        # given a graph of user-movie ratings, we train a model for rating prediction using the ratings edges_train,
        # and evaluate it using the test ratings edges_test. The model also requires the user-movie graph structure.
        # To proceed, we need to create a StellarGraph object from the ingested graph, for training the model:
        # When sampling the GraphSAGE subgraphs, we want to treat user-movie links as undirected
        self.g = sg.StellarGraph(self.g, node_features="feature")

        # Next, we create the link generators for preparing and streaming training and testing data to the model.
        # The mappers essentially sample k-hop subgraphs of G with randomly selected head nodes, as required by
        # the HinSAGE algorithm, and generate minibatches of those samples to be fed to the input layer of the HinSAGE model.
        generator = HinSAGELinkGenerator(
            self.g,
            batch_size,
            num_samples,
        )
        train_gen = generator.flow(edgelist_train, labels_train)
        test_gen = generator.flow(edgelist_test, labels_test)

        # Build the model by stacking a two-layer HinSAGE model and a link regression layer on top.
        assert len(layer_size) == len(
            num_samples
        ), "layer_size and num_samples must be of the same length! Stopping."
        hinsage = HinSAGE(layer_sizes=layer_size,
                          generator=train_gen,
                          bias=use_bias,
                          dropout=dropout)

        # Define input and output sockets of hinsage:
        x_inp, x_out = hinsage.default_model()

        # Final estimator layer
        score_prediction = link_regression(
            edge_embedding_method=args.edge_embedding_method)(x_out)

        # Create Keras model for training
        model = Model(inputs=x_inp, outputs=score_prediction)
        model.compile(
            optimizer=optimizers.Adam(lr=learning_rate),
            loss=losses.mean_squared_error,
            metrics=[root_mean_square_error, metrics.mae],
        )

        # Train model
        print("Training the model for {} epochs with initial learning rate {}".
              format(num_epochs, learning_rate))
        history = model.fit_generator(
            train_gen,
            validation_data=test_gen,
            epochs=num_epochs,
            verbose=2,
            shuffle=True,
            use_multiprocessing=True,
            workers=multiprocessing.cpu_count() // 2,
        )

        # Evaluate and print metrics
        test_metrics = model.evaluate_generator(test_gen)

        print("Test Evaluation:")
        for name, val in zip(model.metrics_names, test_metrics):
            print("\t{}: {:0.4f}".format(name, val))
示例#9
0
def three_streams_rgb():
    dataset_train = '/home/cic/datasets/ImageNet/train/'
    dataset_test = '/home/cic/datasets/ImageNet/validation/'
    save_dir = '/home/nsallent/output/saved_models/'
    model_name = 'three_streams_rgb'

    input_size = 224

    classes_train = []

    for folder in listdir(dataset_train):
        if folder in classes_values:
            classes_train.append(folder)

    classes_test = []

    for folder in listdir(dataset_test):
        if folder in classes_values:
            classes_test.append(folder)

    def color_transformation(image):
        image = np.array(image)
        pca_image = rgb2pca(image)
        scalers = {}
        for i in range(pca_image.shape[0]):
            scalers[i] = MinMaxScaler((0, 255))
            pca_image[i, :, :] = scalers[i].fit_transform(pca_image[i, :, :])
        return Image.fromarray(pca_image.astype('uint8'))

    train_datagen = ImageDataGenerator(
        rescale=1. / 255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        preprocessing_function=color_transformation)

    test_datagen = ImageDataGenerator(
        rescale=1. / 255, preprocessing_function=color_transformation)

    train_generator = train_datagen.flow_from_directory(
        dataset_train,
        target_size=(input_size, input_size),
        classes=classes_train[:35])

    validation_generator = test_datagen.flow_from_directory(
        dataset_test,
        target_size=(input_size, input_size),
        classes=classes_test[:35])

    output, im_input, input_shape = three_streams()

    model = Model(inputs=im_input, outputs=output)

    print(model.summary())

    opt = optimizers.SGD(lr=0.01, decay=0.0005, momentum=0.9)
    model.compile(optimizer=opt, loss='categorical_crossentropy')

    model.fit_generator(train_generator,
                        steps_per_epoch=20,
                        epochs=50,
                        validation_steps=800,
                        validation_data=validation_generator)

    # Save model and weights
    if not isdir(save_dir):
        makedirs(save_dir)
    model_path = join(save_dir, model_name)
    model.save(model_path)
    print('Saved trained model at %s ' % model_path)

    # Score trained model.
    scores = model.evaluate_generator(validation_generator)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])
示例#10
0
文件: model.py 项目: np-core/druid
class AchillesModel:
    def __init__(self, data_file=None, log_dir=""):

        self.data_file = data_file
        self.log_dir = log_dir
        self.model = None

    def build(
        self,
        window_size=400,
        activation="softmax",
        bidirectional=True,
        nb_channels=256,
        rnn_units=200,
        _nb_classes=2,
        nb_residual_block=1,
        nb_rnn=1,
        dropout=0.0,
        gru=False,
        gpus=1,
        summary=True,
    ):

        # Kernel size and strides are only used for single convolutional layers (1D, or 2D)

        # Default for residual block or Conv2D:
        shape = (1, window_size, 1)

        # Input data shape for residual block (Conv2D)
        inputs = layers.Input(shape=shape)

        ######################
        # Residual Block CNN #
        ######################

        # Residual block stack, see config
        x = self.residual_block(inputs, nb_channels, input_shape=shape)

        if nb_residual_block > 1:
            for i in range(nb_residual_block - 1):
                x = self.residual_block(x, nb_channels)

        # Reshape the output layer of residual blocks from 4D to 3D
        x = layers.Reshape((1 * window_size, nb_channels))(x)

        ######################
        # Bidirectional RNN  #
        ######################

        if gru:
            # GRU does not appear to be as good as LSTM!
            rnn_layer = layers.GRU
        else:
            rnn_layer = layers.LSTM

        # "recurrent_dropout": rc_dropout  <-- this hits performance massively even if set to 0
        dropout_params = {"dropout": dropout}

        # Add two Bidirectional RNN layers where sequences returned,
        # then into last layer with standard RNN output into Dense
        if nb_rnn > 0:
            # Deep bidirectional RNN layers must return sequences for stacking
            if nb_rnn > 1:
                for i in range(nb_rnn - 1):
                    # The following structure adds GRU or LSTM cells to the
                    # model, and depending on whether the net is
                    # trained / executed exclusively on GPU, standard cells
                    # are replaced by CuDNN variants, these do
                    # currently not support DROPOUT!
                    if bidirectional:
                        x = layers.Bidirectional(
                            rnn_layer(
                                rnn_units, return_sequences=True, **dropout_params
                            )
                        )(x)
                    else:
                        x = rnn_layer(
                            rnn_units, return_sequences=True, **dropout_params
                        )(x)
            if bidirectional:
                x = layers.Bidirectional(rnn_layer(rnn_units, **dropout_params))(x)
            else:
                x = rnn_layer(rnn_units, **dropout_params)(x)
        else:
            # If no RNN layers, flatten shape for Dense
            x = layers.Flatten()(x)

        outputs = layers.Dense(_nb_classes, activation=activation)(x)

        self.model = Model(inputs=inputs, outputs=outputs)

        if summary:
            self.model.summary()

        if gpus <= 1:
            print("Built model for training on 1 GPU.")
            return self.model
        else:
            print(f"Building model for distributed training on {gpus} GPUs.")
            raise ValueError(
                'Current version of Keras does not support multi GPU models.'
            )

    def save(self, run_id, file):

        self.model.save(os.path.join(run_id, file))

    def compile(self, optimizer="adam", loss="binary_crossentropy"):

        self.model.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])

        return self.model

    def train(
        self,
        batch_size=15,
        epochs=10,
        workers=2,
        run_id="run_1",
        outdir="run_1",
        verbose=True,
        gpu=None,
    ):

        if gpu:
            print(f"CUDA_VISIBLE_DEVICES environment variable set to {gpu}")
            os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)

        # Estimated memory for dimensions and
        # batch size of model, before adjustment:

        # memory = self.estimate_memory_usage(batch_size=batch_size)
        # print("Estimated GPU memory for AchillesModel model: {} GB".format(memory))

        # Reads data from HDF5 data file:
        dataset = AchillesDataset()

        # Get training and validation data generators
        training_generator = dataset.get_signal_generator(
            self.data_file, data_type="training", batch_size=batch_size, shuffle=True
        )
        validation_generator = dataset.get_signal_generator(
            self.data_file, data_type="validation", batch_size=batch_size, shuffle=True
        )

        # Make log directory:
        if outdir:
            os.makedirs(outdir, exist_ok=True)
        else:
            outdir = os.getcwd()

        # Callbacks
        csv = CSVLogger(os.path.join(outdir, run_id + ".epochs.log"))
        chk = ModelCheckpoint(
            os.path.join(outdir, run_id + ".checkpoint.val_loss.h5"),
            monitor="val_loss",
            verbose=0,
            save_best_only=False,
            save_weights_only=False,
            mode="auto",
            save_freq=1,
        )

        print(
            f"Running on batch size {batch_size} for {epochs} epochs "
            f"with {workers} worker processes --> run ID: {run_id}"
        )

        # TODO: Enable NCPU

        history = self.model.fit(
            training_generator,
            use_multiprocessing=False,
            workers=workers,
            epochs=epochs,
            validation_data=validation_generator,
            callbacks=[csv, chk],
            verbose=verbose,
        )

        with open(
            os.path.join(outdir, "{}.model.history".format(run_id)), "wb"
        ) as history_out:
            pickle.dump(history.history, history_out)

    def adjust_batch_size(self, batch_size):

        """ Function for adjusting batch size to live GPU memory; this is not an accurate estimation
        but rather aims at conservatively estimating available GPU memory and adjusting the batch size
        so that training does not raise out-of-memory errors, particularly when using training as part
        of Nextflow workflows where the underlying data dimensions (and therefore memory occupancy) may
        differ between runs or across a grid search.
        """

        # TODO
        mem = self.estimate_memory_usage(batch_size)

    def load_model(self, model_file, summary=True):

        """ Load model from HDF5 output file with model layers and weights """

        # Read model stats

        self.model = load_model(model_file)
        if summary:
            self.model.summary()

    def evaluate(self, eval_generator, workers=2):

        """ Evaluate model against presented dataset """

        loss, acc = self.model.evaluate_generator(
            eval_generator, workers=workers, verbose=True,
            use_multiprocessing=False
        )

        return loss, acc

    @timeit(micro=True)
    def predict(
        self, signal_tensor: np.array = None, batch_size=10,
        null_pass: np.shape = None
    ):

        """ Predict signal arrays using model test function,
         might implement in class later"""

        # Read Fast5 and extract windows from signal array:

        if null_pass:
            # Warmup pass to allocate memory
            signal_tensor = np.zeros(shape=null_pass)

        # Select random or beginning consecutive windows
        return self.model.predict(x=signal_tensor, batch_size=batch_size)

    def predict_generator(self, data_type="data", batch_size=1000):

        # Reads data from HDF5 data file:
        dataset = AchillesDataset()

        # Get training and validation data generators
        prediction_generator = dataset.get_signal_generator(
            self.data_file,
            data_type=data_type,
            batch_size=batch_size,
            shuffle=False,
            no_labels=True,
        )

        return self.model.predict_generator(prediction_generator)

    @staticmethod
    def residual_block(
        y,
        nb_channels,
        input_shape=None,
        _strides=(1, 1),
        _project_shortcut=True,
        _leaky=False,
    ):

        """ Residual block adapted from https://gist.github.com/mjdietzx/5319e42637ed7ef095d430cb5c5e8c64

        Added one more convolution filter and changed kernel sizes to those described in Chiron.
        Also set _project_shortcut to default True for activating condition for sum of shortcut and layers, see Chiron.

        """

        shortcut = y

        # Stack 1
        if input_shape:
            y = layers.Conv2D(
                nb_channels,
                input_shape=input_shape,
                kernel_size=(1, 1),
                strides=_strides,
                padding="same",
            )(y)
        else:
            y = layers.Conv2D(
                nb_channels, kernel_size=(1, 1), strides=_strides, padding="same"
            )(y)

        y = layers.BatchNormalization()(y)

        if _leaky:
            y = layers.LeakyReLU()(y)
        else:
            y = layers.Activation("relu")(y)

        # Stack 2
        y = layers.Conv2D(
            nb_channels, kernel_size=(1, 3), strides=(1, 1), padding="same"
        )(y)
        y = layers.BatchNormalization()(y)

        if _leaky:
            y = layers.LeakyReLU()(y)
        else:
            y = layers.Activation("relu")(y)

        # Stack 3
        y = layers.Conv2D(
            nb_channels, kernel_size=(1, 1), strides=(1, 1), padding="same"
        )(y)
        y = layers.BatchNormalization()(y)

        # ... with shortcut for concatenation before ReLU
        # nb: identity shortcuts used directly when the input
        # and output are of the same dimensions
        if _project_shortcut or _strides != (1, 1):
            shortcut = layers.Conv2D(
                nb_channels, kernel_size=(1, 1), strides=_strides, padding="same"
            )(shortcut)
            shortcut = layers.BatchNormalization()(shortcut)

        y = layers.add([shortcut, y])

        if _leaky:
            y = layers.LeakyReLU()(y)
        else:
            y = layers.Activation("relu")(y)

        return y

    def estimate_memory_usage(self, batch_size):

        """ https://stackoverflow.com/a/46216013 """

        shapes_mem_count = 0
        for l in self.model.layers:
            single_layer_mem = 1
            for s in l.output_shape:
                if s is None:
                    continue
                single_layer_mem *= s
            shapes_mem_count += single_layer_mem

        trainable_count = np.sum(
            [K.count_params(p) for p in set(self.model.trainable_weights)]
        )
        non_trainable_count = np.sum(
            [K.count_params(p) for p in set(self.model.non_trainable_weights)]
        )

        total_memory = (
            4.0
            * batch_size
            * (shapes_mem_count + trainable_count + non_trainable_count)
        )
        gbytes = np.round(total_memory / (1024.0 ** 3), 3)

        return gbytes
示例#11
0
def testModel(m:k.Model, genTst:DirectoryIterator) -> np.ndarray:
	metrics = m.evaluate_generator(genTst, use_multiprocessing=False, workers=1)
	return metrics
# np.save(open('models/bottleneck_features_validation.npy', 'wb'), bottleneck_features_validation)
#

filepath = "model/finetuning-weights-improvement-{epoch:02d}-{val_acc:.3f}.h5"
# 每次epoch之后,如果验证误差减少,则保存模型数据
checkpoint = ModelCheckpoint(filepath,
                             monitor='val_acc',
                             verbose=2,
                             save_best_only=True,
                             mode='max')
callbacks_list = [checkpoint]

model.fit_generator(train_generator,
                    steps_per_epoch=nb_train_samples // batch_size,
                    epochs=epochs,
                    validation_data=validation_generator,
                    validation_steps=nb_validation_samples // batch_size,
                    callbacks=callbacks_list)

model.evaluate_generator(validation_generator, nb_validation_samples)
# 同时保存model和权重
# model.save('first_model.h5')
# 保存model
# model_json = model.to_json()
# with open("model.json", "w") as json_file:
#     json_file.write(model_json)

# 载入model ,读取json文件
# json_string = open('model.json').read()
# model = model_from_json(json_string)
示例#13
0
class Cnn8(object):
    def __init__(self, pid, params):
        self.img_width, self.img_height = params['image_width'], params[
            'image_height']
        self.id = pid
        print("Using cnn8 classifier with id ", str(self.id))
        self.train_dir = params['train_dir']
        self.validation_dir = params['validation_dir']
        self.test_dir = params['test_dir']

        self.cnn_dir = paths.ROOT_DIR + '/model/' + self.id + '/'

        self.batch_size = params['batch_size']
        self.learning_rate = params['learning_rate']
        self.epochs = params['epochs']
        self.workers = params['workers']

        self.fine_tune_from = params['fine_tune_from']

        self.train_generator, self.validation_generator, self.test_generator = None, None, None
        self.transfer_train_time = params['transfer_train_params'][
            'train_time']
        self.tt_acc = params['transfer_train_params']['accuracy']
        self.tt_loss = params['transfer_train_params']['loss']

        self.fine_tune_time = params['fine_tune_params']['train_time']
        self.ft_acc = params['fine_tune_params']['accuracy']
        self.ft_loss = params['fine_tune_params']['loss']

        # TODO test params

    def prepare(self):
        print("Setting up CNN v8..")
        if not os.path.exists(self.cnn_dir):
            os.makedirs(self.cnn_dir)

        ImageFile.LOAD_TRUNCATED_IMAGES = True

        # Rescale all images by 1./255 and apply image augmentation
        train_datagen = ImageDataGenerator(shear_range=0.2,
                                           zoom_range=0.2,
                                           rotation_range=20,
                                           width_shift_range=0.2,
                                           height_shift_range=0.2,
                                           rescale=1. / 255)

        validation_datagen = ImageDataGenerator(rescale=1. / 255)

        test_datagen = ImageDataGenerator(rescale=1. / 255)

        self.train_generator = train_datagen.flow_from_directory(
            self.train_dir,
            target_size=(self.img_width, self.img_height),
            batch_size=self.batch_size,
            class_mode='categorical')

        self.validation_generator = validation_datagen.flow_from_directory(
            self.validation_dir,
            target_size=(self.img_width, self.img_height),
            batch_size=self.batch_size,
            class_mode='categorical')

        self.test_generator = test_datagen.flow_from_directory(
            self.test_dir,
            target_size=(self.img_width, self.img_height),
            batch_size=self.batch_size,
            class_mode='categorical')

        print("Generators are ready, saving class indices")
        np.save((self.cnn_dir + "class_indices.npy"),
                self.train_generator.class_indices)
        self.IMG_SHAPE = (self.img_width, self.img_height, 3)
        self.num_classes = len(self.train_generator.class_indices)

        print("Creating transfer train model")

        # Create the base model from the pre-trained model MobileNet V2 on imagenet data
        self.base_model = MobileNetV2(input_shape=self.IMG_SHAPE,
                                      include_top=False,
                                      weights='imagenet')
        #  its already trained, we just use the features
        self.base_model.trainable = False

        top_model = Sequential()
        top_model.add(GlobalAveragePooling2D())
        top_model.add(
            Dense(128, activation='relu', kernel_regularizer=l2(0.01)))
        top_model.add(Dropout(0.5))
        top_model.add(Dense(64, activation='relu',
                            kernel_regularizer=l2(0.01)))
        top_model.add(Dropout(0.5))
        top_model.add(Dense(64, activation='relu',
                            kernel_regularizer=l2(0.01)))
        top_model.add(Dropout(0.5))
        top_model.add(
            Dense(self.num_classes,
                  activation='softmax',
                  kernel_regularizer=l2(0.01)))
        self.model = Model(input=self.base_model.input,
                           output=top_model(self.base_model.output))
        self.model.compile(optimizer=RMSprop(lr=self.learning_rate),
                           loss='categorical_crossentropy',
                           metrics=['accuracy'])
        print("Model created for id %s" % self.id)
        self.steps_per_epoch = self.train_generator.n // self.batch_size
        self.validation_steps = self.validation_generator.n // self.batch_size
        self.test_steps = self.test_generator.n // self.batch_size

        print("Initializing callbacks")
        self.callbacks = [
            EarlyStopping(monitor='val_loss',
                          min_delta=0,
                          patience=3,
                          verbose=0,
                          mode='auto'),
            ModelCheckpoint(filepath=(self.cnn_dir + "model-weights.h5"),
                            verbose=1,
                            save_best_only=True),
            History(),
            TensorBoard(log_dir="logs/{}".format(self.id))
        ]
        try:
            self.model.load_weights((self.cnn_dir + "model-weights.h5"))
            print("Found weights for %s, loading them and continue training" %
                  self.id)
        except OSError:
            pass
        print("Cnn is ready for train")

    def transfer_train(self):
        start_time = time.time()
        print("Transfer training started")
        self.history = self.model.fit_generator(
            self.train_generator,
            steps_per_epoch=self.steps_per_epoch,
            epochs=self.epochs,
            workers=self.workers,
            callbacks=self.callbacks,
            validation_data=self.validation_generator,
            validation_steps=self.validation_steps)
        self.tt_loss, self.tt_acc = self.model.evaluate_generator(
            self.test_generator, self.test_steps)
        self.transfer_train_time += time.time() - start_time
        print("Transfer train ended at: %d sec" % self.transfer_train_time)

        self.model.save(self.cnn_dir + 'model.h5')
        self.acc = self.history.history['accuracy']
        self.val_acc = self.history.history['val_accuracy']

        self.loss = self.history.history['loss']
        self.val_loss = self.history.history['val_loss']
        self.create_plot('transfer_train', self.acc, self.val_acc, self.loss,
                         self.val_loss)
        return {
            'train_time': self.transfer_train_time,
            'accuracy': self.tt_acc,
            'loss': self.tt_loss
        }

    def create_plot(self, name, acc, val_acc, loss, val_loss):
        plt.figure(figsize=(8, 8))
        plt.subplot(2, 1, 1)
        plt.plot(acc, label='Training Accuracy')
        plt.plot(val_acc, label='Validation Accuracy')
        plt.legend(loc='lower right')
        plt.ylabel('Accuracy')
        plt.ylim([min(plt.ylim()), 1])
        plt.title('Training and Validation Accuracy')

        plt.subplot(2, 1, 2)
        plt.plot(loss, label='Training Loss')
        plt.plot(val_loss, label='Validation Loss')
        plt.legend(loc='upper right')
        plt.ylabel('Cross Entropy')
        plt.ylim([0, max(plt.ylim())])
        plt.title('Training and Validation Loss')
        plt.savefig(self.cnn_dir + name + ".png")
        plt.close()

    def fine_tune(self):
        start_time = time.time()
        print("Fine tune started")
        self.base_model.trainable = True

        # Let's take a look to see how many layers are in the base model
        print("Unfreezing %d layer from %d: " %
              (self.fine_tune_from, len(self.base_model.layers)))

        # Freeze all the layers before the `fine_tune_from` layer
        for layer in self.base_model.layers[:self.fine_tune_from]:
            layer.trainable = False

        # Recompile model
        self.model.compile(loss='categorical_crossentropy',
                           optimizer=RMSprop(lr=2e-5),
                           metrics=['accuracy'])

        history_fine = self.model.fit_generator(
            self.train_generator,
            steps_per_epoch=self.steps_per_epoch,
            epochs=self.epochs,
            callbacks=self.callbacks,
            workers=4,
            validation_data=self.validation_generator,
            validation_steps=self.validation_steps)
        self.ft_loss, self.ft_acc = self.model.evaluate_generator(
            self.test_generator, self.test_steps)
        self.model.save(self.cnn_dir + 'model.h5')
        self.fine_tune_time += time.time() - start_time
        print("Fine tuning model ended %d" % self.fine_tune_time)

        self.acc += history_fine.history['accuracy']
        self.val_acc += history_fine.history['val_accuracy']

        self.loss += history_fine.history['loss']
        self.val_loss += history_fine.history['val_loss']
        self.create_plot('fine_tune', self.acc, self.val_acc, self.loss,
                         self.val_loss)
        return {
            'train_time': self.fine_tune_time,
            'accuracy': self.ft_acc,
            'loss': self.ft_loss
        }

    def test(self):
        start_time = time.time()
        print("Test started")
示例#14
0
class RetrainedClassificationModel(TrainableModel):
    def __init__(self, name, img_width=224, img_height=224):
        super().__init__(name)
        self.img_width = img_width
        self.img_height = img_height
        self.model = None
        self.reset()

    def reset(self):
        model = keras.applications.VGG16(weights="imagenet",
                                         include_top=False,
                                         input_shape=(self.img_width,
                                                      self.img_height, 3))
        model.layers.pop()

        # Adding custom Layers
        x = model.layers[-1].output
        x = Reshape((self.img_width, self.img_height, 2))(x)
        x = Conv2D(1, 1, activation='sigmoid')(x)
        predictions = Flatten()(x)

        # Creating the final model
        self.model = Model(input=model.input, output=predictions)

    def train(self,
              batch_size: int,
              l2_regularization: float = 0,
              dropout_drop_porb: float = 0,
              n_epoch: int = 3,
              reduced_size=None,
              remove_nan=True):
        label_converter = lambda x: cv2.resize(x, (self.img_width, self.
                                                   img_height))
        image_converter = self._input_converter
        image_converter_eval = lambda x: keras.applications.vgg16.preprocess_input(
            label_converter(x))

        training, _, _ = getABSDDataMask(batch_size,
                                         label_converter=label_converter,
                                         image_converter=image_converter,
                                         reduced_size=reduced_size,
                                         remove_nan=remove_nan)
        _, dev, _ = getABSDDataMask(batch_size,
                                    label_converter=label_converter,
                                    image_converter=image_converter_eval,
                                    reduced_size=reduced_size,
                                    remove_nan=remove_nan)

        callbacks = [
            TensorBoard(write_images=True),
            ModelCheckpoint('tcm.{epoch:02d}.hdf5',
                            monitor='val_f1',
                            save_best_only=True,
                            mode='max'),
            EarlyStopping(monitor='val_loss', patience=10)
        ]
        for layer in self.model.layers:
            if hasattr(layer, 'kernel_regularizer'):
                layer.kernel_regularizer = l2(l2_regularization)
            if isinstance(layer, Dropout):
                layer.rate = dropout_drop_porb
        self.model.compile(loss="binary_crossentropy",
                           optimizer='adagrad',
                           metrics=[precision, recall, f1])

        hst = self.model.fit_generator(training,
                                       validation_data=dev,
                                       callbacks=callbacks,
                                       epochs=n_epoch)
        self.model.save("tcm.hd5")
        return hst

    def eval(self, batch_size: int, reduced_size=None, remove_nan=True):
        label_converter = lambda x: cv2.resize(x, (self.img_width, self.
                                                   img_height))
        image_converter = lambda x: keras.applications.vgg16.preprocess_input(
            label_converter(x))

        training, dev, _ = getABSDDataMask(batch_size,
                                           label_converter=label_converter,
                                           image_converter=image_converter,
                                           reduced_size=reduced_size,
                                           remove_nan=remove_nan)
        return self.model.evaluate_generator(dev, verbose=1)

    def _input_converter(self, x):
        resized = cv2.resize(x, (self.img_width, self.img_height))
        noise = np.random.normal(0, 5, size=resized.shape)
        img = np.clip(resized + noise, 0, 255)
        return keras.applications.vgg16.preprocess_input(img)
示例#15
0
              loss='categorical_crossentropy',
              metrics=['accuracy'])

datagen_train = ImageDataGenerator(rotation_range=20,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   validation_split=0.2,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2)

#datagen_train = ImageDataGenerator()
datagen_train = datagen_train.flow_from_directory('../train',
                                                  target_size=(124, 124),
                                                  shuffle=True,
                                                  batch_size=32)

datagen_test = ImageDataGenerator()
datagen_test = datagen_test.flow_from_directory('../test',
                                                target_size=(124, 124),
                                                shuffle=True,
                                                batch_size=32)

hist = model.fit_generator(datagen_train,
                           steps_per_epoch=158,
                           epochs=250,
                           callbacks=[tb])
loss, acc = model.evaluate_generator(datagen_test, steps=10, verbose=0)
print('loss = ', loss, 'acc = ', acc)

model.save('./vggtransfer_VGG.h5')
示例#16
0
def main():
    'Main'
    from argparse import ArgumentParser
    import os

    # available architectures
    models_list = [
        'vgg16',
        'vgg19',
        'inceptionv3',
        'resnet50',
        'custom',
        'xception',
        'inceptionresnet',
        'mobilenet',
        'densenet121',
        'densenet169',
        'densenet201',
        'nasnet',
        'mobilenetv2'
    ]

    # available optimizers
    optimizers_list = ['sgd', 'adam']

    losses_list = [
        'categorical_crossentropy',
        'sparse_categorical_crossentropy',
        'binary_crossentropy',
        'mean_squared_error',
        'mean_absolute_error',
        'mean_absolute_percentage_error',
        'mean_squared_logarithmic_error',
        'squared_hinge',
        'hinge',
        'categorical_hinge',
        'logcosh',
        'kullback_leibler_divergence',
        'poisson',
        'cosine_proximity'
    ]

    # print these names for loss functions
    losses_dict = {
        'mean_squared_error': 'MSE',
        'mean_absolute_error': 'MAE',
        'mean_absolute_percentage_error': 'MAPE',
        'mean_squared_logarithmic_error': 'MSLE',
        'squared_hinge': 'Squared Hinge',
        'hinge': 'Hinge',
        'categorical_hinge': 'Categorical Hinge',
        'logcosh': 'Log-Cosh',
        'categorical_crossentropy': 'Categorial Cross-entropy',
        'sparse_categorical_crossentropy': 'Sparse Categorical Cross-entropy',
        'binary_crossentropy': 'Binary Cross-entropy',
        'kullback_leibler_divergence': 'Kullback-Leibler Divergence',
        'poisson': 'Poisson',
        'cosine_proximity': 'Cosine Proximity'
    }

    parser = ArgumentParser()
    parser.add_argument('model', help='which model to use',
                        type=str, choices=models_list)
    parser.add_argument('path', help='path to data', type=str)
    parser.add_argument('--loadfrom', help='load previous model', type=str)
    parser.add_argument(
        '-e', '--epochs', help='epochs to train for', type=int, default=30)
    parser.add_argument(
        '-b', '--batch', help='training batch size', type=int, default=8)
    parser.add_argument('-o', '--optimizer', help='optimizer to use',
                        type=str, choices=optimizers_list, default='sgd')
    parser.add_argument(
        '-s', '--split', help='test split size', default=0.2, type=float)
    parser.add_argument('-t', '--testset', help='path to test data', type=str)
    parser.add_argument('--loss', help='loss function to use', type=str,
                        choices=losses_list, default='categorical_crossentropy')
    parser.add_argument('--nogpu', help='disable GPU',
                        action='store_true', dest='no_gpu')
    parser.add_argument('--usemp', help='enable multiprocessing for sequences',
                        action='store_true', dest='use_mp')
    parser.add_argument(
        '--pretrained', help='load pre-trained weights', action='store_true')
    parser.add_argument('--output', help='output file', type=str)
    parser.add_argument('--extsum', help='print extended summary',
                        action='store_true', dest='print_extsum')
    parser.add_argument('--sum', help='print summary',
                        action='store_true', dest='print_sum')
    parser.add_argument('--json', help='save model as JSON file', type=str)
    parser.add_argument('--log', help='test log filename',
                        type=str, default='tests_log.log')
    parser.add_argument(
        '--shape', help='input shape in (height:width:depth) format', type=str)
    parser.add_argument(
        '--dropout', help='dropout probability (default=0)', type=float)
    parser.add_argument('--pfldir', help='put .pfl files here',
                        type=str, default='.', dest='pfldir')
    parser.add_argument('--decay', help='weight decay',
                        default=0.005, type=float, dest='weight_decay')
    parser.add_argument('-K', help='apply kernel regularization',
                        action='store_true', dest='regularize_kernel')
    parser.add_argument('-B', help='apply bias regularization',
                        action='store_true', dest='regularize_bias')
    parser.add_argument('-A', help='apply activity regularization',
                        action='store_true', dest='regularize_activity')
    parser.add_argument(
        '-a', '--augment', help='apply perform data augmentation', action='store_true')
    parser.add_argument(
        '--seed', help='random seed for train-test split', type=int, default=7)
    args = parser.parse_args()

    from keras.callbacks import Callback

    if args.pfldir:
        os.makedirs(args.pfldir, exist_ok=True)
    if args.output:
        os.makedirs(args.output, exist_ok=True)

    class PerformanceHistory(Callback):
        def __init__(self, output_file: str):
            super().__init__()
            self.output_file = open(output_file, 'wt')
            self.csv_file = csv.writer(self.output_file)
            self.csv_file.writerow(
                ['ACCURACY', 'LOSS', 'VALIDATION ACCURACY', 'VALIDATION LOSS'])

        def on_epoch_end(self, batch, logs={}):
            self.csv_file.writerow([logs.get('accuracy'), logs.get(
                'loss'), logs.get('val_accuracy'), logs.get('val_loss')])
            self.output_file.flush()

        def __del__(self):
            self.output_file.close()

    if args.no_gpu:
        os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
        os.environ['CUDA_VISIBLE_DEVICES'] = ''

    image_dim = 224 if args.model in ['vgg16', 'vgg19', 'custom'] else 299
    input_shape = (image_dim, image_dim, 3)
    if args.shape is not None:
        input_shape = [(int(y) if y != '' else 1)
                       for y in args.shape.split(':')]

    import keras.applications as apps

    # preprocessing functions dictonary
    input_preprocessing = {
        'vgg16': apps.vgg16.preprocess_input,
        'vgg19': apps.vgg19.preprocess_input,
        'inceptionv3': apps.inception_v3.preprocess_input,
        'resnet50': apps.resnet50.preprocess_input,
        'custom': apps.vgg16.preprocess_input,
        'xception': apps.xception.preprocess_input,
        'inceptionresnet': apps.inception_resnet_v2.preprocess_input,
        'mobilenet': apps.mobilenet.preprocess_input,
        'densenet121': apps.densenet.preprocess_input,
        'densenet169': apps.densenet.preprocess_input,
        'densenet201': apps.densenet.preprocess_input,
        'nasnet': apps.nasnet.preprocess_input,
        'mobilenetv2': apps.mobilenet_v2.preprocess_input
    }

    from keras.layers import Dropout
    from keras.regularizers import l2
    from keras.models import load_model
    from keras import Model
    from optimizers import getOptimizer
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import LabelBinarizer
    from generator import AugmentedBatchGenerator, BatchGenerator
    from extsum import extended_summary
    import numpy as np
    import datetime as dt
    import pickle
    import csv
    import subprocess
    from random import shuffle, seed

    # load default specified dataset
    data_path = os.path.abspath(args.path)
    X_raw = []
    y_raw = []

    for d in os.listdir(data_path):
        for f in os.listdir(data_path + '/' + d):
            X_raw.append('/'.join([data_path, d, f]))
            y_raw.append(d)

    lb = LabelBinarizer()
    lb.fit(y_raw)
    seed(args.seed)

    if args.testset:
        # if a test set was specified, load it
        print(f'Using data at {args.testset} as test set.')

        # shuffle training data
        training_shuffled = list(zip(X_raw, y_raw))
        shuffle(training_shuffled)
        X_data, y_data = zip(*training_shuffled)

        X_data, y_data = np.asarray(X_data), lb.transform(y_data)
        X_train, y_train = X_data, y_data
        data_path = os.path.abspath(args.testset)

        X_raw = []
        y_raw = []

        for d in os.listdir(data_path):
            for f in os.listdir(data_path + '/' + d):
                X_raw.append('/'.join([data_path, d, f]))
                y_raw.append(d)

        # shuffle test
        test_shuffled = list(zip(X_raw, y_raw))
        shuffle(test_shuffled)
        X_raw, y_raw = zip(*test_shuffled)

        X_test, y_test = np.asarray(X_raw), lb.transform(y_raw)
    else:
        # otherwise split default dataset
        print(f'Using {args.split} of the provided dataset as test set.')

        X_data, y_data = np.asarray(X_raw), lb.transform(y_raw)
        X_train, X_test, y_train, y_test = train_test_split(
            X_data, y_data, test_size=args.split, random_state=args.seed)

    del X_raw
    del y_raw

    n_classes = y_data.shape[1]

    models_dict = {
        'xception': apps.Xception,
        'vgg16': apps.VGG16,
        'vgg19': apps.VGG19,
        'resnet50': apps.ResNet50,
        'inceptionv3': apps.InceptionV3,
        'inceptionresnet': apps.InceptionResNetV2,
        'mobilenet': apps.MobileNet,
        'mobilenetv2': apps.MobileNetV2,
        'densenet121': apps.DenseNet121,
        'densenet169': apps.DenseNet169,
        'densenet201': apps.DenseNet201,
        'nasnet': apps.NASNetLarge
    }

    # load vanilla model with specified parameters
    model = models_dict[args.model](
        classes=n_classes, input_shape=input_shape, weights='imagenet' if args.pretrained else None)

    if args.dropout is not None:
        print('Adding weight decay')
        # insert dropout layer and regularization
        preds = model.layers[-1]
        dp = Dropout(args.dropout)(model.layers[-2].output)
        preds = preds(dp)
        model = Model(inputs=model.inputs, outputs=preds)

        for layer in model.layers:
            if args.regularize_kernel:
                layer.kernel_regularizer = l2(args.weight_decay)
            if args.regularize_bias:
                layer.bias_regularizer = l2(args.weight_decay)
            if args.regularize_activity:
                layer.activity_regularizer = l2(args.weight_decay)

    opt = getOptimizer(args.optimizer)

    model.compile(loss=args.loss, optimizer=opt, metrics=['accuracy'])
    if args.loadfrom:
        print('Loading', args.loadfrom)
        model = load_model(os.path.abspath(args.loadfrom))

    # iteratively rename performance file
    pfldir = os.path.abspath(args.pfldir)
    performance_file = os.path.join(
        pfldir, f'{args.model}_b{args.batch}_e{args.epochs}.pfl')
    fnum = 1
    while os.path.isfile(performance_file):
        performance_file = os.path.join(
            pfldir, f'{args.model}_b{args.batch}_e{args.epochs}_{fnum}.pfl')
        fnum += 1
    os.makedirs(pfldir, exist_ok=True)

    if args.print_extsum:
        extended_summary(model)
    elif args.print_sum:
        model.summary()

    perf_log = PerformanceHistory(performance_file)
    # print test parameters to screen
    print('\n{:<20}{}'.format('Model', args.model))
    print('{:<20}{}'.format('Input shape', input_shape))
    print('{:<20}{}'.format('Epochs', args.epochs))
    print('{:<20}{}'.format('Batch size', args.batch))
    print('{:<20}{}'.format('Optimizer', type(opt).__name__))
    print('{:<20}{}'.format('Optimizer params',
                            paramDictFormat(opt.get_config())))
    print('{:<20}{}'.format('Loss', args.loss))
    print('{:<20}{}'.format('Multiprocessing', 'On' if args.use_mp else 'Off'))
    print('{:<20}{}'.format('Performance log', performance_file))
    print('{:<20}{}'.format('Test log', args.log))
    print('{:<20}{}'.format('Dataset', args.path))
    reg = []
    if args.regularize_kernel:
        reg.append('kernel')
    if args.regularize_activity:
        reg.append('activity')
    if args.regularize_bias:
        reg.append('bias')
    print('{:<20}{}\n'.format('Regularization',
                              'None' if not reg else ', '.join(reg)))

    opt = getOptimizer(args.optimizer)
    model.compile(loss=args.loss, optimizer=opt, metrics=['accuracy'])

    # create training batch generator
    if args.augment:
        print('Data augmentation enabled.')
        train_gen = AugmentedBatchGenerator(X_train, y_train, args.batch, shape=input_shape, ops=[
                                            input_preprocessing[args.model]], pad=False)
    else:
        print('Data augmentation disabled.')
        train_gen = BatchGenerator(X_train, y_train, args.batch, shape=input_shape, ops=[
                                   input_preprocessing[args.model]], pad=False)
    # create testing batch  generator
    test_gen = BatchGenerator(X_test, y_test, args.batch, shape=input_shape, ops=[
                              input_preprocessing[args.model]], pad=False)

    # train model
    train_start = dt.datetime.now()
    model.fit_generator(train_gen, epochs=args.epochs, use_multiprocessing=args.use_mp,
                        validation_data=test_gen, callbacks=[perf_log])
    train_end = dt.datetime.now()

    # evaluate final model on train set
    train_score = model.evaluate_generator(train_gen)
    print('Train loss:', train_score[0])
    print('Train accuracy:', train_score[1])

    # evaluate final model on test set
    test_score = model.evaluate_generator(test_gen)
    print('Test loss:', test_score[0])
    print('Test accuracy:', test_score[1])

    # update tests log with current test data
    date_format = '{:%Y-%m-%d %H:%M}'
    log_format = '{:<20}{:<20}{:<20}{:<10}{:<10}{:<15}{:<15.5}{:<15.5}{:<15.5}{:<15.5}{:<30}{:<30}{:<70}{:<15}{:<15}{:<15}{:<15.5}{:<15.5}\n'
    header_format = '{:<20}{:<20}{:<20}{:<10}{:<10}{:<15}{:<15}{:<15}{:<15}{:<15}{:<30}{:<30}{:<70}{:<15}{:<15}{:<15}{:<15}{:<15}\n'
    with open(args.log, 'a+t') as test_log:
        if test_log.tell() <= 0:
            test_log.write(header_format.format(
                'BEGIN', 'END', 'ARCHITECTURE', 'BATCH', 'EPOCHS', 'OPTIMIZER', 'TRAIN LOSS', 'TRAIN ACC', 'TEST LOSS', 'TEST ACC', 'DATA FOLDER', 'LOSS FUNCTION', 'OPTIMIZER PARAMS',
                'KERNEL REG', 'BIAS REG', 'ACTIV. REG', 'DECAY', 'DROPOUT'))
        start_str = date_format.format(train_start)
        end_str = date_format.format(train_end)
        data_folder = args.path.split('/')[-1 if args.path[-1] != '/' else -2]

        test_log.write(log_format.format(start_str, end_str, args.model.upper(), args.batch, args.epochs, args.optimizer.upper(), train_score[0], train_score[1],
                                         test_score[0], test_score[1], data_folder, losses_dict[args.loss], paramDictFormat(
                                             opt.get_config()),
                                         'YES' if args.regularize_kernel else 'NO',
                                         'YES' if args.regularize_bias else 'NO',
                                         'YES' if args.regularize_activity else 'NO',
                                         args.weight_decay,
                                         args.dropout if args.dropout else 0.0))

    # save the model and class file if an output filename was specified
    if args.output is not None:
        print(f'Saving model as {args.output}.h5')
        os.makedirs('/'.join(args.output.split('/')[:-1]), exist_ok=True)
        model.save(f'{args.output}.h5')
        with open(f'{args.output}.bin', 'wb') as fout:
            pickle.dump((args.model, lb), fout)

    subprocess.run(['notify-send', 'Entrenamiento completado',
                    f'Se ha completado el entrenamiento del modelo {args.model}.'], check=False)
示例#17
0
class Cnn7(object):
    def __init__(self, pid, core_params, classification_params, in_params,
                 out_params):
        self.img_width, self.img_height = classification_params[
            'image_width'], classification_params['image_height']
        self.id = pid
        print("Using cnn7 classifier with id ", str(self.id))
        self.train_dir = core_params['train_dir']
        self.test_dir = core_params['test_dir']

        self.epochs = in_params['epochs']
        self.batch_size = in_params['batch_size']
        self.augmentation = in_params['augmentation']
        self.num_classes = in_params['num_classes']
        self.frozen_layers = in_params['frozen_layers']
        self.learning_rate = in_params['learning_rate']
        # self.momentum = in_params['momentum']

        # self.top_model = in_params['top_model']
        self.top_model = Sequential()
        # model.add(Convolution2D(512, 3, 3, input_shape=(4, 4, 512), activation='relu'))
        # model.add(Dropout(0.1))
        # model.add(MaxPooling2D(pool_size=(2, 2)))

        self.top_model.add(Flatten(input_shape=(4, 4, 512)))
        self.top_model.add(Dense(512, activation='relu'))
        self.top_model.add(Dropout(0.1))
        self.top_model.add(Dense(256, activation='relu'))
        self.top_model.add(Dropout(0.1))
        self.top_model.add(
            Dense(self.num_classes,
                  activation='softmax',
                  kernel_regularizer=regularizers.l2(0.001)))
        self.top_model_weights_path = in_params['top_model_weights']
        self.model_path = in_params['model']

        self.model = None
        self.history = None
        self.out_params = out_params

    def fine_tune(self):
        self.base_model = applications.VGG19(weights='imagenet',
                                             include_top=False,
                                             input_shape=(self.img_width,
                                                          self.img_height, 3))
        print('Base model loaded.')

        self.top_model.load_weights(self.top_model_weights_path)
        print("Top model weights loaded")

        self.model = Model(input=self.base_model.input,
                           output=self.top_model(self.base_model.output))

        for layer in self.model.layers[:self.frozen_layers]:
            layer.trainable = False
        print("%d layer unfreezed, %d freezed" %
              (self.frozen_layers,
               (len(self.model.layers) - self.frozen_layers)))

        print("Compiling model")
        self.model.compile(optimizer=optimizers.SGD(lr=self.learning_rate,
                                                    momentum=0.5),
                           loss='categorical_crossentropy',
                           metrics=['accuracy'])

        # train_datagen = ImageDataGenerator(self.augmentation)
        train_datagen = ImageDataGenerator(
            # rotation_range= 30,
            # width_shift_range= 0.2,
            # height_shift_range= 0.2,
            # fill_mode= 'nearest',
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            rescale=1. / 255)

        test_datagen = ImageDataGenerator(rescale=1. / 255)

        train_generator = train_datagen.flow_from_directory(
            self.train_dir,
            target_size=(self.img_height, self.img_width),
            batch_size=self.batch_size,
            class_mode='categorical')

        nb_train_samples = len(train_generator.filenames)

        validation_generator = test_datagen.flow_from_directory(
            self.test_dir,
            target_size=(self.img_height, self.img_width),
            batch_size=self.batch_size,
            class_mode='categorical')

        nb_validation_samples = len(validation_generator.filenames)

        self.checkpointer = ModelCheckpoint(
            filepath=self.top_model_weights_path,
            verbose=1,
            save_best_only=True)
        self.history = History()
        callbacks = [
            EarlyStopping(monitor='val_loss',
                          min_delta=0,
                          patience=5,
                          verbose=0,
                          mode='auto'), self.history, self.checkpointer
        ]
        try:
            self.history = self.model.fit_generator(
                train_generator,
                samples_per_epoch=nb_train_samples // self.batch_size,
                epochs=self.epochs,
                callbacks=callbacks,
                validation_data=validation_generator,
                nb_val_samples=nb_validation_samples // self.batch_size)
        except KeyboardInterrupt:
            print("Training stopped..")

        self.model.save(self.model_path)

        score = self.model.evaluate_generator(
            validation_generator, nb_validation_samples // self.batch_size)
        self.out_params['loss'] = score[0]
        self.out_params['accuracy'] = score[1]
        print("Training model ended, model saved")

    def evaluate(self):
        print("Evaluation fine tuned model")
        plt.figure(2)
        plt.subplot(211)
        plt.plot(self.history.history['acc'])
        plt.plot(self.history.history['val_acc'])
        plt.title('model accuracy')
        plt.ylabel('accuracy')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')

        plt.subplot(212)
        plt.plot(self.history.history['loss'])
        plt.plot(self.history.history['val_loss'])
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')

        stat_plot_dir = paths.STAT_DIR + "/fine_" + str(self.id) + "_" + str(
            time.time()) + ".png"
        plt.savefig(stat_plot_dir)
        self.out_params['plot'] = stat_plot_dir
        self.out_params['history'] = self.history.history

        with open(
                paths.STAT_DIR + str(self.id) + "_" + str(time.time()) +
                '.json', 'w') as outfile:
            json.dump(self.history.history, outfile, indent=4)