Пример #1
0
def predict(datagen: SoundDatagen, model_name: str) -> np.array:
    """ Predicts results on test, using the best model. """
    clips_per_sample = datagen.get_clips_per_sample()

    print("predicting results")
    model = keras.models.load_model(get_best_model_path(model_name))

    y_test = model.predict_generator(datagen, verbose=1)
    print("y_test.shape after predict", y_test.shape)

    pos = 0
    y_merged = []

    for count in clips_per_sample:
        if count != 0:
            y_merged.append(
                merge_predictions(y_test[pos:pos + count], "max", 0))
        else:
            y_merged.append(np.zeros_like(y_merged[0]))

        pos += count

    y_test = np.array(y_merged)
    print("y_test.shape after merge", y_test.shape)
    return y_test
Пример #2
0
def train_model(train_datagen: SoundDatagen, val_datagen: SoundDatagen,
                name: str) -> None:
    """ Creates model, trains it and saves it to the file. """
    shape = train_datagen.shape()
    x = inp = keras.layers.Input(shape=shape[1:])

    x = keras.layers.Convolution2D(32, (4,10), padding="same")(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Activation("relu")(x)
    x = keras.layers.MaxPool2D()(x)

    x = keras.layers.Convolution2D(32, (4,10), padding="same")(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Activation("relu")(x)
    x = keras.layers.MaxPool2D()(x)

    x = keras.layers.Convolution2D(32, (4,10), padding="same")(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Activation("relu")(x)
    x = keras.layers.MaxPool2D()(x)

    x = keras.layers.Convolution2D(32, (4,10), padding="same")(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Activation("relu")(x)
    x = keras.layers.MaxPool2D()(x)

    x = keras.layers.Convolution2D(32, (4,10), padding="same")(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Activation("relu")(x)
    x = keras.layers.MaxPool2D()(x)

    x = keras.layers.Convolution2D(32, (4,10), padding="same")(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Activation("relu")(x)
    x = keras.layers.MaxPool2D()(x)

    x = keras.layers.Flatten()(x)
    x = keras.layers.Dense(64)(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Activation("relu")(x)

    out = keras.layers.Dense(NUM_CLASSES, activation="softmax")(x)

    model = keras.models.Model(inputs=inp, outputs=out)
    model.summary()

    map3 = Map3Metric(val_datagen, name)
    model.compile(loss="categorical_crossentropy", optimizer="adam",
                  metrics=["accuracy"])

    model.fit_generator(train_datagen, epochs=NUM_EPOCHS,
                        verbose=1, shuffle=False,
                        # use_multiprocessing=False,
                        use_multiprocessing=True, workers=12,
                        validation_data=val_datagen, callbacks=[map3])

    print("best MAP@3 value: %.04f at epoch %d" % (map3.best_map3, map3.best_epoch))
Пример #3
0
    assert (len(train_files) == 9473)

    train_labels = train_df["label"]
    test_files = np.array(find_files("../data/audio_test/"))
    test_idx = [os.path.basename(f) for f in test_files]

    build_caches(np.concatenate([train_files, test_files]))
    fit_labels(train_labels)

    if USE_HYPEROPT:
        x_train, x_val, y_train, y_val = train_test_split(train_files,
                                                          train_labels,
                                                          test_size=TEST_SIZE,
                                                          shuffle=False)

        train_datagen = SoundDatagen(x_train, y_train)
        val_datagen = SoundDatagen(x_val, y_val)
        '''
        cnn_dropout_coeff   = float(params["cnn_dropout_coeff"])
        cnn_kern_width      = int(params["cnn_kern_width"])
        cnn_dim_decay       = int(params["cnn_dim_decay"])
        cnn_depth_growth    = int(params["cnn_depth_growth"])
        conv2d_depth        = int(params["conv2d_depth"])
        conv2d_len          = int(params["conv2d_len"])
        pooling             = params["pooling"]
        '''

        hyperopt_space = {
            "num_cnn_layers":
            hp.quniform("num_cnn_layers", 4, 7, 1),
            "cnn_depth":
Пример #4
0
    train_labels = train_df["label"]
    test_files = np.array(find_files("../data/audio_test/"))
    test_idx = [os.path.basename(f) for f in test_files]

    build_caches(np.concatenate([train_files, test_files]))
    fit_labels(train_labels)

    if not ENABLE_KFOLD:
        x_train, x_val, y_train, y_val = train_test_split(train_files,
                                                          train_labels,
                                                          test_size=TEST_SIZE,
                                                          shuffle=False)

        if not PREDICT_ONLY:
            train_model(SoundDatagen(x_train, y_train),
                        SoundDatagen(x_val, y_val), "nofolds")

        pred = predict(SoundDatagen(test_files, None), "nofolds")
        pred = encode_predictions(pred)
    else:
        kf = KFold(n_splits=KFOLDS, shuffle=False)
        pred = np.zeros((len(test_idx), KFOLDS, NUM_CLASSES))

        for k, (train_idx, val_idx) in enumerate(kf.split(train_files)):
            print("fold %d ==============================================" % k)
            name = "fold_%d" % k

            x_train, y_train = train_files[train_idx], train_labels[train_idx]
            x_val, y_val = train_files[val_idx], train_labels[val_idx]