Exemplo n.º 1
0
def predict():
    yaml_file = open("../sensing_data/models/dnn_tf_1_1.yaml", 'r')
    loaded_model_yaml = yaml_file.read()
    yaml_file.close()
    dnn_pred = model_from_yaml(loaded_model_yaml)
    # load weights into new model
    dnn_pred.load_weights("../sensing_data/models/dnn_tf_1_1.h5")
    print("Loaded model from disk")

    dnn_pred.compile(loss='categorical_crossentropy',
                     optimizer='Adam',
                     metrics=['accuracy'])

    dnn_pred.summary()

    X, y, shape = data.load_prediction(ratio=1,
                                       normalize=False,
                                       osm_roads=False,
                                       split_struct=False,
                                       army_gt=False)

    normalizer = preprocessing.Normalizer().fit(X)
    X = normalizer.transform(X)

    y_pred = dnn_pred.predict(X)
    y_pred = [np.argmax(pred) for pred in tqdm(y_pred)]

    kappa = cohen_kappa_score(y - 1, y_pred)
    print(f'Kappa: {kappa}')
    print(classification_report(y - 1, y_pred))

    y_pred = np.array(y_pred)
    yr = y_pred.reshape(shape)

    viz.createGeotiff(OUT_RASTER, yr, REF_FILE, gdal.GDT_Byte)
Exemplo n.º 2
0
def test_pred():
    start = time.time()
    X, y, shape = data.load_prediction(ratio=1,
                                       normalize=False,
                                       osm_roads=False,
                                       split_struct=False)

    normalizer = preprocessing.Normalizer().fit(X)
    X = normalizer.transform(X)
Exemplo n.º 3
0
def main(argv):
    real_start = time.time()

    split_struct=False
    osm_roads=False

    # train_size = int(100_000)
    train_size = int(19_386_625*0.2)
    X_train, y_train, X_test, y_test,_,_,_ = data.load(train_size, normalize=True, osm_roads=osm_roads, split_struct=split_struct)

    start = time.time()
    # Build a sv and compute the feature importances
    sv = svm.SVC(C=6.685338321430641, gamma=6.507029881541734)

    print("Fitting data...")
    sv.fit(X_train, y_train)

    end = time.time()
    elapsed = end-start
    print("Training time: " + str(timedelta(seconds=elapsed)))

    yt_pred = sv.predict(X_train)
    kappa = cohen_kappa_score(y_train, yt_pred)
    print(f'Train Kappa: {kappa}')
    print(classification_report(y_train, yt_pred))

    y_pred = sv.predict(X_test)
    kappa = cohen_kappa_score(y_test, y_pred)
    print(f'Kappa: {kappa}')
    print(classification_report(y_test, y_pred))
    return 0

    dump(sv, '../sensing_data/models/svm_static_group3.joblib')
    print("Saved model to disk")
    # Testing trash
    X, y, shape = data.load_prediction(
        ratio=1, normalize=True, osm_roads=osm_roads, split_struct=split_struct)

    start_pred = time.time()
    y_pred = sv.predict(X)
    print("Predict time: " + str(timedelta(seconds=time.time()-start_pred)))

    kappa = cohen_kappa_score(y, y_pred)
    print(f'Kappa: {kappa}')
    print(classification_report(y, y_pred))

    yr = y_pred.reshape(shape)

    viz.createGeotiff(OUT_RASTER, yr, DS_FOLDER +
                      "clipped_sentinel2_B08.vrt", gdal.GDT_Byte)

    end = time.time()
    elapsed = end-real_start
    print("Total run time: " + str(timedelta(seconds=elapsed)))
Exemplo n.º 4
0
def model(dfs):
    start = time.time()
    train_size = int(19386625 * 0.2)

    split_struct = True
    osm_roads = False

    X_train, y_train, X_test, y_test, _, _, normalizer = data.load(
        train_size,
        normalize=True,
        osm_roads=osm_roads,
        split_struct=split_struct)

    X_train = np.expand_dims(X_train, axis=2)
    X_test = np.expand_dims(X_test, axis=2)

    input_shape = X_train.shape[1]
    logits = 5

    y_train = y_train - 1
    y_test = y_test - 1

    #class_weights = class_weight.compute_class_weight('balanced',
    #                                                 np.unique(y_train),
    #                                              y_train)

    y_train_onehot = tf.keras.utils.to_categorical(y_train, num_classes=logits)

    n_timesteps, n_features, n_outputs = X_train.shape[1], X_train.shape[
        2], y_train_onehot.shape[1]

    model_cnn = Sequential()
    model_cnn.add(
        Conv1D(filters=64,
               kernel_size=3,
               activation='relu',
               input_shape=(n_timesteps, n_features)))
    model_cnn.add(MaxPooling1D(pool_size=2))

    model_cnn.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
    model_cnn.add(MaxPooling1D(pool_size=2))

    model_cnn.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
    model_cnn.add(Flatten())
    model_cnn.add(Dense(128, activation='relu'))
    model_cnn.add(Dropout(0.5))
    model_cnn.add(Dense(64, activation='relu'))
    model_cnn.add(Dense(n_outputs, activation='softmax'))

    model_cnn.compile(loss='categorical_crossentropy',
                      optimizer='rmsprop',
                      metrics=['mae', 'acc'])
    model_cnn.summary()

    es = EarlyStopping(monitor='val_loss',
                       min_delta=0.0001,
                       patience=5,
                       verbose=0,
                       mode='auto')

    model_cnn.fit(X_train,
                  y_train_onehot,
                  epochs=100,
                  validation_split=0.2,
                  callbacks=[es])

    yt_pred_onehot = model_cnn.predict(X_train)
    yt_pred = [np.argmax(pred) for pred in yt_pred_onehot]

    kappa = cohen_kappa_score(y_train, yt_pred)
    print(f'Train Kappa: {kappa}')
    print(classification_report(y_train, yt_pred))

    y_pred_onehot = model_cnn.predict(X_test)
    y_pred = [np.argmax(pred) for pred in y_pred_onehot]

    kappa = cohen_kappa_score(y_test, y_pred)
    print(f'Validation Kappa: {kappa}')
    print(classification_report(y_test, y_pred))

    # Testing trash
    X, y, shape = data.load_prediction(ratio=1,
                                       normalize=normalizer,
                                       osm_roads=osm_roads,
                                       split_struct=split_struct,
                                       army_gt=False)
    print(X.shape, y.shape)

    y_pred = model_cnn.predict(X)
    y_pred = [np.argmax(pred) for pred in y_pred]

    kappa = cohen_kappa_score(y - 1, y_pred)
    print(f'Kappa: {kappa}')
    print(classification_report(y - 1, y_pred))

    y_pred = np.array(y_pred)
    yr = y_pred.reshape(shape)

    viz.createGeotiff(OUT_RASTER, yr, REF_FILE, gdal.GDT_Byte)

    end = time.time()
    elapsed = end - start
    print("Run time: " + str(timedelta(seconds=elapsed)))
Exemplo n.º 5
0
def main(argv):
    real_start = time.time()
    train_size = int(19386625 * 0.2)

    split_struct = True
    osm_roads = False

    X_train, y_train, X_test, y_test, _, _, _ = data.load(
        train_size,
        normalize=False,
        osm_roads=osm_roads,
        split_struct=split_struct)

    start = time.time()
    # Build a forest and compute the feature importances
    forest = RandomForestClassifier(n_estimators=500,
                                    min_samples_leaf=4,
                                    min_samples_split=2,
                                    max_depth=130,
                                    class_weight='balanced',
                                    n_jobs=-1,
                                    verbose=1)
    print("Fitting data...")
    forest.fit(X_train, y_train)

    end = time.time()
    elapsed = end - start
    print("Training time: " + str(timedelta(seconds=elapsed)))

    yt_pred = forest.predict(X_train)
    kappa = cohen_kappa_score(y_train, yt_pred)
    print(f'Train Kappa: {kappa}')
    print(classification_report(y_train, yt_pred))

    y_pred = forest.predict(X_test)
    kappa = cohen_kappa_score(y_test, y_pred)
    print(f'Validation Kappa: {kappa}')
    print(classification_report(y_test, y_pred))
    return 0

    dump(forest, '../sensing_data/models/forest_tsfull_group2.joblib')
    print("Saved model to disk")

    X, y, shape = data.load_prediction(ratio=1,
                                       normalize=None,
                                       osm_roads=osm_roads,
                                       split_struct=split_struct,
                                       army_gt=False)

    start_pred = time.time()
    y_pred_classes = forest.predict(X)

    # y_pred_proba = forest.predict_proba(X)
    # y_pred_classes = np.array(
    #     [np.argmax(yi, axis=-1) + 1 for yi in tqdm(y_pred_proba)])
    print("Predict time: " + str(timedelta(seconds=time.time() - start_pred)))

    kappa = cohen_kappa_score(y, y_pred_classes)
    print(f'Kappa: {kappa}')
    print(classification_report(y, y_pred_classes))

    yr = y_pred_classes.reshape(shape)

    viz.createGeotiff(OUT_RASTER, yr, REF_FILE, gdal.GDT_Byte)

    print("Creating uncertainty matrix...")
    start_matrix = time.time()

    # y_pred_proba_reshaped = y_pred_proba.reshape((shape[0], shape[1], 3))

    # viz.createGeotiff(OUT_PROBA_RASTER + "estrutura.tiff",
    #                   y_pred_proba_reshaped[:, :, 0], REF_FILE, gdal.GDT_Float32)
    # # viz.createGeotiff(OUT_PROBA_RASTER + "estrada.tiff",
    # #                   y_pred_proba_reshaped[:, :, 1], REF_FILE, gdal.GDT_Float32)
    # viz.createGeotiff(OUT_PROBA_RASTER + "restante.tiff",
    #                   y_pred_proba_reshaped[:, :, 1], REF_FILE, gdal.GDT_Float32)
    # viz.createGeotiff(OUT_PROBA_RASTER + "agua.tiff",
    #                   y_pred_proba_reshaped[:, :, 2], REF_FILE, gdal.GDT_Float32)

    end = time.time()
    elapsed = end - start_matrix
    print("Matrix creation time: " + str(timedelta(seconds=elapsed)))

    end = time.time()
    elapsed = end - real_start
    print("Total run time: " + str(timedelta(seconds=elapsed)))
def main(argv):
    parser = argparse.ArgumentParser(description='Trains a xgboost model.')
    parser.add_argument("--roads",
                        type=str_2_bool,
                        nargs='?',
                        const=True,
                        default=False,
                        help="Activate OSM roads")
    parser.add_argument("--fselect",
                        type=str_2_bool,
                        nargs='?',
                        const=True,
                        default=False,
                        help="Activate feature selection roads")

    args = parser.parse_args()

    road_flag = args.roads
    selector_flag = args.fselect

    if road_flag:
        print("Using roads...")

    obj = 'binary:hinge' if args.roads else 'multi:softmax'

    real_start = time.time()
    train_size = int(19386625 * 0.2)
    X, y, X_test, y_test = data.load(train_size,
                                     normalize=False,
                                     balance=False,
                                     osm_roads=road_flag)

    start = time.time()

    forest = xgb.XGBClassifier(
        colsample_bytree=0.7553707061597048,
        gamma=5,
        gpu_id=0,
        learning_rate=0.2049732654267658,
        max_depth=8,
        min_child_weight=1,
        max_delta_step=9.075685204314162,
        n_estimators=1500,
        n_jobs=4,
        objective=obj,  # binary:hinge if binary classification
        predictor='gpu_predictor',
        tree_method='gpu_hist')

    if selector_flag:
        print("Feature importances running...")
        selector = fselector.Fselector(forest, mode="elastic", thold=0.25)
        transformer = selector.select(X, y)

        print("Transforming data...")
        X = transformer.transform(X)
        X_test = transformer.transform(X_test)

    print("Fitting data...")
    forest.fit(X, y)

    end = time.time()
    elapsed = end - start
    print("Training time: " + str(timedelta(seconds=elapsed)))

    y_pred = forest.predict(X_test)

    kappa = cohen_kappa_score(y_test, y_pred)
    print(f'Kappa: {kappa}')
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))

    dump(forest, '../sensing_data/models/boosted.joblib')
    print("Saved model to disk")

    # Testing trash
    X, y, shape = data.load_prediction(ratio=1,
                                       normalize=False,
                                       osm_roads=road_flag)

    start_pred = time.time()
    # batch test
    X_h = X[:len(X) // 2]
    X_h1 = X[len(X) // 2:]

    forest.get_booster().set_param('predictor', 'cpu_predictor')

    print("Predict 0%...")
    y_pred = forest.predict_proba(X_h)
    print("Predict 50%...")
    y_pred2 = forest.predict_proba(X_h1)
    print("Predict 100%...")

    y_pred_proba = np.concatenate((y_pred, y_pred2))
    y_pred_classes = np.array(
        [np.argmax(yi, axis=-1) + 1 for yi in tqdm(y_pred_proba)])
    print("Predict time: " + str(timedelta(seconds=time.time() - start_pred)))

    kappa = cohen_kappa_score(y, y_pred_classes)
    print(f'Kappa: {kappa}')
    print(classification_report(y, y_pred_classes))

    y_pred_classes_reshaped = y_pred_classes.reshape(shape)

    viz.createGeotiff(OUT_RASTER, y_pred_classes_reshaped, REF_FILE,
                      gdal.GDT_Byte)

    print("Creating uncertainty matrix...")
    start_matrix = time.time()

    y_pred_proba_reshaped = y_pred_proba.reshape((shape[0], shape[1], 3))

    viz.createGeotiff(OUT_PROBA_RASTER + "estrutura.tiff",
                      y_pred_proba_reshaped[:, :,
                                            0], REF_FILE, gdal.GDT_Float32)
    # viz.createGeotiff(OUT_PROBA_RASTER + "estrada.tiff",
    #                   y_pred_proba_reshaped[:, :, 1], REF_FILE, gdal.GDT_Float32)
    viz.createGeotiff(OUT_PROBA_RASTER + "restante.tiff",
                      y_pred_proba_reshaped[:, :,
                                            1], REF_FILE, gdal.GDT_Float32)
    viz.createGeotiff(OUT_PROBA_RASTER + "agua.tiff",
                      y_pred_proba_reshaped[:, :,
                                            2], REF_FILE, gdal.GDT_Float32)

    end = time.time()
    elapsed = end - start_matrix
    print("Matrix creation time: " + str(timedelta(seconds=elapsed)))

    end = time.time()
    elapsed = end - real_start
    print("Total run time: " + str(timedelta(seconds=elapsed)))
Exemplo n.º 7
0
def main(argv):
    parser = argparse.ArgumentParser(description='Trains a xgboost model.')
    parser.add_argument("--roads",
                        type=str_2_bool,
                        nargs='?',
                        const=True,
                        default=False,
                        help="Activate OSM roads")
    parser.add_argument("--fselect",
                        type=str_2_bool,
                        nargs='?',
                        const=True,
                        default=False,
                        help="Activate feature selection")

    args = parser.parse_args()

    road_flag = args.roads
    selector_flag = args.fselect

    if road_flag:
        print("Using roads...")

    if selector_flag:
        print("Using feature selection...")

    obj = 'binary:hinge'

    real_start = time.time()

    split_struct = False
    osm_roads = False

    train_size = int(19386625 * 0.2)
    # train_size = int(1607*1015*0.2)

    X_train, y_train, X_test, y_test, _, _, _ = data.load(
        train_size,
        map_classes=False,
        normalize=False,
        osm_roads=osm_roads,
        split_struct=split_struct,
        gt_raster='cos_new_gt_2015t.tiff')

    start = time.time()

    #XGB_binary_building = {'colsample_bytree': 0.7343021353976351, 'gamma': 0, 'learning_rate': 0.16313076998849083, 'max_delta_step': 8.62355770678575, 'max_depth': 8, 'min_child_weight': 3, 'n_estimators': 1500, 'predictor': 'cpu_predictor', 'tree_method': 'hist'}

    forest = xgb.XGBClassifier(
        colsample_bytree=0.7343021353976351,
        gamma=0,
        gpu_id=0,
        learning_rate=0.16313076998849083,
        max_depth=8,
        min_child_weight=3,
        max_delta_step=8.62355770678575,
        n_estimators=1500,
        n_jobs=-1,
        #objective=obj,  # binary:hinge if binary classification
        predictor='cpu_predictor',
        tree_method='hist')

    if selector_flag:
        print("Feature importances running...")
        # svm cant handle full training data
        x_train_feature, _, y_train_feature, _ = train_test_split(
            X_test, y_test, test_size=0, train_size=100_000)

        selector = fselector.Fselector(forest, mode="importances", thold=0.80)
        transformer = selector.select(x_train_feature, y_train_feature)

        features = transformer.get_support()
        # feature_names = data.get_features()
        # feature_names = feature_names[features]
        print(features)
        print("Transforming data...")
        print("Before: ", X_train.shape)
        X = transformer.transform(X)
        X_test = transformer.transform(X_test)
        print("After: ", X_train.shape)

    print("Fitting data...")
    forest.fit(X_train, y_train)

    end = time.time()
    elapsed = end - start
    print("Training time: " + str(timedelta(seconds=elapsed)))

    yt_pred = forest.predict(X_train)

    yt_pred[yt_pred > 0.5] = 1
    yt_pred[yt_pred <= 0.5] = 0

    kappa = cohen_kappa_score(y_train, yt_pred)
    print(f'Train Kappa: {kappa}')
    print(classification_report(y_train, yt_pred))

    y_pred = forest.predict(X_test)

    y_pred[y_pred > 0.5] = 1
    y_pred[y_pred <= 0.5] = 0

    kappa = cohen_kappa_score(y_test, y_pred)
    print(f'Validation Kappa: {kappa}')
    print(classification_report(y_test, y_pred))

    dump(forest, '../sensing_data/models/boosted_test_group1.joblib')
    print("Saved model to disk")

    # Testing trash
    X, y, shape = data.load_prediction(ratio=1,
                                       map_classes=False,
                                       normalize=False,
                                       osm_roads=osm_roads,
                                       split_struct=split_struct,
                                       gt_raster='cos_new_gt_2015t.tif')

    start_pred = time.time()
    # batch test
    X_h = X[:len(X) // 2]
    X_h1 = X[len(X) // 2:]

    forest.get_booster().set_param('predictor', 'cpu_predictor')

    print("Predict 0%...")
    y_pred = forest.predict(X_h)
    print("Predict 50%...")
    y_pred2 = forest.predict(X_h1)
    print("Predict 100%...")

    y_pred_classes = np.concatenate((y_pred, y_pred2))
    y_pred_classes[y_pred_classes > 0.5] = 1
    y_pred_classes[y_pred_classes <= 0.5] = 0

    print("Predict time: " + str(timedelta(seconds=time.time() - start_pred)))

    kappa = cohen_kappa_score(y, y_pred_classes)
    print(f'Kappa: {kappa}')
    print(classification_report(y, y_pred_classes))

    y_pred_classes_reshaped = y_pred_classes.reshape(shape)

    viz.createGeotiff(OUT_RASTER, y_pred_classes_reshaped, REF_FILE,
                      gdal.GDT_Byte)

    return

    print("Creating uncertainty matrix...")
    start_matrix = time.time()

    y_pred_proba_reshaped = y_pred_proba.reshape((shape[0], shape[1], 4))

    viz.createGeotiff(OUT_PROBA_RASTER + "estrutura_urbana.tiff",
                      y_pred_proba_reshaped[:, :,
                                            0], REF_FILE, gdal.GDT_Float32)
    viz.createGeotiff(OUT_PROBA_RASTER + "estrada.tiff",
                      y_pred_proba_reshaped[:, :,
                                            1], REF_FILE, gdal.GDT_Float32)
    # viz.createGeotiff(OUT_PROBA_RASTER + "outras.tiff",
    #                     y_pred_proba_reshaped[:, :, 2], REF_FILE, gdal.GDT_Float32)
    viz.createGeotiff(OUT_PROBA_RASTER + "natural.tiff",
                      y_pred_proba_reshaped[:, :,
                                            3], REF_FILE, gdal.GDT_Float32)
    viz.createGeotiff(OUT_PROBA_RASTER + "agua.tiff",
                      y_pred_proba_reshaped[:, :,
                                            4], REF_FILE, gdal.GDT_Float32)

    end = time.time()
    elapsed = end - start_matrix
    print("Matrix creation time: " + str(timedelta(seconds=elapsed)))

    end = time.time()
    elapsed = end - real_start
    print("Total run time: " + str(timedelta(seconds=elapsed)))