def cross_validataion_cnn(t_span, height_span, image_size, downsample_size, cnn_model, initial_weights, augment, test_ratio=0.2, limit=10000): """ cross validation for cnn-like models """ K = int(1 / test_ratio) Y_pred_collection = None for t in t_span: X, Y = load_training_data(t=t, height_span=height_span, image_size=image_size, downsample_size=downsample_size, limit=limit) X = X / NORM_X Y = Y / NORM_Y # X, Y = preprocessing_data(X, Y) k_fold = KFold(K) Y_pred = np.zeros((len(Y), 1)) for k, (train, test) in enumerate(k_fold.split(X, Y)): reset_weights(cnn_model, initial_weights) train_X, train_Y = preprocessing_data(X[train], Y[train]) test_X, test_Y = X[test], Y[test] if augment: train_X, train_Y = augment_training_data(train_X, train_Y, image_size, mode='image') early_stop = EarlyStopping(monitor='loss', patience=0) cnn_model.fit(train_X, train_Y, batch_size=32, epochs=200, verbose=1, validation_data=(test_X, test_Y), callbacks=[ early_stop, ]) Y_pred[test] = cnn_model.predict(test_X).reshape(-1, 1) print("cv {} rmse: {}".format( k, rmse(Y_pred[test] * NORM_Y, Y[test] * NORM_Y))) if Y_pred_collection is None: Y_pred_collection = Y_pred else: Y_pred_collection = np.concatenate((Y_pred_collection, Y_pred), axis=1) avg_Y_pred = np.mean(Y_pred_collection, axis=1) print("t:{} h:{} rmse:{}".format(t, height_span, rmse(Y, Y_pred))) print("avg rmse:{}".format(rmse(Y, avg_Y_pred))) return avg_Y_pred, Y
def train_full_cnn_model(t_span, height_span, image_size, downsample_size, cnn_model, initial_weights, learner_storage_path): for t in t_span: print("training t{} h{}...".format(t, height_span)) X, Y = load_training_data(t=t, height_span=height_span, image_size=image_size, downsample_size=downsample_size, limit=10000) reset_weights(cnn_model, initial_weights) cnn_model.fit(X, Y, batch_size=256, epochs=10, verbose=1) cnn_model.save("{}/t{}h{}size{}.krs".format(learner_storage_path, t, height_span, image_size))
def cross_validataion_rnn(t_span, height_span, image_size, downsample_size, rnn_model, initial_weights, test_ratio=0.2, limit=10000): """ cross_validation on rnn models """ K = int(1 / test_ratio) time_length = len(t_span) channels = len(height_span) Xs = np.zeros((limit, time_length, channels * image_size * image_size)) t_span.sort() for idx, t in enumerate(t_span): X, Y = load_training_data(t=t, height_span=height_span, image_size=image_size, downsample_size=downsample_size, limit=limit) X = X.reshape((limit, 1, -1)) for i in range(limit): Xs[i, idx] = X[i] Xs, Y = preprocessing_data(X, Y) k_fold = KFold(K) Y_pred = np.zeros((limit, 1)) for k, (train, test) in enumerate(k_fold.split(Xs, Y)): reset_weights(rnn_model, initial_weights) rnn_model.fit(Xs[train], Y[train], batch_size=32, epochs=50, verbose=1, validation_data=(Xs[test], Y[test])) Y_pred[test] = rnn_model.predict(Xs[test]).reshape(-1, 1) print("cv {} rmse: {}".format(k, rmse(Y_pred[test], Y[test]))) print("overall rmse: {}".format(rmse(Y, Y_pred))) return Y_pred, Y
def cross_validataion_convlstm(t_span, height_span, image_size, downsample_size, convlstm_model, test_ratio=0.2, limit=10000): """ cross validation on rnn+cnn models """ K = int(1 / test_ratio) k_fold = KFold(K) time_length = len(t_span) channels = len(height_span) Xs = np.zeros((limit, time_length, image_size, image_size, channels)) t_span.sort() for idx, t in enumerate(t_span): X, Y = load_training_data(t=t, height_span=height_span, image_size=image_size, downsample_size=downsample_size, limit=limit) X = X.reshape((limit, 1, image_size, image_size, channels)) for i in range(limit): Xs[i, idx] = X[i] Xs = Xs / NORM_X Y = Y / NORM_Y Y_pred = np.zeros((limit, 1)) for k, (train, test) in enumerate(k_fold.split(Xs, Y)): # reset_weights(convlstm_model, initial_weights) convlstm_model.fit(Xs[train], Y[train], batch_size=64, epochs=200, verbose=1, validation_data=(Xs[test], Y[test])) Y_pred[test] = convlstm_model.predict(Xs[test]).reshape(-1, 1) print("cv {} rmse: {}".format(k, NORM_Y * rmse(Y_pred[test], Y[test]))) print("overall rmse: {}".format(NORM_Y * rmse(Y, Y_pred))) return Y_pred, Y
def time_sensitive_validataion_cnn(t_span, height_span, image_size, downsample_size, cnn_models, initial_weights, augment, holdout=0.1): """ use the first part of training data for training and the last part of the data for validation holdout is the percentage of the validation part """ Y_pred_collection = None for t in t_span: X, Y = load_training_data(t=t, height_span=height_span, image_size=image_size, downsample_size=downsample_size, limit=10000) num_of_recs = len(Y) train = range(int(num_of_recs * (1 - holdout))) test = range(int(num_of_recs * (1 - holdout)), num_of_recs) if augment is False: train_X, train_Y = X[train], Y[train] else: train_X, train_Y = augment_training_data(X[train], Y[train], image_size, mode='image') Y_pred = None for idx, cnn_model in enumerate(cnn_models): reset_weights(cnn_model, initial_weights) cnn_model.fit(train_X, train_Y, batch_size=256, epochs=5, verbose=1, validation_data=(X[test], Y[test])) Y_pred_each = cnn_model.predict(X[test]).reshape(-1, 1) print("model {} rmse: {}".format(idx, rmse(Y[test], Y_pred_each))) if Y_pred is None: Y_pred = Y_pred_each else: Y_pred = np.concatenate((Y_pred, Y_pred_each), axis=1) Y_pred = np.mean(Y_pred, axis=1).reshape(-1, 1) if Y_pred_collection is None: Y_pred_collection = Y_pred else: Y_pred_collection = np.concatenate((Y_pred_collection, Y_pred), axis=1) print(Y_pred_collection.shape) avg_Y_pred = np.mean(Y_pred_collection, axis=1) print("t:{} h:{} rmse:{}".format(t, height_span, rmse(Y[test], Y_pred))) print("avg rmse:{}".format(rmse(Y[test], avg_Y_pred))) return avg_Y_pred, Y[test]