epochs = 10 mag_T0 = 64 sw_T0 = 240 train_test_split = .11 train_val_split = .15 # load in the data created by "create_dataset.py" data = np.load(data_fn) X = data['X'] y = data['y'][:, None] strength = data['strength'] SW = data['SW'] # create train, val and test sets train, test = utils.split_data([X, SW, y, strength], train_test_split, random=False) train, val = utils.split_data(train, train_val_split, random=True) X_train, SW_train, y_train, strength_train = train X_val, SW_val, y_val, strength_val = val X_test, SW_test, y_test, strength_test = test print("X train shape:", X_train.shape, "proportion of substorms: ", np.mean(y_train)) print("X val shape:", X_val.shape, "proportion of substorms: ", np.mean(y_val)) print("X test shape:", X_test.shape, "proportion of substorms: ", np.mean(y_test)) params = { 'batch_size': batch_size, 'epochs': epochs,
'sw_fl_strides': 3, 'sw_fl_kernel_size': 15, 'sw_type': 'residual' } data_fn = "../data/regression_data13000.npz" train_val_split = .15 data = np.load(data_fn) X = data['mag_data_train'][:, :, :, 2:] y = data['y_train'][:, None] SW = data['sw_data_train'] X_test = data['mag_data_test'][:, :, :, 2:] y_test = data['y_test'][:, None] SW_test = data['sw_data_test'] train, val = utils.split_data([X, SW, y], train_val_split, random=True) del data del X del y X_train, SW_train, y_train = train X_val, SW_val, y_val = val train_data = [X_train, SW_train] train_targets = y_train val_data = [X_val, SW_val] val_targets = y_val test_data = [X_test[:, -params['Tm']:], SW_test[:, -params['Tw']:]] test_targets = y_test print("X train shape:", X_train.shape)
import linear_models plt.style.use('ggplot') params = [] path = "D:\\substorm-detection\\data\\all_stations_data_128.npz" data = np.load(path) X = data['X'] y = data['y'][:, None] train_test_split = .1 train_val_split = .15 # create train, val and test sets train, test = utils.split_data([X, y], train_test_split, random=False, rnn_format=True) train, val = utils.split_data(train, train_val_split, random=True, rnn_format=True) X_train, y_train = train X_val, y_val = val X_test, y_test = test X_train, X_val, X_test = utils.linear_format_x([X_train, X_val, X_test]) # train score, history = linear_models.train_logistic_regression( X_train, y_train, X_val, y_val, params) print(score) print(history)
def load_data_and_model(self, data_fn, params, train_model, train_val_split, model_file): data = np.load(data_fn) mag_data_train = data['mag_data_train'] # MLT, MLAT, N, E, Z mag_data_test = data['mag_data_test'] # MLT, MLAT, N, E, Z sw_data_test = data['sw_data_test'] y_test = data['y_test'] sme_data_test = data['sme_data_test'] ss_interval_index_test = data['ss_interval_index_test'].astype(int) ss_location_test = data['ss_location_test'] ss_dates_test = pd.to_datetime(data['ss_dates_test']) self.stations = np.array(data['stations']) self.station_locations = data['station_locations'] self.t0 = mag_data_train.shape[2] sml_windows = self.t0 + ss_interval_index_test[:, None] + np.arange( 20)[None, :] sml_test = -1 * np.min( sme_data_test[np.arange(sme_data_test.shape[0])[:, None], sml_windows, 1], axis=1) del data del mag_data_train shuff_idx = np.arange(mag_data_test.shape[0]) np.random.shuffle(shuff_idx) self.mag_data = mag_data_test[shuff_idx] self.sw_data = sw_data_test[shuff_idx] self.y = y_test[shuff_idx] # SME, SML, SMU, SML_MLAT, SMU_MLAT, SML_MLT, SMU_MLT, SME_NUMSTATIONS, SMR_NUMSTATIONS self.sme_data = sme_data_test[shuff_idx] self.ss_interval_index = ss_interval_index_test[shuff_idx] self.ss_locations = ss_location_test[shuff_idx] self.ss_dates = ss_dates_test[shuff_idx] self.sml = sml_test[shuff_idx] self.test_data = [ self.mag_data[:, :, self.t0 - params['Tm']:self.t0, 2:], self.sw_data[:, -params['Tw']:] ] self.test_targets = [self.y, self.sml] if train_model: sw_data_train = data['sw_data_train'] y_train = data['y_train'] sme_data_train = data['sme_data_train'] ss_interval_index_train = data['ss_interval_index_train'].astype( int) ss_location_train = data['ss_location_train'] ss_dates_train = data['ss_dates_train'] # create train, val and test sets train, val = utils.split_data([ mag_data_train, sw_data_train, y_train, sme_data_train, ss_interval_index_train, ss_location_train, ss_dates_train ], train_val_split, random=True) del y_train del sme_data_train del sw_data_train del ss_interval_index_train del ss_location_train del ss_dates_train (mag_data_train, sw_data_train, y_train, sme_data_train, ss_interval_index_train, ss_location_train, ss_dates_train) = train mag_data_val, sw_data_val, y_val, sme_data_val, ss_interval_index_val, ss_location_val, ss_dates_val = val train_data = [ mag_data_train[:, :, self.t0 - params['Tm']:self.t0, 2:], sw_data_train[:, -params['Tw']:] ] train_targets = [y_train, -1 * sme_data_train[:, 1]] val_data = [ mag_data_val[:, :, self.t0 - params['Tm']:self.t0, 2:], sw_data_val[:, -params['Tw']:] ] val_targets = [y_val, -1 * sme_data_val[:, 1]] hist, self.model = models.train_cnn(train_data, train_targets, val_data, val_targets, params) self.model.summary() keras.models.save_model(self.model, model_file) plt.figure() plt.subplot(211) plt.plot(hist.history['val_time_output_acc']) plt.plot(hist.history['time_output_acc']) plt.subplot(212) plt.plot(hist.history['val_strength_output_mean_absolute_error']) plt.plot(hist.history['strength_output_mean_absolute_error']) print("mag data train shape:", mag_data_train.shape, "proportion of substorms: ", np.mean(y_train)) print("mag data val shape:", mag_data_val.shape, "proportion of substorms: ", np.mean(y_val)) else: self.model = keras.models.load_model(model_file, custom_objects={ 'true_positive': utils.true_positive, 'false_positive': utils.false_positive }) self.model.summary() print("mag data test shape:", self.mag_data.shape, "proportion of substorms: ", np.mean(self.y))
# # plt.plot(n_storms) # plt.show() model_file = '../CNN/saved models/StrengthNet.h5' data_fn = "../data/1classes_data64_withsw.npz" data = np.load(data_fn) X = data['X'] y = data['y'][:, None] sw_data = data['SW'] strength = data['strength'] train_test_split = .11 train, test = utils.split_data([X, y, sw_data, strength], train_test_split, random=False) X_train, y_train, sw_data_train, strength_train = train X_test, y_test, sw_data_test, strength_test = test model: keras.models.Model = keras.models.load_model(model_file, custom_objects={ 'true_positive': utils.true_positive, 'false_positive': utils.false_positive }) pred = model.predict([X_test, sw_data_test[:, -240:]]) pos_mask = y_test[:, 0] == 1 logits = np.log(pred[0][pos_mask].max(axis=1) /
data_fn = "../data/2classes_data128_withsw_small.npz" train_test_split = .11 train_val_split = .15 model_file = "saved models/model.h5" batch_size = 32 # load in the data created by "create_dataset.py" data = np.load(data_fn) X = data['X'] y = data['y'][:, None] strength = data['strength'] SW = data['SW'] ind = data['interval_index'] # create train, val and test sets train, test = utils.split_data([X, SW, y, strength, ind], train_test_split, random=False) del data del X del y del strength del SW del ind train, val = utils.split_data(train, train_val_split, random=True, batch_size=batch_size) X_train, SW_train, y_train, strength_train, ind_train = train X_val, SW_val, y_val, strength_val, ind_val = val X_test, SW_test, y_test, strength_test, ind_test = test idx = np.arange(X_test.shape[0]) np.random.shuffle(idx) test_data = [X_test[idx], SW_test[idx]] test_targets = [y_test[idx], strength_test[idx]]