def mergeNGAdata( nametrain='/Users/aklimasewski/Documents/data/cybertrainyeti10_residfeb.csv', nametest='/Users/aklimasewski/Documents/data/cybertestyeti10_residfeb.csv', filenamenga='/Users/aklimasewski/Documents/data/NGA_mag2_9.csv', n=13): from sklearn.model_selection import train_test_split train_data1, test_data1, train_targets1, test_targets1, feature_names = readindata( nametrain= '/Users/aklimasewski/Documents/data/cybertrainyeti10_residfeb.csv', nametest= '/Users/aklimasewski/Documents/data/cybertestyeti10_residfeb.csv', n=n) train_data1, test_data1, feature_names = add_az(train_data1, test_data1, feature_names) # filenamenga = '/Users/aklimasewski/Documents/data/NGA_mag2_9.csv' nga_data1, nga_targets1, feature_names = readindataNGA(filenamenga, n) nga_data1, feature_names = add_azNGA(filenamenga, nga_data1, feature_names) # ngatrain, ngatest, ngatrain_targets, ngatest_targets = train_test_split(nga_data1,nga_targets1, test_size=0.2, random_state=1) ngatrain, ngatest, ngatrain_targets, ngatest_targets, ngacells_train, ngacells_test = train_test_split( nga_data1, nga_targets1, cells_nga, test_size=0.2, random_state=1) train_data1 = np.concatenate([train_data1, ngatrain], axis=0) test_data1 = np.concatenate([test_data1, ngatest], axis=0) train_data1 = np.concatenate([train_data1, ngatrain], axis=0) test_data1 = np.concatenate([test_data1, ngatest], axis=0) train_targets1 = np.concatenate([train_targets1, ngatrain_targets], axis=0) test_targets1 = np.concatenate([test_targets1, ngatest_targets], axis=0) return train_data1, test_data1, train_targets1, test_targets1, feature_names
def mergeNGAdata( nametrain='/Users/aklimasewski/Documents/data/cybertrainyeti10_residfeb.csv', nametest='/Users/aklimasewski/Documents/data/cybertestyeti10_residfeb.csv', filenamenga='/Users/aklimasewski/Documents/data/NGA_mag2_9.csv', n=13): ''' Read in NGA data file, train test split and merge with cybershake data Parameters ---------- nametrain: path for cybershake training data csv nametest: path for cybershake testing data csv filenamenga: integer number of hidden layers n: number of model input features Returns ------- train_data1: numpy array of training features test_data1: numpy array of testing features train_targets1: numpy array of training features test_targets1: numpy array of testing features feature_names: numpy array feature names ''' from sklearn.model_selection import train_test_split train_data1, test_data1, train_targets1, test_targets1, feature_names = readindata( nametrain= '/Users/aklimasewski/Documents/data/cybertrainyeti10_residfeb.csv', nametest= '/Users/aklimasewski/Documents/data/cybertestyeti10_residfeb.csv', n=n) train_data1, test_data1, feature_names = add_az(train_data1, test_data1, feature_names) # filenamenga = '/Users/aklimasewski/Documents/data/NGA_mag2_9.csv' nga_data1, nga_targets1, feature_names = readindataNGA(filenamenga, n) nga_data1, feature_names = add_azNGA(filenamenga, nga_data1, feature_names) # ngatrain, ngatest, ngatrain_targets, ngatest_targets = train_test_split(nga_data1,nga_targets1, test_size=0.2, random_state=1) ngatrain, ngatest, ngatrain_targets, ngatest_targets = train_test_split( nga_data1, nga_targets1, test_size=0.2, random_state=1) train_data1 = np.concatenate([train_data1, ngatrain], axis=0) test_data1 = np.concatenate([test_data1, ngatest], axis=0) train_data1 = np.concatenate([train_data1, ngatrain], axis=0) test_data1 = np.concatenate([test_data1, ngatest], axis=0) train_targets1 = np.concatenate([train_targets1, ngatrain_targets], axis=0) test_targets1 = np.concatenate([test_targets1, ngatest_targets], axis=0) return train_data1, test_data1, train_targets1, test_targets1, feature_names
filenamenga = '/Users/aklimasewski/Documents/data/NGA_mag2_9.csv' nga_data1, nga_targets1, feature_names = readindataNGA(filenamenga, n) nga_data1, feature_names = add_azNGA(filenamenga, nga_data1, feature_names) # nga_data1,feature_names = add_locfeatNGA(filenamenga,nga_data1,feature_names) if az == True: nga_data1, feature_names = add_azNGA(nga_data1, feature_names) # read in cyber shake trainineg and testing data train_data1, test_data1, train_targets1, test_targets1, feature_names = readindata( nametrain= '/Users/aklimasewski/Documents/data/cybertrainyeti10_residfeb.csv', nametest='/Users/aklimasewski/Documents/data/cybertestyeti10_residfeb.csv', n=n) train_data1, test_data1, feature_names = add_az(train_data1, test_data1, feature_names) x_train, y_train, x_nga, y_nga, x_range, x_train_raw, x_nga_raw = transform_data( transform_method, train_data1, nga_data1, train_targets1, nga_targets1, feature_names, folder_pathNGA) # load model and fit loadedmodel = keras.models.load_model(folder_path + 'model/') pre_nga = loadedmodel.predict(x_nga) resid_nga = np.asarray(nga_targets1) - pre_nga pre_train = loadedmodel.predict(x_train) resid_train = np.asarray(train_targets1) - pre_train period = [10, 7.5, 5, 4, 3, 2, 1, 0.5, 0.2, 0.1]
def ANN_2step(folder_pathmod1, folder_pathmod2, epochs1=50, epochs2=50, numlayers1=1, numlayers2=1, units1=[20], units2=[20]): ''' 2 ANNs: 1st is the base ANN, 2nd ANN uses 1st model residuals as targets and cell location features Parameters ---------- folder_pathmod1: path for saving png files for the first ANN folder_pathmod2: path for saving png files for the second ANN epochs1: number of training epochs for the first ANN epochs2: number of training epochs for the second ANN numlayers1: integer number of hidden layers for the first ANN numlayers2: integer number of hidden layers for the second ANN units1: array of number of units for hidden layers for first ANN units2: array of number of units for hidden layers for second ANN Returns ------- None. creates two ANNS and saves model files and figures ''' from sklearn.preprocessing import PowerTransformer if not os.path.exists(folder_pathmod1): os.makedirs(folder_pathmod1) # read in training, testing, and cell data train_data1, test_data1, train_targets1, test_targets1, feature_names = readindata( nametrain= '/Users/aklimasewski/Documents/data/cybertrainyeti10_residfeb.csv', nametest= '/Users/aklimasewski/Documents/data/cybertestyeti10_residfeb.csv', n=n) train_data1, test_data1, feature_names = add_az(train_data1, test_data1, feature_names) cells = pd.read_csv(folder_path + 'gridpointslatlon_train.csv', header=0, index_col=0) cells_test = pd.read_csv(folder_path + 'gridpointslatlon_test.csv', header=0, index_col=0) x_train, y_train, x_test, y_test, x_range, x_train_raw, x_test_raw = transform_data( transform_method, train_data1, test_data1, train_targets1, test_targets1, feature_names, folder_pathmod1) resid, resid_test, pre_train, pre_test = create_ANN( x_train, y_train, x_test, y_test, feature_names, numlayers1, units1, epochs1, transform_method, folder_pathmod1) period = [10, 7.5, 5, 4, 3, 2, 1, 0.5, 0.2, 0.1] plot_resid(resid, resid_test, folder_pathmod1) # second ANN if not os.path.exists(folder_pathmod2): os.makedirs(folder_pathmod2) train_targets1 = resid test_targets1 = resid_test train_data1 = np.asarray(cells) test_data1 = np.asarray(cells_test) transform_method = PowerTransformer() feature_names = np.asarray([ 'eventlat', 'eventlon', 'midlat', 'midlon', 'sitelat', 'sitelon', ]) x_train, y_train, x_test, y_test, x_range, x_train_raw, x_test_raw = transform_data( transform_method, train_data1, test_data1, train_targets1, test_targets1, feature_names, folder_pathmod2) resid, resid_test, pre_train, pre_test = create_ANN( x_train, y_train, x_test, y_test, feature_names, numlayers2, units2, epochs2, transform_method, folder_pathmod2) period = [10, 7.5, 5, 4, 3, 2, 1, 0.5, 0.2, 0.1] plot_resid(resid, resid_test, folder_pathmod2)
def ANN_gridpoints(folder_pathmod, epochs=50, numlayers=1, units=[20]): ''' ANN with cell locations as additional features Parameters ---------- folder_pathmod: path for saving png files epochs: number of training epochs numlayers: integer number of hidden layers units: array of number of units for hidden layers Returns ------- None. creates ANNS and saves model files and figures ''' cells = pd.read_csv(folder_path + 'gridpointslatlon_train.csv', header=0, index_col=0) cells_test = pd.read_csv(folder_path + 'gridpointslatlon_test.csv', header=0, index_col=0) if not os.path.exists(folder_pathmod): os.makedirs(folder_pathmod1) transform_method = 'Norm' #function or text n = 13 train_data1, test_data1, train_targets1, test_targets1, feature_names = readindata( nametrain= '/Users/aklimasewski/Documents/data/cybertrainyeti10_residfeb.csv', nametest= '/Users/aklimasewski/Documents/data/cybertestyeti10_residfeb.csv', n=n) train_data1, test_data1, feature_names = add_az(train_data1, test_data1, feature_names) # add the cell features train_data1 = np.concatenate([train_data1, cells], axis=1) test_data1 = np.concatenate([test_data1, cells_test], axis=1) feature_names = np.concatenate([ feature_names, [ 'eventlat', 'eventlon', 'midlat', 'midlon', 'sitelat', 'sitelon', ] ], axis=0) x_train, y_train, x_test, y_test, x_range, x_train_raw, x_test_raw = transform_data( transform_method, train_data1, test_data1, train_targets1, test_targets1, feature_names, folder_pathmod) resid, resid_test, pre_train, pre_test = create_ANN( x_train, y_train, x_test, y_test, feature_names, numlayers, units, epochs, transform_method, folder_pathmod) plot_resid(resid, resid_test, folder_pathmod1)
def mergeNGAdata_cells( nametrain='/Users/aklimasewski/Documents/data/cybertrainyeti10_residfeb.csv', nametest='/Users/aklimasewski/Documents/data/cybertestyeti10_residfeb.csv', filenamenga='/Users/aklimasewski/Documents/data/NGA_mag2_9.csv', n=13): ''' Read in NGA data file, train test split and merge with cybershake data Parameters ---------- nametrain: path for cybershake training data csv nametest: path for cybershake testing data csv filenamenga: integer number of hidden layers n: number of model input features Returns ------- train_data1: numpy array of training features test_data1: numpy array of testing features train_targets1: numpy array of training features test_targets1: numpy array of testing features feature_names: numpy array feature names ''' from sklearn.model_selection import train_test_split cells = pd.read_csv(folder_path + 'gridpointslatlon_train.csv', header=0, index_col=0) cells_test = pd.read_csv(folder_path + 'gridpointslatlon_test.csv', header=0, index_col=0) cells_nga = pd.read_csv(folder_path + 'gridpointslatlon_nga.csv', header=0, index_col=0) train_data1, test_data1, train_targets1, test_targets1, feature_names = readindata( nametrain= '/Users/aklimasewski/Documents/data/cybertrainyeti10_residfeb.csv', nametest= '/Users/aklimasewski/Documents/data/cybertestyeti10_residfeb.csv', n=n) train_data1, test_data1, feature_names = add_az(train_data1, test_data1, feature_names) nga_data1, nga_targets1, feature_names = readindataNGA(filenamenga, n) nga_data1, feature_names = add_azNGA(filenamenga, nga_data1, feature_names) nga_data1 = np.concatenate([nga_data1, cells_nga], axis=0) ngatrain, ngatest, ngatrain_targets, ngatest_targets = train_test_split( nga_data1, nga_targets1, test_size=0.2, random_state=1) feature_names = np.concatenate([ feature_names, [ 'eventlat', 'eventlon', 'midlat', 'midlon', 'sitelat', 'sitelon', ] ], axis=0) train_data1 = np.concatenate([train_data1, cells], axis=1) test_data1 = np.concatenate([test_data1, cells_test], axis=1) train_data1 = np.concatenate([train_data1, ngatrain], axis=0) test_data1 = np.concatenate([test_data1, ngatest], axis=0) train_targets1 = np.concatenate([train_targets1, ngatrain_targets], axis=0) test_targets1 = np.concatenate([test_targets1, ngatest_targets], axis=0) return train_data1, test_data1, train_targets1, test_targets1, feature_names