def fit_model(X_train, y_train): #define model model = Model(inputs=[admiss_data], outputs=main_output) # print(model.summary()) # adam = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) # model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy']) # class_weight = { 0: 1., 1: cw, # 1: 20. } histories = my_callbacks.Histories() #model fit model.fit([X_train], y_train, epochs=n_epochs, batch_size=n_batch_size, validation_data=([[X_val], y_val]), class_weight=class_weight, callbacks=[histories]) model.save('base_nn.h5') return model
batch_size = 512 epochs = 500 num_classes = 9 m = Model_(batch_size, 100, num_classes) if one_d == True: model = m.cnn() else: model = m.cnn2() model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) ## callbacks history = my_callbacks.Histories() rocauc = my_callbacks.ROC_AUC(X_train, y_train, X_test, y_test) inception = my_callbacks.Inception(X_test, num_classes) checkpoint = ModelCheckpoint('TSTR_'+ date +'/train/'+ folder +'/weights.best.trainonsynthetic.hdf5', monitor='val_loss', verbose=1, save_best_only=True, mode='min') earlyStopping = EarlyStopping(monitor='val_loss',min_delta = 0.00000001 , patience=10, verbose=1, mode='min') #0.00000001 patience 0 model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data = (X_val, y_val), callbacks = [history, checkpoint, earlyStopping, rocauc, inception ]) model.save('TSTR_'+ date +'/train/'+ folder +'/trainonsynthetic_model.h5')
def main(result_dict={}, N_CLASSES=2): folder = 'catalina_amp_irregular_' + str(N_CLASSES) + 'classes' dataset_real = 'catalina_north' + str(N_CLASSES) + 'classes'#'catalina_random_sample_augmented_90k_' + str(N_CLASSES) + 'classes' result_dict[str(N_CLASSES)] = {'training': {}, 'testing': {}} def read_data(file): with open(file, 'rb') as f: data = pickle.load(f) X_train = np.asarray(data[0]['generated_magnitude']) #print(X_train.shape) X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, 1) #print(X_train.shape) y_train = np.asarray(data[0]['class']) X_train, y_train = shuffle(X_train, y_train, random_state=42) y_train = change_classes(y_train) y_train = to_categorical(y_train) X_val = np.asarray(data[1]['generated_magnitude']) X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, 1) y_val = np.asarray(data[1]['class']) y_val = change_classes(y_val) y_val = to_categorical(y_val) X_val, y_val = shuffle(X_val, y_val, random_state=42) X_test = np.asarray(data[2]['generated_magnitude']) X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, 1) y_test = np.asarray(data[2]['class']) y_test = change_classes(y_test) y_test = to_categorical(y_test) X_test, y_test = shuffle(X_test, y_test, random_state=42) return X_train, y_train, X_val, y_val, X_test, y_test def read_data_original_irr(file): with open(file, 'rb') as f: data = pickle.load(f) print(data[0].keys()) mgt = np.asarray(data[0]['original_magnitude_random']) t = np.asarray(data[0]['time_random']) X_train = np.stack((mgt, t), axis=-1) #print(X_train.shape) #print(X_train.T.shape) X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, X_train.shape[2]) #print(X_train.shape) y_train = np.asarray(data[0]['class']) #print(np.unique(y_train)) X_train, y_train = shuffle(X_train, y_train, random_state=42) y_train = change_classes(y_train) y_train = to_categorical(y_train) mgt = np.asarray(data[1]['original_magnitude_random']) t = np.asarray(data[1]['time_random']) X_val = np.stack((mgt, t), axis=-1) X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, X_val.shape[2]) y_val = np.asarray(data[1]['class']) y_val = change_classes(y_val) y_val = to_categorical(y_val) X_val, y_val = shuffle(X_val, y_val, random_state=42) mgt = np.asarray(data[2]['original_magnitude_random']) t = np.asarray(data[2]['time_random']) X_test = np.stack((mgt, t), axis=-1) X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, X_test.shape[2]) y_test = np.asarray(data[2]['class']) y_test = change_classes(y_test) y_test = to_categorical(y_test) X_test, y_test = shuffle(X_test, y_test, random_state=42) return X_train, y_train, X_val, y_val, X_test, y_test def read_data_generated_irr(file): with open(file, 'rb') as f: data = pickle.load(f) print(data[0].keys()) mgt = np.asarray(data[0]['generated_magnitude']) t = np.asarray(data[0]['time']) X_train = np.stack((mgt, t), axis=-1) #print(X_train.shape) #print(X_train.T.shape) X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, X_train.shape[2]) #print(X_train.shape) y_train = np.asarray(data[0]['class']) #print(np.unique(y_train)) X_train, y_train = shuffle(X_train, y_train, random_state=42) # for i in y_train: # if i != None: # print(i) y_train = change_classes(y_train) y_train = to_categorical(y_train) mgt = np.asarray(data[1]['generated_magnitude']) t = np.asarray(data[1]['time']) X_val = np.stack((mgt, t), axis=-1) X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, X_val.shape[2]) y_val = np.asarray(data[1]['class']) y_val = change_classes(y_val) y_val = to_categorical(y_val) X_val, y_val = shuffle(X_val, y_val, random_state=42) mgt = np.asarray(data[2]['generated_magnitude']) t = np.asarray(data[2]['time']) X_test = np.stack((mgt, t), axis=-1) X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, X_test.shape[2]) y_test = np.asarray(data[2]['class']) y_test = change_classes(y_test) y_test = to_categorical(y_test) X_test, y_test = shuffle(X_test, y_test, random_state=42) return X_train, y_train, X_val, y_val, X_test, y_test def change_classes(targets): #print(targets) target_keys = np.unique(targets) #print(target_keys) target_keys_idxs = np.argsort(np.unique(targets)) targets = target_keys_idxs[np.searchsorted(target_keys, targets, sorter=target_keys_idxs)] return targets def open_data(file): with open(file, 'rb') as f: data = pickle.load(f) print(len(data['generated_magnitude'])) X = np.asarray(data['generated_magnitude']) X = X.reshape(X.shape[0], X.shape[1], 1, 1) y = np.asarray(data['class']) X, y = shuffle(X, y, random_state=42) y = change_classes(y) y = to_categorical(y) return X, y def evaluation(X_test, y_test, n_classes): y_pred_prob = model.predict_proba(X_test) n = 10 probs = np.array_split(y_pred_prob, n) score = [] mean = [] std = [] Y = [] for prob in probs: ys = np.zeros(n_classes)#[0, 0 for class_i in range(n_classes): for j in prob: ys[class_i] = ys[class_i] + j[class_i] ys[:] = [x/len(prob) for x in ys] Y.append(np.asarray(ys)) ep = 1e-12 tmp = [] for s in range(n): kl = (probs[s] * np.log((probs[s] + ep)/Y[s])).sum(axis=1) E = np.mean(kl) IS = np.exp(E) #pdb.set_trace() tmp.append(IS) score.append(tmp) mean.append(np.mean(tmp)) std.append(np.std(tmp)) print('Inception Score:\nMean score : ', mean[-1]) print('Std : ', std[-1]) return score, mean, std def check_dir(directory): if not os.path.exists(directory): os.makedirs(directory) check_dir('TRTS_'+ date) check_dir('TRTS_'+ date +'/train/') check_dir('TRTS_'+ date +'/train/') check_dir('TRTS_'+ date +'/train/'+ folder) check_dir('TRTS_'+ date +'/test/') check_dir('TRTS_'+ date +'/test/'+ folder) if os.path.isfile('TRTS_'+ date +'/train/'+ folder +'/train_model.h5'): print('\nTrain metrics:') mean = np.load('TRTS_'+ date +'/train/'+ folder +'/train_is_mean.npy') std = np.load('TRTS_'+ date +'/train/'+ folder +'/train_is_std.npy') print('Training metrics:') print('Inception Score:\nMean score : ', mean[-1]) print('Std : ', std[-1]) acc = np.load('TRTS_'+ date +'/train/'+ folder +'/train_history_acc.npy') val_acc = np.load('TRTS_'+ date +'/train/'+ folder +'/train_history_val_acc.npy') loss = np.load('TRTS_'+ date +'/train/'+ folder +'/train_history_loss.npy') val_loss = np.load('TRTS_'+ date +'/train/'+ folder +'/train_history_val_loss.npy') print('ACC : ', np.mean(acc)) print('VAL_ACC : ', np.mean(val_acc)) print('LOSS : ', np.mean(loss)) print('VAL_LOSS : ', np.mean(val_loss)) print('\nTest metrics:') score = np.load('TRTS_'+ date +'/train/'+ folder +'/test_score.npy') print('Test loss:', score[0]) print('Test accuracy:', score[1]) else: irr = True one_d = False ## Train on real #dataset_real = 'catalina_random_sample_augmented_' + str(N_CLASSES) + 'classes' #dataset_real = 'catalina_north' + str(N_CLASSES) + 'classes' if irr == True: X_train, y_train, X_val, y_val, X_test, y_test = read_data_original_irr('TSTR_data/'+ in_TSTR_FOLDER + dataset_real +'.pkl')#datasets_original/REAL/'+ dataset_real +'.pkl') else: X_train, y_train, X_val, y_val, X_test, y_test = read_data('TSTR_data/'+in_TSTR_FOLDER+ dataset_real +'.pkl')#datasets_original/REAL/'+ dataset_real +'.pkl') print('') print ('Training new model') print('') batch_size = 512 epochs = 200 m = Model_(batch_size, 100, N_CLASSES) if one_d == True: model = m.cnn() else: model = m.cnn2() model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) ## callbacks history = my_callbacks.Histories() inception = my_callbacks.Inception(X_test, N_CLASSES) checkpoint = ModelCheckpoint('TRTS_'+ date +'/train/'+ folder +'/weights.best.train.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max') earlyStopping = EarlyStopping(monitor='val_acc',min_delta = 0.00000001 , patience=10, verbose=1, mode='max') #0.00000001 patience 0 model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data = (X_val, y_val), callbacks = [history, checkpoint, earlyStopping, inception ]) model = load_model('TRTS_'+ date +'/train/'+ folder +'/weights.best.train.hdf5') #Create dictionary, then save into two different documments. ## Loss history_dictionary_loss = history.loss np.save('TRTS_'+ date +'/train/'+ folder +'/train_history_loss.npy', history_dictionary_loss) ## Val Loss history_dictionary_val_loss = history.val_loss np.save('TRTS_'+ date +'/train/'+ folder +'/train_history_val_loss.npy', history_dictionary_val_loss) ## Acc history_dictionary_acc = history.acc np.save('TRTS_'+ date +'/train/'+ folder +'/train_history_acc.npy', history_dictionary_acc) ## Val Acc history_dictionary_val_acc = history.val_acc np.save('TRTS_'+ date +'/train/'+ folder +'/train_history_val_acc.npy', history_dictionary_val_acc) ## IS scores_dict = inception.score np.save('TRTS_'+ date +'/train/'+ folder +'/train_is.npy', scores_dict) mean_scores_dict = inception.mean np.save('TRTS_'+ date +'/train/'+ folder +'/train_is_mean.npy', mean_scores_dict) std_scores_dict = inception.std np.save('TRTS_'+ date +'/train/'+ folder +'/train_is_std.npy', std_scores_dict) ### plot loss and validation_loss v/s epochs plt.figure(1) plt.yscale("log") plt.plot(history.loss) plt.plot(history.val_loss) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'val'], loc='upper right') plt.savefig('TRTS_'+ date +'/train/'+ folder +'/train_loss.png') ### plot acc and validation acc v/s epochs plt.figure(2) plt.yscale("log") plt.plot(history.acc) plt.plot(history.val_acc) plt.title('model acc') plt.ylabel('Acc') plt.xlabel('epoch') plt.legend(['train', 'val'], loc='upper right') plt.savefig('TRTS_'+ date +'/train/'+ folder +'/train_acc.png') print('Training metrics:') print('Inception Score:\nMean score : ', mean_scores_dict[-1]) print('Std : ', std_scores_dict[-1]) print('ACC : ', history_dictionary_acc[-1]) print('VAL_ACC : ', history_dictionary_val_acc[-1]) print('LOSS : ', history_dictionary_loss[-1]) print('VAL_LOSS : ', history_dictionary_val_loss[-1]) # Test on real, then thest on synthetic # Test on real print('\nTest metrics:') print('\nTest on real:') dataset_syn = 'catalina_amp_irregular_' + str(N_CLASSES) + 'classes_generated' if irr == True: X_train2, y_train2, X_val2, y_val2, X_test2, y_test2 = read_data_generated_irr('TSTR_data/generated/'+ folder +'/' + dataset_syn + '.pkl') else: X_train2, y_train2, X_val2, y_val2, X_test2, y_test2 = read_data('TSTR_data/generated/'+ folder + '/' + dataset_syn + '.pkl') sc, me, st = evaluation(X_test, y_test, N_CLASSES) np.save('TRTS_'+ date +'/test/'+ folder +'/test_onreal_is.npy', sc) np.save('TRTS_'+ date +'/test/'+ folder +'/test_onreal_is_mean.npy', me) np.save('TRTS_'+ date +'/test/'+ folder +'/test_onreal_is_std.npy', st) score_real = model.evaluate(X_test, y_test, verbose=1) print('Test loss:', score_real[0]) print('Test accuracy:', score_real[1]) np.save('TRTS_'+ date +'/test/'+ folder +'/test_onreal_score.npy', score_real) # Test on synthetic print('\nTest on synthetic:') sc, me, st = evaluation(X_test2, y_test2, N_CLASSES) np.save('TRTS_'+ date +'/test/'+ folder +'/test_onsyn_is.npy', sc) np.save('TRTS_'+ date +'/test/'+ folder +'/test_onsyn_is_mean.npy', me) np.save('TRTS_'+ date +'/test/'+ folder +'/test_onsyn_is_std.npy', st) score_syn = model.evaluate(X_test2, y_test2, verbose=1) print('Test loss:', score_syn[0]) print('Test accuracy:', score_syn[1]) np.save('TRTS_'+ date +'/test/'+ folder +'/test_onsyn_score.npy', score_syn) result_dict[str(N_CLASSES)]['training'] = { 'IS mean': mean_scores_dict[-1], 'IS std': std_scores_dict[-1], 'acc': history_dictionary_acc[-1], 'val_acc': history_dictionary_val_acc[-1], 'loss': history_dictionary_loss[-1], 'val_loss': history_dictionary_val_loss[-1] } result_dict[str(N_CLASSES)]['testing'] = { 'test_onreal_loss': score_real[0], 'test_onreal_acc': score_real[1], 'test_onsyn_loss': score_syn[0], 'test_onsyn_score': score_syn[1] }
def main(result_dict={}, catalina_n_classes=1): real_data_folder = os.path.join('datasets_original', 'REAL', '%iclasses_100_100' % catalina_n_classes) dataset_real_pkl = '%s%iclasses.pkl' % (BASE_REAL_NAME, catalina_n_classes) # syn_data_name = os.path.join('%s%s%.2f' % (BASE_GEN_DATA_FOLDER_NAME, gan_version, catalina_n_classes)) catalina_n_classes_str = catalina_n_classes #str(catalina_n_classes) result_dict[catalina_n_classes_str] = {'training': {}, 'testing': {}} #result_dict = {'training': {}, 'testing': {}} print("\nREAL Training set to load %s\n" % dataset_real_pkl) # print("SYN Training set to load %s" % syn_data_name) def read_data_original_irr(file): with open(file, 'rb') as f: data = pickle.load(f) print(data[0].keys()) mgt = np.asarray(data[0][ORIGNAL_MAG_KEY]) t = np.asarray(data[0][ORIGINAL_TIME_KEY]) X_train = np.stack((mgt, t), axis=-1) X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, X_train.shape[2]) y_train = np.asarray(data[0]['class']) X_train, y_train = shuffle(X_train, y_train, random_state=42) y_train = change_classes(y_train) y_train = to_categorical(y_train) mgt = np.asarray(data[1][ORIGNAL_MAG_KEY]) t = np.asarray(data[1][ORIGINAL_TIME_KEY]) X_val = np.stack((mgt, t), axis=-1) X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, X_val.shape[2]) y_val = np.asarray(data[1]['class']) y_val = change_classes(y_val) y_val = to_categorical(y_val) X_val, y_val = shuffle(X_val, y_val, random_state=42) mgt = np.asarray(data[2][ORIGNAL_MAG_KEY]) t = np.asarray(data[2][ORIGINAL_TIME_KEY]) X_test = np.stack((mgt, t), axis=-1) X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, X_test.shape[2]) y_test = np.asarray(data[2]['class']) y_test = change_classes(y_test) y_test = to_categorical(y_test) X_test, y_test = shuffle(X_test, y_test, random_state=42) return X_train, y_train, X_val, y_val, X_test, y_test def read_data_generated_irr(file): with open(file, 'rb') as f: data = pickle.load(f) print(data[0].keys()) mgt = np.asarray(data[0]['generated_magnitude']) t = np.asarray(data[0]['time']) X_train = np.stack((mgt, t), axis=-1) X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, X_train.shape[2]) # print(X_train.shape) y_train = np.asarray(data[0]['class']) print(np.unique(y_train)) X_train, y_train = shuffle(X_train, y_train, random_state=42) # for i in y_train: # if i != None: # print(i) y_train = change_classes(y_train) y_train = to_categorical(y_train) mgt = np.asarray(data[1]['generated_magnitude']) t = np.asarray(data[1]['time']) X_val = np.stack((mgt, t), axis=-1) X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, X_val.shape[2]) y_val = np.asarray(data[1]['class']) y_val = change_classes(y_val) y_val = to_categorical(y_val) X_val, y_val = shuffle(X_val, y_val, random_state=42) mgt = np.asarray(data[2]['generated_magnitude']) t = np.asarray(data[2]['time']) X_test = np.stack((mgt, t), axis=-1) X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, X_test.shape[2]) y_test = np.asarray(data[2]['class']) y_test = change_classes(y_test) y_test = to_categorical(y_test) X_test, y_test = shuffle(X_test, y_test, random_state=42) return X_train, y_train, X_val, y_val, X_test, y_test def change_classes(targets): # print(targets) target_keys = np.unique(targets) # print(target_keys) target_keys_idxs = np.argsort(np.unique(targets)) targets = target_keys_idxs[np.searchsorted(target_keys, targets, sorter=target_keys_idxs)] return targets def open_data(file): with open(file, 'rb') as f: data = pickle.load(f) print(len(data['generated_magnitude'])) X = np.asarray(data['generated_magnitude']) X = X.reshape(X.shape[0], X.shape[1], 1, 1) y = np.asarray(data['class']) X, y = shuffle(X, y, random_state=42) y = change_classes(y) y = to_categorical(y) return X, y def check_dir(directory): if not os.path.exists(directory): os.makedirs(directory) check_dir('TSTR_' + date) check_dir('TSTR_' + date + '/train/') check_dir('TSTR_' + date + '/train/') check_dir('TSTR_' + date + '/train/' + real_data_folder) check_dir('TSTR_' + date + '/test/') check_dir('TSTR_' + date + '/test/' + real_data_folder) # if else # irr = True # dataset_syn_pkl = syn_data_name + '_generated.pkl' # one_d = False ## Train on real # X_train_syn, y_train_syn, X_val_syn, y_val_syn, X_test_syn, y_test_syn = read_data_generated_irr( # os.path.join('TSTR_data', 'generated', syn_data_name, dataset_syn_pkl)) X_train_real, y_train_real, X_val_real, y_val_real, X_test_real, y_test_real = read_data_original_irr( os.path.join('TSTR_data', real_data_folder, dataset_real_pkl)) print('') print('Training new model') print('') batch_size = 512 epochs = 10000 num_classes = catalina_n_classes m = Model_(batch_size, 100, num_classes, drop_rate=DROP_OUT_RATE) if BN_CONDITION == 'batch_norm_': model = m.cnn2_batch() else: model = m.cnn2() model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) ## callbacks history = my_callbacks.Histories() checkpoint = ModelCheckpoint('TSTR_' + date + '/train/' + real_data_folder + '/weights.best.trainonreal.hdf5', monitor=EARLY_STOP_ON, verbose=1, save_best_only=True, mode=EARLY_STOP_ON_COD) earlyStopping = EarlyStopping(monitor=EARLY_STOP_ON, min_delta=0.00000001, patience=PATIENCE, verbose=1, mode=EARLY_STOP_ON_COD) model.fit( X_train_real, y_train_real, epochs=epochs, batch_size=batch_size, validation_data=(X_val_real, y_val_real), callbacks=[ history, checkpoint, earlyStopping # , # rocauc, # inception ]) model = load_model('TSTR_' + date + '/train/' + real_data_folder + '/weights.best.trainonreal.hdf5') print('Training metrics:') score_train = model.evaluate(X_train_real, y_train_real, verbose=1) score_val = model.evaluate(X_val_real, y_val_real, verbose=1) print('ACC : ', score_train[1]) print('VAL_ACC : ', score_val[1]) print('LOSS : ', score_train[0]) print('VAL_LOSS : ', score_val[0]) # fine tunning # K.set_value(model.optimizer.lr, 0.00005) # # checkpoint = ModelCheckpoint('TSTR_' + date + '/train/' + syn_data_name + '/weights.best.trainfinetune.hdf5', # monitor='val_acc', verbose=1, save_best_only=True, mode='max') # earlyStopping = EarlyStopping(monitor='val_acc', min_delta=0.00000001, patience=PATIENCE_FINE, verbose=1, mode='max') # model.fit(X_train_real, y_train_real, epochs=epochs, batch_size=batch_size, validation_data=(X_val_real, y_val_real), # callbacks=[history, # checkpoint, # earlyStopping # , # # rocauc, # # inception # ]) # # model = load_model('TSTR_' + date + '/train/' + syn_data_name + '/weights.best.trainfinetune.hdf5') ## Test on real # score_val = model.evaluate(X_val_real, y_val_real, verbose=1) # # print('fine tune VAL_ACC : ', score_val[1]) # print('fine tune VAL_LOSS : ', score_val[0]) print('\nTest metrics:') print('\nTest on real:') score_test = model.evaluate(X_test_real, y_test_real, verbose=1) print('Test loss:', score_test[0]) print('Test accuracy:', score_test[1]) result_dict[catalina_n_classes_str]['testing'] = { 'test loss on real': score_test[0], 'Test accuracy on real': score_test[1] # , 'auc roc on real': roc } ## Test on syn print('\nTest on synthetic:') # score = model.evaluate(X_test_syn, y_test_syn, verbose=1) # print('Test loss:', score[0]) # print('Test accuracy:', score[1]) result_dict[catalina_n_classes_str]['training'] = { 'VAL_ACC': score_val[1], 'TRAIN_ACC': score_train[1], 'TRAIN_LOSS': score_train[0], 'VAL_LOSS': score_val[0] } # result_dict[catalina_n_classes_str]['testing']['test loss on syn'] = score[0] # result_dict[catalina_n_classes_str]['testing']['Test accuracy on syn'] = score[1] y_predict_prob_test = model.predict(X_test_real) y_predict_classes_test = y_predict_prob_test.argmax(axis=-1) confusion_matrix = sklearn.metrics.confusion_matrix( y_test_real.argmax(axis=-1), y_predict_classes_test) print( 'Accuracy Test conf %.4f, accuracy eval %.4f' % (np.trace(confusion_matrix) / np.sum(confusion_matrix), score_test[1])) keras.backend.clear_session() del model return confusion_matrix
def main(result_dict={}, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE=1.0, v=''): folder = '%s%s%.2f' % (BASE_REAL_NAME, v, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE) if AUGMENTED_OR_NOT_EXTRA_STR == '': in_TSTR_FOLDER = 'datasets_original/REAL/' dataset_real = '%s%s%s%.2f' % ( BASE_REAL_NAME, AUGMENTED_OR_NOT_EXTRA_STR, '', PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE) else: in_TSTR_FOLDER = 'augmented/' dataset_real = '%s%s%s%.2f' % ( BASE_REAL_NAME, AUGMENTED_OR_NOT_EXTRA_STR, v, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE) #folder = dataset_real # folder = 'starlight_amp_noisy_irregular_all_%s%.2f' % (v, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE) # dataset_real = 'starlight_noisy_irregular_all_%s%.2f' % (v, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE) # same_set # folder = 'starlight_noisy_irregular_all_same_set_%s%.2f' % (v, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE) # dataset_real = 'starlight_noisy_irregular_all_same_set_%.2f' % PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE # for augmented # dataset_real = 'starlight_random_sample_augmented_%s%.2f' % (v, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE) PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY = str(PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE) result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY] = {'training': {}, 'testing': {}} def read_data(file): with open(file, 'rb') as f: data = pickle.load(f) X_train = np.asarray(data[0]['generated_magnitude']) # print(X_train.shape) X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, 1) # print(X_train.shape) y_train = np.asarray(data[0]['class']) X_train, y_train = shuffle(X_train, y_train, random_state=42) y_train = change_classes(y_train) y_train = to_categorical(y_train) X_val = np.asarray(data[1]['generated_magnitude']) X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, 1) y_val = np.asarray(data[1]['class']) y_val = change_classes(y_val) y_val = to_categorical(y_val) X_val, y_val = shuffle(X_val, y_val, random_state=42) X_test = np.asarray(data[2]['generated_magnitude']) X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, 1) y_test = np.asarray(data[2]['class']) y_test = change_classes(y_test) y_test = to_categorical(y_test) X_test, y_test = shuffle(X_test, y_test, random_state=42) return X_train, y_train, X_val, y_val, X_test, y_test def read_data_original_irr(file): with open(file, 'rb') as f: data = pickle.load(f) print(data[0].keys()) mgt = np.asarray(data[0]['original_magnitude']) t = np.asarray(data[0]['time']) X_train = np.stack((mgt, t), axis=-1) # print(X_train.shape) # print(X_train.T.shape) X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, X_train.shape[2]) # print(X_train.shape) y_train = np.asarray(data[0]['class']) # print(np.unique(y_train)) X_train, y_train = shuffle(X_train, y_train, random_state=42) y_train = change_classes(y_train) y_train = to_categorical(y_train) mgt = np.asarray(data[1]['original_magnitude']) t = np.asarray(data[1]['time']) X_val = np.stack((mgt, t), axis=-1) X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, X_val.shape[2]) y_val = np.asarray(data[1]['class']) y_val = change_classes(y_val) y_val = to_categorical(y_val) X_val, y_val = shuffle(X_val, y_val, random_state=42) mgt = np.asarray(data[2]['original_magnitude']) t = np.asarray(data[2]['time']) X_test = np.stack((mgt, t), axis=-1) X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, X_test.shape[2]) y_test = np.asarray(data[2]['class']) y_test = change_classes(y_test) y_test = to_categorical(y_test) X_test, y_test = shuffle(X_test, y_test, random_state=42) return X_train, y_train, X_val, y_val, X_test, y_test def read_data_generated_irr(file): with open(file, 'rb') as f: data = pickle.load(f) print(data[0].keys()) mgt = np.asarray(data[0]['generated_magnitude']) t = np.asarray(data[0]['time']) X_train = np.stack((mgt, t), axis=-1) # print(X_train.shape) # print(X_train.T.shape) X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, X_train.shape[2]) # print(X_train.shape) y_train = np.asarray(data[0]['class']) # print(np.unique(y_train)) X_train, y_train = shuffle(X_train, y_train, random_state=42) # for i in y_train: # if i != None: # print(i) y_train = change_classes(y_train) y_train = to_categorical(y_train) mgt = np.asarray(data[1]['generated_magnitude']) t = np.asarray(data[1]['time']) X_val = np.stack((mgt, t), axis=-1) X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, X_val.shape[2]) y_val = np.asarray(data[1]['class']) y_val = change_classes(y_val) y_val = to_categorical(y_val) X_val, y_val = shuffle(X_val, y_val, random_state=42) mgt = np.asarray(data[2]['generated_magnitude']) t = np.asarray(data[2]['time']) X_test = np.stack((mgt, t), axis=-1) X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, X_test.shape[2]) y_test = np.asarray(data[2]['class']) y_test = change_classes(y_test) y_test = to_categorical(y_test) X_test, y_test = shuffle(X_test, y_test, random_state=42) return X_train, y_train, X_val, y_val, X_test, y_test def change_classes(targets): # print(targets) target_keys = np.unique(targets) # print(target_keys) target_keys_idxs = np.argsort(np.unique(targets)) targets = target_keys_idxs[np.searchsorted(target_keys, targets, sorter=target_keys_idxs)] return targets def open_data(file): with open(file, 'rb') as f: data = pickle.load(f) print(len(data['generated_magnitude'])) X = np.asarray(data['generated_magnitude']) X = X.reshape(X.shape[0], X.shape[1], 1, 1) y = np.asarray(data['class']) X, y = shuffle(X, y, random_state=42) y = change_classes(y) y = to_categorical(y) return X, y def evaluation(X_test, y_test, n_classes): y_pred_prob = model.predict_proba(X_test) n = 10 probs = np.array_split(y_pred_prob, n) score = [] mean = [] std = [] Y = [] for prob in probs: ys = np.zeros(n_classes) # [0, 0 for class_i in range(n_classes): for j in prob: ys[class_i] = ys[class_i] + j[class_i] ys[:] = [x / len(prob) for x in ys] Y.append(np.asarray(ys)) ep = 1e-12 tmp = [] for s in range(n): kl = (probs[s] * np.log((probs[s] + ep) / Y[s])).sum(axis=1) E = np.mean(kl) IS = np.exp(E) # pdb.set_trace() tmp.append(IS) score.append(tmp) mean.append(np.mean(tmp)) std.append(np.std(tmp)) print('Inception Score:\nMean score : ', mean[-1]) print('Std : ', std[-1]) return score, mean, std def check_dir(directory): if not os.path.exists(directory): os.makedirs(directory) check_dir('TRTS_' + date) check_dir('TRTS_' + date + '/train/') check_dir('TRTS_' + date + '/train/') check_dir('TRTS_' + date + '/train/' + folder) check_dir('TRTS_' + date + '/test/') check_dir('TRTS_' + date + '/test/' + folder) # if os.path.isfile('TRTS_' + date + '/train/' + folder + '/train_model.h5'): # os.remove('TRTS_' + date + '/train/' + folder + '/train_model.h5') # shutil.rmtree('TRTS_' + date + '/test/' + folder) # else: irr = True one_d = False ## Train on real # dataset_real = 'catalina_random_full_north_9classes' if irr == True: X_train, y_train, X_val, y_val, X_test, y_test = read_data_original_irr( 'TSTR_data/' + in_TSTR_FOLDER + dataset_real + '.pkl') # datasets_original/REAL/' + dataset_real + '.pkl') else: X_train, y_train, X_val, y_val, X_test, y_test = read_data( 'TSTR_data/' + in_TSTR_FOLDER + dataset_real + '.pkl') print('') print('Training new model') print('') batch_size = 512 epochs = 200 num_classes = 3 m = Model_(batch_size, 100, num_classes, drop_rate=DROP_OUT_RATE) # if one_d == True: # model = m.cnn() # else: # model = m.cnn2() if BN_CONDITION == 'batch_norm_': model = m.cnn2_batch() else: model = m.cnn2() model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) ## callbacks history = my_callbacks.Histories() # rocauc = my_callbacks.ROC_AUC(X_train, y_train, X_test, y_test) # inception = my_callbacks.Inception(X_test, num_classes) checkpoint = ModelCheckpoint('TRTS_' + date + '/train/' + folder + '/weights.best.train.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max') earlyStopping = EarlyStopping(monitor='val_acc', min_delta=0.00000001, patience=PATIENCE, verbose=1, mode='max') model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_val, y_val), callbacks=[history, checkpoint, earlyStopping # , # rocauc, # inception ]) model = load_model('TRTS_' + date + '/train/' + folder + '/weights.best.train.hdf5') os.remove('TRTS_' + date + '/train/' + folder + '/weights.best.train.hdf5') # Create dictionary, then save into two different documments. ## Loss history_dictionary_loss = history.loss np.save('TRTS_' + date + '/train/' + folder + '/train_history_loss.npy', history_dictionary_loss) ## Val Loss history_dictionary_val_loss = history.val_loss np.save('TRTS_' + date + '/train/' + folder + '/train_history_val_loss.npy', history_dictionary_val_loss) ## Acc history_dictionary_acc = history.acc np.save('TRTS_' + date + '/train/' + folder + '/train_history_acc.npy', history_dictionary_acc) ## Val Acc history_dictionary_val_acc = history.val_acc np.save('TRTS_' + date + '/train/' + folder + '/train_history_val_acc.npy', history_dictionary_val_acc) ## AUC ROC # roc_auc_dictionary = rocauc.roc_auc # np.save('TRTS_' + date + '/train/' + folder + '/train_rocauc_dict.npy', roc_auc_dictionary) ## IS # scores_dict = inception.score # np.save('TRTS_' + date + '/train/' + folder + '/train_is.npy', scores_dict) # mean_scores_dict = inception.mean # np.save('TRTS_' + date + '/train/' + folder + '/train_is_mean.npy', mean_scores_dict) # std_scores_dict = inception.std # np.save('TRTS_' + date + '/train/' + folder + '/train_is_std.npy', std_scores_dict) ### plot loss and validation_loss v/s epochs plt.figure(1) plt.yscale("log") plt.plot(history.loss) plt.plot(history.val_loss) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'val'], loc='upper right') plt.savefig('TRTS_' + date + '/train/' + folder + '/train_loss.png') ### plot acc and validation acc v/s epochs plt.figure(2) plt.yscale("log") plt.plot(history.acc) plt.plot(history.val_acc) plt.title('model acc') plt.ylabel('Acc') plt.xlabel('epoch') plt.legend(['train', 'val'], loc='upper right') plt.savefig('TRTS_' + date + '/train/' + folder + '/train_acc.png') print('Training metrics:') # print('Inception Score:\nMean score : ', mean_scores_dict[-1]) # print('Std : ', std_scores_dict[-1]) # model = load_model('TRTS_' + date + '/train/' + folder + '/weights.best.train.hdf5') score_train = model.evaluate(X_train, y_train, verbose=1) score_val = model.evaluate(X_val, y_val, verbose=1) print('ACC : ', score_train[1]) print('VAL_ACC : ', score_val[1]) print('LOSS : ', score_train[0]) print('VAL_LOSS : ', score_val[0]) ## Test on synthetic print('\nTest metrics:') print('\nTest on real:') dataset_syn = folder + '_generated' # sc, me, st = evaluation(X_test, y_test, num_classes) # np.save('TRTS_' + date + '/test/' + folder + '/test_onreal_is.npy', sc) # np.save('TRTS_' + date + '/test/' + folder + '/test_onreal_is_mean.npy', me) # np.save('TRTS_' + date + '/test/' + folder + '/test_onreal_is_std.npy', st) score = model.evaluate(X_test, y_test, verbose=1) print('Test loss:', score[0]) print('Test accuracy:', score[1]) np.save('TRTS_' + date + '/test/' + folder + '/test_onreal_score.npy', score) # y_pred = model.predict(X_test) # roc = roc_auc_score(y_test, y_pred) # print('auc roc', roc) # np.save('TRTS_' + date + '/test/' + folder + '/test_onreal_rocauc.npy', roc) result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['testing'] = { 'test loss on real': score[0], 'Test accuracy on real': score[1] # , 'auc roc on real': roc } print('\nTest on synthetic:') if irr == True: X_train2, y_train2, X_val2, y_val2, X_test2, y_test2 = read_data_generated_irr( 'TSTR_data/generated/' + folder + '/' + dataset_syn + '.pkl') else: X_train2, y_train2, X_val2, y_val2, X_test2, y_test2 = read_data( 'TSTR_data/generated/' + folder + '/' + dataset_syn + '.pkl') # sc, me, st = evaluation(X_test2, y_test2, num_classes) # np.save('TRTS_' + date + '/test/' + folder + '/test_onsyn_is.npy', sc) # np.save('TRTS_' + date + '/test/' + folder + '/test_onsyn_is_mean.npy', me) # np.save('TRTS_' + date + '/test/' + folder + '/test_onsyn_is_std.npy', st) score = model.evaluate(X_test2, y_test2, verbose=1) print('Test loss:', score[0]) print('Test accuracy:', score[1]) # np.save('TRTS_' + date + '/test/' + folder + '/test_onsyn_score.npy', score) # y_pred = model.predict(X_test2) # roc = roc_auc_score(y_test2, y_pred) # print('auc roc', roc) result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['training'] = { # 'IS Mean': mean_scores_dict[-1], # 'IS Std': std_scores_dict[-1], 'ACC': np.mean(history_dictionary_acc), 'VAL_ACC': score_val[1], 'TRAIN_ACC': score_train[1], 'TRAIN_LOSS': score_train[0], 'VAL_LOSS': score_val[0] } result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['testing']['test loss on syn'] = score[0] result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['testing']['Test accuracy on syn'] = score[1] # result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['testing']['auc roc on syn'] = roc # np.save('TRTS_' + date + '/test/' + folder + '/test_onsyn_rocauc.npy', roc) keras.backend.clear_session() del model
def main(result_dict={}, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE=1.0, v=''): real_data_folder = os.path.join('datasets_original', 'REAL') dataset_real_pkl = '%s%.2f.pkl' % ( BASE_REAL_NAME, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE) syn_data_name = os.path.join( '%s%s%.2f' % (BASE_REAL_NAME, v, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE)) PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY = str( PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE) result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY] = { 'training': {}, 'testing': {} } print("REAL Training set to load %s" % dataset_real_pkl) print("SYN Training set to load %s" % syn_data_name) def read_data(file): with open(file, 'rb') as f: data = pickle.load(f) X_train = np.asarray(data[0]['generated_magnitude']) # print(X_train.shape) X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, 1) # print(X_train.shape) y_train = np.asarray(data[0]['class']) X_train, y_train = shuffle(X_train, y_train, random_state=42) y_train = change_classes(y_train) y_train = to_categorical(y_train) X_val = np.asarray(data[1]['generated_magnitude']) X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, 1) y_val = np.asarray(data[1]['class']) y_val = change_classes(y_val) y_val = to_categorical(y_val) X_val, y_val = shuffle(X_val, y_val, random_state=42) X_test = np.asarray(data[2]['generated_magnitude']) X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, 1) y_test = np.asarray(data[2]['class']) y_test = change_classes(y_test) y_test = to_categorical(y_test) X_test, y_test = shuffle(X_test, y_test, random_state=42) return X_train, y_train, X_val, y_val, X_test, y_test def read_data_original_irr(file): with open(file, 'rb') as f: data = pickle.load(f) print(data[0].keys()) mgt = np.asarray(data[0]['original_magnitude']) t = np.asarray(data[0]['time']) X_train = np.stack((mgt, t), axis=-1) X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, X_train.shape[2]) y_train = np.asarray(data[0]['class']) X_train, y_train = shuffle(X_train, y_train, random_state=42) y_train = change_classes(y_train) y_train = to_categorical(y_train) mgt = np.asarray(data[1]['original_magnitude']) t = np.asarray(data[1]['time']) X_val = np.stack((mgt, t), axis=-1) X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, X_val.shape[2]) y_val = np.asarray(data[1]['class']) y_val = change_classes(y_val) y_val = to_categorical(y_val) X_val, y_val = shuffle(X_val, y_val, random_state=42) mgt = np.asarray(data[2]['original_magnitude']) t = np.asarray(data[2]['time']) X_test = np.stack((mgt, t), axis=-1) X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, X_test.shape[2]) y_test = np.asarray(data[2]['class']) y_test = change_classes(y_test) y_test = to_categorical(y_test) X_test, y_test = shuffle(X_test, y_test, random_state=42) return X_train, y_train, X_val, y_val, X_test, y_test def read_data_generated_irr(file): with open(file, 'rb') as f: data = pickle.load(f) print(data[0].keys()) mgt = np.asarray(data[0]['generated_magnitude']) t = np.asarray(data[0]['time']) X_train = np.stack((mgt, t), axis=-1) X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, X_train.shape[2]) # print(X_train.shape) y_train = np.asarray(data[0]['class']) print(np.unique(y_train)) X_train, y_train = shuffle(X_train, y_train, random_state=42) # for i in y_train: # if i != None: # print(i) y_train = change_classes(y_train) y_train = to_categorical(y_train) mgt = np.asarray(data[1]['generated_magnitude']) t = np.asarray(data[1]['time']) X_val = np.stack((mgt, t), axis=-1) X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, X_val.shape[2]) y_val = np.asarray(data[1]['class']) y_val = change_classes(y_val) y_val = to_categorical(y_val) X_val, y_val = shuffle(X_val, y_val, random_state=42) mgt = np.asarray(data[2]['generated_magnitude']) t = np.asarray(data[2]['time']) X_test = np.stack((mgt, t), axis=-1) X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, X_test.shape[2]) y_test = np.asarray(data[2]['class']) y_test = change_classes(y_test) y_test = to_categorical(y_test) X_test, y_test = shuffle(X_test, y_test, random_state=42) return X_train, y_train, X_val, y_val, X_test, y_test def change_classes(targets): # print(targets) target_keys = np.unique(targets) # print(target_keys) target_keys_idxs = np.argsort(np.unique(targets)) targets = target_keys_idxs[np.searchsorted(target_keys, targets, sorter=target_keys_idxs)] return targets def open_data(file): with open(file, 'rb') as f: data = pickle.load(f) print(len(data['generated_magnitude'])) X = np.asarray(data['generated_magnitude']) X = X.reshape(X.shape[0], X.shape[1], 1, 1) y = np.asarray(data['class']) X, y = shuffle(X, y, random_state=42) y = change_classes(y) y = to_categorical(y) return X, y def check_dir(directory): if not os.path.exists(directory): os.makedirs(directory) check_dir('TSTR_' + date) check_dir('TSTR_' + date + '/train/') check_dir('TSTR_' + date + '/train/') check_dir('TSTR_' + date + '/train/' + syn_data_name) check_dir('TSTR_' + date + '/test/') check_dir('TSTR_' + date + '/test/' + syn_data_name) # if else irr = True dataset_syn_pkl = syn_data_name + '_generated.pkl' one_d = False ## Train on synthetic X_train_syn, y_train_syn, X_val_syn, y_val_syn, X_test_syn, y_test_syn = read_data_generated_irr( os.path.join('TSTR_data', 'generated', syn_data_name, dataset_syn_pkl)) X_train_real, y_train_real, X_val_real, y_val_real, X_test_real, y_test_real = read_data_original_irr( os.path.join('TSTR_data', real_data_folder, dataset_real_pkl)) print('') print('Training new model') print('') batch_size = 512 epochs = 10000 num_classes = 3 m = Model_(batch_size, 100, num_classes, drop_rate=DROP_OUT_RATE) if BN_CONDITION == 'batch_norm_': model = m.cnn2_batch() else: model = m.cnn2() model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) ## callbacks history = my_callbacks.Histories() checkpoint = ModelCheckpoint('TSTR_' + date + '/train/' + syn_data_name + '/weights.best.trainonsynthetic.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max') earlyStopping = EarlyStopping(monitor='val_acc', min_delta=0.00000001, patience=PATIENCE, verbose=1, mode='max') model.fit( X_train_syn, y_train_syn, epochs=epochs, batch_size=batch_size, validation_data=(X_val_real, y_val_real), callbacks=[ history, checkpoint, earlyStopping # , # rocauc, # inception ]) model = load_model('TSTR_' + date + '/train/' + syn_data_name + '/weights.best.trainonsynthetic.hdf5') print('Training metrics:') score_train = model.evaluate(X_train_syn, y_train_syn, verbose=1) score_val = model.evaluate(X_val_real, y_val_real, verbose=1) print('ACC : ', score_train[1]) print('VAL_ACC : ', score_val[1]) print('LOSS : ', score_train[0]) print('VAL_LOSS : ', score_val[0]) #fine tunning #K.set_value(model.optimizer.lr, 0.00005) checkpoint = ModelCheckpoint('TSTR_' + date + '/train/' + syn_data_name + '/weights.best.trainfinetune.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max') earlyStopping = EarlyStopping(monitor='val_acc', min_delta=0.00000001, patience=PATIENCE_FINE, verbose=1, mode='max') model.fit( X_train_real, y_train_real, epochs=epochs, batch_size=batch_size, validation_data=(X_val_real, y_val_real), callbacks=[ history, checkpoint, earlyStopping # , # rocauc, # inception ]) model = load_model('TSTR_' + date + '/train/' + syn_data_name + '/weights.best.trainfinetune.hdf5') ## Test on real score_val = model.evaluate(X_val_real, y_val_real, verbose=1) print('fine tune VAL_ACC : ', score_val[1]) print('fine tune VAL_LOSS : ', score_val[0]) print('\nTest metrics:') print('\nTest on real:') score = model.evaluate(X_test_real, y_test_real, verbose=1) print('Test loss:', score[0]) print('Test accuracy:', score[1]) result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY][ 'testing'] = { 'test loss on real': score[0], 'Test accuracy on real': score[1] # , 'auc roc on real': roc } ## Test on syn print('\nTest on synthetic:') score = model.evaluate(X_test_syn, y_test_syn, verbose=1) print('Test loss:', score[0]) print('Test accuracy:', score[1]) result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY][ 'training'] = { 'VAL_ACC': score_val[1], 'TRAIN_ACC': score_train[1], 'TRAIN_LOSS': score_train[0], 'VAL_LOSS': score_val[0] } result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['testing'][ 'test loss on syn'] = score[0] result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['testing'][ 'Test accuracy on syn'] = score[1] keras.backend.clear_session() del model
def main(result_dict={}, percentage_of_samples_to_keep_for_imbalance=1.0, v=''): real_data_folder = os.path.join('datasets_original', 'REAL') dataset_real_pkl = '%s%.2f.pkl' % ( BASE_REAL_NAME, percentage_of_samples_to_keep_for_imbalance) syn_data_name = os.path.join( '%s%s%.2f' % (BASE_REAL_NAME, v, percentage_of_samples_to_keep_for_imbalance)) percentage_of_samples_to_keep_for_imbalance_key = str( percentage_of_samples_to_keep_for_imbalance) result_dict[percentage_of_samples_to_keep_for_imbalance_key] = { 'training': {}, 'testing': {} } print("\nREAL Training set to load %s" % dataset_real_pkl) print("SYN Training set to load %s" % syn_data_name) dataset_syn_pkl = syn_data_name + '_generated.pkl' # load syn and real data x_train_syn, y_train_syn, x_val_syn, y_val_syn, x_test_syn, y_test_syn = read_data_irregular_sampling( os.path.join('TSTR_data', 'generated', syn_data_name, dataset_syn_pkl), magnitude_key='generated_magnitude', time_key='time') x_train_real, y_train_real, x_val_real, y_val_real, x_test_real, y_test_real = read_data_irregular_sampling( os.path.join('TSTR_data', real_data_folder, dataset_real_pkl), magnitude_key='generated_magnitude', time_key='time') ## Train on synthetic print('\nTraining new model\n') batch_size = 512 epochs = 10000 num_classes = 3 # choose model m = Model_(batch_size, 100, num_classes, drop_rate=DROP_OUT_RATE) if BN_CONDITION == 'batch_norm_': model = m.cnn2_batch() else: model = m.cnn2() model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) ## callbacks history = my_callbacks.Histories() weight_folder = os.path.join('TSTR_' + date, 'train', RESULTS_NAME, syn_data_name) check_dir(weight_folder) checkpoint = ModelCheckpoint(os.path.join( weight_folder, 'weights.best.trainonsynthetic.hdf5'), monitor=EARLY_STOP_ON, verbose=1, save_best_only=True, mode=EARLY_STOP_ON_COD) earlyStopping = EarlyStopping(monitor=EARLY_STOP_ON, min_delta=0.00000001, patience=PATIENCE, verbose=1, mode=EARLY_STOP_ON_COD) model.fit(x_train_syn, y_train_syn, epochs=epochs, batch_size=batch_size, validation_data=(x_val_real, y_val_real), callbacks=[history, checkpoint, earlyStopping]) model = load_model( os.path.join(weight_folder, 'weights.best.trainonsynthetic.hdf5')) print('Syn Training metrics:') score_train = model.evaluate(x_train_syn, y_train_syn, verbose=1) score_val = model.evaluate(x_val_real, y_val_real, verbose=1) score_tstr = model.evaluate(x_test_real, y_test_real, verbose=1) print('ACC : ', score_train[1]) print('VAL_ACC : ', score_val[1]) print('LOSS : ', score_train[0]) print('VAL_LOSS : ', score_val[0]) print('TSTR loss: %f ;-; accuracy: %f' % (score_tstr[0], score_tstr[1])) result_dict[percentage_of_samples_to_keep_for_imbalance_key]['testing'] = { 'tstr loss': score_tstr[0], 'tstr accuracy': score_tstr[1] } # fine tunning K.set_value(model.optimizer.lr, K.eval(model.optimizer.lr) * LR_VAL_MULT) checkpoint = ModelCheckpoint(os.path.join( weight_folder, 'weights.best.trainfinetune.hdf5'), monitor=EARLY_STOP_ON, verbose=1, save_best_only=True, mode=EARLY_STOP_ON_COD) earlyStopping = EarlyStopping(monitor=EARLY_STOP_ON, min_delta=0.00000001, patience=PATIENCE_FINE, verbose=1, mode=EARLY_STOP_ON_COD) model.fit(x_train_real, y_train_real, epochs=epochs, batch_size=batch_size, validation_data=(x_val_real, y_val_real), callbacks=[history, checkpoint, earlyStopping]) model = load_model( os.path.join(weight_folder, 'weights.best.trainfinetune.hdf5')) ## Test on real score_val = model.evaluate(x_val_real, y_val_real, verbose=1) print('fine tune VAL_ACC : ', score_val[1]) print('fine tune VAL_LOSS : ', score_val[0]) print('\nTest metrics:') print('\nTest on real:') score = model.evaluate(x_test_real, y_test_real, verbose=1) print('Test loss:', score[0]) print('Test accuracy:', score[1]) result_dict[percentage_of_samples_to_keep_for_imbalance_key]['testing'][ 'test loss on real'] = score[0] result_dict[percentage_of_samples_to_keep_for_imbalance_key]['testing'][ 'Test accuracy on real'] = score[1] result_dict[percentage_of_samples_to_keep_for_imbalance_key][ 'training'] = { 'VAL_ACC': score_val[1], 'TRAIN_ACC': score_train[1], 'TRAIN_LOSS': score_train[0], 'VAL_LOSS': score_val[0] } keras.backend.clear_session() del model