def DELETE(self): """ Delete models for multiple instances :: DELETE /_instances DELETE data: :: [ "{region}/{namespace}/{instanceId}", ... ] Returns: :: { "result": "success" } """ try: instances = json.loads(web.data()) except: raise InvalidRequestResponse({"result": "Invalid request"}) if not instances: raise InvalidRequestResponse({"result": ("Missing instances in DELETE" " request")}) deleted = [] if instances: for server in instances: if server.count("/") == 4: (lhs, _, identifier) = server.rpartition("/") (regionAndNamespace, _, _) = lhs.rpartition("/") serverSansDimension = regionAndNamespace + "/" + identifier else: serverSansDimension = server with web.ctx.connFactory() as conn: modelIds = repository.listMetricIDsForInstance(conn, serverSansDimension) if modelIds: for modelId in modelIds: ModelHandler.deleteModel(modelId) deleted.append(server) if instances == deleted: self.addStandardHeaders() return encodeJson({'result': 'success'}) raise web.notfound("Not able to delete %s" % encodeJson(list(set(instances)-set(deleted))))
def pickle_models(): """ Generate pickle models. """ ModelHandler.train_save_new_model(count=5000, verbose=True) ModelHandler.train_save_new_model(count=10000, verbose=True) ModelHandler.train_save_new_model(count=20000, verbose=True) ModelHandler.train_save_new_model(count=30000, verbose=True)
def main(args): print(args) settings = Settings.Settings(args) # We already did these # ResNet50 and indices: 5, 2, 7, 3 (doing ? r.n.) settings.TestDataset_Fold_Index = int(args.FOLD_I) # can be 0 to 9 (K-1) settings.TestDataset_K_Folds = int(args.KFOLDS) assert settings.TestDataset_Fold_Index < settings.TestDataset_K_Folds kfold_txt = "KFold_" + str(settings.TestDataset_Fold_Index) + "z" + str( settings.TestDataset_K_Folds) print(kfold_txt) settings.model_backend = args.model_backend settings.train_epochs = int(args.train_epochs) settings.train_batch = int(args.train_batch) # resnet 101 approx 5-6 hours (per fold - might be a bit less ...) # resnet 50 approx 3-4 hours model_txt = "cleanManual_" + str( settings.train_epochs) + "ep_ImagenetWgenetW_" + str( settings.model_backend) + "-" + str( settings.train_batch ) + "batch_Augmentation1to1_ClassWeights1to3_TestVal" print(model_txt) dataset = Dataset.Dataset(settings) evaluator = Evaluator.Evaluator(settings) #settings.run_name = settings.run_name + "AYRAN" show = False save = True #dataset.dataset model = ModelHandler.ModelHandler(settings, dataset) if not os.path.exists("plots/"): os.makedirs("plots/") model.model.train(show=show, save=save) # K-Fold_Crossval: model.model.save( "/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/weightsModel2_" + model_txt + "_[" + kfold_txt + "].h5") SAVE_ALL_FOLDER = model_txt + "PLOTS/" SAVE_ALL_PLOTS = SAVE_ALL_FOLDER + "plot" # DEBUG_SAVE_ALL_THR_PLOTS = None if not os.path.exists(SAVE_ALL_FOLDER): os.makedirs(SAVE_ALL_FOLDER) evaluator.unified_test_report([model.model.model], dataset.test, validation_set=dataset.val, postprocessor=model.model.dataPreprocesser, name=SAVE_ALL_PLOTS, optionally_save_missclassified=True)
def main(): """ Main function. """ #start_tpot_optimazation(count=10000) clf = PredictionModelSymmetricXGBoost() clf = ModelHandler.train_model(clf=clf, count=10000, verbose=True) start_20_way_one_shot(clf=clf, count=400)
def addExerciseToday(self): try: # find foodId to place in userMeal index = self.ui.exercise_table.currentIndex().row() + 1 caloriesBurnt = float(self.ui.add_calories_2.text()) today = datetime.today().strftime('%d/%m/%Y') userexercise = Table('userExercise', ModelHandler.return_meta(), autoload=True, autoload_with=ModelHandler.engine) insert = userexercise.insert().values(exerciseId=index, username=self._user, caloriesBurnt=caloriesBurnt, activityDate=today) conn = ModelHandler.return_connection() conn.execute(insert) display_message('Your exercise for today has been added', 'You have added an exercise successfully', False) except Exception as e: display_message('Your exercise has not been added', 'Adding a exercise has failed', False) print(e)
def addFoodToday(self): ''' TODO: 1:clean it 3:Investigate why foreign key relationship to foodId and username in parent tables are not being respected 4: This line can be probably simplified because there will be only one foodId returned at any moment: foodId=foodIdResult[0].foodId :return: ''' try: # find foodId to place in userMeal index = self.ui.food_database.currentIndex().row() + 1 caloriesEatenPerFood = float(self.ui.calories_output.text()) today = datetime.today().strftime('%d/%m/%Y') usermeal = Table('userMeal', ModelHandler.return_meta(), autoload=True, autoload_with=ModelHandler.engine) insert = usermeal.insert().values( foodId=index, username=self._user, caloriesEatenPerFood=caloriesEatenPerFood, dateIntake=today) conn = ModelHandler.return_connection() conn.execute(insert) display_message('Your meal has been added', 'You have added a meal successfully', False) except Exception as e: display_message('Your meal has not been added', 'Adding a meal has failed', False) print(e)
def POST(self, region, namespace, instanceId=None): """ Monitor a set of default metrics for a specific instance :: POST /_instances/{region}/{namespace}/{instanceId} Returns: :: { "result": "success" } OR Monitor a set of default metrics for multiple specific instances :: POST /_instances/{region}/{namespace} POST data: :: [ {instanceId}, ... ] Returns: :: { "result": "success" } Note: We expect a 200 OK even when attempting to POST to an instanece in the wrong namespace or the wrong region, this saves the overhead of asking AWS if we're dealing with a valid instance in the given namespace or region with every POST request. We expect the CLI user to know the correct instance ID. """ if instanceId is None: try: dimension = None instances = json.loads(web.data()) except: raise InvalidRequestResponse({"result": "Invalid request"}) else: (dimension, _, identifier) = instanceId.rpartition("/") instances = [identifier] # Check for invalid region or namespace cwAdapter = datasource.createDatasourceAdapter("cloudwatch") supportedRegions = set(region for region, _desc in cwAdapter.describeRegions()) if region not in supportedRegions: raise InvalidRequestResponse({"result": ("Not supported. Region '%s' was" " not found.") % region}) supportedNamespaces = set() for resourceInfo in cwAdapter.describeSupportedMetrics().values(): for metricInfo in resourceInfo.values(): supportedNamespaces.add(metricInfo["namespace"]) if namespace not in supportedNamespaces: raise InvalidRequestResponse({"result": ("Not supported. Namespace '%s' " "was not found.") % namespace}) try: # Attempt to validate instances list using validictory validate(instances, _INSTANCES_MODEL_CREATION_SCHEMA) except ValidationError as e: response = "InvalidArgumentsError: " + str(e) raise InvalidRequestResponse({"result": response}) if instances: for instanceId in instances: server = "/".join([region, namespace, instanceId]) with web.ctx.connFactory() as conn: numMetrics = repository.getMetricCountForServer(conn, server) if numMetrics > 0: # Metrics exist for instance id. pass else: try: resourceType = cloudwatch.NAMESPACE_TO_RESOURCE_TYPE[namespace] except KeyError: raise InvalidRequestResponse({"result": "Not supported."}) modelSpecs = cwAdapter.getDefaultModelSpecs( resourceType, region, instanceId, dimension) for modelSpec in modelSpecs: ModelHandler.createModel(modelSpec) self.addStandardHeaders() return encodeJson({"result": "success"})
def main(args): print(args) settings = Settings.Settings(args) settings.TestDataset_Fold_Index = 0 settings.TestDataset_K_Folds = 5 settings.model_backend = args.model_backend settings.train_batch = args.train_batch settings.train_epochs = args.train_epochs dataset = Dataset.Dataset(settings) evaluator = Evaluator.Evaluator(settings) show = False save = True model_h = ModelHandler.ModelHandler(settings, dataset) model_h.model.load(args.one_model_path) model = model_h.model.model # data prep: test_set_processed = dataset.dataPreprocesser.apply_on_a_set_nondestructively( dataset.test) train_set_processed = dataset.dataPreprocesser.apply_on_a_set_nondestructively( dataset.train) test_L, test_R, test_V = test_set_processed train_L, train_R, train_V = train_set_processed if test_L.shape[3] > 3: # 3 channels only - rgb test_L = test_L[:, :, :, 1:4] test_R = test_R[:, :, :, 1:4] train_L = train_L[:, :, :, 1:4] train_R = train_R[:, :, :, 1:4] import random import keras.backend as K import matplotlib.pyplot as plt import numpy as np T = 5 batch_size = 16 # as it was when training train_data_indices = list(range(0, len(train_L))) f = K.function( [model.layers[0].input, model.layers[1].input, K.learning_phase()], [model.layers[-1].output]) print("f", f) # For each sample? samples_N = 10 predictions_for_sample = np.zeros((T, samples_N) + ( 256, 256, )) # < T, SamplesN, 256x256 > for sample_id in range(samples_N): # like this it's probably slow ... sample = [test_L[sample_id], test_R[sample_id]] # (2,256,256,3) sample = np.asarray(sample) for MC_iteration in range(T): selected_indices = random.sample(train_data_indices, batch_size - 1) print("train_L[selected_indices] :: ", train_L[selected_indices].shape) # 15, 256,256,3 print("sample :: ", sample.shape) # 2,256,256,3 ? train_sample = [ np.append(train_L[selected_indices], [sample[0]], 0), np.append(train_R[selected_indices], [sample[1]], 0) ] train_sample = np.asarray(train_sample) print("MonteCarloBatchNormalization") print("T", T) print("batch_size", batch_size) print("sample.shape", sample.shape) print("train_sample.shape", train_sample.shape) # all in the training regime - local statistics get changed in each iteration predictions = f((np.asarray(train_sample[0], dtype=np.float32), np.asarray(train_sample[1], dtype=np.float32), 1))[0] print("predictions.shape", predictions.shape) # 16, 256,256,2 sample_predicted = predictions[batch_size - 1] # last one # 256,256,2 sample_predicted = sample_predicted[:, :, 1] print("sample_predicted.shape", sample_predicted.shape) # 256,256 predictions_for_sample[MC_iteration, sample_id, :, :] = sample_predicted #print("are they equal? 0-1", np.array_equal(predictions_for_sample[0], predictions_for_sample[1])) #print("are they equal? 1-2", np.array_equal(predictions_for_sample[1], predictions_for_sample[2])) #print("are they equal? 2-3", np.array_equal(predictions_for_sample[2], predictions_for_sample[3])) predictions_for_sample = np.asarray( predictions_for_sample) # [5, 100, 256, 256] print("predictions_for_sample ::", predictions_for_sample.shape) predictions_for_sample_By_Images = np.swapaxes(predictions_for_sample, 0, 1) # [100, 5, 256, 256] print("predictions_for_sample_By_Images ::", predictions_for_sample_By_Images.shape) resolution = len(predictions_for_sample[0][0]) # 256 predictions_N = len(predictions_for_sample[0]) print("predictions_N:", predictions_N) import scipy for prediction_i in range(predictions_N): predictions = predictions_for_sample_By_Images[ prediction_i] # 5 x 256x256 a_problematic_zone = np.finfo(float).eps # move 0-1 to 0.1 to 0.9 helper_offset = np.ones_like(predictions) predictions = predictions * (1.0 - 2 * a_problematic_zone ) + helper_offset * (a_problematic_zone) def entropy_across_predictions(pixel_predictions): #print("pixel_predictions.shape", pixel_predictions.shape) T = len(pixel_predictions) p_sum = np.sum(pixel_predictions, axis=0) #assert len(pixel_predictions.shape) == 1 pk0 = (p_sum) / T pk1 = 1 - (p_sum) / T entropy0 = -pk0 * np.log(pk0) entropy1 = -pk1 * np.log( pk1 ) # i think that this one can be ignored in two class case ... in theory ... """ print("pk0", pk0) print("pk1", pk1) print("entropy0", entropy0) print("entropy1", entropy1) """ return entropy0 + entropy1 def ent_img_sumDiv(pixel_predictions): return np.sum(pixel_predictions, axis=0) / len(pixel_predictions) def ent_img_log(pk): return -pk * np.log(pk) startTMP = timer() # trying to write it faster ! ent_img_pk0 = np.apply_along_axis(arr=predictions, axis=0, func1d=ent_img_sumDiv) ent_img_pk1 = np.ones_like(ent_img_pk0) - ent_img_pk0 ent_img_ent0 = np.apply_along_axis(arr=ent_img_pk0, axis=0, func1d=ent_img_log) ent_img_ent1 = np.apply_along_axis(arr=ent_img_pk1, axis=0, func1d=ent_img_log) entropy_image = ent_img_ent0 + ent_img_ent1 sum_ent = np.sum(entropy_image.flatten()) endTMP = timer() timeTMP = (endTMP - startTMP) print("Entropy faster " + str(timeTMP) + "s (" + str(timeTMP / 60.0) + "min)") """ Entropy faster 0.28297295499942265s (0.004716215916657044min) Entropy before 0.481015188008314s (0.008016919800138567min) startTMP = timer() entropy_image = np.apply_along_axis(arr=predictions, axis=0, func1d=entropy_across_predictions) sum_ent = np.sum(entropy_image.flatten()) endTMP = timer() timeTMP = (endTMP - startTMP) print("Entropy before " + str(timeTMP) + "s (" + str(timeTMP / 60.0) + "min)") """ def BALD_diff(pixel_predictions): # Bayesian Active Learning by Disagreement = BALD = https://arxiv.org/abs/1112.5745 #T = len(pixel_predictions) #assert len(pixel_predictions.shape) == 1 accum = 0 for val in pixel_predictions: #if val == 0.0: # val += np.finfo(float).eps #elif val == 1.0: # val -= np.finfo(float).eps accum0 = -val * np.log(val) accum1 = -(1 - val) * np.log(1 - val) accum += accum0 + accum1 return accum startTMP = timer() bald_diff_image = np.apply_along_axis(arr=predictions, axis=0, func1d=BALD_diff) endTMP = timer() timeTMP = (endTMP - startTMP) print("Bald orig" + str(timeTMP) + "s (" + str(timeTMP / 60.0) + "min)") bald_image = -1 * (entropy_image - bald_diff_image) sum_bald = np.sum(bald_image.flatten()) variance_image = np.var(predictions, axis=0) sum_var = np.sum(variance_image.flatten()) do_viz = True if do_viz: fig = plt.figure(figsize=(10, 8)) for i in range(T): img = predictions[i] ax = fig.add_subplot(1, T + 3, i + 1) plt.imshow(img, cmap='gray', vmin=0.0, vmax=1.0) ax.title.set_text('Model ' + str(i)) ax = fig.add_subplot(1, T + 3, T + 1) plt.imshow(entropy_image, cmap='gray', vmin=0.0, vmax=1.0) ax.title.set_text('Entropy (' + str(np.round(sum_ent, 3)) + ')') #ax = fig.add_subplot(1, T + 3, T + 2) #plt.imshow(entropy_image_f, cmap='gray', vmin = 0.0, vmax = 1.0) #ax.title.set_text('Entropy_f (' + str(np.round(sum_ent_f,3)) + ')') ax = fig.add_subplot(1, T + 3, T + 2) plt.imshow(bald_image, cmap='gray') #, vmin = 0.0, vmax = 1.0) ax.title.set_text('BALD (' + str(np.round(sum_bald, 3)) + ')') ax = fig.add_subplot(1, T + 3, T + 3) plt.imshow(variance_image, cmap='gray', vmin=0.0, vmax=1.0) ax.title.set_text('Variance (' + str(np.round(sum_var, 3)) + ')') plt.show() # MCBN (sample, T, train_data, batch_size) # predictions_for_sample = [] # for i in T: # batch of train data <- random from train_data of size batch_size # update_layer_statistics (= eval with training mode on) # prediction = model.predict(sample) # predictions.append(prediction) # return predictions nkhnkkjnjk # ---------------------------------------------------------- # Predict data: print("about to predict data with", test_L.shape) predicted = model.model.model.predict(x=[test_L, test_R], batch_size=4) predicted = predicted[:, :, :, 1]
def main(args): print(args) threshold_fineness = 0.05 # move this out as a param eventually exclusions_by_idxs = [] selected_model_files = [] settings = Settings.Settings(args) if args.input_file is not "": print("") with open(args.input_file) as fp: line_file = fp.readline() line_exclusions = fp.readline() cnt = 1 while line_file: print("|" + line_exclusions.strip() + "|" + line_file.strip() + "|") selected_model_files.append(line_file.strip()) if line_exclusions.strip() is not "": exclusions_by_idxs.append( list(map(int, line_exclusions.strip().split(" ")))) else: exclusions_by_idxs.append([]) line_file = fp.readline() line_exclusions = fp.readline() cnt += 1 else: selected_model_files = glob.glob(args.models_path_star) selected_model_files.sort() corresponding_fold_indices = [] corresponding_K_of_folds = [] print("Selected", len(selected_model_files), "models:") for p in selected_model_files: print(p) print("") f = p.split("/")[-1] # we will need to get the fold index from the name! (keep it intact) assert "[KFold_" in f indicator = f.split("[KFold_")[-1] limits = indicator.split("z") fold_idx = int(limits[0]) K_of_folds = int(limits[1].split("]")[0]) #print(fold_idx,"from", K_of_folds,"=", f) corresponding_fold_indices.append(fold_idx) corresponding_K_of_folds.append(K_of_folds) if args.input_file is "": exclusions_by_idxs.append([]) print("exclusions_by_idxs", exclusions_by_idxs) print("We got these indices of folds", corresponding_fold_indices) print("And these K values for kfoldcrossval", corresponding_K_of_folds) # TEST MODELS ONE BY ONE statistics_over_models = [] for model_idx in range(len(selected_model_files)): #for model_idx in range(2): model_path = selected_model_files[model_idx] settings.TestDataset_Fold_Index = corresponding_fold_indices[model_idx] settings.TestDataset_K_Folds = corresponding_K_of_folds[model_idx] assert settings.TestDataset_Fold_Index < settings.TestDataset_K_Folds print(model_path) dataset = Dataset.Dataset(settings) evaluator = Evaluator.Evaluator(settings) show = False save = True #dataset.dataset settings.model_backend = args.model_backend settings.train_epochs = int(args.train_epochs) settings.train_batch = int(args.train_batch) model = ModelHandler.ModelHandler(settings, dataset) # K-Fold_Crossval: ####model.model.load("/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/weightsModel2_"+model_txt+"_["+kfold_txt+"].h5") model.model.load(model_path) folder_name = model_path.split("/")[-1][0:-3] model.model.save_plot_path = "evaluation_plots/" + folder_name + "/" import os if not os.path.exists("evaluation_plots/"): os.makedirs("evaluation_plots/") if not os.path.exists(model.model.save_plot_path): os.makedirs(model.model.save_plot_path) file = open("evaluation_plots/inprogress.txt", "w") file.write("Started \n") file.close() ############################################################################################################### ############################################################################################################### ############################################################################################################### #SimulateUnbalancedDataset = True SimulateUnbalancedDataset = False if SimulateUnbalancedDataset: [lefts_paths_in_trainAndTest_already, rights_paths, labels_paths] = dataset.paths # dataset.train_paths < if we need to be more specific print("len(lefts_paths_in_trainAndTest)", len(lefts_paths_in_trainAndTest_already)) from ActiveLearning.LargeDatasetHandler_AL import get_unbalanced_dataset WholeDataset = get_unbalanced_dataset() all_left_paths = WholeDataset.paths[0] print("We had ", len(lefts_paths_in_trainAndTest_already), " in train+test.") print("From ", len(all_left_paths), "all possible pairs in our dataset...") allowed_indices = [] for key_idx in all_left_paths: path = all_left_paths[key_idx] if path not in lefts_paths_in_trainAndTest_already: allowed_indices.append(key_idx) print( "... we have", len(allowed_indices), "allowed indices to play with! (which were not in the original train+test)" ) # 81K possibilities!!! unbalanced_ratio = 10.0 unbalanced_ratio = 80.0 in_test_set_already_N = len(dataset.test[0]) likely_N_of_changes = in_test_set_already_N / 2.0 wanted_N_of_nonchanges = int(likely_N_of_changes * unbalanced_ratio) print("Sample", wanted_N_of_nonchanges, " new non changes...") del dataset.train import h5py """ def save_images_to_h5(arr, hdf5_path): hdf5_file = h5py.File(hdf5_path, mode='w') hdf5_file.create_dataset("arr", data=arr, dtype="float32") hdf5_file.close() print("Saved", len(arr), "images successfully to:", hdf5_path) return hdf5_path def load_images_from_h5(hdf5_path): hdf5_file = h5py.File(hdf5_path, "r") arr = hdf5_file['arr'][:] hdf5_file.close() return arr """ def save_images_to_h5_DEFAULT_DATA_FORMAT(lefts, rights, labels, hdf5_path): SUBSET = len(lefts) hdf5_file = h5py.File(hdf5_path, mode='w') hdf5_file.create_dataset("lefts", data=lefts) hdf5_file.create_dataset("rights", data=rights) hdf5_file.create_dataset("labels", data=labels) hdf5_file.close() print("Saved", SUBSET, "images successfully to:", hdf5_path) return hdf5_path def save_images_to_h5(lefts, rights, labels, hdf5_path): SIZE = lefts[0].shape SUBSET = len(lefts) hdf5_file = h5py.File(hdf5_path, mode='w') hdf5_file.create_dataset("lefts", data=lefts, dtype="float32") hdf5_file.create_dataset("rights", data=rights, dtype="float32") hdf5_file.create_dataset("labels", data=labels, dtype="float32") hdf5_file.close() print("Saved", SUBSET, "images successfully to:", hdf5_path) return hdf5_path def load_images_from_h5(hdf5_path): hdf5_file = h5py.File(hdf5_path, "r") lefts = hdf5_file['lefts'][:] rights = hdf5_file['rights'][:] labels = hdf5_file['labels'][:] hdf5_file.close() return lefts, rights, labels #path_additional_set = "/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/datasets/INBALANCED_ADDITIONAL_LEFTS_DATASET_FOR_TESTS8560" # rename to 8560 #path_additional_set = "/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/datasets/INBALANCED_ADDITIONAL_LEFTS_DATASET_FOR_TESTS50" path_additional_set = "/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/datasets/INBALANCED_ADDITIONAL_LEFTS_DATASET_FOR_TESTS800" PER_BATCH = 1000 # that's really small and thus slow ... batches_to_load = 9 # goes from 0 to 8 # SAVE ONCE, THEN REUSE THOSE BATCHES """ assert False # Do you really want to recalc these? (cca 20 min) selected_indices = sample(allowed_indices, wanted_N_of_nonchanges) batch_i = 0 for batch in WholeDataset.generator_for_all_images(PER_BATCH, mode='datalabels', custom_indices_to_sample_from = selected_indices): selected_indices, [additional_L, additional_R], additional_V = batch save_images_to_h5_DEFAULT_DATA_FORMAT(additional_L, additional_R, additional_V, path_additional_set+"_"+str(batch_i)+"_"+str(PER_BATCH)+".h5") batch_i += 1 del additional_L del additional_R del additional_V if batch_i >= batches_to_load: break # just 1 batch """ additional_predicted = [] additional_gts = [] for i in range(batches_to_load): print("loading batch ", i) additional_L, additional_R, additional_V = load_images_from_h5( path_additional_set + "_" + str(i) + "_" + str(PER_BATCH) + ".h5") additional_set = additional_L, additional_R, additional_V # goes up to 18G~21G/31G additional_set_processed = dataset.dataPreprocesser.apply_on_a_set_nondestructively( additional_set, be_destructive=True) add_L, add_R, add_V = additional_set_processed if add_L.shape[3] > 3: # 3 channels only - rgb add_L = add_L[:, :, :, 1:4] add_R = add_R[:, :, :, 1:4] print("about to predict batch", i, "with", add_L.shape) additional_predicted_batch = model.model.model.predict( x=[add_L, add_R], batch_size=4 ) # Wait, actually do we create a problem here by moving the BatchNorm stuff in the model? # because after it it seems like the model is not predicting the same way # try reloading the model afterwards again.... additional_predicted_batch = additional_predicted_batch[:, :, :, 1] additional_gts_batch = add_V print("... predicted", len(additional_predicted_batch)) del add_L del add_R additional_predicted.extend(additional_predicted_batch) additional_gts.extend(additional_gts_batch) print("Successfully predicted", len(additional_predicted), "so far!") file = open("evaluation_plots/inprogress.txt", "a") file.write("Successfully predicted" + str(len(additional_predicted)) + "so far!\n") file.close() del additional_predicted_batch del additional_gts_batch del additional_set_processed del additional_set import keras keras.backend.clear_session() # CLEAR GPU MEM #### # RESET the model... something has changed in it even if we only predict ... (model's stochasticity ....) model_path = selected_model_files[model_idx] dataset = Dataset.Dataset( settings) # yo probably slow again ... evaluator = Evaluator.Evaluator(settings) model = ModelHandler.ModelHandler(settings, dataset) model.model.load(model_path) folder_name = model_path.split("/")[-1][0:-3] model.model.save_plot_path = "evaluation_plots/" + folder_name + "/" # PS: different behaviour with the extended set by Unabalanced samples is still possible because of the # way we establish the chosen THR value (as the one which maximizes f1 score) # However ... the human legible outputs should be good - these were done manually on the whole Recall plot curve. #### additional_predicted = np.asarray(additional_predicted) additional_gts = np.asarray(additional_gts) print("We have additional predictions:", len(additional_predicted), additional_predicted.shape, "and additional gts:", len(additional_gts), additional_gts.shape) optional_additional_predAndGts = [ additional_predicted, additional_gts ] ############################################################################################################### ############################################################################################################### ############################################################################################################### if not SimulateUnbalancedDataset: optional_additional_predAndGts = [] SAVE_ALL_PLOTS = model.model.save_plot_path + "plot" statistics = evaluator.unified_test_report( [model.model.model], dataset.test, validation_set=dataset.val, postprocessor=model.model.dataPreprocesser, name=SAVE_ALL_PLOTS, optionally_save_missclassified=True, optional_manual_exclusions=exclusions_by_idxs[model_idx], optional_additional_predAndGts=optional_additional_predAndGts) if SimulateUnbalancedDataset: # HAX NOW IF WE GET HERE ... statistics, pixels_best_thr, tiles_best_thr, ToReturn_predicted, ToReturn_gts = statistics print("the threshold has been selected on the VAL data as:", pixels_best_thr, tiles_best_thr) print("now we have") print("ToReturn_predicted=", ToReturn_predicted.shape) print("additional_predicted=", additional_predicted.shape) print("ToReturn_gts=", ToReturn_gts.shape) print("additional_gts=", additional_gts.shape) predicted_total = np.append(additional_predicted, ToReturn_predicted, 0) gts_total = np.append(additional_gts, ToReturn_gts, 0) path_large_files_backup_sol = "/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/main_eval_mem_issues/" import os if not os.path.exists(path_large_files_backup_sol): os.makedirs(path_large_files_backup_sol) if not os.path.exists(path_large_files_backup_sol + folder_name + "/"): os.makedirs(path_large_files_backup_sol + folder_name + "/") np.save( path_large_files_backup_sol + folder_name + "/" + "BatchI-" + str(model_idx) + "_predicted_total.npy", predicted_total) np.save( path_large_files_backup_sol + folder_name + "/" + "BatchI-" + str(model_idx) + "_gts_total.npy", gts_total) np.save( path_large_files_backup_sol + folder_name + "/" + "BatchI-" + str(model_idx) + "_statistics_total.npy", np.asarray(statistics)) print("predicted_total=", predicted_total.shape) print("gts_total=", gts_total.shape) del additional_predicted del additional_gts del ToReturn_predicted del ToReturn_gts del predicted_total del gts_total # model.model.test(evaluator,show=show,save=save) #statistics = model.model.test(evaluator,show=show,save=save, threshold_fineness = threshold_fineness) statistics_over_models.append(statistics) mask_stats, tiles_stats = statistics print("statistics = ", statistics) print("mask_stats = ", mask_stats) print("tiles_stats = ", tiles_stats) del model del dataset del evaluator del optional_additional_predAndGts import keras keras.backend.clear_session() """ debug statistics_over_models = [((0.1, 0.609693808104319, 0.782560176106013, 0.9765439612843166, 0.7100948985424048), (0.1, 0.8504672897196262, 0.978494623655914, 0.9158878504672897, 0.91)), ( (0.2, 0.603389186392769, 0.861152404951038, 0.9746090897889895, 0.7240157353295362), (0.2, 0.7570093457943925, 1.0, 0.8785046728971962, 0.8617021276595744))] """ add_text = args.model_backend statistics_over_models = np.asarray(statistics_over_models) if not os.path.exists("evaluation_plots/"): os.makedirs("evaluation_plots/") np.save("evaluation_plots/statistics_over_models_" + add_text + ".npy", statistics_over_models) #####statistics_over_models = np.load("evaluation_plots/resnet101_kfolds_0to8.npy") ### Process overall statistics -> boxplots! print("Overall statistics::: (", len(statistics_over_models), ")") print(statistics_over_models) # each model has [mask_stats, tiles_stats] = [[thr, recall, precision, accuracy, f1], [...]] thresholds = [] tiles_recalls = [] tiles_precisions = [] tiles_accuracies = [] tiles_f1s = [] mask_recalls = [] mask_precisions = [] mask_accuracies = [] mask_f1s = [] mask_AUCs = [] # tiles_stats = tiles_best_thr, tiles_selected_recall, tiles_selected_precision, tiles_selected_accuracy, tiles_selected_f1 # mask_stats = pixels_best_thr, pixels_selected_recall, pixels_selected_precision, pixels_selected_accuracy, pixels_selected_f1, pixels_auc # statistics = mask_stats, tiles_stats for stats in statistics_over_models: mask_stats, tiles_stats = stats thresholds.append(mask_stats[0]) tiles_recalls.append(tiles_stats[1]) mask_recalls.append(mask_stats[1]) tiles_precisions.append(tiles_stats[2]) mask_precisions.append(mask_stats[2]) tiles_accuracies.append(tiles_stats[3]) mask_accuracies.append(mask_stats[3]) tiles_f1s.append(tiles_stats[4]) mask_f1s.append(mask_stats[4]) mask_AUCs.append(mask_stats[5]) # REPORT report_text = "" report_text += "Tiles evaluation:\n" report_text += "mean tiles_recalls = " + str( 100.0 * np.mean(tiles_recalls)) + " +- " + str( 100.0 * np.std(tiles_recalls)) + " std \n" report_text += "mean tiles_precisions = " + str( 100.0 * np.mean(tiles_precisions)) + " +- " + str( 100.0 * np.std(tiles_precisions)) + " std \n" report_text += "mean tiles_accuracies = " + str( 100.0 * np.mean(tiles_accuracies)) + " +- " + str( 100.0 * np.std(tiles_accuracies)) + " std \n" report_text += "mean tiles_f1s = " + str( 100.0 * np.mean(tiles_f1s)) + " +- " + str( 100.0 * np.std(tiles_f1s)) + " std \n" report_text += "\n" report_text += "Mask evaluation:\n" report_text += "mean mask_recalls = " + str( 100.0 * np.mean(mask_recalls)) + " +- " + str( 100.0 * np.std(mask_recalls)) + " std \n" report_text += "mean mask_precisions = " + str( 100.0 * np.mean(mask_precisions)) + " +- " + str( 100.0 * np.std(mask_precisions)) + " std \n" report_text += "mean mask_accuracies = " + str( 100.0 * np.mean(mask_accuracies)) + " +- " + str( 100.0 * np.std(mask_accuracies)) + " std \n" report_text += "mean mask_f1s = " + str( 100.0 * np.mean(mask_f1s)) + " +- " + str( 100.0 * np.std(mask_f1s)) + " std \n" report_text += "mean mask_AUCs = " + str( 100.0 * np.mean(mask_AUCs)) + " +- " + str( 100.0 * np.std(mask_AUCs)) + " std \n" file = open("evaluation_plots/report_boxplotStats_" + add_text + ".txt", "w") file.write(report_text) file.close() xs = ["recall", "precision", "accuracy", "f1"] data = [tiles_recalls, tiles_precisions, tiles_accuracies, tiles_f1s] import matplotlib.pyplot as plt import seaborn as sns sns.set(style="whitegrid") """ ax = sns.boxplot(x=xs, y=data) ax.set_title('Stats per tiles') ax.set_ylim(0.0,1.0) plt.show() """ fig1, ax1 = plt.subplots() ax1.set_title('KFoldCrossval statistics (per tiles) - ' + settings.model_backend) ax1.boxplot(data, labels=xs) ax1.set_ylim(0.0, 1.0) #plt.show() plt.savefig("evaluation_plots/boxplot_tiles_stats_" + add_text + ".png") plt.savefig("evaluation_plots/boxplot_tiles_stats_" + add_text + ".pdf") fig2, ax2 = plt.subplots() ax2.set_title('KFoldCrossval statistics (per masks) - ' + settings.model_backend) xs_pixels = ["recall", "precision", "accuracy", "f1", "AUC"] data = [ mask_recalls, mask_precisions, mask_accuracies, mask_f1s, mask_AUCs ] ax2.boxplot(data, labels=xs_pixels) ax2.set_ylim(0.0, 1.0) #plt.show() plt.savefig("evaluation_plots/boxplot_masks_stats_" + add_text + ".png") plt.savefig("evaluation_plots/boxplot_masks_stats_" + add_text + ".pdf") print( "Just as an additional info, these were the chosen thresholds across models:", thresholds)
def session(): session = ModelHandler.make_session() # set up of session yield session # return session session.rollback() # teardown session. Rollback any changes to db session.close() # close session
def main(args): print(args) settings = Settings.Settings(args) # We already did these # ResNet50 and indices: 5, 2, 7, 3 (doing ? r.n.) settings.TestDataset_Fold_Index = int(args.FOLD_I) # can be 0 to 9 (K-1) settings.TestDataset_K_Folds = int(args.KFOLDS) assert settings.TestDataset_Fold_Index < settings.TestDataset_K_Folds kfold_txt = "KFold_" + str(settings.TestDataset_Fold_Index) + "z" + str( settings.TestDataset_K_Folds) print(kfold_txt) # resnet 101 approx 5-6 hours (per fold - might be a bit less ...) # resnet 50 approx 3-4 hours model_txt = "cleanManual_" + args.train_epochs + "ep_ImagenetWgenetW_" + args.model_backend + "-" + args.train_batch + "batch_Augmentation1to1_ClassWeights1to3_TestVal" print(model_txt) dataset = Dataset.Dataset(settings) evaluator = Evaluator.Evaluator(settings) #settings.run_name = settings.run_name + "AYRAN" show = False save = True #dataset.dataset settings.model_backend = args.model_backend settings.train_epochs = int(args.train_epochs) settings.train_batch = int(args.train_batch) model = ModelHandler.ModelHandler(settings, dataset) model.model.train(show=show, save=save) # Model 2 ... # TODO Note: # - change settings.run_name to have saved plots # write down: # - model bottom (resnet34 ?) # - initial weights (imagenet ?) # - used augmentation ? # - epoch number # - class weights changed ? # - ... any other special cool thing ... # K-Fold_Crossval: #model.model.save("/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/weightsModel2_"+model_txt+"_["+kfold_txt+"].h5") model.model.save( "/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/weightsModel2_" + model_txt + "_[" + kfold_txt + "].h5") # Next = train Resnet50 on the same dataset without the whole STRIP2 (to have some large Test images) #model.model.load("/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/weightsModel2_cleanManual_100ep_ImagenetWgenetW_seresnext50-8batch_Augmentation1to1_ClassWeights1to3.h5") #model.model.load("/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/weightsModel2_cleanManual-noStrip2_100ep_ImagenetWgenetW_resnet50-16batch_Augmentation1to1_ClassWeights1to3.h5") #model.model.load("/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/weightsModel2_cleanManual_100ep_ImagenetWgenetW_resnet101-8batch_Augmentation1to1_ClassWeights1to3.h5") # Senet154 crashed, 10hrs train + Imagenet weights + Data Aug 1:1 + Class weight 1:3 #model.model.load("/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/weightsModel2_cleanManual_XYZep_ImagenetW_senet154-4batch_Augmentation1to1_ClassWeights1to3_early_stop_save_26mar-7am(cca10hrs).h5") # Seresnet34 + Imagenet weights + Data Aug 1:1 + Class weight 1:3 #model.model.load("/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/weightsModel2_cleanManual_100ep_ImagenetWgenetW_seresnet34_Augmentation1to1_ClassWeights1to3.h5") # Resnet50 (batch 16) + Imagenet weights + Data Aug 1:1 + Class weight 1:3 #model.model.load("/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/weightsModel2_cleanManual_100ep_ImagenetWgenetW_resnet50-16batch_Augmentation1to1_ClassWeights1to3.h5") # Resnet34 + Imagenet weights + Data Aug 1:1 + Class weight 1:3 #model.model.load("/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/weightsModel2_cleanManual_100ep_ImagenetW_Resnet34_Augmentation1to1_ClassWeights1to3.h5") # Resnet34 + Imagenet weights + No Data Aug + Class weight 1:3 #model.model.load("/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/weightsModel2_cleanManual_100ep_ImagenetBase.h5") #model.model.load("/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/weightsModel2_cleanManual_54ep_ImagenetBase_best_so_far_for_eastly_stops.h5") # early stop at 54 ep # Resnet34 + Custom DSM weights + No Data Aug + Class weight 1:3 #model.model.load("/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/weightsModel2_cleanManual_100ep_CustomDSMBase.h5") #model.model.load("/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/weightsModel2_cleanManual_49ep_CustomDSMBase_best_so_far_for_eastly_stops.h5") #model.model.load("/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/weightsModel2_cleanManual_25ep_ImagenetFrozenEnc.h5") # 26,428,523 > 5,139,429 trainable params - faster? #model.model.load("/scratch/ruzicka/python_projects_large/ChangeDetectionProject_files/weightsModel2_cleanManual_.h5") # ... SAVE_ALL_FOLDER = model_txt + "PLOTS/" SAVE_ALL_PLOTS = SAVE_ALL_FOLDER + "plot" # DEBUG_SAVE_ALL_THR_PLOTS = None if not os.path.exists(SAVE_ALL_FOLDER): os.makedirs(SAVE_ALL_FOLDER) evaluator.unified_test_report([model.model.model], dataset.test, validation_set=dataset.val, postprocessor=model.model.dataPreprocesser, name=SAVE_ALL_PLOTS, optionally_save_missclassified=True)
import DataBaseHandler import ModelHandler import time from sklearn.linear_model import LinearRegression model = LinearRegression() sys_samples = DataBaseHandler.get_samples(type="systolicBloodPressure") x_train, y_train = ModelHandler.get_samples_to_nparray(sys_samples) ModelHandler.train_model(model, x_train, y_train) while True: new_systolic_samples = ModelHandler.get_new_systolic_samples_from_API() if new_systolic_samples is not None: x_tune, y_tune = ModelHandler.get_samples_to_nparray(new_systolic_samples) ModelHandler.tune_model(model, x_tune, y_tune) ModelHandler.save_as_onnx(model, "MyModel.onnx") ModelHandler.save_model_as_pickle(model, "MyModel.pkl") time.sleep(60*60) # One hour.
def main(args): import keras.backend as K print(args) settings = Settings.Settings(args) settings.TestDataset_Fold_Index = 0 settings.TestDataset_K_Folds = 5 settings.model_backend = args.model_backend settings.train_batch = args.train_batch settings.train_epochs = args.train_epochs dataset = Dataset.Dataset(settings) evaluator = Evaluator.Evaluator(settings) show = False save = True model_h = ModelHandler.ModelHandler(settings, dataset) model_h.model.load(args.one_model_path) model = model_h.model.model """ # One could also reload all the weights manually ... # care about a model inside a model! weights_list = [] for i, layer in enumerate(model.layers[3:]): weights_list.append(layer.get_weights()) for i, layer in enumerate(model.layers[3:]): weights = weights_list[i] name = layer.name print(name, len(weights), len(layer.weights)) # restore by: if "_bn" in name: layer.set_weights(weights) # Batch normalization weights are: [gamma, beta, mean, std] """ # data prep: test_set_processed = dataset.dataPreprocesser.apply_on_a_set_nondestructively(dataset.test) train_set_processed = dataset.dataPreprocesser.apply_on_a_set_nondestructively(dataset.train) test_L, test_R, test_V = test_set_processed train_L, train_R, train_V = train_set_processed if test_L.shape[3] > 3: # 3 channels only - rgb test_L = test_L[:, :, :, 1:4] test_R = test_R[:, :, :, 1:4] train_L = train_L[:, :, :, 1:4] train_R = train_R[:, :, :, 1:4] train_V = train_V.reshape(train_V.shape + (1,)) from keras.utils import to_categorical train_V = to_categorical(train_V) import random import matplotlib.pyplot as plt import numpy as np T = 5 batch_size = 16 # as it was when training train_data_indices = list(range(0,len(train_L))) f = K.function([model.layers[0].input, model.layers[1].input, K.learning_phase()], [model.layers[-1].output]) print("f", f) # For each sample? samples_N = 32 predictions_for_sample = np.zeros((T,samples_N) + (256,256,)) # < T, SamplesN, 256x256 > sample = [test_L[0:samples_N], test_R[0:samples_N]] # (16, 2,256,256,3) sample = np.asarray(sample) for MC_iteration in range(T): selected_indices = random.sample(train_data_indices, batch_size*4) print("train_L[selected_indices] :: ", train_L[selected_indices].shape) # 16, 256,256,3 print("sample :: ", sample.shape) # 16, 2,256,256,3 ? train_sample = [train_L[selected_indices], train_R[selected_indices]] train_sample = np.asarray(train_sample) train_sample_labels = np.asarray(train_V[selected_indices]) print("MonteCarloBatchNormalization") print("T", T) print("batch_size", batch_size) print("sample.shape", sample.shape) print("train_sample.shape", train_sample.shape) """ # complete revert? Arguably not necessary model_h = ModelHandler.ModelHandler(settings, dataset) # < this will be slow model_h.model.load(args.one_model_path) model = model_h.model.model #model.load_weights(args.one_model_path) # revert at each MC_iteration start """ # freeze everything besides BN layers for i, layer in enumerate(model.layers[2].layers): name = layer.name if "bn" not in name: # freeeze layer which is not BN: layer.trainable = False #print(name, layer.trainable) for i, layer in enumerate(model.layers): name = layer.name if "bn" not in name: # freeeze layer which is not BN: layer.trainable = False # else layer.stateful = True ? #print(name, layer.trainable) """ Without it shouts a warning, but seems alright # Re-Compile! (after changing the trainable param.) from keras.optimizers import Adam from loss_weighted_crossentropy import weighted_categorical_crossentropy loss = "categorical_crossentropy" weights = [1, 3] loss = weighted_categorical_crossentropy(weights) metric = "categorical_accuracy" model.compile(optimizer=Adam(lr=0.00001), loss=loss, metrics=[metric, 'mse']) # """ model.fit(x=[train_sample[0], train_sample[1]], y=train_sample_labels, batch_size=16, epochs=25, verbose=2) """ # revert weights? (another way instead of loading from the .h5 file) weights_list = [] for i, layer in enumerate(model.layers[3:]): weights_list.append(layer.get_weights()) model.load_weights(args.one_model_path) # revert for i, layer in enumerate(model.layers[3:]): weights = weights_list[i] name = layer.name print(name, len(weights), len(layer.weights)) if "_bn" in name: layer.set_weights(weights) # Batch normalization weights are: [gamma, beta, mean, std] """ # model.predict would be nice to be able to batch easily # .... however ... predictions = model.predict(x=[sample[0], sample[1]], batch_size=16, verbose=2) # q: can i replace the f(...) with this? # it's not behaving ## don't want to make a new function every time though... #X#f = K.function([model.layers[0].input, model.layers[1].input, K.learning_phase()], #X# [model.layers[-1].output]) predictions = \ f((np.asarray(sample[0], dtype=np.float32), np.asarray(sample[1], dtype=np.float32), 1))[0] # here BNs use exponentially weighted (/running) avg of the params for each layer from values it has seen during training # (sort of like the latest average value) # Ps: second prediction here is the same print("predictions.shape", predictions.shape) # 16, 256,256,2 sample_predicted = predictions[:, :, :, 1] print("sample_predicted.shape", sample_predicted.shape) # 256,256 predictions_for_sample[MC_iteration, :, :, :] = sample_predicted #print("are they equal? 0-1", np.array_equal(predictions_for_sample[0], predictions_for_sample[1])) #print("are they equal? 1-2", np.array_equal(predictions_for_sample[1], predictions_for_sample[2])) #print("are they equal? 2-3", np.array_equal(predictions_for_sample[2], predictions_for_sample[3])) predictions_for_sample = np.asarray(predictions_for_sample) # [5, 100, 256, 256] print("predictions_for_sample ::", predictions_for_sample.shape) predictions_for_sample_By_Images = np.swapaxes(predictions_for_sample, 0, 1) # [100, 5, 256, 256] print("predictions_for_sample_By_Images ::", predictions_for_sample_By_Images.shape) resolution = len(predictions_for_sample[0][0]) # 256 predictions_N = len(predictions_for_sample[0]) print("predictions_N:", predictions_N) for prediction_i in range(predictions_N): predictions = predictions_for_sample_By_Images[prediction_i] # 5 x 256x256 variance_image = np.var(predictions, axis=0) sum_var = np.sum(variance_image.flatten()) do_viz = True if do_viz: fig = plt.figure(figsize=(10, 8)) for i in range(T): img = predictions[i] ax = fig.add_subplot(1, T + 1, i + 1) plt.imshow(img, cmap='gray') ax.title.set_text('Model ' + str(i)) ax = fig.add_subplot(1, T + 1, T + 1) plt.imshow(variance_image, cmap='gray') ax.title.set_text('Variance Viz (' + str(sum_var) + ')') plt.show() # MCBN (sample, T, train_data, batch_size) # predictions_for_sample = [] # for i in T: # batch of train data <- random from train_data of size batch_size # update_layer_statistics (= eval with training mode on) # prediction = model.predict(sample) # predictions.append(prediction) # return predictions nkhnkkjnjkghhhhhh