def get_svm_accuracy(path_mat, test_data, test_labels, num_samples=10000, lock=None, signal_no_signal=False, random_seed=42, **kwargs): start = time.time() if lock is not None: lock.acquire() meanData, meanDataLabels, dataMetric = get_h5mean_data(path_mat, **kwargs) if lock is not None: lock.release() train_data, train_labels = poisson_noise_loader( meanData, size=num_samples, numpyData=True, seed=random_seed * 42, signal_no_signal=signal_no_signal) train_data = train_data.reshape(train_data.shape[0], -1) test_data = test_data.reshape(test_data.shape[0], -1) svc = svm.SVC(kernel='linear', max_iter=1000, random_state=random_seed) num_data = len(train_data) num_train = int(num_data) x_train, y_train = train_data, train_labels x_test, y_test = test_data, test_labels svc.fit(x_train, y_train) preds = svc.predict(x_test) acc = np.mean(preds == y_test) # save predictions, labels id_name = os.path.basename(path_mat).split('.')[0] out_path = os.path.dirname(path_mat) pickle.dump( np.stack([preds, y_test], axis=1), open(os.path.join(out_path, f"{id_name}_svm_pred_labels.p"), 'wb')) # score dprime dp_preds = (preds > 0).astype(np.int) dp_y_test = (y_test > 0).astype(np.int) dprime = calculate_dprime(np.stack([dp_preds, dp_y_test], axis=1)) print( f'Accuracy is {acc}, Dprime is {dprime} train samples is {num_train}, took {str(datetime.timedelta(seconds=time.time()-start))}.' ) return acc, dprime, float(dataMetric[1])
from deepLearning.src.data.mat_data import get_h5mean_data from matplotlib import pyplot as plt import scipy.misc import os from imageio import imsave from glob import glob def show(arr): plt.imshow(arr, cmap='gray') plt.show() folderp = r'C:\Users\Fabian\Documents\data\rsync\redo_experiments\redo_automaton\matlab_contrasts' sub_folders = [f.path for f in os.scandir(folderp) if f.is_dir()] for sub in sub_folders: fp = glob(f"{sub}\\*0_019952623150*.h5") fp = fp[0] data = get_h5mean_data(fp) signal = data[0][1] signal = scipy.misc.imresize(signal, 4.) out_path = os.path.dirname(fp) imsave(os.path.join(out_path, 'mean_signal.png'), signal) # scipy.misc.imsave() print("nice")
all_h5 = glob(f'{shift_path}\\**\\**.h5', recursive=True) unique_h5 = [] used_h5 = [] for h5 in all_h5: f = os.path.basename(h5) if not f in used_h5: used_h5.append(f) unique_h5.append(h5) print('nice"') for h5 in all_h5: shuffled_pixels = get_block_size(h5) try: data = get_h5mean_data(h5, shuffled_pixels=shuffled_pixels) except: continue signal = data[0][1] # signal = scipy.misc.imresize(signal, 4.) out_path = get_outpath(h5) fname = os.path.basename(h5)[:-3] f_folder = os.path.join(out_path, fname) os.makedirs(f_folder, exist_ok=True) imsave(os.path.join(f_folder, f'{fname}.png'), signal) imsave(os.path.join(f_folder, f'{fname}_nosignal.png'), data[0][0]) signal_p = np.random.poisson(signal) imsave(os.path.join(f_folder, f'{fname}_poisson.png'), signal_p) signal_p = np.random.poisson(signal) + signal imsave(os.path.join(f_folder, f'{fname}_mixed1.png'), signal_p) create_txt(f_folder, fname, signal)
from glob import glob from deepLearning.src.data.mat_data import get_h5mean_data import os pathMatDir = '/share/wandell/data/reith/matlabData/shift_contrast100/' matFiles = glob(f'{pathMatDir}*.h5') matFiles.sort() for f in matFiles: meanData, meanDataLabels, dataContrast, dataShift = get_h5mean_data( f, includeContrast=True, includeShift=True) with open(os.path.join(pathMatDir, "shiftVals.txt"), 'a') as txt: txt.write(str(dataShift[1]) + '\n')
from deepLearning.src.models.GrayResNet_skip_connections import GrayResnet18, GrayResnet101 from deepLearning.src.data.mat_data import get_h5mean_data, poisson_noise_loader import torch weights_path = '/share/wandell/data/reith/experiment_freq_1_log_contrasts30_resnet18/resNet_weights_5_samplesPerClass_freq_1_contrast_oo_0_181393069391.torch' h5_path = '/share/wandell/data/reith/experiment_freq_1_log_contrasts30_resnet18/5_samplesPerClass_freq_1_contrast_oo_0_181393069391.h5' Net = GrayResnet18(2) Net.load_state_dict(torch.load(weights_path)) Net.cuda() Net.eval() meanData, meanDataLabels, dataContrast = get_h5mean_data(h5_path, includeContrast=True) testDataFull, testLabelsFull = poisson_noise_loader(torch.tensor(meanData), size=64, numpyData=False) dim_in = testDataFull.shape[-1] testDataFull = testDataFull.view(-1, 1, dim_in, dim_in).cuda().float() testDataFull -= testDataFull.mean() testDataFull /= testDataFull.std() out = Net(testDataFull) print("Resnet101:") weights_path = '/share/wandell/data/reith/experiment_freq_1_log_contrasts30_resnet101/resNet_weights_5_samplesPerClass_freq_1_contrast_oo_0_181393069391.torch' h5_path = '/share/wandell/data/reith/experiment_freq_1_log_contrasts30_resnet101/5_samplesPerClass_freq_1_contrast_oo_0_181393069391.h5' Net = GrayResnet101(2) Net.load_state_dict(torch.load(weights_path)) Net.cuda() Net.eval() out = Net(testDataFull) # for n, p in Net.named_parameters(): # print(n) print("nice!")
import pickle import numpy as np from PIL import Image import pickle from scipy.stats import lognorm import torchvision.models as models from deepLearning.src.data.mat_data import get_mat_data, get_h5data, get_h5mean_data, poisson_noise_loader # relevant variables test_interval = 2 batchSize = 128 numSamplesEpoch = 10000 pathMat = "/black/localhome/reith/Desktop/projects/WLDiscriminationNetwork/deepLearning/data/experiment_shift_contrasts/5_samplesPerClass_freq_1_contrast_0_10_shift_1_00_pi_per_300000.h5" meanData, meanDataLabels = get_h5mean_data(pathMat) # data = torch.from_numpy(data).type(torch.float32) # pickle.dump([data, labels, dataNoNoise], open('mat1PercentNoNoiseData.p', 'wb')) # data, labels, dataNoNoise = pickle.load(open("mat1PercentData.p", 'rb')) # Image.fromarray(data[4]*(255/20)).show() testData, testLabels = poisson_noise_loader(meanData, size=1000, numpyData=True) # you gotta normalize stuff, bro # Variance is taken from testData, as it's a good enough representation. Mean is taken from mean meanData mean = meanData.mean() var = testData.std() accOptimal = get_optimal_observer_acc(testData, testLabels, meanData) print(f"Optimal observer accuracy on all data is {accOptimal*100:.2f}%")
def autoTrain_Resnet_optimalObserver(pathMat, device=None, lock=None, train_nn=True, include_shift=False, deeper_pls=False, oo=True, svm=True, NetClass=None, NetClass_param=None, include_angle=False, training_csv=True, num_epochs=30, initial_lr=0.001, lr_deviation=0.1, lr_epoch_reps=3, them_cones=False, separate_rgb=False, meanData_rounding=None, shuffled_pixels=0, shuffle_scope=-1, test_eval=True, random_seed_nn=True, train_set_size=-1, test_size=5000, shuffle_portion=-1, ca_rule=-1, force_balance=False, same_test_data_shuff_pixels=True, class_balance='class_based', random_seed=42): # relevant variables # class_balance can be 'signal_based' (all signal cases summed up are equal to all non signal cases) or # 'class_based' (all signal classes + non signal have equal sample size for train and test set). if class_balance == 'class_based': signal_no_signal = False else: signal_no_signal = True shuffled_pixels_backup = 0 startTime = time.time() print(device, pathMat) if device is not None: os.environ["CUDA_VISIBLE_DEVICES"] = str(device) test_interval = 1 batchSize = 32 numSamplesEpoch = 10000 outPath = os.path.dirname(pathMat) fileName = os.path.basename(pathMat).split('.')[0] sys.stdout = Logger(f"{os.path.join(outPath, fileName)}_log.txt") # We want to add the same seeded poisson noise. We implement this by first getting the same meanData template # and add the seeded poisson noise. We then shuffle all test Data with the same mask. if same_test_data_shuff_pixels and (shuffled_pixels != 0): shuffled_pixels_backup = shuffled_pixels shuffled_pixels = False if include_shift: meanData, meanDataLabels, dataContrast, dataShift = get_h5mean_data( pathMat, includeContrast=True, includeShift=True, them_cones=them_cones, separate_rgb=separate_rgb, meanData_rounding=meanData_rounding, shuffled_pixels=shuffled_pixels, shuffle_scope=shuffle_scope, shuffle_portion=shuffle_portion, ca_rule=ca_rule) elif include_angle: meanData, meanDataLabels, dataContrast, dataAngle = get_h5mean_data( pathMat, includeContrast=True, includeAngle=True, them_cones=them_cones, separate_rgb=separate_rgb, meanData_rounding=meanData_rounding, shuffled_pixels=shuffled_pixels, shuffle_scope=shuffle_scope, shuffle_portion=shuffle_portion, ca_rule=ca_rule) else: meanData, meanDataLabels, dataContrast = get_h5mean_data( pathMat, includeContrast=True, them_cones=them_cones, separate_rgb=separate_rgb, meanData_rounding=meanData_rounding, shuffled_pixels=shuffled_pixels, shuffle_scope=shuffle_scope, shuffle_portion=shuffle_portion, ca_rule=ca_rule) # data = torch.from_numpy(data).type(torch.float32) # pickle.dump([data, labels, dataNoNoise], open('mat1PercentNoNoiseData.p', 'wb')) # data, labels, dataNoNoise = pickle.load(open("mat1PercentData.p", 'rb')) # Image.fromarray(data[4]*(255/20)).show() if training_csv: header = ['accuracy', 'dprime', 'epoch', 'contrast'] default_vals = {} default_vals['contrast'] = max(dataContrast) if include_shift: header.append('shift') default_vals['shift'] = dataShift[1] if include_angle: header.append('angle') default_vals['angle'] = dataAngle[1] TrainWrt = CsvWriter(os.path.join(outPath, 'train_results.csv'), header=header, default_vals=default_vals, lock=lock) TestWrt = CsvWriter(os.path.join(outPath, 'test_results.csv'), header=header, default_vals=default_vals, lock=lock) train_test_log = [TrainWrt, TestWrt] else: train_test_log = None if same_test_data_shuff_pixels and shuffled_pixels_backup != 0: testDataFull, testLabelsFull = poisson_noise_loader( meanData, size=test_size, numpyData=True, seed=random_seed, force_balance=force_balance, signal_no_signal=signal_no_signal) if shuffled_pixels_backup > 0: testDataFull = shuffle_pixels_func(testDataFull, shuffled_pixels_backup, shuffle_scope, shuffle_portion) meanData = shuffle_pixels_func(meanData, shuffled_pixels_backup, shuffle_scope, shuffle_portion) shuffled_pixels = shuffled_pixels_backup else: testDataFull = shuffle_1d(testDataFull, dimension=shuffled_pixels_backup) meanData = shuffle_1d(meanData, dimension=shuffled_pixels_backup) shuffled_pixels = shuffled_pixels_backup # also shuffle mean data. As the shuffle mask is seeded, we simply call the shuffle function again.. else: testDataFull, testLabelsFull = poisson_noise_loader( meanData, size=test_size, numpyData=True, seed=random_seed, force_balance=force_balance, signal_no_signal=signal_no_signal) # normalization values mean_norm = meanData.mean() std_norm = testDataFull.std() min_norm = testDataFull.min() max_norm = testDataFull.max() id_name = os.path.basename(pathMat).split('.')[0] accOptimal, optimalOPredictionLabel = get_optimal_observer_acc_parallel( testDataFull, testLabelsFull, meanData, returnPredictionLabel=True) pickle.dump( optimalOPredictionLabel, open(os.path.join(outPath, f"{id_name}_oo_pred_label.p"), 'wb')) pickle.dump( dataContrast, open(os.path.join(outPath, f"{id_name}_contrast_labels.p"), 'wb')) if oo: if len(meanData) > 2: # set all signal cases to 1 optimalOPredictionLabel[optimalOPredictionLabel > 0] = 1 accOptimal = np.mean( optimalOPredictionLabel[:, 0] == optimalOPredictionLabel[:, 1]) d1 = -1 print(f"Theoretical d index is {d1}") d2 = calculate_dprime(optimalOPredictionLabel) print(f"Optimal observer d index is {d2}, acc is {accOptimal}.") else: d1 = calculate_discriminability_index(meanData) print(f"Theoretical d index is {d1}") d2 = calculate_dprime(optimalOPredictionLabel) print(f"Optimal observer d index is {d2}") print( f"Optimal observer accuracy on all data is {accOptimal*100:.2f}%") else: d1 = -1 d2 = -1 accOptimal = -1 testData = testDataFull[:500] testLabels = testLabelsFull[:500] dimIn = testData[0].shape[1] dimOut = len(meanData) if svm: include_contrast_svm = not (include_shift or include_angle) if include_contrast_svm: metric_svm = 'contrast' elif include_angle: metric_svm = 'angle' elif include_shift: metric_svm = 'shift' # do_debug = False # if do_debug: # kwords = {'them_cones': them_cones, 'includeContrast': include_contrast_svm, 'separate_rgb': separate_rgb, 'metric': metric_svm, # 'meanData_rounding': meanData_rounding, 'shuffled_pixels': shuffled_pixels, 'includeAngle': include_angle, # 'includeShift': include_shift} # score_svm(pathMat, lock, **kwords) svm_process = mp.Process( target=score_svm, args=[pathMat, lock, testDataFull, testLabelsFull], kwargs={ 'them_cones': them_cones, 'includeContrast': include_contrast_svm, 'separate_rgb': separate_rgb, 'metric': metric_svm, 'meanData_rounding': meanData_rounding, 'shuffled_pixels': shuffled_pixels, 'includeAngle': include_angle, 'includeShift': include_shift, 'signal_no_signal': signal_no_signal, 'random_seed': random_seed }) svm_process.start() if train_nn: if random_seed_nn: torch.random.manual_seed(random_seed) if NetClass is None: if deeper_pls: Net = GrayResnet101(dimOut) else: Net = GrayResnet18(dimOut) else: if NetClass_param is None: Net = NetClass(dimOut, min_norm, max_norm, mean_norm, std_norm) else: Net = NetClass(dimOut, min_norm, max_norm, mean_norm, std_norm, freeze_until=NetClass_param) Net.cuda() print(Net) # Net.load_state_dict(torch.load('trained_RobustNet_denoised.torch')) criterion = nn.NLLLoss() bestTestAcc = 0 # Test the network # testAcc = test(batchSize, testData, testLabels, Net, dimIn) # Train the network lr_deviation = lr_deviation num_epochs = num_epochs learning_rate = initial_lr testLabels = torch.from_numpy(testLabels.astype(np.long)) testData = torch.from_numpy(testData).type(torch.float32) testData -= mean_norm testData /= std_norm PoissonDataObject = PoissonNoiseLoaderClass( meanData, batchSize, train_set_size=train_set_size, data_seed=random_seed, use_data_seed=True, signal_no_signal=signal_no_signal) for i in range(lr_epoch_reps): print( f"Trainig for {num_epochs/lr_epoch_reps} epochs with a learning rate of {learning_rate}.." ) optimizer = optim.Adam(Net.parameters(), lr=learning_rate) # import pdb; pdb.set_trace() Net, testAcc = train_poisson( round(num_epochs / lr_epoch_reps), numSamplesEpoch, batchSize, meanData, testData, testLabels, Net, test_interval, optimizer, criterion, dimIn, mean_norm, std_norm, train_test_log, test_eval, PoissonDataObject) print(f"Test accuracy is {testAcc*100:.2f} percent") learning_rate = learning_rate * lr_deviation # bestTestAcc = max(bestTestAcc, bestTestAccStep) # torch.save(Net.state_dict(), os.path.join(outPath, f"resNet_weights_{fileName}.torch")) # print("saved resNet weights to", f"resNet_weights_{fileName}.torch") testLabelsFull = torch.from_numpy(testLabelsFull.astype(np.long)) testDataFull = torch.from_numpy(testDataFull).type(torch.float32) testDataFull -= mean_norm testDataFull /= std_norm testAcc, nnPredictionLabels = test(batchSize, testDataFull, testLabelsFull, Net, dimIn, includePredictionLabels=True, test_eval=test_eval) if len(meanData) == 2 or optimalOPredictionLabel.max() <= 1: nnPredictionLabels_dprime = np.copy(nnPredictionLabels) nnPredictionLabels_dprime[nnPredictionLabels_dprime > 0] = 1 nn_dprime = calculate_dprime(nnPredictionLabels_dprime) else: nn_dprime = -1 pickle.dump( nnPredictionLabels, open(os.path.join(outPath, f"{id_name}_nn_pred_labels.p"), 'wb')) else: testAcc = 0.5 nn_dprime = -1 print(f"ResNet accuracy is {testAcc*100:.2f}%") print(f"ResNet dprime is {nn_dprime}") print(f"Optimal observer accuracy is {accOptimal*100:.2f}%") print(f"Optimal observer d index is {d2}") print(f"Theoretical d index is {d1}") if train_nn or oo: if lock is not None: lock.acquire() resultCSV = os.path.join(outPath, "results.csv") file_exists = os.path.isfile(resultCSV) with open(resultCSV, 'a') as csvfile: if not include_shift and not include_angle: headers = [ 'ResNet_accuracy', 'optimal_observer_accuracy', 'theoretical_d_index', 'optimal_observer_d_index', 'contrast', 'nn_dprime' ] writer = csv.DictWriter(csvfile, delimiter=';', lineterminator='\n', fieldnames=headers) if not file_exists: writer.writeheader( ) # file doesn't exist yet, write a header writer.writerow({ 'ResNet_accuracy': testAcc, 'optimal_observer_accuracy': accOptimal, 'theoretical_d_index': d1, 'optimal_observer_d_index': d2, 'contrast': max(dataContrast).astype(np.float64), 'nn_dprime': nn_dprime }) elif include_shift: headers = [ 'ResNet_accuracy', 'optimal_observer_accuracy', 'theoretical_d_index', 'optimal_observer_d_index', 'contrast', 'shift', 'nn_dprime' ] writer = csv.DictWriter(csvfile, delimiter=';', lineterminator='\n', fieldnames=headers) if not file_exists: writer.writeheader( ) # file doesn't exist yet, write a header writer.writerow({ 'ResNet_accuracy': testAcc, 'optimal_observer_accuracy': accOptimal, 'theoretical_d_index': d1, 'optimal_observer_d_index': d2, 'contrast': max(dataContrast).astype(np.float32), 'shift': dataShift[1].astype(np.float64), 'nn_dprime': nn_dprime }) elif include_angle: headers = [ 'ResNet_accuracy', 'optimal_observer_accuracy', 'theoretical_d_index', 'optimal_observer_d_index', 'contrast', 'angle', 'nn_dprime' ] writer = csv.DictWriter(csvfile, delimiter=';', lineterminator='\n', fieldnames=headers) if not file_exists: writer.writeheader( ) # file doesn't exist yet, write a header writer.writerow({ 'ResNet_accuracy': testAcc, 'optimal_observer_accuracy': accOptimal, 'theoretical_d_index': d1, 'optimal_observer_d_index': d2, 'contrast': max(dataContrast).astype(np.float32), 'angle': dataAngle[1].astype(np.float64), 'nn_dprime': nn_dprime }) print(f'Wrote results to {resultCSV}') if lock is not None: lock.release() endTime = time.time() print( f"done! It took {str(datetime.timedelta(seconds=endTime-startTime))} hours:min:seconds" ) sys.stdout = sys.stdout.revert()
# score dprime dp_preds = (preds > 0).astype(np.int) dp_y_test = (y_test > 0).astype(np.int) dprime = calculate_dprime(np.stack([dp_preds, dp_y_test], axis=1)) print( f'Accuracy is {acc}, Dprime is {dprime} train samples is {num_train}, took {str(datetime.timedelta(seconds=time.time()-start))}.' ) return acc, dprime, float(dataMetric[1]) if __name__ == '__main__': print("starting out..") windows_db = True if windows_db: path_mat = r'C:\Users\Fabian\Documents\data\windows2rsync\windows_data\multiple_locations_hc\harmonic_frequency_of_1_loc_1_signalGridSize_4\1_samplesPerClass_freq_1_contrast_0_798104925988_loc_1_signalGrid_4.h5' else: path_mat = '/share/wandell/data/reith/2_class_MTF_freq_experiment/frequency_1/5_samplesPerClass_freq_1_contrast_oo_0_000414616956.h5' meanData, meanDataLabels, dataMetric = get_h5mean_data( path_mat, includeContrast=True) sample_numbers = np.logspace(np.log10(500), np.log10(50000), num=15).astype(np.int) test_data, test_labels = poisson_noise_loader(meanData, size=100, numpyData=True) # for num in sample_numbers: get_svm_accuracy(path_mat, test_data, test_labels, num_samples=200, includeContrast=True)