def __getitem__(self, idx): # TODO: allow slices instead of forcing just 1 idx at a time # TODO: suppress warning following line outputs truth = load_img(self.im_paths[idx], to_grayscale=True) if (self.transform): truth = self.transform(truth) noisy = add_noise(truth) return {'data': noisy, 'truth': truth}
def trainOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion, weight_dict, learningRate): dataChunkLoss = [] dataChunkAcc = [] dataChunkRegLoss = [] for i in range(len(inputCoor)): xTrain_1, graphTrain_1, labelTrain_1 = inputCoor[i], inputGraph[i], inputLabel[i] graphTrain_1 = graphTrain_1.tocsr() labelBinarize = label_binarize(labelTrain_1, classes=[j for j in range(para.outputClassN)]) xTrain, graphTrain, labelTrain = shuffle(xTrain_1, graphTrain_1, labelBinarize) # labelBinarize = label_binarize(labelTrain, classes=[j for j in range(40)]) batch_loss = [] batch_acc = [] batch_reg = [] batchSize = para.batchSize for batchID in range(len(labelBinarize) / para.batchSize): start = batchID * batchSize end = start + batchSize batchCoor, batchGraph, batchLabel = get_mini_batch(xTrain, graphTrain, labelTrain, start, end) batchGraph = batchGraph.todense() batchCoor = add_noise(batchCoor, sigma=0.008, clip=0.02) if para.weighting_scheme == 'uniform': batchWeight = uniform_weight(batchLabel) elif para.weighting_scheme == 'weighted': batchWeight = weights_calculation(batchLabel, weight_dict) else: print 'please enter the valid weighting scheme' #print batchWeight feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph, trainOperaion['outputLabel']: batchLabel, trainOperaion['lr']: learningRate, trainOperaion['weights']: batchWeight, trainOperaion['keep_prob_1']: para.keep_prob_1, trainOperaion['keep_prob_2']: para.keep_prob_2} opt, loss_train, acc_train, loss_reg_train = sess.run( [trainOperaion['train'], trainOperaion['loss_total'], trainOperaion['acc'], trainOperaion['loss_reg']], feed_dict=feed_dict) #print('The loss loss_reg and acc for this batch is {},{} and {}'.format(loss_train, loss_reg_train, acc_train)) batch_loss.append(loss_train) batch_acc.append(acc_train) batch_reg.append(loss_reg_train) dataChunkLoss.append(np.mean(batch_loss)) dataChunkAcc.append(np.mean(batch_acc)) dataChunkRegLoss.append(np.mean(batch_reg)) train_average_loss = np.mean(dataChunkLoss) train_average_acc = np.mean(dataChunkAcc) loss_reg_average = np.mean(dataChunkRegLoss) return train_average_loss, train_average_acc, loss_reg_average
def main(): """Main function """ # Load one of these sample image, show different color channels. img = load_image(os.path.join('samples', 'IMG_6566.JPG')) show_custom_channels(img, color_space='rgb', title='Input image') # Zoom in and show a small window to see triplet of color values for a # 64x64 (or so) window zoomed = zoom_in(img, 850, 950, height=500, width=500) show_custom_channels(zoomed, color_space='rgb', title='Zoomed-in window') # Separate H&E color stain channels from the image channel_lst, cmap_lst = show_custom_channels( zoomed, color_space='hed', title='Immunohistochemical staining colors separation') # Select eosin channel for processing sel_chn = np.copy(channel_lst[1]) sel_cmap = cmap_lst[1] # Add noise and do a simple denoising task noised = add_noise(sel_chn) denoised = simple_denoise(noised, kernel_size=3) show_with_cmap([sel_chn, noised, denoised], [sel_cmap] * 3, [ 'Original image', 'With Gaussian noise', 'Denoised with Median filter' ]) # Apply blurring and add noise and do a simple deblurring task, using the # Wiener filter blurred_noised = add_noise(blur(sel_chn, block_size=3), sigma=3) deblurred = simple_deblur(blurred_noised) show_with_cmap([sel_chn, blurred_noised, deblurred], [sel_cmap] * 3, [ 'Original image', 'With blurring and noise', 'Deblurred with Wiener filter' ]) # Detect cell boundary and overlay the results on images detect_cell_boundary(sel_chn, sel_cmap) plt.show() pass
def pretrain(**kwargs): model = kwargs['model'] dataloader = kwargs['dataloader'] epochs = kwargs['epochs'] pth_file = kwargs['pth'] png_file = kwargs['png'] denoising = kwargs['denoising'] optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-3, weight_decay=1e-5) criterion = nn.MSELoss() best_loss = 1e10 model.train() for epoch in range(1, epochs + 1): train_loss = 0.0 for x, _ in dataloader: _, c, h, w = x.shape x = x.view((x.shape[0], -1)) if denoising: noisy_x = add_noise(x) else: noisy_x = x noisy_x = noisy_x.cuda() x = x.cuda() # ===================forward===================== output = model(noisy_x)['rec'] # output = output.squeeze(1) # output = output.view(output.size(0), 28 * 28) loss = criterion(output, x) train_loss += loss.item() # ===================backward==================== optimizer.zero_grad() loss.backward() optimizer.step() # ===================visualize==================== x = x[0].view(c, h, w) noisy_x = noisy_x[0].view(c, h, w) output = output[0].view(c, h, w) final = torch.cat([x, noisy_x, output], dim=1).detach().cpu().numpy() final = np.transpose(final, (2, 1, 0)) final = np.clip(final * 255.0, 0, 255).astype(np.uint8) cv2.imwrite(png_file, final) # ===================log======================== train_loss /= len(dataloader) logger.info('epoch [{}/{}], MSE_loss:{:.4f}'.format( epoch, epochs, train_loss)) if best_loss > train_loss: best_loss = train_loss torch.save(model.state_dict(), pth_file)
def check_capacity(self, noise=1): count = 0 for s in self.states: if noise: temp_s = add_noise(s, noise_frac=0.05) _, new_pattern, _ = self.update_rule(temp_s, self.max_iter, verbose=False) else: _, new_pattern, _ = self.update_rule(s, 1, verbose=False) count = count + np.array_equal(new_pattern, s) return count
def __getitem__(self, index): img_index = random.randint(1, 800) img = Image.open(self.data_root + "/" + str(img_index)+'.png') img_H = img.size[0] img_W = img.size[1] H_start = random.randint(0, img_H - opt.crop_size) W_start = random.randint(0, img_W - opt.crop_size) crop_box = (W_start, H_start, W_start + opt.crop_size, H_start + opt.crop_size) img_crop = img.crop(crop_box) label = self.transform(img_crop) noise = add_noise(label, opt.noise_level) return noise, label
def __getitem__(self, idx): ''' Return: ids: LongTensor(batch_size * max_len) sentence_len: Tensor (batch_size) mask: FloatTensor(batch_size * max_len) noise_ids: LongTensor(batch_size * max_len) noise_sentence_len: Tensor (batch_size) noise_mask: FloatTensor(batch_size * max_len) ''' sentence = self.corpus[idx] ids = self.vocab.sentence2ids(sentence, sos=self.sos, eos=self.eos) sentence_len = len(ids) max_len = self.max_len if self.sos: max_len += 1 if self.eos: max_len += 1 ids = self._pad_ids(ids, max_len) mask = [1 if x < sentence_len else 0 for x in range(max_len)] noise_sentence = add_noise(sentence, self.drop_prob, self.k) noise_ids = self.vocab.sentence2ids(noise_sentence) noise_sentence_len = len(noise_ids) noise_ids = self._pad_ids(noise_ids, self.max_len) noise_mask = [ 1 if x < noise_sentence_len else 0 for x in range(self.max_len) ] ids = torch.from_numpy(np.array(ids)).long() mask = torch.from_numpy(np.array(mask)).float() noise_ids = torch.from_numpy(np.array(noise_ids)).long() noise_mask = torch.from_numpy(np.array(noise_mask)).float() ret = {} ret["ids"] = ids ret["sentence_len"] = sentence_len ret["mask"] = mask ret["noise_ids"] = noise_ids ret["noise_sentence_len"] = noise_sentence_len ret["noise_mask"] = noise_mask return ret
def __preproc__(self, file): file.readline() n_verts, n_faces, n_dontknow = tuple( [int(s) for s in file.readline().strip().split(' ')]) verts = [[float(s) for s in file.readline().strip().split(' ')] for i_vert in range(n_verts)] faces = [[int(s) for s in file.readline().strip().split(' ')][1:] for i_face in range(n_faces)] sampled_points = utils.sample_points(np.array(verts), faces) sampled_points = utils.cent_norm(sampled_points) if not self.valid: theta = random.random() * 360 sampled_points = utils.rotation_z(utils.add_noise(sampled_points), theta) return np.array(sampled_points, dtype="float32")
def __getitem__(self, idx): #train file will have audio, type noise, SNR wav_files = self.wav file_name = wav_files[idx][0] file_path = os.path.join(self.val_dir, file_name) [audio, fs] = librosa.load(file_path,self.fs) clean_spect = librosa.stft(audio,n_fft=self.n_fft, hop_length=self.hop_size) if self.noise == 'babble': [sub_noise, sub_fs] = librosa.load('noise/babble_test.wav',self.fs) elif self.noise == 'factory1': [sub_noise, sub_fs] = librosa.load('noise/factory1_test.wav',self.fs) elif self.noise == 'engine': [sub_noise, sub_fs] = librosa.load('noise/engine_test.wav',self.fs) elif self.noise =='ops': [sub_noise, sub_fs] = librosa.load('noise/ops.wav',self.fs) elif self.noise == 'bucc': [sub_noise, sub_fs] = librosa.load('noise/bucc.wav',self.fs) elif self.noise == 'dishes': [sub_noise, sub_fs] = librosa.load('noise/dishes.wav',self.fs) elif self.noise == 'bike': [sub_noise, sub_fs] = librosa.load('noise/bike.wav',self.fs) elif self.noise == 'tap': [sub_noise, sub_fs] = librosa.load('noise/tap.wav',self.fs) elif self.noise =='white': sub_noise = np.random.normal(0,1,audio.shape) noise_audio = utils.add_noise(audio,sub_noise, self.snr) noise_spect = librosa.stft(noise_audio,n_fft=self.n_fft, hop_length=self.hop_size) magC, phaseC = librosa.magphase(clean_spect) magN, phaseN = librosa.magphase(noise_spect) magClean = np.transpose(magC) magNoise = np.transpose(magN) #make this a function later on sample = {'clean_mag': magClean, 'noise_mag': magNoise, 'noise_audio' :noise_audio,'clean_audio': audio } return sample
def data_stats(self, sample_size): """compute sigma and mu of each frequency bin in noise dir, from DeepXi""" if os.path.exists(self.noise_dir + 'stats.npz'): with np.load(self.noise_dir + 'stats.npz') as stats: self.mu = stats['mu_hat'] self.sigma = stats['sigma_hat'] else: print('Start saving stats') samples = [] for idx in range(sample_size): snr = random.choice(self.snr_level) speech_file = random.choice(self.speech_wav_files) speech_src, _ = librosa.load(speech_file, sr=self.sr) noise_file = random.choice(self.noise_wav_files) noise_src, _ = librosa.load(noise_file, sr=self.sr) start_idx = random.randint(0, len(noise_src) - len(speech_src)) noise_src = noise_src[start_idx:start_idx + len(speech_src)] _, alpha = utils.add_noise( speech_src, noise_src, snr) # get scale factor based on snr noise_src = noise_src * alpha # do stft for both speech and noise _, sample_speech_mag, _ = utils.analysis( speech_src, self.frame_len, self.frame_shift, self.n_fft) _, sample_noise_mag, _ = utils.analysis( noise_src, self.frame_len, self.frame_shift, self.n_fft) # compute prior snr between speech and noise spectrums snr_db = utils.prior_snr( sample_speech_mag, sample_noise_mag) # instantaneous a prior SNR (dB). samples.append(np.squeeze(snr_db)) samples = np.hstack(samples) if len(samples.shape) != 2: raise ValueError('Incorrect shape for sample.') stats = { 'mu_hat': np.mean(samples, axis=1), 'sigma_hat': np.std(samples, axis=1) } self.mu, self.sigma = stats['mu_hat'], stats['sigma_hat'] np.savez(self.noise_dir + 'stats.npz', mu_hat=stats['mu_hat'], sigma_hat=stats['sigma_hat']) print('Sample statistics saved.')
def env_get(self, l): """ Get most recent (obs, rews, dones, infos) from vectorized environment Using step_wait if necessary """ if self.I.step_count == 0: # On the zeroth step with a new venv, we need to call reset on the environment ob = self.I.venvs[l].reset() out = self.I.env_results[l] = (ob, None, np.ones(self.I.lump_stride, bool), {}) else: if self.I.env_results[l] is None: out = self.I.venvs[l].step_wait() obs = add_noise(out[0], noise_p=self.noise_p, noise_type=self.noise_type) out = (obs, *out[1:]) self.I.env_results[l] = out else: out = self.I.env_results[l] return out
def pretrain(**kwargs): data = kwargs["data"] model = kwargs["model"] num_epochs = kwargs["num_epochs"] savepath = kwargs["savepath"] checkpoint = kwargs["checkpoint"] start_epoch = checkpoint["epoch"] parameters = list(autoencoder.parameters()) optimizer = torch.optim.Adam(parameters, lr=1e-3, weight_decay=1e-5) train_loader = DataLoader(dataset=data, batch_size=128, shuffle=True) for epoch in range(start_epoch, num_epochs): for data in train_loader: img = data.float() noisy_img = add_noise(img) noisy_img = noisy_img.to(device) img = img.to(device) # ===================forward===================== output = model(noisy_img) output = output.squeeze(1) output = output.view(output.size(0), 28 * 28) loss = nn.MSELoss()(output, img) # ===================backward==================== optimizer.zero_grad() loss.backward() optimizer.step() # ===================log======================== print( "epoch [{}/{}], MSE_loss:{:.4f}".format( epoch + 1, num_epochs, loss.item() ) ) state = loss.item() is_best = False if state < checkpoint["best"]: checkpoint["best"] = state is_best = True save_checkpoint( {"state_dict": model.state_dict(), "best": state, "epoch": epoch}, savepath, is_best, )
def evaluate_model(data, sess, model, global_step, num_eval_examples): """Computes the cost associated with the model. Args: data: pointer to the MNIST data sess: Session object. model: Instance of DAE; the model to evaluate. global_step: Global step of the model checkpoint. num_eval_examples: Number of examples to run the evaluation on. """ # Determine the number of batches to run the evaluation. num_eval_batches = int(math.ceil(num_eval_examples / model.config.batch_size)) # Initialise the loss. sum_losses = 0. num_eval_batches = 1 for i in range(num_eval_batches): # Read batch. batch = data.validation.next_batch(model.config.batch_size)[0] # Create a noisy version of the batch. noisy_batch = utils.add_noise(batch) # Prepare the dictionnary to feed the data to the graph. feed_dict = {"images:0": batch, "noisy_images:0": noisy_batch, "phase_train:0": False} # Evaluate the loss. loss = sess.run([model.total_loss], feed_dict=feed_dict) sum_losses += np.sum(loss) x_reconstructed_images = sess.run(model.reconstructed_images, feed_dict=feed_dict) for i in range(10): image = np.reshape(x_reconstructed_images[i], [28, 28]) MRF.plot_image(image, 'image', "samples/image" + str(i)) sum_losses=sum_losses/num_eval_batches print("Step:", '%06d' % (global_step),",cost=", "{:.9f}".format(sum_losses))
def get_train_data(self): while len(self.noisy_buffer) < self.block_size * self.frame_len: new_speech, _ = librosa.load(self.speech_wav_files[self.wav_idx], sr=self.sr) self.speech_buffer = utils.normalize(new_speech) while len(self.noise_buffer) < len(self.speech_buffer): new_noise, _ = librosa.load(random.choice( self.noise_wav_files), sr=self.sr) new_noise = utils.normalize(new_noise) self.noise_buffer = np.concatenate( (self.noise_buffer, new_noise)) snr = random.choice(self.snr_level) self.noisy_buffer, _ = utils.add_noise( self.speech_buffer, self.noise_buffer[:len(self.speech_buffer)], snr, normalization=False) self.wav_idx += 1 speech_block = self.speech_buffer[:self.block_size * self.frame_len] noise_block = self.noise_buffer[:self.block_size * self.frame_len] noisy_block = self.noisy_buffer[:self.block_size * self.frame_len] self.speech_buffer = self.speech_buffer[self.block_shift * self.frame_len:] self.noise_buffer = self.noise_buffer[self.block_shift * self.frame_len:] self.noisy_buffer = self.noisy_buffer[self.block_shift * self.frame_len:] # _, speech_mag, speech_pha = utils.analysis(speech_block, self.frame_len, self.frame_shift, self.n_fft) # _, noise_mag, noise_pha = utils.analysis(noise_block, self.frame_len, self.frame_shift, self.n_fft) # _, noisy_mag, noisy_pha = utils.analysis(noisy_block, self.frame_len, self.frame_shift, self.n_fft) # mapping prior snr to interval[0, 1] using erf function # snr_mapped = utils.xi_bar(speech_mag, noisy_mag, self.mu, self.sigma) speech_block, noise_block, noisy_block = speech_block[np.newaxis, :], \ noise_block[np.newaxis, :], noisy_block[np.newaxis, :] # expand to 3-d return speech_block, noise_block, noisy_block
def build_test_pairs(self): if len(self.noisy_wav_files) == len(self.speech_wav_files): return for speech_file in self.speech_wav_files: speech_src, _ = sf.read(speech_file) noise_file = random.choice(self.noise_wav_files) noise_src, _ = sf.read(noise_file) while len(noise_src) < len(speech_file): noise_file = random.choice(self.noise_wav_files) noise_src, _ = sf.read(noise_file) snr = random.choice(self.snr_level) noise_type = noise_file[noise_file.rfind('/') + 1:noise_file.find('_')] noisy_file = self.noisy_dir + os.path.basename(speech_file)[:-4] \ + '_' + noise_type + '_' + str(snr) + 'dB.wav' speech_len = len(speech_src) start_idx = random.randint(0, len(noise_src) - speech_len) noise_seg = noise_src[start_idx:start_idx + speech_len] noisy_src, _ = utils.add_noise(speech_src, noise_seg, snr) sf.write(noisy_file, noisy_src, samplerate=self.sr, subtype='PCM_16') self.noisy_wav_files.append(noisy_file)
spectra = np.array(file['spectrum']) phi = np.array(file['phi']) theta = np.array(file['theta']) lp = np.array(file['lp']) target = np.concatenate([phi.reshape(-1, 1), theta.reshape(-1, 1), lp.reshape(-1, 1)], axis=1) noise_estimation = utils.get_std(spectra.copy(), target.copy()) noise_accuracy = [] seed = [628, 693, 847, 621, 861, 409, 74, 306, 884, 777] for i in range(k): np.random.seed(42) spectra_noise = utils.add_noise(spectra.copy(), noise_estimation.copy()) x_train, y_train, x_test, y_test = split_data_for_noise_test(spectra_noise.copy(), target.copy(), seed[i]) x_train, x_test, _, _ = utils.preprocessing(x_train, x_test) y_train, y_test, _, _ = utils.preprocessing(y_train, y_test) y_pred = model_fcnn[i].predict(x_test) y_pred, y_test = utils.postprocessing(y_pred, y_test, y_min, y_max) noise_accuracy.append(utils.metric(utils.create_df(y_test, y_pred))) if i == 0: plot_corr_model(y_test, y_pred, 'corr_noise.png') print('model trained on clean data and tested on noisy data')
def main(args): utils.seedme(args.seed) cudnn.benchmark = True device = torch.device( 'cuda' if torch.cuda.is_available() and not args.nocuda else 'cpu') os.system('mkdir -p {}'.format(args.outf)) dataloader_train = utils.get_patchloader(args.image_train, resize=args.resize_train, patch_size=args.patch_size, batch_size=args.batch_size_train, fliplr=args.fliplr, flipud=args.flipud, rot90=args.rot90, smooth=args.smooth) if args.image_valid: dataloader_valid = utils.get_patchloader( args.image_valid, resize=args.resize_valid, patch_size=args.patch_size, batch_size=args.batch_size_valid, fliplr=args.fliplr, flipud=args.flipud, rot90=args.rot90, smooth=args.smooth) netG = models.DCGAN_G(image_size=args.patch_size, nc=args.nc, nz=args.ncode, ngf=args.ngf).to(device) netE = models.Encoder(patch_size=args.patch_size, nc=args.nc, ncode=args.ncode, ndf=args.ndf).to(device) print netG print netE optimizer = optim.Adam(list(netG.parameters()) + list(netE.parameters()), lr=args.lr, amsgrad=True) loss_func = nn.MSELoss() losses = [] losses_valid = [] best_loss = 1e16 for i in range(args.niter): optimizer.zero_grad() x = next(dataloader_train).to(device) if args.sigma: x = utils.add_noise(x, args.sigma) y = netG(netE(x)) loss = loss_func(y, x) loss.backward() optimizer.step() if args.image_valid: with torch.no_grad(): netG.eval() netE.eval() x_ = next(dataloader_valid).to(device) if args.sigma: x_ = utils.add_noise(x, args.sigma) y_ = netG(netE(x_)) loss_valid = loss_func(y_, x_) netG.train() netE.train() losses_valid.append(loss_valid.item()) _loss = loss_valid.item() if args.image_valid else loss.item() if _loss + 1e-3 < best_loss: best_loss = _loss print "[{}/{}] best loss: {}".format(i + 1, args.niter, best_loss) if args.save_best: torch.save(netE.state_dict(), '{}/netD_best.pth'.format(args.outf)) losses.append(loss.item()) if (i + 1) % args.nprint == 0: if args.image_valid: print '[{}/{}] train: {}, test: {}, best: {}'.format( i + 1, args.niter, loss.item(), loss_valid.item(), best_loss) else: print '[{}/{}] train: {}, best: {}'.format( i + 1, args.niter, loss.item(), best_loss) logger.vutils.save_image(torch.cat([x, y], dim=0), '{}/train_{}.png'.format( args.outf, i + 1), normalize=True) fig, ax = plt.subplots() ax.semilogy(scipy.signal.medfilt(losses, 11)[5:-5], label='train') if args.image_valid: logger.vutils.save_image(torch.cat([x_, y_], dim=0), '{}/test_{}.png'.format( args.outf, i + 1), normalize=True, nrow=32) ax.semilogy(scipy.signal.medfilt(losses_valid, 11)[5:-5], label='valid') fig.legend() fig.savefig('{}/loss.png'.format(args.outf)) plt.close(fig) torch.save(netE.state_dict(), '{}/netD_iter_{}.pth'.format(args.outf, i + 1))
def trainOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion, weight_dict, learningRate): # Description: training one epoch (two options to train the model, using weighted gradient descent or normal gradient descent) # Input: (1)inputCoor: input coordinates (B, N, 3) (2) inputGraph: input graph (B, N*N) (3) inputLabel: labels (B, 1) # (4) para: global Parameters (5) sess: Session (6) trainOperaion: placeholder dictionary # (7) weight_dict: weighting scheme used of weighted gradient descnet (8)learningRate: learning rate for current epoch # Return: average loss, acc, regularization loss for training set dataChunkLoss = [] dataChunkAcc = [] dataChunkRegLoss = [] for i in range(len(inputLabel)): xTrain_1, graphTrain_1, labelTrain_1 = inputCoor[i], inputGraph[i], inputLabel[i] graphTrain_1 = graphTrain_1.tocsr() labelBinarize = label_binarize(labelTrain_1, classes=[j for j in range(40)]) xTrain, graphTrain, labelTrain = shuffle(xTrain_1, graphTrain_1, labelBinarize) batch_loss = [] batch_acc = [] batch_reg = [] batchSize = para.batchSize for batchID in range(len(labelBinarize) / para.batchSize): start = batchID * batchSize end = start + batchSize batchCoor, batchGraph, batchLabel = get_mini_batch(xTrain, graphTrain, labelTrain, start, end) batchGraph = batchGraph.todense() batchCoor = add_noise(batchCoor, sigma=0.008, clip=0.02) if para.weighting_scheme == 'uniform': batchWeight = uniform_weight(batchLabel) elif para.weighting_scheme == 'weighted': batchWeight = weights_calculation(batchLabel, weight_dict) else: print 'please enter a valid weighting scheme' batchIndexL1, centroid_coordinates = farthest_sampling_new(batchCoor, M=para.clusterNumberL1, k=para.nearestNeighborL1, batch_size=batchSize, nodes_n=para.pointNumber) batchMiddleGraph = middle_graph_generation(centroid_coordinates, batch_size = batchSize, M = para.clusterNumberL1) feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph, trainOperaion['outputLabel']: batchLabel, trainOperaion['lr']: learningRate, trainOperaion['weights']: batchWeight, trainOperaion['keep_prob_1']: para.keep_prob_1, trainOperaion['keep_prob_2']: para.keep_prob_2, trainOperaion['batch_index_l1']: batchIndexL1, trainOperaion['l2Graph']: batchMiddleGraph, trainOperaion['batch_size']: para.batchSize} opt, loss_train, acc_train, loss_reg_train = sess.run( [trainOperaion['train'], trainOperaion['loss_total'], trainOperaion['acc'], trainOperaion['loss_reg']], feed_dict=feed_dict) batch_loss.append(loss_train) batch_acc.append(acc_train) batch_reg.append(loss_reg_train) #print "The loss, L2 loss and acc for this batch is {}, {} and {}".format(loss_train, loss_reg_train, acc_train) dataChunkLoss.append(np.mean(batch_loss)) dataChunkAcc.append(np.mean(batch_acc)) dataChunkRegLoss.append(np.mean(batch_reg)) train_average_loss = np.mean(dataChunkLoss) train_average_acc = np.mean(dataChunkAcc) loss_reg_average = np.mean(dataChunkRegLoss) return train_average_loss, train_average_acc, loss_reg_average
transform1 = T.ToTensor() transform2 = T.ToPILImage() with torch.no_grad(): net = DPDNN() net = nn.DataParallel(net) net.load_state_dict(torch.load(opt.load_model_path)) img = Image.open(label_img) # img.show() label = np.array(img).astype(np.float32) # label:0~255 img_H = img.size[0] img_W = img.size[1] img = transform1(img) img_noise = add_noise(img, opt.noise_level).resize_(1, 1, img_H, img_W) output = net(img_noise) output = output.cpu() output = output.resize_(img_H, img_W) output = torch.clamp(output, min=0, max=1) output = transform2(output) # output.show() # To save the output(denoised) image, you must create a new folder. Here is my path. output.save('./output/sigma%d/%d.png' % (opt.noise_level, i)) img_noise = transform2(img_noise.resize_(img_H, img_W)) # img_noise.show() img_noise.save('./output/sigma%d/%d_noise.png' % (opt.noise_level, i)) output = np.array(output) # output:0~255
def sgd(all_input_params): t1 = time.time() X_without_bias, y, amount_in_interval, random_state = all_input_params # X are the predictors, come as np array # y are the targets, come as np array # amount_in_interval is the number of samples used to geneerate learning curve # do the random projection as they do in the paper -- second paper transformer = random_projection.GaussianRandomProjection(n_components = 50) X_without_bias = transformer.fit_transform(X_without_bias) # we add bias term in front -- done for the gradient decent records, attributes = np.shape(X_without_bias) X = np.ones((records, attributes + 1)) X[:,1:] = X_without_bias # multiprocessing dose not do different seed, so we take a random number to start different seeds np.random.seed(random_state) # shuffle so different data will be used in each process X, y = shuffle(X, y) num_dimensions = len(X[0]) num_in_batch = [1, 2, 5, 10, 50, 75, 100, 150, 200, 250, 300, 400, 500, 1000, 2000]# int(4*len(y)/5)] epochs = 1 #10 k_splits = 5 learning_rates = [1/np.sqrt(t + 1) for i in range(epochs) for t in range(amount_in_interval[-1])] epsilons = [ 0.1, 1, 10, float('Inf')] # [float('Inf')] inf makes the noise go to zero -- equal to having no noise weight_decays = [3.0, 2.0, 1.5, 1, 0.5, 10**(-1), 10**(-2), 10**(-5), 10**(-20)] parameters = {'batch_size':[], 'weight_decay':[], 'error_rate':[]} optimal_results = {} kf = KFold(n_splits=k_splits) for epsilon in epsilons: if epsilon not in optimal_results: optimal_results[epsilon] = {} for n in amount_in_interval: # lets do grid search of the parameters for each epsilon if n not in optimal_results[epsilon]: optimal_results[epsilon][n] = {} for weight_decay in weight_decays: for batch_size in num_in_batch: avg_error = 0 for train_index, validation_index in kf.split(X[:n]): X_train, y_train = X[train_index], y[train_index] X_validation, y_validation = X[validation_index], y[validation_index] weights = np.array([0.0 for i in range(num_dimensions)]) t = 0 for i in range(epochs): # shuffle the data so the minibatch takes different data in each epoch X_train, y_train = shuffle(X_train, y_train) for j in range(0, len(y_train), batch_size): X_batch = X_train[j:j+batch_size] y_batch = y_train[j:j+batch_size] # claculate the derative of the l2 norm of the weights l2_derivative = sum(weights) # get the noise for all dimensions noise = utils.add_noise(num_dimensions, epsilon) learning_rate = learning_rates[t] # take a step towrads the optima weights -= learning_rate *(weight_decay * l2_derivative + utils.loss_derivative(X_batch, y_batch, weights) / batch_size + noise / batch_size) t += 1 # now we predict with the trained weights, using logistic regression num_correct = 0 for i in range(len(y_validation)): if y_validation[i] == utils.sigmoid_prediction(X_validation[i], weights): num_correct += 1 avg_error += num_correct/len(y_validation) avg_error /= k_splits parameters['error_rate'].append(1 - avg_error) parameters['batch_size'].append(batch_size) parameters['weight_decay'].append(weight_decay) #print('epoach..', flush = True) #print('{} out of {} correct with batch size {}, learning_rate: {}'.format(num_correct, len(y_validation), batch_size, learning_rate)) #print('=========================================================================') #print('error rate', parameters['error_rate']) #print('batch_size', parameters['batch_size']) #print('=========================================================================') # find the optimal parameters fro the cross validation -- optimal_index = utils.get_min_index(parameters['error_rate'], parameters['batch_size']) optimal_results[epsilon][n]['parameters'] = (parameters['batch_size'][optimal_index],\ parameters['weight_decay'][optimal_index]) optimal_results[epsilon][n]['error_rate'] = parameters['error_rate'][optimal_index] # clear parameters for next run parameters = {'batch_size':[], 'weight_decay':[], 'error_rate':[]} print('tuning for epsilon: {} done, time from start {}'.format(epsilon, time.time() - t1), flush = True) return optimal_results
def sgd(all_input_params): t1 = time.time() X_without_bias, y, amount_in_interval, random_state = all_input_params # X are the predictors, come as np array # y are the targets, come as np array # amount_in_interval is the number of samples used to geneerate learning curve # do the random projection as they do in the paper -- second paper transformer = random_projection.GaussianRandomProjection(n_components=50) X_without_bias = transformer.fit_transform(X_without_bias) # we add bias term in front -- done for the gradient decent records, attributes = np.shape(X_without_bias) X = np.ones((records, attributes + 1)) X[:, 1:] = X_without_bias # multiprocessing dose not do different seed, so we take a random number to start different seeds np.random.seed(random_state) # shuffle so different data will be used in each process X, y = shuffle(X, y) num_dimensions = len(X[0]) num_in_batch = [1, 2, 5, 10, 50, 75, 100, 150, 200, 250, 300, 350, 400] epochs = 1 k_splits = 5 learning_rates = [ 1 / np.sqrt(t + 1) for i in range(epochs) for t in range(amount_in_interval[-1]) ] epsilons = [ 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 10, float('Inf') ] # inf makes the noise go to zero -- equal to having no noise weight_decays = [ 1, 0.5, 10**(-1), 10**(-2), 10**(-5), 10**(-8), 10**(-11), 10**(-15) ] parameters = {'batch_size': [], 'weight_decay': [], 'error_rate': []} optimal_results = {} kf = KFold(n_splits=k_splits) for epsilon in epsilons: if epsilon not in optimal_results: optimal_results[epsilon] = {} for n in amount_in_interval: # lets do grid search of the parameters for each epsilon if n not in optimal_results[epsilon]: optimal_results[epsilon][n] = {} for weight_decay in weight_decays: for batch_size in num_in_batch: avg_error = 0 for train_index, validation_index in kf.split(X[:n]): X_train, y_train = X[train_index], y[train_index] X_validation, y_validation = X[validation_index], y[ validation_index] weights = np.array( [0.0 for i in range(num_dimensions)]) t = 0 for i in range(epochs): # shuffle the data so the minibatch takes different data in each epoch X_train, y_train = shuffle(X_train, y_train) for j in range(0, len(y_train), batch_size): X_batch = X_train[j:j + batch_size] y_batch = y_train[j:j + batch_size] # claculate the derative of the l2 norm of the weights l2_derivative = sum(weights) # get the noise for all dimensions noise = utils.add_noise( num_dimensions, epsilon) learning_rate = learning_rates[t] # take a step towrads the optima weights -= learning_rate * ( weight_decay * l2_derivative + utils.loss_derivative( X_batch, y_batch, weights) / batch_size + noise / batch_size) t += 1 # now we predict with the trained weights, using logistic regression num_correct = 0 for i in range(len(y_validation)): if y_validation[i] == utils.sigmoid_prediction( X_validation[i], weights): num_correct += 1 avg_error += num_correct / len(y_validation) avg_error /= k_splits parameters['error_rate'].append(1 - avg_error) parameters['batch_size'].append(batch_size) parameters['weight_decay'].append(weight_decay) #print('epoach..', flush = True) #print('{} out of {} correct with batch size {}, learning_rate: {}'.format(num_correct, len(y_validation), batch_size, learning_rate)) #print('=========================================================================') #print('error rate', parameters['error_rate']) #print('batch_size', parameters['batch_size']) #print('=========================================================================') # find the optimal parameters fro the cross validation -- optimal_index = utils.get_min_index(parameters['error_rate'], parameters['batch_size']) optimal_results[epsilon][n]['parameters'] = (parameters['batch_size'][optimal_index],\ parameters['weight_decay'][optimal_index]) optimal_results[epsilon][n]['error_rate'] = parameters[ 'error_rate'][optimal_index] # clear parameters for next run parameters = { 'batch_size': [], 'weight_decay': [], 'error_rate': [] } print('tuning for epsilon: {} done, time from start {}'.format( epsilon, time.time() - t1), flush=True) return optimal_results
def deeper(layer, activation_fn=nn.ReLU(), bnorm=True, prefix='', filters=16): r""" Function preserving deeper operator adding a new layer on top of the given layer. Implemented based on Net2Net paper. If a new dense layer is being added, its weight matrix will be set to identity matrix. For convolutional layer, the center element of a input channel (in increasing sequence) is set to 1 and other to 0. This approach only works only for Relu activation function as it is idempotent. :param layer: Layer on top of which new layers will be added. :param activation_fn: Activation function to be used between the two layers. Default Relu :param bnorm: Add a batch normalisation layer between two convolutional/dense layers if True. :param filters: Number of filters of filters being deepened :return: New layers to be added in the network. """ print 'Net2Net Deeper...' if isinstance(layer, nn.Linear) or isinstance(layer, nn.Conv2d): if isinstance(layer, nn.Linear): # Create new linear layer with input and output features equal to # output features of a dense layer on top of which a new dense layer # is being added. new_layer = th.nn.Linear(layer.out_features, layer.out_features) new_layer.weight.data = th.eye(layer.out_features) new_layer.bias.data = th.zeros(layer.out_features) if bnorm: new_num_features = layer.out_features new_bn_layer = nn.BatchNorm1d(num_features=new_num_features) else: new_kernel_shape = layer.kernel_size new_num_channels = filters # Create new convolutional layer with number of input and output # channels equal to number of output channel of the layer on top of # which new layer will be placed. The filter shape will be same. And # a padding of 1 is added to maintain previous output dimension. new_layer = th.nn.Conv2d(new_num_channels, new_num_channels, kernel_size=layer.kernel_size, padding=1) new_layer_weight = th.zeros((new_num_channels, new_num_channels) + new_kernel_shape) center = tuple(map(lambda x: int((x - 1) / 2), new_kernel_shape)) for i in range(new_num_channels): filter_weight = th.zeros((new_num_channels, ) + new_kernel_shape) index = (i, ) + center filter_weight[index] = 1 new_layer_weight[i, ...] = filter_weight new_layer_bias = th.zeros(new_num_channels) # Set new weight and bias for new convolutional layer # new_layer.weight.data = new_layer_weight new_layer.weight.data = add_noise(new_layer_weight.cuda(), layer.weight.data) new_layer.bias.data = new_layer_bias # Set noise as initial weight and bias for all parameter values for # BN layer if bnorm: new_num_features = layer.out_channels new_bn_layer = nn.BatchNorm2d(num_features=new_num_features) if bnorm: new_bn_layer.weight.data = add_noise( th.ones(new_num_features).cuda(), th.Tensor([0, 1])) new_bn_layer.bias.data = add_noise( th.zeros(new_num_features).cuda(), th.Tensor([0, 1])) new_bn_layer.running_mean.data = add_noise( th.zeros(new_num_features).cuda(), th.Tensor([0, 1])) new_bn_layer.running_var.data = add_noise( th.ones(new_num_features).cuda(), th.Tensor([0, 1])) else: raise RuntimeError("{} Module not supported".format( layer.__class__.__name__)) seq_container = th.nn.Sequential().cuda() seq_container.add_module(prefix + '_conv', layer) if bnorm: seq_container.add_module(prefix + '_bnorm', new_bn_layer) # if activation_fn is not None: # seq_container.add_module(prefix + '_nonlin', nn.ReLU()) seq_container.add_module(prefix + '_conv_new', new_layer) return seq_container
def read_ply(self, file_name): num_samples = self.num_samples // len(self.files_list) if self.file_index == len(self.files_list) - 1: num_samples = num_samples + (self.num_samples - (num_samples * len(self.files_list))) root, ext = os.path.splitext(file_name) if not os.path.isfile(root + ".npy"): ply = PlyData.read(file_name) vertex = ply['vertex'] (x, y, z) = (vertex[t] for t in ('x', 'y', 'z')) points = zip(x.ravel(), y.ravel(), z.ravel()) np.save(root + ".npy", points) else: points = np.load(root + ".npy") if self.add_noise: self.data = utils.add_noise(points, prob=self.noise_prob, factor=self.noise_factor) else: self.data = np.asarray(points) #if self.data.shape[0] > 2e5: # self.data, _ = Sampler.sample(self.data, -1, 2e5, sampling_algorithm=self.sampling_algorithm) pc_diameter = utils.get_pc_diameter(self.data) self.l = self.relL*pc_diameter rot = utils.angle_axis_to_rotation(self.rotation_angle, self.rotation_axis) self.data = utils.transform_pc(self.data, rot) #plotutils.show_pc(self.data) #mlab.show() #TODO: better sampling print "sampling file: ", file_name self.samples, self.sample_indices = Sampler.sample(self.data, -1, num_samples, file_name=file_name, sampling_algorithm=self.sampling_algorithm) self.samples = self.samples[0:num_samples] self.sample_indices = self.sample_indices[0:num_samples] self.tree = spatial.KDTree(self.data) #TODO:Intergrate with num_samples for consistency if self.filter_bad_samples: temp_file_samples = 'temp/' + os.path.basename(file_name) + '_' + str(num_samples) + '_filter' + str(self.filter_threshold) + '.npy' print 'samples file: ', temp_file_samples if os.path.isfile(temp_file_samples): self.sample_indices = np.load(temp_file_samples) self.samples = self.data[self.sample_indices] else: self.samples, self.sample_indices = Sampler.sample(self.data, -1, num_samples*2, sampling_algorithm=self.sampling_algorithm) self.samples = self.samples[0:num_samples*2] self.sample_indices = self.sample_indices[0:num_samples*2] sample_indices_temp = [] for idx in self.sample_indices: if self.is_good_sample(self.data[idx], self.filter_threshold): sample_indices_temp.append(idx) if len(sample_indices_temp) >= num_samples: break assert (len(sample_indices_temp) >= num_samples) self.sample_indices = np.asarray(sample_indices_temp[0:num_samples]) self.samples = self.data[self.sample_indices] np.save(temp_file_samples, self.sample_indices) #plotutils.show_pc(self.samples) #mlab.show() logging.basicConfig(filename='example.log',level=logging.DEBUG) return self.data
def sgd(all_input_params): X_train_without_bias, y_train, X_test_without_bias, y_test, amount_in_interval, random_state, parameters = all_input_params # X are the predictors, come as np array # y are the targets, come as np array # amount_in_interval is the number of samples used to geneerate learning curve # do the random projection as they do in the paper -- second paper transformer = random_projection.GaussianRandomProjection(n_components=50) transformer.fit(X_train_without_bias) X_train_without_bias = transformer.transform(X_train_without_bias) X_test_without_bias = transformer.transform(X_test_without_bias) # we add bias term in front -- done for the gradient decent records, attributes = np.shape(X_train_without_bias) X_train = np.ones((records, attributes + 1)) X_train[:, 1:] = X_train_without_bias records, attributes = np.shape(X_test_without_bias) X_test = np.ones((records, attributes + 1)) X_test[:, 1:] = X_test_without_bias # multiprocessing dose not do different seed, so we take a random number to start different seeds np.random.seed(random_state) # shuffle so different data will be used in each process X_train, y_train = shuffle(X_train, y_train) num_dimensions = len(X_train[0]) epochs = 1 epsilons = [ 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 10, float('Inf') ] # inf makes the noise go to zero -- equal to having no noise learning_rates = [ 1 / np.sqrt(t + 1) for i in range(epochs) for t in range(amount_in_interval[-1]) ] results = {} objective_info = {} for epsilon in epsilons: if epsilon not in results: results[epsilon] = {} for n in amount_in_interval: if n not in results[epsilon]: results[epsilon][n] = {} #results[epsilon][n]['noise'] = [] weights = np.array([0.0 for i in range(num_dimensions)]) # param is a list which has the order -> [learning_rate, batch_size, weight_decay] #learning_rate = parameters[epsilon][n]['parameters'][0] batch_size = parameters[epsilon][n]['parameters'][0] weight_decay = parameters[epsilon][n]['parameters'][1] # this if sentance is just so we can invetegate some properties only for the last model # where it is trained on all avilable data if n != amount_in_interval[-1]: t = 0 for i in range(epochs): # shuffle the data so the minibatch takes different data in each epoch X_train_in_use, y_train_in_use = shuffle( X_train[:int(n)], y_train[:int(n)]) for j in range(0, len(y_train_in_use), batch_size): X_batch = X_train_in_use[j:j + batch_size] y_batch = y_train_in_use[j:j + batch_size] # claculate the derative of the l2 norm of the weights -- regularize l2_derivative = sum(weights) # get the noise for all dimensions noise = utils.add_noise(num_dimensions, epsilon) # get the objective derivative value -- look at convergance objective_derivative = weight_decay * l2_derivative + utils.loss_derivative( X_batch, y_batch, weights) / batch_size + noise / batch_size # take a step towrads the optima weights -= learning_rates[t] * (objective_derivative) # keep all the noise added so we can investegate it's distribution #results[epsilon][n]['noise'] += noise.tolist() t += 1 else: print( 'n != amount_in_interval[-1] = {}, n {}, amount_in_interval[-1] {}' .format(n != amount_in_interval[-1], n, amount_in_interval[-1])) # we want to investegate how the objective changes thorugh iterations only for # the models which are trained on all the data if epsilon not in objective_info: objective_info[epsilon] = {} objective_info[epsilon]['objective'] = [] objective_info[epsilon]['gradient'] = [] objective_info[epsilon]['num_points'] = [] t = 0 for i in range(epochs): if objective_info[epsilon]['num_points']: points_from_last_epoch = objective_info[epsilon][ 'num_points'][-1] else: points_from_last_epoch = 0 # shuffle the data so the minibatch takes different data in each epoch X_train_in_use, y_train_in_use = shuffle( X_train[:int(n)], y_train[:int(n)]) print(len(y_train)) for j in range(0, len(y_train_in_use), batch_size): X_batch = X_train_in_use[j:j + batch_size] y_batch = y_train_in_use[j:j + batch_size] # claculate the derative of the l2 norm of the weights -- regularize l2_derivative = sum(weights) # get the noise for all dimensions noise = utils.add_noise(num_dimensions, epsilon) # get the objective value objective = utils.get_objective( X_batch, y_batch, weights, batch_size) # get the objective derivative value -- look at convergance objective_derivative = weight_decay * l2_derivative + utils.loss_derivative( X_batch, y_batch, weights) / batch_size + noise / batch_size # take a step towrads the optima weights -= learning_rates[t] * (objective_derivative) objective_info[epsilon]['objective'].append( np.mean(objective)) objective_info[epsilon]['gradient'].append( np.mean(objective_derivative)) objective_info[epsilon]['num_points'].append( j + batch_size + points_from_last_epoch ) # if we go to the next epoch we keep on couniting #results[epsilon][n]['noise'] += noise.tolist() t += 1 print('num_points', objective_info[epsilon]['num_points'], flush=True) # now we predict with the trained weights, using logistic regression num_correct = 0 avg_error = 0 for i in range(len(y_test)): if y_test[i] == utils.sigmoid_prediction(X_test[i], weights): num_correct += 1 avg_error = num_correct / len(y_test) results[epsilon][n]['error_rate'] = 1 - avg_error # take the last iteration of the noise and find its magnitude # this is done to compare it to the wegiths to see how it influences # the decision process -- when epsilon is inf no noise is added and we can see how the weights are if epsilon == float('Inf'): results[epsilon][n]['noise_and_weights_magnitude'] = sum( abs(weights)) else: results[epsilon][n]['noise_and_weights_magnitude'] = sum( abs(noise)) # lets investegate how the noise affects the weights .. by looking at how the final weights are after # each noise level results[epsilon][n]['weights'] = sum(abs(weights)) return (results, objective_info)
for epoch in range(0, n_epochs): G.train() D.train() _batch = 0 scheduler_lr.step() for X, _ in train_iter: _batch += 1 real_x = X.to(DEVICE) z = T.randn(real_x.size(0), nz, 1, 1, device=DEVICE) fake_x = G(z) # instance noise trick if instance_noise_trick: real_x = add_noise(real_x, initial_noise_strength, anneal_epoch, epoch) fake_x = add_noise(fake_x, initial_noise_strength, anneal_epoch, epoch) fake_score = D(fake_x.detach()) real_score = D(real_x) D.zero_grad() lss_D = criterion(real_score, T.ones_like(real_score)) + \ criterion(fake_score, T.zeros_like(fake_score)) lss_D.backward() opt_D.step() fake_score = D(fake_x) real_score = D(real_x)
def trainOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion, weight_dict, learningRate): # Description: training one epoch (two options to train the model, using weighted gradient descent or normal gradient descent) # Input: (1)inputCoor: input coordinates (B, N, 3) (2) inputGraph: input graph (B, N*N) (3) inputLabel: labels (B, 1) # (4) para: global Parameters (5) sess: Session (6) trainOperaion: placeholder dictionary # (7) weight_dict: weighting scheme used of weighted gradient descnet (8)learningRate: learning rate for current epoch # Return: average loss, acc, regularization loss for training set dataChunkLoss = [] dataChunkAcc = [] dataChunkRegLoss = [] for i in range(len(inputLabel)): xTrain_1, graphTrain_1, labelTrain_1 = inputCoor[i], inputGraph[ i], inputLabel[i] graphTrain_1 = graphTrain_1.tocsr() labelBinarize = label_binarize(labelTrain_1, classes=[j for j in range(40)]) xTrain, graphTrain, labelTrain = shuffle(xTrain_1, graphTrain_1, labelBinarize) batch_loss = [] batch_acc = [] batch_reg = [] batchSize = para.batchSize for batchID in range(len(labelBinarize) / para.batchSize): start = batchID * batchSize end = start + batchSize batchCoor, batchGraph, batchLabel = get_mini_batch( xTrain, graphTrain, labelTrain, start, end) batchGraph = batchGraph.todense() batchCoor = add_noise(batchCoor, sigma=0.008, clip=0.02) if para.weighting_scheme == 'uniform': batchWeight = uniform_weight(batchLabel) elif para.weighting_scheme == 'weighted': batchWeight = weights_calculation(batchLabel, weight_dict) else: print 'please enter a valid weighting scheme' batchIndexL1, centroid_coordinates = farthest_sampling_new( batchCoor, M=para.clusterNumberL1, k=para.nearestNeighborL1, batch_size=batchSize, nodes_n=para.pointNumber) batchMiddleGraph = middle_graph_generation(centroid_coordinates, batch_size=batchSize, M=para.clusterNumberL1) feed_dict = { trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph, trainOperaion['outputLabel']: batchLabel, trainOperaion['lr']: learningRate, trainOperaion['weights']: batchWeight, trainOperaion['keep_prob_1']: para.keep_prob_1, trainOperaion['keep_prob_2']: para.keep_prob_2, trainOperaion['batch_index_l1']: batchIndexL1, trainOperaion['l2Graph']: batchMiddleGraph, trainOperaion['batch_size']: para.batchSize } opt, loss_train, acc_train, loss_reg_train = sess.run( [ trainOperaion['train'], trainOperaion['loss_total'], trainOperaion['acc'], trainOperaion['loss_reg'] ], feed_dict=feed_dict) batch_loss.append(loss_train) batch_acc.append(acc_train) batch_reg.append(loss_reg_train) #print "The loss, L2 loss and acc for this batch is {}, {} and {}".format(loss_train, loss_reg_train, acc_train) dataChunkLoss.append(np.mean(batch_loss)) dataChunkAcc.append(np.mean(batch_acc)) dataChunkRegLoss.append(np.mean(batch_reg)) train_average_loss = np.mean(dataChunkLoss) train_average_acc = np.mean(dataChunkAcc) loss_reg_average = np.mean(dataChunkRegLoss) return train_average_loss, train_average_acc, loss_reg_average
plt.subplot(221) plt.title('image') io.imshow(img) plt.subplot(222) plt.title('label') io.imshow(label) plt.subplot(223) plt.title('affine') io.imshow(img_tf) plt.subplot(224) plt.title('affine') io.imshow(label_tf) if 0: Y_train = np.squeeze(Y_train) noisy_imgs = add_noise(X_train) ix = 0 plt.figure(figsize=(8, 8)) plt.subplot(221) plt.title('image') io.imshow(X_train[ix]) plt.subplot(222) plt.title('noisy') io.imshow(noisy_imgs[ix]) plt.subplot(223) io.imshow(Y_train[ix]) plt.subplot(224) io.imshow(Y_train[ix]) if 0: hrz_flp, vrt_flp = flip_images(X_train, Y_train)
def sgd(all_input_params): X_train_without_bias_start, y_train_start, X_test_without_bias_start, y_test_start, amount_in_interval, random_state, parameters = all_input_params # X are the predictors, come as np array # y are the targets, come as np array # amount_in_interval is the number of samples used to geneerate learning curve # multiprocessing dose not do different seed, so we take a random number to start different seeds np.random.seed(random_state) epochs = 1 epsilons = [0.1, 1, 10, float('Inf')] # inf makes the noise go to zero -- equal to having no noise learning_rates = [1/np.sqrt(t + 1) for i in range(epochs) for t in range(amount_in_interval[-1])] results = {} objective_info = {} dimensions = [15, 50, 100, 200, 400, 'all'] for d in dimensions: results[d] = {} objective_info[d] = {} if d != 'all': # do the random projection as they do in the paper -- second paper transformer = random_projection.GaussianRandomProjection(n_components = d) transformer.fit(X_train_without_bias_start) X_train_without_bias = transformer.transform(X_train_without_bias_start) X_test_without_bias = transformer.transform(X_test_without_bias_start) # we add bias term in front -- done for the gradient decent records, attributes = np.shape(X_train_without_bias) X_train = np.ones((records, attributes + 1)) X_train[:,1:] = X_train_without_bias records, attributes = np.shape(X_test_without_bias) X_test = np.ones((records, attributes + 1)) X_test[:,1:] = X_test_without_bias else: # we add bias term in front -- done for the gradient decent records, attributes = np.shape(X_train_without_bias_start) X_train = np.ones((records, attributes + 1)) X_train[:,1:] = X_train_without_bias_start records, attributes = np.shape(X_test_without_bias_start) X_test = np.ones((records, attributes + 1)) X_test[:,1:] = X_test_without_bias_start # shuffle so different data will be used in each process X_train, y_train = shuffle(X_train, y_train_start) num_dimensions = len(X_train[0]) for epsilon in epsilons: if epsilon not in results: results[d][epsilon] = {} for n in amount_in_interval: if n not in results[d][epsilon]: results[d][epsilon][n] = {} #results[epsilon][n]['noise'] = [] weights = np.array([0.0 for i in range(num_dimensions)]) batch_size = 5 #parameters[epsilon][n]['parameters'][0] weight_decay = 0.0001#parameters[epsilon][n]['parameters'][1] # this if sentance is just so we can invetegate some properties only for the last model # where it is trained on all avilable data if n != amount_in_interval[-1]: t = 0 for i in range(epochs): # shuffle the data so the minibatch takes different data in each epoch X_train_in_use, y_train_in_use = shuffle(X_train[:int(n)], y_train[:int(n)]) for j in range(0, len(y_train_in_use), batch_size): X_batch = X_train_in_use[j:j+batch_size] y_batch = y_train_in_use[j:j+batch_size] # claculate the derative of the l2 norm of the weights -- regularize l2_derivative = sum(weights) # get the noise for all dimensions noise = utils.add_noise(num_dimensions, epsilon) # get the objective derivative value -- look at convergance objective_derivative = weight_decay * l2_derivative + utils.loss_derivative(X_batch, y_batch, weights) / batch_size + noise / batch_size # take a step towrads the optima weights -= learning_rates[t] *(objective_derivative) t += 1 else: #print('n != amount_in_interval[-1] = {}, n {}, amount_in_interval[-1] {}'.format(n != amount_in_interval[-1], n, amount_in_interval[-1])) # we want to investegate how the objective changes thorugh iterations only for # the models which are trained on all the data if epsilon not in objective_info[d]: objective_info[d][epsilon] = {} objective_info[d][epsilon]['objective'] = [] objective_info[d][epsilon]['gradient'] = [] objective_info[d][epsilon]['num_points'] = [] t = 0 for i in range(epochs): if objective_info[d][epsilon]['num_points']: points_from_last_epoch = objective_info[d][epsilon]['num_points'][-1] else: points_from_last_epoch = 0 # shuffle the data so the minibatch takes different data in each epoch X_train_in_use, y_train_in_use = shuffle(X_train[:int(n)], y_train[:int(n)]) #print(len(y_train )) for j in range(0, len(y_train_in_use), batch_size): X_batch = X_train_in_use[j:j+batch_size] y_batch = y_train_in_use[j:j+batch_size] # claculate the derative of the l2 norm of the weights -- regularize l2_derivative = sum(weights) # get the noise for all dimensions noise = utils.add_noise(num_dimensions, epsilon) # get the objective value objective = utils.get_objective(X_batch, y_batch, weights, batch_size, weight_decay) # get the objective derivative value -- look at convergance objective_derivative = weight_decay * l2_derivative + utils.loss_derivative(X_batch, y_batch, weights) / batch_size + noise / batch_size # take a step towrads the optima weights -= learning_rates[t] *(objective_derivative) objective_info[d][epsilon]['objective'].append(objective) objective_info[d][epsilon]['gradient'].append(np.linalg.norm(weight_decay * l2_derivative + utils.loss_derivative(X_batch, y_batch, weights) / batch_size, ord = 2)) objective_info[d][epsilon]['num_points'].append(j+batch_size + points_from_last_epoch) # if we go to the next epoch we keep on couniting #results[epsilon][n]['noise'] += noise.tolist() t += 1 #print('num_points', objective_info[d][epsilon]['num_points'], flush = True) # now we predict with the trained weights, using logistic regression num_correct = 0 avg_error = 0 for i in range(len(y_test)): if y_test[i] == utils.sigmoid_prediction(X_test[i], weights): num_correct += 1 avg_error = num_correct/len(y_test) results[d][epsilon][n]['error_rate'] = 1 - avg_error # take the last iteration of the noise and find its magnitude # this is done to compare it to the wegiths to see how it influences # the decision process -- when epsilon is inf no noise is added and we can see how the weights are if epsilon == float('Inf'): results[d][epsilon][n]['noise_and_weights_magnitude'] = sum(abs(weights)) else: results[d][epsilon][n]['noise_and_weights_magnitude'] = sum(abs(noise)) # lets investegate how the noise affects the weights .. by looking at how the final weights are after # each noise level results[d][epsilon][n]['weights'] = sum(abs(weights)) print('dimension {}'.format(d), flush = True) return (results, objective_info)
def main(unused_argv): # Parse arguments. parser = argparse.ArgumentParser() args = parse_arguments(parser) # Model configuration. model_config = configuration.ModelConfig() training_config = configuration.TrainingConfig() # Create training directory. train_dir = args.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) # Load MNIST data. mnist = input_data.read_data_sets('MNIST') # Build the TensorFlow graph. g = tf.Graph() with g.as_default(): # Build the model. the_model = model.DAE(model_config) the_model.build() # Set up the learning rate. learning_rate = tf.constant(training_config.learning_rate) # Set up the training ops. train_op = tf.contrib.layers.optimize_loss( loss=the_model.total_loss, global_step=the_model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver() # Run training. print("Training") with tf.Session() as sess: print("Initializing parameters") sess.run(tf.global_variables_initializer()) for step in range(1, args.number_of_steps): # Read batch. batch = mnist.train.next_batch(model_config.batch_size)[0] # Create a noisy version of the batch. noisy_batch = utils.add_noise(batch) # Prepare the dictionnary to feed the data to the graph. feed_dict = { "images:0": batch, "noisy_images:0": noisy_batch, "phase_train:0": True } # Run training _, loss = sess.run([train_op, the_model.total_loss], feed_dict=feed_dict) if step % 50 == 0: # Save checkpoint. ave_path = saver.save(sess, train_dir + '/model.ckpt') # Print Loss. print("Step:", '%06d' % (step), "cost=", "{:.9f}".format(loss)) print('Finished training ...') print('Start testing ...') # load batch. testing_data = mnist.test.images # Plot the Original Image # Plot the Denoised Image # Create a noisy version of the data. corrupted_testing = utils.add_noise(testing_data) ori_plot = corrupted_testing[:10] count = 1 for img in ori_plot: name = 'ori_img' + str(count) path = 'img/' + name count += 1 plot_image(img.reshape((28, 28)), name, path) # Prepare the dictionnary to feed the data to the graph. feed_dict = { "images:0": testing_data, "noisy_images:0": corrupted_testing, "phase_train:0": False } # Compute the loss reconstruc, loss = sess.run( [the_model.reconstructed_images, the_model.total_loss], feed_dict=feed_dict) ori_plot = reconstruc[:10] count = 1 for img in ori_plot: name = 'de_img' + str(count) path = 'img/' + name count += 1 plot_image(img.reshape((28, 28)), name, path) print(loss) print("Testing loss= ", loss)
def wider(layer1, layer2, new_width, bnorm=None): r""" Widens the layers in the network. Implemented according to NetMorph Widening operation. The next adjacent layer in the network also needs to be be widened due to increase in the width of previous layer. :param layer1: The layer to be widened :param layer2: The next adjacent layer to be widened :param new_width: Width of the new layer (output channels/features of first layer and input channels/features of next layer. :param bnorm: BN layer to be widened if provided. :return: widened layers """ print 'NetMorph Widening... ' if (isinstance(layer1, nn.Conv2d) or isinstance(layer1, nn.Linear)) and ( isinstance(layer2, nn.Conv2d) or isinstance(layer2, nn.Linear)): teacher_w1 = layer1.weight.data teacher_b1 = layer1.bias.data teacher_w2 = layer2.weight.data teacher_b2 = layer2.bias.data assert new_width > teacher_w1.size(0), "New size should be larger" # Widening output channels/features of first layer # Randomly select weight from the first teacher layer and corresponding # bias and add it to first student layer. Add noise to newly created # student layer. student_w1 = teacher_w1.clone() student_b1 = teacher_b1.clone() rand_ids = th.randint(low=0, high=teacher_w1.shape[0], size=((new_width - teacher_w1.shape[0]),)) for i in range(rand_ids.numel()): teacher_index = int(rand_ids[i].item()) new_weight = teacher_w1[teacher_index, ...] new_weight.unsqueeze_(0) student_w1 = th.cat((student_w1, new_weight), dim=0) new_bias = teacher_b1[teacher_index] new_bias.unsqueeze_(0) student_b1 = th.cat((student_b1, new_bias)) if isinstance(layer1, nn.Conv2d): new_current_layer = nn.Conv2d( out_channels=new_width, in_channels=layer1.in_channels, kernel_size=(3, 3), stride=1, padding=1) else: new_current_layer = nn.Linear( in_features=layer1.out_channels * layer1.kernel_size[0] * layer1.kernel_size[1], out_features=layer2.out_features) new_current_layer.weight.data = add_noise(student_w1, teacher_w1) new_current_layer.bias.data = add_noise(student_b1, teacher_b1) layer1 = new_current_layer # Widening input channels/features of second layer. Copy the weights # from teacher layer and only add noise to additional filter # channels/features in student layer. The student layer will have same # bias as teacher. new_weight = th.zeros(teacher_w2.shape).cuda() noise = add_noise(new_weight, teacher_w2) student_w2 = th.cat((teacher_w2, noise), dim=1) if isinstance(layer2, nn.Conv2d): new_next_layer = nn.Conv2d(out_channels=layer2.out_channels, in_channels=new_width, kernel_size=(3, 3), stride=1, padding=1) else: new_next_layer = nn.Linear( in_features=layer1.out_channels * layer1.kernel_size[0] * layer1.kernel_size[1], out_features=layer2.out_features) new_next_layer.weight.data = student_w2 new_next_layer.bias.data = teacher_b2 layer2 = new_next_layer # Widening batch normalisation layer if provided. Only add noise to # additional features for all 4 parameters in the layer i.e. mean, variance, # weight and bias. if bnorm is not None: n_add = new_width - bnorm.num_features # get current parameter values bn_weights = bnorm.weight.data bn_bias = bnorm.bias.data bn_running_mean = bnorm.running_mean.data bn_running_var = bnorm.running_var.data # set noise for all parameter values weight_noise = add_noise(th.ones(n_add).cuda(), th.Tensor([0, 1])) bias_noise = add_noise(th.zeros(n_add).cuda(), th.Tensor([0, 1])) running_mean_noise = add_noise(th.zeros(n_add).cuda(), th.Tensor([0, 1])) running_var_noise = add_noise(th.ones(n_add).cuda(), th.Tensor([0, 1])) # append noise to current parameter values to widen new_bn_weights = th.cat((bn_weights, weight_noise)) new_bn_bias = th.cat((bn_bias, bias_noise)) new_bn_running_mean = th.cat((bn_running_mean, running_mean_noise)) new_bn_running_var = th.cat((bn_running_var, running_var_noise)) # assign new parameter values for new BN layer new_bn_layer = nn.BatchNorm2d(num_features=bnorm.num_features + n_add) new_bn_layer.weight.data = new_bn_weights new_bn_layer.bias.data = new_bn_bias new_bn_layer.running_mean.data = new_bn_running_mean new_bn_layer.running_var.data = new_bn_running_var bnorm = new_bn_layer return layer1, layer2, bnorm
def main(args): # Setup tensorboard stuff writer = SummaryWriter("../tensorboard_data/" + args.model_type + "-" + args.denoise + str(args.denoise_latent) + str(args.noise_level) + str(datetime.datetime.now())) params_to_tb(writer, args) ## Load Data size = 2 spectrum, y = load_data(args.channelwise) if args.benchmark: spectrum = spectrum[y[:, 0] <= 6000] y = y[y[:, 0] <= 6000] spectrum = spectrum[y[:, 0] >= 4000] y = y[y[:, 0] >= 4000] size = 4 spectrum, y = interpolate(spectrum, y, number_of_inters=size) torch.manual_seed(0) #spectrum = add_noise(spectrum, args.noise_level) print(spectrum.shape) device = 'cuda' if torch.cuda.is_available() else 'cpu' if args.model_type == 'bayes' or args.model_type == 'bAttnVGG' or args.model_type == 'bAttn1d': Bayesian = True else: Bayesian = False X_train, X_test, y_train, y_test = train_test_split(spectrum.data.numpy(), y.data.numpy(), random_state=55, test_size=0.1) X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, random_state=55, test_size=0.1) X_train = torch.from_numpy(X_train).float() y_train = torch.from_numpy(y_train).float() X_val = torch.from_numpy(X_val).float() y_val = torch.from_numpy(y_val).float() X_test = torch.from_numpy(X_test).float() y_test = torch.from_numpy(y_test).float() print("Normalizing") train_means = torch.mean(y_train, dim=0) train_std = torch.std(y_train, dim=0) y_train = (y_train - train_means) / train_std y_val = (y_val - train_means) / train_std y_test = (y_test - train_means) / train_std print(train_std) print(train_means) print(spectrum.shape) print(y.shape) if args.model_type == 'conv1d': model = conv1D(in_size=spectrum.shape[-1], out_size=4, input_channels=spectrum.shape[1], convolutions=args.convolutions, kernel_size=args.kernel_size, hiddenlayer=args.hiddenlayer, maxpool=args.maxpool, dropout=args.dropout) elif args.model_type == 'resnet': print("resnet") model = ResidualNetworkD1(in_size=spectrum.shape[-1], out_size=4, input_channels=spectrum.shape[2], convolutions=args.convolutions, kernel_size=args.kernel_size, hiddenlayer=args.hiddenlayer, maxpool=args.maxpool, dropout=args.dropout) elif args.model_type == 'conv2d': print("resnet2d") model = ResidualNetworkD2(in_size=8 * 4096, out_size=4, convolutions=args.convolutions, kernel_size=args.kernel_size, hiddenlayer=args.hiddenlayer, maxpool=args.maxpool, dropout=args.dropout) elif args.model_type == 'bayes': print('Bayesian') model = BayesianResidualNetworkD1(in_size=spectrum.shape[-1], out_size=4, input_channels=spectrum.shape[2], convolutions=args.convolutions, kernel_size=args.kernel_size, hiddenlayer=args.hiddenlayer, maxpool=args.maxpool, dropout=args.dropout) elif args.model_type == 'attention': print("spatialAttetion") model = SpatialAttentionNetwork(4) elif args.model_type == 'AttnVGG': print("AttnVGG") model = AttnVGG_after(im_size=4096, num_classes=4, attention=True, normalize_attn=True) elif args.model_type == 'bAttnVGG': print("bAttnVGG") model = bAttnVGG_after(im_size=4096, num_classes=4, attention=True, normalize_attn=args.norm_att) elif args.model_type == 'bAttn1d': print("batt1d") model = bAttnVGG_1d(im_size=4096, num_classes=4, attention=True, normalize_attn=True) else: model = conv2D(in_size=8 * 4096, out_size=4, convolutions=args.convolutions, kernel_size=args.kernel_size, hiddenlayer=args.hiddenlayer, maxpool=args.maxpool, dropout=args.dropout) model.to(device) if (args.l1): criterion = nn.L1Loss() else: criterion = nn.MSELoss() if (args.SGD): optimizer = optim.AdamW(model.parameters(), lr=args.learningrate) else: optimizer = optim.Adam(model.parameters(), lr=args.learningrate, weight_decay=args.l2) scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=[(args.epochs - args.lr_decay_milestones)], gamma=args.lr_decay_factor) if (args.model_type == 'attention'): lr = 3e-4 optim.Adam([{ 'params': model.networks.parameters(), 'lr': lr, 'weight_decay': 10e-5 }, { 'params': model.finals.parameters(), 'lr': lr, 'weight_decay': 10e-5 }, { 'params': model.stn.parameters(), 'lr': lr * 10e-2, 'weight_decay': 10e-5 }]) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[325, 420], gamma=args.lr_decay_factor) dataset = torch.utils.data.TensorDataset(X_train, y_train) dataset_val = torch.utils.data.TensorDataset(X_val, y_val) dataset_test = torch.utils.data.TensorDataset(X_test, y_test) BATCH_SIZE = args.batch_size trainloader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=True) valloader = torch.utils.data.DataLoader(dataset_val, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=True) testloader = torch.utils.data.DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=True) if (args.denoise != " "): if (args.denoise == 'VAE1D'): denoiser = ConvVAE1D(dataset[0][0].squeeze(0).shape, args.denoise_latent**2) elif (args.denoise == 'DAE'): denoiser = ConvDAE(dataset[0][0].shape, args.denoise_latent**2) elif (args.denoise == 'DAE1d'): print("DAE1d") denoiser = DAE1d(dataset[0][0].squeeze(0).shape, args.denoise_latent**2) elif (args.denoise == 'VAE2D'): denoiser = ConvVAE(dataset[0][0].shape, args.denoise_latent**2) elif (args.denoise == 'AFVAE'): denoiser = AFVAE(dataset[0][0].shape, args.denoise_latent**2) denoiser.load_state_dict( torch.load("../savedmodels/" + args.denoise + str(args.denoise_latent) + str(args.noise_level) + ".pth", map_location=torch.device(device))) denoiser.to(device) denoiser.eval() test_spectrum_clean = spectrum[0:15].to(device) test_spectrum = spectrum[0:15].to(device) denoised, _ = denoiser.reconstruct(test_spectrum.to(device)) print( f'MSE_recon: {torch.sum((denoised.cpu()-test_spectrum_clean.cpu())**2)}' ) print( f'MSE_noise: {torch.sum((test_spectrum.cpu()-test_spectrum_clean.cpu())**2)}' ) del test_spectrum_clean del test_spectrum del denoised print("setup Complete") TB_counter = 0 epochs = args.epochs start_epoch = 0 if args.restore_checkpoint: checkpoint = torch.load("../savedmodels/checkpoint" + args.model_type + "-" + args.denoise + str(args.denoise_latent) + str(args.noise_level)) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) start_epoch = checkpoint['epoch'] loss = checkpoint['loss'] scheduler.load_state_dict(checkpoint['scheduler']) for epoch in range(start_epoch, epochs): train_loss = 0 train_counter = 0 model.train() for i, (mini_batch_x, mini_batch_y) in enumerate(trainloader): mini_batch_x = add_noise(mini_batch_x, args.noise_level) # If denoise run a denoising step if (args.denoise != " "): mini_batch_x, _ = denoiser.reconstruct(mini_batch_x.to(device)) optimizer.zero_grad() #### Forward Pass y_pred = model(mini_batch_x.to(device)) #### Compute Loss if Bayesian: loss = nll_loss(y_pred, mini_batch_y.to(device)) #print(loss.item()) #print(y_pred.mean) #print(y_pred.stddev) else: loss = loss_func(y_pred, mini_batch_y.to(device)) #loss = loss_func(y_pred.squeeze(), mini_batch_y.to(device)) #### Backward pass loss.backward() optimizer.step() train_loss += loss.cpu().data.numpy() train_counter += 1 scheduler.step() writer.add_scalar("train_loss", train_loss / train_counter, global_step=TB_counter) TB_counter += 1 if ((epoch) % 10) == 0: val_loss = 0 val_counter = 0 with torch.set_grad_enabled(False): model.eval() for i, (val_batch_x, val_batch_y) in enumerate(valloader): val_batch_x = add_noise(val_batch_x, args.noise_level) if (args.denoise != " "): val_batch_x, _ = denoiser.reconstruct( val_batch_x.to(device)) if Bayesian: # just take the mean of the estimates y_pred_test = model(val_batch_x.to(device)).mean else: y_pred_test = model(val_batch_x.to(device)) val_loss += loss_func(y_pred_test.squeeze(), val_batch_y.to(device)) val_counter += 1 val_loss = (val_loss).cpu().data.numpy() / val_counter writer.add_scalar("validation_loss", val_loss, global_step=TB_counter) if ((epoch) % 10) == 0: print('Epoch {}: train_loss: {} Val loss: {}'.format( epoch, loss, val_loss)) if ((epoch % 25) == 0 and args.model_type == 'bAttnVGG'): torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss, 'scheduler': scheduler.state_dict() }, "../savedmodels/checkpoint" + args.model_type + "-" + args.denoise + str(args.denoise_latent) + str(args.noise_level)) model.eval() old_batch = None old_label = None with torch.set_grad_enabled(False): final_val_loss = 0 for i, (val_batch_x, val_batch_y) in enumerate(testloader): val_batch_x = add_noise(val_batch_x, args.noise_level) # If denoise run a denoising step if (args.denoise != " "): with torch.set_grad_enabled(False): val_batch_x, _ = denoiser.reconstruct( val_batch_x.to(device)) if Bayesian: # just take the mean of the estimates y_pred_test = model(val_batch_x.to(device)).mean y_pred_test_std = model(val_batch_x.to(device)).stddev else: y_pred_test = model(val_batch_x.to(device)) final_val_loss += loss_func(y_pred_test.squeeze(), val_batch_y.to(device)).cpu() y_pred = (y_pred_test.detach().cpu() * train_std) + train_means y = (val_batch_y.detach().cpu() * train_std) + train_means if i == 0: residuals = (y_pred - y).cpu().detach() if Bayesian: residuals_stds = y_pred_test_std else: residuals = torch.cat([residuals, (y_pred - y).detach()], dim=0) if Bayesian: residuals_stds = torch.cat( [residuals_stds, y_pred_test_std.detach()], dim=0) if i < 3: with open( '../residuals/data-' + args.model_type + "-" + args.denoise + str(args.denoise_latent) + str(args.noise_level) + '.csv', 'a') as data: np.savetxt(data, val_batch_x.view(val_batch_x.shape[0], -1).cpu().data.numpy(), delimiter=",") with open( '../residuals/labels-' + args.model_type + "-" + args.denoise + str(args.denoise_latent) + str(args.noise_level) + '.csv', 'a') as data: np.savetxt(data, y.view(y.shape[0], -1).data.numpy(), delimiter=",") with open( '../residuals/residuals-' + args.model_type + "-" + args.denoise + str(args.denoise_latent) + str(args.noise_level) + '.csv', 'a') as res: np.savetxt(res, (y_pred - y).detach(), delimiter=",") if Bayesian: with open( '../residuals/residuals-std-' + args.model_type + "-" + args.denoise + str(args.denoise_latent) + str(args.noise_level) + '.csv', 'a') as res: np.savetxt( res, (y_pred_test_std.detach().cpu() * train_std), delimiter=",") if args.model_type == 'bAttnVGG' or args.model_type == 'AttnVGG' or args.model_type == 'bAttn1d': model.visual_att(testloader, device, args) final_val_loss = final_val_loss final_test_loss = 0 final_counter = 0 with torch.set_grad_enabled(False): for i, (val_batch_x, val_batch_y) in enumerate(testloader): val_batch_x = add_noise(val_batch_x, args.noise_level) if (args.denoise != " "): val_batch_x, _ = denoiser.reconstruct(val_batch_x.to(device)) if Bayesian: # just take the mean of the estimates y_pred_test = model(val_batch_x.to(device)).mean else: y_pred_test = model(val_batch_x.to(device)) final_test_loss += loss_func( y_pred_test.squeeze(), val_batch_y.to(device)).cpu().data.numpy() final_counter += 1 final_test_loss = final_test_loss / final_counter print("final validation loss: {}".format(final_val_loss)) print("final std of residuals from validation set: {}".format( torch.std(residuals, dim=0).cpu().data.numpy())) print("final mean squared error: {}".format( torch.mean(residuals**2, dim=0).cpu().data.numpy())) print("final RMSE error: {}".format( torch.sqrt(torch.mean(residuals**2, dim=0)).cpu().data.numpy())) print("final MAE error: {}".format( torch.mean(torch.abs(residuals), dim=0).cpu().data.numpy())) if Bayesian: print("final unnormed mean std from model: {}".format( torch.mean(y_pred_test_std.cpu() * train_std, dim=0).cpu().data.numpy())) print("STARNET RMSE ") print("[51.2, 0.081, 0.040] ") print("STARNET MAE ") print("[31.2, 0.053, 0.025] ") print("final test loss: {}".format(final_test_loss)) test_sun(model, train_means, train_std, device) print("Saving Residuals") if args.savemodel: torch.save(model.state_dict(), "../savedmodels/" + args.name)
def __init__(self, shape, sess, variance_coef, data_info): """DAE initializer Args: shape: list of ints specifying num input, hidden1 units,...hidden_n units, num outputs sess: tensorflow session object to use varience_coef: multiplicative factor for the variance of noise wrt the variance of data data_info: key information about the dataset """ self.__shape = shape # [input_dim,hidden1_dim,...,hidden_n_dim,output_dim] self.__variables = {} self.__sess = sess self.num_hidden_layers = np.size(shape) - 2 self.batch_size = FLAGS.batch_size self.sequence_length = FLAGS.chunk_length self.scaling_factor = 1 # maximal value and mean pose in the dataset (used for scaling it to interval [-1,1] and back) self.max_val = data_info.max_val self.mean_pose = data_info.mean_pose #################### Add the DATASETS to the GRAPH ############### #### 1 - TRAIN ### self._train_data_initializer = tf.placeholder( dtype=tf.float32, shape=data_info.train_shape) self._train_data = tf.Variable(self._train_data_initializer, trainable=False, collections=[], name='Train_data') train_epochs = FLAGS.training_epochs + FLAGS.pretraining_epochs * FLAGS.num_hidden_layers train_frames = tf.train.slice_input_producer([self._train_data], num_epochs=train_epochs) self._train_batch = tf.train.shuffle_batch(train_frames, batch_size=FLAGS.batch_size, capacity=5000, min_after_dequeue=1000, name='Train_batch') #### 2 - VALIDATE, can be used as TEST ### # When optimizing - this dataset stores as a validation dataset, # when testing - this dataset stores a test dataset self._valid_data_initializer = tf.placeholder( dtype=tf.float32, shape=data_info.eval_shape) self._valid_data = tf.Variable(self._valid_data_initializer, trainable=False, collections=[], name='Valid_data') valid_frames = tf.train.slice_input_producer( [self._valid_data], num_epochs=FLAGS.training_epochs) self._valid_batch = tf.train.shuffle_batch(valid_frames, batch_size=FLAGS.batch_size, capacity=5000, min_after_dequeue=1000, name='Valid_batch') if FLAGS.weight_decay is not None: print('\nWe apply weight decay') ### Specify tensorflow setup ### with sess.graph.as_default(): ############## SETUP VARIABLES ###################### with tf.variable_scope("AE_Variables"): for i in range(self.num_hidden_layers + 1): # go over layers # create variables for matrices and biases for each layer self._create_variables(i, FLAGS.weight_decay) ############## DEFINE THE NETWORK ################## ''' 1 - Setup network for TRAINing ''' # Input noisy data and reconstruct the original one # as in Denoising AutoEncoder self._input_ = add_noise(self._train_batch, variance_coef, data_info.data_sigma) self._target_ = self._train_batch # Define output and loss for the training data self._output, _, _ = self.construct_graph( self._input_, FLAGS.dropout) self._reconstruction_loss = loss_reconstruction( self._output, self._target_, self.max_val) tf.add_to_collection( 'losses', self._reconstruction_loss) # add weight decay loses self._loss = tf.add_n(tf.get_collection('losses'), name='total_loss') ''' 2 - Setup network for TESTing ''' self._valid_input_ = self._valid_batch self._valid_target_ = self._valid_batch # Define output (no dropout) self._valid_output, self._encode, self._decode = \ self.construct_graph(self._valid_input_, 1) # Define loss self._valid_loss = loss_reconstruction(self._valid_output, self._valid_target_, self.max_val)
def train(epoch): model.train() train_loss = 0. SE, KLD = 0., 0. topo = 0. b01, b0, b1, b2 = 0., 0., 0., 0. for batch_idx, data in enumerate(train_loader): noisy_data = add_noise(data, device) data = data.to(device) noisy_data = noisy_data.to(device) optimizer.zero_grad() recon_batch, mu, logvar = model(noisy_data) if args.mode == 1: loss, l01, l0, l1, l2 = topological_loss(recon_batch) train_loss += loss.item() b01 += l01.item() b0 += l0.item() b1 += l1.item() b2 += l2.item() elif args.topo == True: loss, l_SE, l_KLD, l_topo, l01, l0, l1, l2 = loss_function( recon_batch, data, mu, logvar) train_loss += loss.item() SE += l_SE.item() KLD += l_KLD.item() topo += l_topo.item() b01 += l01.item() b0 += l0.item() b1 += l1.item() b2 += l2.item() else: loss, l_SE, l_KLD = loss_function(recon_batch, data, mu, logvar) train_loss += loss.item() SE += l_SE.item() KLD += l_KLD.item() loss.backward() optimizer.step() if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item() / len(noisy_data))) train_loss /= len(train_loader) train_loss_list.append(train_loss) print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss)) if args.mode == 0: SE /= len(train_loader) KLD /= len(train_loader) writer.add_scalars("loss/each_loss", { 'Train': train_loss, 'Rec': SE, 'KL': KLD, 'Topo': topo }, epoch) writer.add_scalars("loss/each_loss", { 'Train': train_loss, 'Rec': SE, 'KL': KLD }, epoch) if args.topo == True: b01 /= len(train_loader) b0 /= len(train_loader) b1 /= len(train_loader) b2 /= len(train_loader) topo /= len(train_loader) writer.add_scalars("loss/topological_loss", { 'topo': topo, 'b01': b01, 'b0': b0, 'b1': b1, 'b2': b2 }, epoch) return train_loss
def wider(layer1, layer2, new_width, bnorm=None): print 'Net2Net Widening... ' w1 = layer1.weight.data w2 = layer2.weight.data b1 = layer1.bias.data b2 = layer2.bias.data if isinstance(layer1, nn.Conv2d) and (isinstance(layer2, nn.Conv2d) or isinstance(layer2, nn.Linear)): # Convert Linear layers to Conv if linear layer follows target layer if isinstance(layer1, nn.Conv2d) and isinstance(layer2, nn.Linear): print w2.shape print w1.shape print w2.size(1) print w1.size(0) assert w2.size(1) % w1.size( 0) == 0, 'Linear units need to be multiple' if w1.dim() == 4: kernel_size = int(np.sqrt(w2.size(1) // w1.size(0))) print kernel_size exit() w2 = w2.view(w2.size(0), w2.size(1) // kernel_size**2, kernel_size, kernel_size) else: assert w1.size(0) == w2.size( 1), "Module weights are not compatible" assert new_width > w1.size(0), "New size should be larger" nw1 = w1.clone() nb1 = b1.clone() nw2 = w2.clone() old_width = w1.size(0) if bnorm is not None: nrunning_mean = bnorm.running_mean.clone().resize_(new_width) nrunning_var = bnorm.running_var.clone().resize_(new_width) if bnorm.affine: nweight = bnorm.weight.data.clone().resize_(new_width) nbias = bnorm.bias.data.clone().resize_(new_width) if isinstance(layer1, nn.Conv2d): new_current_layer = nn.Conv2d(out_channels=new_width, in_channels=layer1.in_channels, kernel_size=(3, 3), stride=1, padding=1) else: new_current_layer = nn.Linear(in_features=layer1.out_channels * layer1.kernel_size[0] * layer1.kernel_size[1], out_features=layer2.out_features) rand_ids = th.tensor( random.sample(range(w1.shape[0]), new_width - w1.shape[0])) replication_factor = np.bincount(rand_ids) for i in range(rand_ids.numel()): teacher_index = int(rand_ids[i].item()) new_weight = w1.select(0, teacher_index) new_weight = add_noise(new_weight, nw1) new_weight = new_weight.unsqueeze(0) nw1 = th.cat((nw1, new_weight), dim=0) new_bias = b1[teacher_index].unsqueeze(0) nb1 = th.cat((nb1, new_bias)) if bnorm is not None: nrunning_mean[old_width + i] = bnorm.running_mean[teacher_index] nrunning_var[old_width + i] = bnorm.running_var[teacher_index] if bnorm.affine: nweight[old_width + i] = bnorm.weight.data[teacher_index] nbias[old_width + i] = bnorm.bias.data[teacher_index] new_current_layer.weight.data = nw1 new_current_layer.bias.data = nb1 layer1 = new_current_layer # Copy the weights from input channel of next layer and append it after # dividing the selected filter by replication factor. for i in range(rand_ids.numel()): teacher_index = int(rand_ids[i].item()) factor = replication_factor[teacher_index] + 1 assert factor > 1, 'Error in Net2Wider' # Calculate new weight according to replication factor new_weight = w2.select(1, teacher_index) * (1. / factor) # Append the new weight increasing its input channel new_weight_re = new_weight.unsqueeze(1) nw2 = th.cat((nw2, new_weight_re), dim=1) # Assign the calculated new weight to replicated filter nw2[:, teacher_index, :, :] = new_weight if isinstance(layer2, nn.Conv2d): new_next_layer = nn.Conv2d(out_channels=layer2.out_channels, in_channels=new_width, kernel_size=(3, 3), stride=1, padding=1) new_next_layer.weight.data = nw2 else: new_next_layer = nn.Linear(in_features=layer1.out_channels * layer1.kernel_size[0] * layer1.kernel_size[1], out_features=layer2.out_features) # Convert the 4D tensor to 2D tensor for linear layer i.e. reverse # the earlier effect when linear layer was converted to # convolutional layer. new_next_layer.weight.data = nw2.view(layer2.weight.size(0), new_width * kernel_size**2) # Set the bias for new next layer as previous bias for next layer new_next_layer.bias.data = b2 layer2 = new_next_layer if bnorm is not None: bnorm.num_features = new_width bnorm.running_var = nrunning_var bnorm.running_mean = nrunning_mean if bnorm.affine: bnorm.weight.data = nweight bnorm.bias.data = nbias return layer1, layer2, bnorm