예제 #1
0
파일: bsdset.py 프로젝트: dbisk/bmcn
 def __getitem__(self, idx):
     # TODO: allow slices instead of forcing just 1 idx at a time
     # TODO: suppress warning following line outputs
     truth = load_img(self.im_paths[idx], to_grayscale=True)
     if (self.transform):
         truth = self.transform(truth)
     noisy = add_noise(truth)
     return {'data': noisy, 'truth': truth}
def trainOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion, weight_dict, learningRate):
    dataChunkLoss = []
    dataChunkAcc = []
    dataChunkRegLoss = []
    for i in range(len(inputCoor)):
        xTrain_1, graphTrain_1, labelTrain_1 = inputCoor[i], inputGraph[i], inputLabel[i]
        graphTrain_1 = graphTrain_1.tocsr()
        labelBinarize = label_binarize(labelTrain_1, classes=[j for j in range(para.outputClassN)])
        xTrain, graphTrain, labelTrain = shuffle(xTrain_1, graphTrain_1, labelBinarize)
        # labelBinarize = label_binarize(labelTrain, classes=[j for j in range(40)])

        batch_loss = []
        batch_acc = []
        batch_reg = []
        batchSize = para.batchSize
        for batchID in range(len(labelBinarize) / para.batchSize):
            start = batchID * batchSize
            end = start + batchSize
            batchCoor, batchGraph, batchLabel = get_mini_batch(xTrain, graphTrain, labelTrain, start, end)
            batchGraph = batchGraph.todense()


            batchCoor = add_noise(batchCoor, sigma=0.008, clip=0.02)
            if para.weighting_scheme == 'uniform':
                batchWeight = uniform_weight(batchLabel)
            elif para.weighting_scheme == 'weighted':
                batchWeight = weights_calculation(batchLabel, weight_dict)
            else:
                print 'please enter the valid weighting scheme'
	        
	    #print batchWeight

            feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph,
                         trainOperaion['outputLabel']: batchLabel, trainOperaion['lr']: learningRate,
                         trainOperaion['weights']: batchWeight,
                         trainOperaion['keep_prob_1']: para.keep_prob_1, trainOperaion['keep_prob_2']: para.keep_prob_2}

            opt, loss_train, acc_train, loss_reg_train = sess.run(
                [trainOperaion['train'], trainOperaion['loss_total'], trainOperaion['acc'], trainOperaion['loss_reg']],
                feed_dict=feed_dict)

            #print('The loss loss_reg and acc for this batch is {},{} and {}'.format(loss_train, loss_reg_train, acc_train))
            batch_loss.append(loss_train)
            batch_acc.append(acc_train)
            batch_reg.append(loss_reg_train)

        dataChunkLoss.append(np.mean(batch_loss))
        dataChunkAcc.append(np.mean(batch_acc))
        dataChunkRegLoss.append(np.mean(batch_reg))


    train_average_loss = np.mean(dataChunkLoss)
    train_average_acc = np.mean(dataChunkAcc)
    loss_reg_average = np.mean(dataChunkRegLoss)
    return train_average_loss, train_average_acc, loss_reg_average
예제 #3
0
def main():
    """Main function
    """
    # Load one of these sample image, show different color channels.
    img = load_image(os.path.join('samples', 'IMG_6566.JPG'))
    show_custom_channels(img, color_space='rgb', title='Input image')

    # Zoom in and show a small window to see triplet of color values for a
    # 64x64 (or so) window
    zoomed = zoom_in(img, 850, 950, height=500, width=500)
    show_custom_channels(zoomed, color_space='rgb', title='Zoomed-in window')

    # Separate H&E color stain channels from the image
    channel_lst, cmap_lst = show_custom_channels(
        zoomed,
        color_space='hed',
        title='Immunohistochemical staining colors separation')

    # Select eosin channel for processing
    sel_chn = np.copy(channel_lst[1])
    sel_cmap = cmap_lst[1]

    # Add noise and do a simple denoising task
    noised = add_noise(sel_chn)
    denoised = simple_denoise(noised, kernel_size=3)
    show_with_cmap([sel_chn, noised, denoised], [sel_cmap] * 3, [
        'Original image', 'With Gaussian noise', 'Denoised with Median filter'
    ])

    # Apply blurring and add noise and do a simple deblurring task, using the
    # Wiener filter
    blurred_noised = add_noise(blur(sel_chn, block_size=3), sigma=3)
    deblurred = simple_deblur(blurred_noised)
    show_with_cmap([sel_chn, blurred_noised, deblurred], [sel_cmap] * 3, [
        'Original image', 'With blurring and noise',
        'Deblurred with Wiener filter'
    ])

    # Detect cell boundary and overlay the results on images
    detect_cell_boundary(sel_chn, sel_cmap)
    plt.show()
    pass
예제 #4
0
def pretrain(**kwargs):
    model = kwargs['model']
    dataloader = kwargs['dataloader']
    epochs = kwargs['epochs']
    pth_file = kwargs['pth']
    png_file = kwargs['png']
    denoising = kwargs['denoising']

    optimizer = torch.optim.Adam(params=model.parameters(),
                                 lr=1e-3,
                                 weight_decay=1e-5)
    criterion = nn.MSELoss()

    best_loss = 1e10
    model.train()
    for epoch in range(1, epochs + 1):
        train_loss = 0.0
        for x, _ in dataloader:
            _, c, h, w = x.shape
            x = x.view((x.shape[0], -1))
            if denoising:
                noisy_x = add_noise(x)
            else:
                noisy_x = x
            noisy_x = noisy_x.cuda()
            x = x.cuda()
            # ===================forward=====================
            output = model(noisy_x)['rec']
            # output = output.squeeze(1)
            # output = output.view(output.size(0), 28 * 28)
            loss = criterion(output, x)
            train_loss += loss.item()
            # ===================backward====================
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # ===================visualize====================
            x = x[0].view(c, h, w)
            noisy_x = noisy_x[0].view(c, h, w)
            output = output[0].view(c, h, w)
            final = torch.cat([x, noisy_x, output],
                              dim=1).detach().cpu().numpy()
            final = np.transpose(final, (2, 1, 0))
            final = np.clip(final * 255.0, 0, 255).astype(np.uint8)
            cv2.imwrite(png_file, final)

        # ===================log========================
        train_loss /= len(dataloader)
        logger.info('epoch [{}/{}], MSE_loss:{:.4f}'.format(
            epoch, epochs, train_loss))
        if best_loss > train_loss:
            best_loss = train_loss
            torch.save(model.state_dict(), pth_file)
예제 #5
0
 def check_capacity(self, noise=1):
     count = 0
     for s in self.states:
         if noise:
             temp_s = add_noise(s, noise_frac=0.05)
             _, new_pattern, _ = self.update_rule(temp_s,
                                                  self.max_iter,
                                                  verbose=False)
         else:
             _, new_pattern, _ = self.update_rule(s, 1, verbose=False)
         count = count + np.array_equal(new_pattern, s)
     return count
예제 #6
0
    def __getitem__(self, index):
        img_index = random.randint(1, 800)
        img = Image.open(self.data_root + "/" + str(img_index)+'.png')
        img_H = img.size[0]
        img_W = img.size[1]
        H_start = random.randint(0, img_H - opt.crop_size)
        W_start = random.randint(0, img_W - opt.crop_size)
        crop_box = (W_start, H_start, W_start + opt.crop_size, H_start + opt.crop_size)
        img_crop = img.crop(crop_box)

        label = self.transform(img_crop)
        noise = add_noise(label, opt.noise_level)

        return noise, label
예제 #7
0
파일: data.py 프로젝트: yuekai146/unmt
    def __getitem__(self, idx):
        '''
        Return:
            ids: LongTensor(batch_size * max_len)
            sentence_len: Tensor (batch_size)
            mask: FloatTensor(batch_size * max_len)

            noise_ids: LongTensor(batch_size * max_len)
            noise_sentence_len: Tensor (batch_size)
            noise_mask: FloatTensor(batch_size * max_len)

        '''
        sentence = self.corpus[idx]
        ids = self.vocab.sentence2ids(sentence, sos=self.sos, eos=self.eos)
        sentence_len = len(ids)

        max_len = self.max_len

        if self.sos:
            max_len += 1
        if self.eos:
            max_len += 1

        ids = self._pad_ids(ids, max_len)
        mask = [1 if x < sentence_len else 0 for x in range(max_len)]

        noise_sentence = add_noise(sentence, self.drop_prob, self.k)
        noise_ids = self.vocab.sentence2ids(noise_sentence)
        noise_sentence_len = len(noise_ids)
        noise_ids = self._pad_ids(noise_ids, self.max_len)
        noise_mask = [
            1 if x < noise_sentence_len else 0 for x in range(self.max_len)
        ]

        ids = torch.from_numpy(np.array(ids)).long()
        mask = torch.from_numpy(np.array(mask)).float()

        noise_ids = torch.from_numpy(np.array(noise_ids)).long()
        noise_mask = torch.from_numpy(np.array(noise_mask)).float()

        ret = {}
        ret["ids"] = ids
        ret["sentence_len"] = sentence_len
        ret["mask"] = mask
        ret["noise_ids"] = noise_ids
        ret["noise_sentence_len"] = noise_sentence_len
        ret["noise_mask"] = noise_mask

        return ret
예제 #8
0
    def __preproc__(self, file):
        file.readline()
        n_verts, n_faces, n_dontknow = tuple(
            [int(s) for s in file.readline().strip().split(' ')])
        verts = [[float(s) for s in file.readline().strip().split(' ')]
                 for i_vert in range(n_verts)]
        faces = [[int(s) for s in file.readline().strip().split(' ')][1:]
                 for i_face in range(n_faces)]
        sampled_points = utils.sample_points(np.array(verts), faces)
        sampled_points = utils.cent_norm(sampled_points)
        if not self.valid:
            theta = random.random() * 360
            sampled_points = utils.rotation_z(utils.add_noise(sampled_points),
                                              theta)

        return np.array(sampled_points, dtype="float32")
예제 #9
0
    def __getitem__(self, idx):

        #train file will have audio, type noise, SNR

        wav_files = self.wav
        file_name = wav_files[idx][0]
        file_path = os.path.join(self.val_dir, file_name)
        [audio, fs] = librosa.load(file_path,self.fs)

        clean_spect = librosa.stft(audio,n_fft=self.n_fft, hop_length=self.hop_size)
            
        if self.noise == 'babble':
            [sub_noise, sub_fs] = librosa.load('noise/babble_test.wav',self.fs) 
        elif self.noise == 'factory1':
            [sub_noise, sub_fs] = librosa.load('noise/factory1_test.wav',self.fs) 
        elif self.noise == 'engine':
            [sub_noise, sub_fs] = librosa.load('noise/engine_test.wav',self.fs)
        elif self.noise =='ops':
            [sub_noise, sub_fs] = librosa.load('noise/ops.wav',self.fs)
        elif self.noise == 'bucc':
            [sub_noise, sub_fs] = librosa.load('noise/bucc.wav',self.fs)
        elif self.noise == 'dishes':
            [sub_noise, sub_fs] = librosa.load('noise/dishes.wav',self.fs)
        elif self.noise == 'bike':
            [sub_noise, sub_fs] = librosa.load('noise/bike.wav',self.fs)
        elif self.noise == 'tap':
            [sub_noise, sub_fs] = librosa.load('noise/tap.wav',self.fs)


        elif self.noise =='white':
            sub_noise = np.random.normal(0,1,audio.shape)
        
        noise_audio = utils.add_noise(audio,sub_noise, self.snr)    
                
        noise_spect = librosa.stft(noise_audio,n_fft=self.n_fft, hop_length=self.hop_size)
        
        magC, phaseC = librosa.magphase(clean_spect)
        magN, phaseN = librosa.magphase(noise_spect)

        magClean = np.transpose(magC)
        magNoise = np.transpose(magN)

        #make this a function later on

        sample = {'clean_mag': magClean, 'noise_mag': magNoise, 'noise_audio' :noise_audio,'clean_audio': audio }
        return sample 
예제 #10
0
 def data_stats(self, sample_size):
     """compute sigma and mu of each frequency bin in noise dir, from DeepXi"""
     if os.path.exists(self.noise_dir + 'stats.npz'):
         with np.load(self.noise_dir + 'stats.npz') as stats:
             self.mu = stats['mu_hat']
             self.sigma = stats['sigma_hat']
     else:
         print('Start saving stats')
         samples = []
         for idx in range(sample_size):
             snr = random.choice(self.snr_level)
             speech_file = random.choice(self.speech_wav_files)
             speech_src, _ = librosa.load(speech_file, sr=self.sr)
             noise_file = random.choice(self.noise_wav_files)
             noise_src, _ = librosa.load(noise_file, sr=self.sr)
             start_idx = random.randint(0, len(noise_src) - len(speech_src))
             noise_src = noise_src[start_idx:start_idx + len(speech_src)]
             _, alpha = utils.add_noise(
                 speech_src, noise_src,
                 snr)  # get scale factor based on snr
             noise_src = noise_src * alpha
             # do stft for both speech and noise
             _, sample_speech_mag, _ = utils.analysis(
                 speech_src, self.frame_len, self.frame_shift, self.n_fft)
             _, sample_noise_mag, _ = utils.analysis(
                 noise_src, self.frame_len, self.frame_shift, self.n_fft)
             # compute prior snr between speech and noise spectrums
             snr_db = utils.prior_snr(
                 sample_speech_mag,
                 sample_noise_mag)  # instantaneous a prior SNR (dB).
             samples.append(np.squeeze(snr_db))
         samples = np.hstack(samples)
         if len(samples.shape) != 2:
             raise ValueError('Incorrect shape for sample.')
         stats = {
             'mu_hat': np.mean(samples, axis=1),
             'sigma_hat': np.std(samples, axis=1)
         }
         self.mu, self.sigma = stats['mu_hat'], stats['sigma_hat']
         np.savez(self.noise_dir + 'stats.npz',
                  mu_hat=stats['mu_hat'],
                  sigma_hat=stats['sigma_hat'])
         print('Sample statistics saved.')
예제 #11
0
    def env_get(self, l):
        """
        Get most recent (obs, rews, dones, infos) from vectorized environment
        Using step_wait if necessary
        """
        if self.I.step_count == 0: # On the zeroth step with a new venv, we need to call reset on the environment
            ob = self.I.venvs[l].reset()
            out = self.I.env_results[l] = (ob, None, np.ones(self.I.lump_stride, bool), {})
        else:
            if self.I.env_results[l] is None:
                out = self.I.venvs[l].step_wait()

                obs = add_noise(out[0], noise_p=self.noise_p, noise_type=self.noise_type)
                out = (obs, *out[1:])
                self.I.env_results[l] =  out

            else:
                out = self.I.env_results[l]
        return out
예제 #12
0
def pretrain(**kwargs):
    data = kwargs["data"]
    model = kwargs["model"]
    num_epochs = kwargs["num_epochs"]
    savepath = kwargs["savepath"]
    checkpoint = kwargs["checkpoint"]
    start_epoch = checkpoint["epoch"]
    parameters = list(autoencoder.parameters())
    optimizer = torch.optim.Adam(parameters, lr=1e-3, weight_decay=1e-5)
    train_loader = DataLoader(dataset=data, batch_size=128, shuffle=True)
    for epoch in range(start_epoch, num_epochs):
        for data in train_loader:
            img = data.float()
            noisy_img = add_noise(img)
            noisy_img = noisy_img.to(device)
            img = img.to(device)
            # ===================forward=====================
            output = model(noisy_img)
            output = output.squeeze(1)
            output = output.view(output.size(0), 28 * 28)
            loss = nn.MSELoss()(output, img)
            # ===================backward====================
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        # ===================log========================
        print(
            "epoch [{}/{}], MSE_loss:{:.4f}".format(
                epoch + 1, num_epochs, loss.item()
            )
        )
        state = loss.item()
        is_best = False
        if state < checkpoint["best"]:
            checkpoint["best"] = state
            is_best = True

        save_checkpoint(
            {"state_dict": model.state_dict(), "best": state, "epoch": epoch},
            savepath,
            is_best,
        )
예제 #13
0
def evaluate_model(data, sess, model, global_step, num_eval_examples):
	"""Computes the cost associated with the model.
	Args:
		data: pointer to the MNIST data
	    sess: Session object.
	    model: Instance of DAE; the model to evaluate.
	    global_step: Global step of the model checkpoint.
	    num_eval_examples: Number of examples to run the evaluation on.
	"""	  

	# Determine the number of batches to run the evaluation.
	num_eval_batches = int(math.ceil(num_eval_examples / model.config.batch_size))  

	# Initialise the loss.
	sum_losses = 0.
	num_eval_batches = 1
	for i in range(num_eval_batches):
		
		# Read batch.
		batch = data.validation.next_batch(model.config.batch_size)[0]

		# Create a noisy version of the batch.
		noisy_batch = utils.add_noise(batch)

		# Prepare the dictionnary to feed the data to the graph.
		feed_dict = {"images:0": batch, "noisy_images:0": noisy_batch, "phase_train:0": False}

		# Evaluate the loss.
		loss = sess.run([model.total_loss], feed_dict=feed_dict)
		sum_losses += np.sum(loss)


	x_reconstructed_images = sess.run(model.reconstructed_images, feed_dict=feed_dict)
	for i in range(10):
		image = np.reshape(x_reconstructed_images[i], [28, 28])
		MRF.plot_image(image, 'image', "samples/image" + str(i))

	sum_losses=sum_losses/num_eval_batches
	    

	print("Step:", '%06d' % (global_step),",cost=", "{:.9f}".format(sum_losses))  
예제 #14
0
    def get_train_data(self):
        while len(self.noisy_buffer) < self.block_size * self.frame_len:
            new_speech, _ = librosa.load(self.speech_wav_files[self.wav_idx],
                                         sr=self.sr)
            self.speech_buffer = utils.normalize(new_speech)
            while len(self.noise_buffer) < len(self.speech_buffer):
                new_noise, _ = librosa.load(random.choice(
                    self.noise_wav_files),
                                            sr=self.sr)
                new_noise = utils.normalize(new_noise)
                self.noise_buffer = np.concatenate(
                    (self.noise_buffer, new_noise))
            snr = random.choice(self.snr_level)
            self.noisy_buffer, _ = utils.add_noise(
                self.speech_buffer,
                self.noise_buffer[:len(self.speech_buffer)],
                snr,
                normalization=False)
            self.wav_idx += 1
        speech_block = self.speech_buffer[:self.block_size * self.frame_len]
        noise_block = self.noise_buffer[:self.block_size * self.frame_len]
        noisy_block = self.noisy_buffer[:self.block_size * self.frame_len]
        self.speech_buffer = self.speech_buffer[self.block_shift *
                                                self.frame_len:]
        self.noise_buffer = self.noise_buffer[self.block_shift *
                                              self.frame_len:]
        self.noisy_buffer = self.noisy_buffer[self.block_shift *
                                              self.frame_len:]

        # _, speech_mag, speech_pha = utils.analysis(speech_block, self.frame_len, self.frame_shift, self.n_fft)
        # _, noise_mag, noise_pha = utils.analysis(noise_block, self.frame_len, self.frame_shift, self.n_fft)
        # _, noisy_mag, noisy_pha = utils.analysis(noisy_block, self.frame_len, self.frame_shift, self.n_fft)

        # mapping prior snr to interval[0, 1] using erf function
        # snr_mapped = utils.xi_bar(speech_mag, noisy_mag, self.mu, self.sigma)
        speech_block, noise_block, noisy_block = speech_block[np.newaxis, :], \
                                                 noise_block[np.newaxis, :], noisy_block[np.newaxis, :]  # expand to 3-d
        return speech_block, noise_block, noisy_block
예제 #15
0
 def build_test_pairs(self):
     if len(self.noisy_wav_files) == len(self.speech_wav_files):
         return
     for speech_file in self.speech_wav_files:
         speech_src, _ = sf.read(speech_file)
         noise_file = random.choice(self.noise_wav_files)
         noise_src, _ = sf.read(noise_file)
         while len(noise_src) < len(speech_file):
             noise_file = random.choice(self.noise_wav_files)
             noise_src, _ = sf.read(noise_file)
         snr = random.choice(self.snr_level)
         noise_type = noise_file[noise_file.rfind('/') +
                                 1:noise_file.find('_')]
         noisy_file = self.noisy_dir + os.path.basename(speech_file)[:-4] \
                      + '_' + noise_type + '_' + str(snr) + 'dB.wav'
         speech_len = len(speech_src)
         start_idx = random.randint(0, len(noise_src) - speech_len)
         noise_seg = noise_src[start_idx:start_idx + speech_len]
         noisy_src, _ = utils.add_noise(speech_src, noise_seg, snr)
         sf.write(noisy_file,
                  noisy_src,
                  samplerate=self.sr,
                  subtype='PCM_16')
         self.noisy_wav_files.append(noisy_file)
예제 #16
0
spectra = np.array(file['spectrum'])
phi = np.array(file['phi'])
theta = np.array(file['theta'])
lp = np.array(file['lp'])

target = np.concatenate([phi.reshape(-1, 1),
                         theta.reshape(-1, 1),
                         lp.reshape(-1, 1)], axis=1)

noise_estimation = utils.get_std(spectra.copy(), target.copy())
noise_accuracy = []

seed = [628, 693, 847, 621, 861, 409, 74, 306, 884, 777]
for i in range(k):
    np.random.seed(42)
    spectra_noise = utils.add_noise(spectra.copy(), noise_estimation.copy())
    x_train, y_train, x_test, y_test = split_data_for_noise_test(spectra_noise.copy(), target.copy(), seed[i])

    x_train, x_test, _, _ = utils.preprocessing(x_train, x_test)
    y_train, y_test, _, _ = utils.preprocessing(y_train, y_test)

    y_pred = model_fcnn[i].predict(x_test)

    y_pred, y_test = utils.postprocessing(y_pred, y_test, y_min, y_max)

    noise_accuracy.append(utils.metric(utils.create_df(y_test, y_pred)))

    if i == 0:
        plot_corr_model(y_test, y_pred, 'corr_noise.png')

print('model trained on clean data and tested on noisy data')
예제 #17
0
def main(args):
    utils.seedme(args.seed)
    cudnn.benchmark = True
    device = torch.device(
        'cuda' if torch.cuda.is_available() and not args.nocuda else 'cpu')

    os.system('mkdir -p {}'.format(args.outf))

    dataloader_train = utils.get_patchloader(args.image_train,
                                             resize=args.resize_train,
                                             patch_size=args.patch_size,
                                             batch_size=args.batch_size_train,
                                             fliplr=args.fliplr,
                                             flipud=args.flipud,
                                             rot90=args.rot90,
                                             smooth=args.smooth)
    if args.image_valid:
        dataloader_valid = utils.get_patchloader(
            args.image_valid,
            resize=args.resize_valid,
            patch_size=args.patch_size,
            batch_size=args.batch_size_valid,
            fliplr=args.fliplr,
            flipud=args.flipud,
            rot90=args.rot90,
            smooth=args.smooth)

    netG = models.DCGAN_G(image_size=args.patch_size,
                          nc=args.nc,
                          nz=args.ncode,
                          ngf=args.ngf).to(device)
    netE = models.Encoder(patch_size=args.patch_size,
                          nc=args.nc,
                          ncode=args.ncode,
                          ndf=args.ndf).to(device)

    print netG
    print netE

    optimizer = optim.Adam(list(netG.parameters()) + list(netE.parameters()),
                           lr=args.lr,
                           amsgrad=True)
    loss_func = nn.MSELoss()

    losses = []
    losses_valid = []
    best_loss = 1e16
    for i in range(args.niter):
        optimizer.zero_grad()
        x = next(dataloader_train).to(device)
        if args.sigma:
            x = utils.add_noise(x, args.sigma)
        y = netG(netE(x))
        loss = loss_func(y, x)
        loss.backward()
        optimizer.step()

        if args.image_valid:
            with torch.no_grad():
                netG.eval()
                netE.eval()
                x_ = next(dataloader_valid).to(device)
                if args.sigma:
                    x_ = utils.add_noise(x, args.sigma)
                y_ = netG(netE(x_))
                loss_valid = loss_func(y_, x_)
                netG.train()
                netE.train()
                losses_valid.append(loss_valid.item())

        _loss = loss_valid.item() if args.image_valid else loss.item()
        if _loss + 1e-3 < best_loss:
            best_loss = _loss
            print "[{}/{}] best loss: {}".format(i + 1, args.niter, best_loss)
            if args.save_best:
                torch.save(netE.state_dict(),
                           '{}/netD_best.pth'.format(args.outf))

        losses.append(loss.item())
        if (i + 1) % args.nprint == 0:
            if args.image_valid:
                print '[{}/{}] train: {}, test: {}, best: {}'.format(
                    i + 1, args.niter, loss.item(), loss_valid.item(),
                    best_loss)
            else:
                print '[{}/{}] train: {}, best: {}'.format(
                    i + 1, args.niter, loss.item(), best_loss)
            logger.vutils.save_image(torch.cat([x, y], dim=0),
                                     '{}/train_{}.png'.format(
                                         args.outf, i + 1),
                                     normalize=True)
            fig, ax = plt.subplots()
            ax.semilogy(scipy.signal.medfilt(losses, 11)[5:-5], label='train')
            if args.image_valid:
                logger.vutils.save_image(torch.cat([x_, y_], dim=0),
                                         '{}/test_{}.png'.format(
                                             args.outf, i + 1),
                                         normalize=True,
                                         nrow=32)
                ax.semilogy(scipy.signal.medfilt(losses_valid, 11)[5:-5],
                            label='valid')
            fig.legend()
            fig.savefig('{}/loss.png'.format(args.outf))
            plt.close(fig)
            torch.save(netE.state_dict(),
                       '{}/netD_iter_{}.pth'.format(args.outf, i + 1))
def trainOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion,
                  weight_dict, learningRate):
    # Description: training one epoch (two options to train the model, using weighted gradient descent or normal gradient descent)
    # Input: (1)inputCoor: input coordinates (B, N, 3) (2) inputGraph: input graph (B, N*N) (3) inputLabel: labels (B, 1)
    #        (4) para: global Parameters  (5) sess: Session (6) trainOperaion: placeholder dictionary
    #        (7) weight_dict: weighting scheme used of weighted gradient descnet (8)learningRate: learning rate for current epoch
    # Return: average loss, acc, regularization loss for training set
    dataChunkLoss = []
    dataChunkAcc = []
    dataChunkRegLoss = []
    for i in range(len(inputLabel)):
        xTrain_1, graphTrain_1, labelTrain_1 = inputCoor[i], inputGraph[i], inputLabel[i]

        graphTrain_1 = graphTrain_1.tocsr()
        labelBinarize = label_binarize(labelTrain_1, classes=[j for j in range(40)])
        xTrain, graphTrain, labelTrain = shuffle(xTrain_1, graphTrain_1, labelBinarize)

        batch_loss = []
        batch_acc = []
        batch_reg = []
        batchSize = para.batchSize
        for batchID in range(len(labelBinarize) / para.batchSize):
            start = batchID * batchSize
            end = start + batchSize
            batchCoor, batchGraph, batchLabel = get_mini_batch(xTrain, graphTrain, labelTrain, start, end)
            batchGraph = batchGraph.todense()
            batchCoor = add_noise(batchCoor, sigma=0.008, clip=0.02)
	    if para.weighting_scheme == 'uniform':
		batchWeight = uniform_weight(batchLabel)
	    elif para.weighting_scheme == 'weighted':
                batchWeight = weights_calculation(batchLabel, weight_dict)
            else:
                print 'please enter a valid weighting scheme'

            batchIndexL1, centroid_coordinates = farthest_sampling_new(batchCoor, M=para.clusterNumberL1,
                                                                   k=para.nearestNeighborL1, batch_size=batchSize,
                                                                   nodes_n=para.pointNumber)
            batchMiddleGraph = middle_graph_generation(centroid_coordinates, batch_size = batchSize, M = para.clusterNumberL1)

            feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph,
                         trainOperaion['outputLabel']: batchLabel, trainOperaion['lr']: learningRate,
                         trainOperaion['weights']: batchWeight,
                         trainOperaion['keep_prob_1']: para.keep_prob_1, trainOperaion['keep_prob_2']: para.keep_prob_2,
                         trainOperaion['batch_index_l1']: batchIndexL1,
                         trainOperaion['l2Graph']: batchMiddleGraph, trainOperaion['batch_size']: para.batchSize}

            opt, loss_train, acc_train, loss_reg_train = sess.run(
                [trainOperaion['train'], trainOperaion['loss_total'], trainOperaion['acc'], trainOperaion['loss_reg']],
                feed_dict=feed_dict)

            batch_loss.append(loss_train)
            batch_acc.append(acc_train)
            batch_reg.append(loss_reg_train)

            #print "The loss, L2 loss and acc for this batch is {}, {} and {}".format(loss_train, loss_reg_train, acc_train)

        dataChunkLoss.append(np.mean(batch_loss))
        dataChunkAcc.append(np.mean(batch_acc))
        dataChunkRegLoss.append(np.mean(batch_reg))

    train_average_loss = np.mean(dataChunkLoss)
    train_average_acc = np.mean(dataChunkAcc)
    loss_reg_average = np.mean(dataChunkRegLoss)
    return train_average_loss, train_average_acc, loss_reg_average
예제 #19
0
transform1 = T.ToTensor()
transform2 = T.ToPILImage()

with torch.no_grad():
    net = DPDNN()
    net = nn.DataParallel(net)
    net.load_state_dict(torch.load(opt.load_model_path))

    img = Image.open(label_img)
    # img.show()
    label = np.array(img).astype(np.float32)  # label:0~255
    img_H = img.size[0]
    img_W = img.size[1]
    img = transform1(img)

    img_noise = add_noise(img, opt.noise_level).resize_(1, 1, img_H, img_W)

    output = net(img_noise)
    output = output.cpu()
    output = output.resize_(img_H, img_W)
    output = torch.clamp(output, min=0, max=1)
    output = transform2(output)

    # output.show()
    # To save the output(denoised) image, you must create a new folder. Here is my path.
    output.save('./output/sigma%d/%d.png' % (opt.noise_level, i))

    img_noise = transform2(img_noise.resize_(img_H, img_W))
    # img_noise.show()
    img_noise.save('./output/sigma%d/%d_noise.png' % (opt.noise_level, i))
    output = np.array(output)  # output:0~255
예제 #20
0
def sgd(all_input_params):
    t1 = time.time()
    X_without_bias, y, amount_in_interval, random_state = all_input_params
    # X are the predictors, come as np array
    # y are the targets, come as np array
    # amount_in_interval is the number of samples used to geneerate learning curve
    
    # do the random projection as they do in the paper -- second paper
    transformer = random_projection.GaussianRandomProjection(n_components = 50)
    X_without_bias = transformer.fit_transform(X_without_bias)
    
    # we add bias term in front -- done for the gradient decent
    records, attributes = np.shape(X_without_bias)
    X = np.ones((records, attributes + 1))
    X[:,1:] = X_without_bias
    
    # multiprocessing dose not do different seed, so we take a random number to start different seeds
    np.random.seed(random_state)
    
    # shuffle so different data will be used in each process
    X, y = shuffle(X, y)
    
    num_dimensions = len(X[0])
    num_in_batch = [1, 2, 5, 10, 50, 75, 100, 150, 200, 250, 300,  400, 500, 1000, 2000]# int(4*len(y)/5)]
    epochs = 1 #10
    k_splits = 5
    learning_rates = [1/np.sqrt(t + 1) for i in range(epochs) for t in range(amount_in_interval[-1])]
    epsilons =  [ 0.1,  1, 10, float('Inf')] # [float('Inf')] inf makes the noise go to zero -- equal to having no noise
    weight_decays = [3.0, 2.0, 1.5,  1, 0.5, 10**(-1), 10**(-2), 10**(-5), 10**(-20)]
    
    parameters = {'batch_size':[], 'weight_decay':[], 'error_rate':[]}
    optimal_results = {}
    
    
    kf = KFold(n_splits=k_splits)
    for epsilon in epsilons:
        if epsilon not in optimal_results:
            optimal_results[epsilon] = {}
        for n in amount_in_interval:
            # lets do grid search of the parameters for each epsilon
            if n not in optimal_results[epsilon]:
                optimal_results[epsilon][n] = {}
            for weight_decay in weight_decays:
                for batch_size in num_in_batch:
                    avg_error = 0
                    for train_index, validation_index in kf.split(X[:n]):
                        X_train, y_train = X[train_index], y[train_index]
                        X_validation, y_validation = X[validation_index], y[validation_index]
                        weights = np.array([0.0 for i in range(num_dimensions)])
                        t = 0
                        for i in range(epochs):
                            # shuffle the data so the minibatch takes different data in each epoch
                            X_train, y_train = shuffle(X_train, y_train)
                            
                            for j in range(0, len(y_train), batch_size):
                                X_batch = X_train[j:j+batch_size]
                                y_batch = y_train[j:j+batch_size]
                            
                                # claculate the derative of the l2 norm of the weights 
                                l2_derivative = sum(weights)
                                
                                # get the noise for all dimensions
                                noise = utils.add_noise(num_dimensions, epsilon)
                                
                                learning_rate = learning_rates[t]

                                # take a step towrads the optima
                                weights -= learning_rate *(weight_decay * l2_derivative  + utils.loss_derivative(X_batch, y_batch, weights) / batch_size  + noise / batch_size) 
                                
                                t += 1
                
                
                        # now we predict with the trained weights, using logistic regression
                        num_correct = 0
                        for i in range(len(y_validation)):
                            if y_validation[i] == utils.sigmoid_prediction(X_validation[i], weights):
                                num_correct += 1
                        avg_error += num_correct/len(y_validation)
        
                    avg_error /= k_splits
                    parameters['error_rate'].append(1 - avg_error)
                    parameters['batch_size'].append(batch_size)
                    parameters['weight_decay'].append(weight_decay)
                    #print('epoach..', flush = True)
                    #print('{} out of {} correct with batch size {}, learning_rate: {}'.format(num_correct, len(y_validation), batch_size, learning_rate))
            #print('=========================================================================')
            #print('error rate', parameters['error_rate'])
            #print('batch_size', parameters['batch_size'])        
            #print('=========================================================================')
            
            # find the optimal parameters fro the cross validation --
            optimal_index = utils.get_min_index(parameters['error_rate'], parameters['batch_size'])

            
            optimal_results[epsilon][n]['parameters'] = (parameters['batch_size'][optimal_index],\
                           parameters['weight_decay'][optimal_index])
            
            optimal_results[epsilon][n]['error_rate'] = parameters['error_rate'][optimal_index]
            # clear parameters for next run
            parameters = {'batch_size':[], 'weight_decay':[], 'error_rate':[]}
            
        print('tuning for epsilon: {} done, time from start {}'.format(epsilon, time.time() - t1), flush = True)
            
        
            
    return optimal_results
예제 #21
0
def sgd(all_input_params):
    t1 = time.time()
    X_without_bias, y, amount_in_interval, random_state = all_input_params
    # X are the predictors, come as np array
    # y are the targets, come as np array
    # amount_in_interval is the number of samples used to geneerate learning curve

    # do the random projection as they do in the paper -- second paper
    transformer = random_projection.GaussianRandomProjection(n_components=50)
    X_without_bias = transformer.fit_transform(X_without_bias)

    # we add bias term in front -- done for the gradient decent
    records, attributes = np.shape(X_without_bias)
    X = np.ones((records, attributes + 1))
    X[:, 1:] = X_without_bias

    # multiprocessing dose not do different seed, so we take a random number to start different seeds
    np.random.seed(random_state)

    # shuffle so different data will be used in each process
    X, y = shuffle(X, y)

    num_dimensions = len(X[0])
    num_in_batch = [1, 2, 5, 10, 50, 75, 100, 150, 200, 250, 300, 350, 400]
    epochs = 1
    k_splits = 5
    learning_rates = [
        1 / np.sqrt(t + 1) for i in range(epochs)
        for t in range(amount_in_interval[-1])
    ]
    epsilons = [
        0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 10,
        float('Inf')
    ]  # inf makes the noise go to zero -- equal to having no noise
    weight_decays = [
        1, 0.5, 10**(-1), 10**(-2), 10**(-5), 10**(-8), 10**(-11), 10**(-15)
    ]

    parameters = {'batch_size': [], 'weight_decay': [], 'error_rate': []}
    optimal_results = {}

    kf = KFold(n_splits=k_splits)
    for epsilon in epsilons:
        if epsilon not in optimal_results:
            optimal_results[epsilon] = {}
        for n in amount_in_interval:
            # lets do grid search of the parameters for each epsilon
            if n not in optimal_results[epsilon]:
                optimal_results[epsilon][n] = {}
            for weight_decay in weight_decays:
                for batch_size in num_in_batch:
                    avg_error = 0
                    for train_index, validation_index in kf.split(X[:n]):
                        X_train, y_train = X[train_index], y[train_index]
                        X_validation, y_validation = X[validation_index], y[
                            validation_index]
                        weights = np.array(
                            [0.0 for i in range(num_dimensions)])
                        t = 0
                        for i in range(epochs):
                            # shuffle the data so the minibatch takes different data in each epoch
                            X_train, y_train = shuffle(X_train, y_train)

                            for j in range(0, len(y_train), batch_size):
                                X_batch = X_train[j:j + batch_size]
                                y_batch = y_train[j:j + batch_size]

                                # claculate the derative of the l2 norm of the weights
                                l2_derivative = sum(weights)

                                # get the noise for all dimensions
                                noise = utils.add_noise(
                                    num_dimensions, epsilon)

                                learning_rate = learning_rates[t]
                                # take a step towrads the optima
                                weights -= learning_rate * (
                                    weight_decay * l2_derivative +
                                    utils.loss_derivative(
                                        X_batch, y_batch, weights) / batch_size
                                    + noise / batch_size)

                                t += 1

                        # now we predict with the trained weights, using logistic regression
                        num_correct = 0
                        for i in range(len(y_validation)):
                            if y_validation[i] == utils.sigmoid_prediction(
                                    X_validation[i], weights):
                                num_correct += 1
                        avg_error += num_correct / len(y_validation)

                    avg_error /= k_splits
                    parameters['error_rate'].append(1 - avg_error)
                    parameters['batch_size'].append(batch_size)
                    parameters['weight_decay'].append(weight_decay)
                    #print('epoach..', flush = True)
                    #print('{} out of {} correct with batch size {}, learning_rate: {}'.format(num_correct, len(y_validation), batch_size, learning_rate))
            #print('=========================================================================')
            #print('error rate', parameters['error_rate'])
            #print('batch_size', parameters['batch_size'])
            #print('=========================================================================')

            # find the optimal parameters fro the cross validation --
            optimal_index = utils.get_min_index(parameters['error_rate'],
                                                parameters['batch_size'])


            optimal_results[epsilon][n]['parameters'] = (parameters['batch_size'][optimal_index],\
                           parameters['weight_decay'][optimal_index])

            optimal_results[epsilon][n]['error_rate'] = parameters[
                'error_rate'][optimal_index]
            # clear parameters for next run
            parameters = {
                'batch_size': [],
                'weight_decay': [],
                'error_rate': []
            }

        print('tuning for epsilon: {} done, time from start {}'.format(
            epsilon,
            time.time() - t1),
              flush=True)

    return optimal_results
예제 #22
0
def deeper(layer, activation_fn=nn.ReLU(), bnorm=True, prefix='', filters=16):
    r""" Function preserving deeper operator adding a new layer on top of the
    given layer.

    Implemented based on Net2Net paper. If a new dense layer is being added, its
    weight matrix will be set to identity matrix. For convolutional layer, the
    center element of a input channel (in increasing sequence) is set to 1 and
    other to 0. This approach only works only for Relu activation function as it
    is idempotent.

    :param layer: Layer on top of which new layers will be added.
    :param activation_fn: Activation function to be used between the two layers.
     Default Relu
    :param bnorm: Add a batch normalisation layer between two
    convolutional/dense layers if True.
    :param filters: Number of filters of filters being deepened

    :return: New layers to be added in the network.
    """

    print 'Net2Net Deeper...'
    if isinstance(layer, nn.Linear) or isinstance(layer, nn.Conv2d):
        if isinstance(layer, nn.Linear):
            # Create new linear layer with input and output features equal to
            # output features of a dense layer on top of which a new dense layer
            # is being added.
            new_layer = th.nn.Linear(layer.out_features, layer.out_features)
            new_layer.weight.data = th.eye(layer.out_features)
            new_layer.bias.data = th.zeros(layer.out_features)

            if bnorm:
                new_num_features = layer.out_features
                new_bn_layer = nn.BatchNorm1d(num_features=new_num_features)
        else:
            new_kernel_shape = layer.kernel_size
            new_num_channels = filters
            # Create new convolutional layer with number of input and output
            # channels equal to number of output channel of the layer on top of
            # which new layer will be placed. The filter shape will be same. And
            # a padding of 1 is added to maintain previous output dimension.
            new_layer = th.nn.Conv2d(new_num_channels,
                                     new_num_channels,
                                     kernel_size=layer.kernel_size,
                                     padding=1)

            new_layer_weight = th.zeros((new_num_channels, new_num_channels) +
                                        new_kernel_shape)
            center = tuple(map(lambda x: int((x - 1) / 2), new_kernel_shape))
            for i in range(new_num_channels):
                filter_weight = th.zeros((new_num_channels, ) +
                                         new_kernel_shape)
                index = (i, ) + center
                filter_weight[index] = 1
                new_layer_weight[i, ...] = filter_weight

            new_layer_bias = th.zeros(new_num_channels)
            # Set new weight and bias for new convolutional layer
            # new_layer.weight.data = new_layer_weight
            new_layer.weight.data = add_noise(new_layer_weight.cuda(),
                                              layer.weight.data)
            new_layer.bias.data = new_layer_bias

            # Set noise as initial weight and bias for all parameter values for
            # BN layer
            if bnorm:
                new_num_features = layer.out_channels
                new_bn_layer = nn.BatchNorm2d(num_features=new_num_features)

        if bnorm:
            new_bn_layer.weight.data = add_noise(
                th.ones(new_num_features).cuda(), th.Tensor([0, 1]))
            new_bn_layer.bias.data = add_noise(
                th.zeros(new_num_features).cuda(), th.Tensor([0, 1]))
            new_bn_layer.running_mean.data = add_noise(
                th.zeros(new_num_features).cuda(), th.Tensor([0, 1]))
            new_bn_layer.running_var.data = add_noise(
                th.ones(new_num_features).cuda(), th.Tensor([0, 1]))
    else:
        raise RuntimeError("{} Module not supported".format(
            layer.__class__.__name__))

    seq_container = th.nn.Sequential().cuda()
    seq_container.add_module(prefix + '_conv', layer)
    if bnorm:
        seq_container.add_module(prefix + '_bnorm', new_bn_layer)
    # if activation_fn is not None:
    #     seq_container.add_module(prefix + '_nonlin', nn.ReLU())
    seq_container.add_module(prefix + '_conv_new', new_layer)

    return seq_container
예제 #23
0
 def read_ply(self, file_name):
     num_samples = self.num_samples // len(self.files_list)
     if self.file_index == len(self.files_list) - 1:
         num_samples = num_samples + (self.num_samples - (num_samples * len(self.files_list)))
     
     root, ext = os.path.splitext(file_name)
     if not os.path.isfile(root + ".npy"):
         ply = PlyData.read(file_name)
         vertex = ply['vertex']
         (x, y, z) = (vertex[t] for t in ('x', 'y', 'z'))
         points = zip(x.ravel(), y.ravel(), z.ravel())
         np.save(root + ".npy", points)
     else:
         points = np.load(root + ".npy")
         
     if self.add_noise:
         self.data = utils.add_noise(points, prob=self.noise_prob, factor=self.noise_factor)
     else:
         self.data = np.asarray(points)
     
     #if self.data.shape[0] > 2e5:
     #        self.data, _ = Sampler.sample(self.data, -1, 2e5, sampling_algorithm=self.sampling_algorithm)
         
     pc_diameter = utils.get_pc_diameter(self.data)
     self.l = self.relL*pc_diameter
     
     rot = utils.angle_axis_to_rotation(self.rotation_angle, self.rotation_axis)
     
     
     self.data = utils.transform_pc(self.data, rot)
     #plotutils.show_pc(self.data)
     #mlab.show()
             
     #TODO: better sampling
     print "sampling file: ", file_name
     self.samples, self.sample_indices = Sampler.sample(self.data, -1, num_samples, file_name=file_name, sampling_algorithm=self.sampling_algorithm)
     self.samples = self.samples[0:num_samples]
     self.sample_indices = self.sample_indices[0:num_samples]
     
     self.tree = spatial.KDTree(self.data)
     
     #TODO:Intergrate with num_samples for consistency
     if self.filter_bad_samples:
         temp_file_samples = 'temp/' + os.path.basename(file_name) + '_' + str(num_samples) + '_filter' + str(self.filter_threshold) + '.npy'
         print 'samples file: ', temp_file_samples 
         if os.path.isfile(temp_file_samples):
             self.sample_indices = np.load(temp_file_samples)
             self.samples = self.data[self.sample_indices]
         else:
             self.samples, self.sample_indices = Sampler.sample(self.data, -1, num_samples*2, sampling_algorithm=self.sampling_algorithm)
             self.samples = self.samples[0:num_samples*2]
             self.sample_indices = self.sample_indices[0:num_samples*2]
             sample_indices_temp = []
             for idx in self.sample_indices:
                 if self.is_good_sample(self.data[idx], self.filter_threshold):
                     sample_indices_temp.append(idx)
                     if len(sample_indices_temp) >= num_samples:
                         break   
             assert (len(sample_indices_temp) >= num_samples)
             self.sample_indices = np.asarray(sample_indices_temp[0:num_samples])
             self.samples = self.data[self.sample_indices]
             np.save(temp_file_samples, self.sample_indices)
             #plotutils.show_pc(self.samples)
             #mlab.show()
     
     logging.basicConfig(filename='example.log',level=logging.DEBUG)
     return self.data
예제 #24
0
def sgd(all_input_params):
    X_train_without_bias, y_train, X_test_without_bias, y_test, amount_in_interval, random_state, parameters = all_input_params
    # X are the predictors, come as np array
    # y are the targets, come as np array
    # amount_in_interval is the number of samples used to geneerate learning curve

    # do the random projection as they do in the paper -- second paper
    transformer = random_projection.GaussianRandomProjection(n_components=50)
    transformer.fit(X_train_without_bias)
    X_train_without_bias = transformer.transform(X_train_without_bias)
    X_test_without_bias = transformer.transform(X_test_without_bias)

    # we add bias term in front -- done for the gradient decent
    records, attributes = np.shape(X_train_without_bias)
    X_train = np.ones((records, attributes + 1))
    X_train[:, 1:] = X_train_without_bias

    records, attributes = np.shape(X_test_without_bias)
    X_test = np.ones((records, attributes + 1))
    X_test[:, 1:] = X_test_without_bias

    # multiprocessing dose not do different seed, so we take a random number to start different seeds
    np.random.seed(random_state)

    # shuffle so different data will be used in each process
    X_train, y_train = shuffle(X_train, y_train)

    num_dimensions = len(X_train[0])
    epochs = 1
    epsilons = [
        0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 10,
        float('Inf')
    ]  # inf makes the noise go to zero -- equal to having no noise
    learning_rates = [
        1 / np.sqrt(t + 1) for i in range(epochs)
        for t in range(amount_in_interval[-1])
    ]
    results = {}
    objective_info = {}
    for epsilon in epsilons:
        if epsilon not in results:
            results[epsilon] = {}
        for n in amount_in_interval:
            if n not in results[epsilon]:
                results[epsilon][n] = {}
                #results[epsilon][n]['noise'] = []

            weights = np.array([0.0 for i in range(num_dimensions)])
            # param is a list which has the order -> [learning_rate, batch_size, weight_decay]
            #learning_rate = parameters[epsilon][n]['parameters'][0]
            batch_size = parameters[epsilon][n]['parameters'][0]
            weight_decay = parameters[epsilon][n]['parameters'][1]

            # this if sentance is just so we can invetegate some properties only for the last model
            # where it is trained on all avilable data
            if n != amount_in_interval[-1]:
                t = 0
                for i in range(epochs):
                    # shuffle the data so the minibatch takes different data in each epoch
                    X_train_in_use, y_train_in_use = shuffle(
                        X_train[:int(n)], y_train[:int(n)])
                    for j in range(0, len(y_train_in_use), batch_size):
                        X_batch = X_train_in_use[j:j + batch_size]
                        y_batch = y_train_in_use[j:j + batch_size]

                        # claculate the derative of the l2 norm of the weights -- regularize
                        l2_derivative = sum(weights)

                        # get the noise for all dimensions
                        noise = utils.add_noise(num_dimensions, epsilon)

                        # get the objective derivative value -- look at convergance
                        objective_derivative = weight_decay * l2_derivative + utils.loss_derivative(
                            X_batch, y_batch,
                            weights) / batch_size + noise / batch_size

                        # take a step towrads the optima
                        weights -= learning_rates[t] * (objective_derivative)

                        # keep all the noise added so we can investegate it's distribution
                        #results[epsilon][n]['noise'] += noise.tolist()
                        t += 1
            else:
                print(
                    'n != amount_in_interval[-1] = {}, n {}, amount_in_interval[-1] {}'
                    .format(n != amount_in_interval[-1], n,
                            amount_in_interval[-1]))
                # we want to investegate how the objective changes thorugh iterations only for
                # the models which are trained on all the data
                if epsilon not in objective_info:
                    objective_info[epsilon] = {}
                    objective_info[epsilon]['objective'] = []
                    objective_info[epsilon]['gradient'] = []
                    objective_info[epsilon]['num_points'] = []
                t = 0
                for i in range(epochs):
                    if objective_info[epsilon]['num_points']:
                        points_from_last_epoch = objective_info[epsilon][
                            'num_points'][-1]
                    else:
                        points_from_last_epoch = 0

                    # shuffle the data so the minibatch takes different data in each epoch
                    X_train_in_use, y_train_in_use = shuffle(
                        X_train[:int(n)], y_train[:int(n)])
                    print(len(y_train))
                    for j in range(0, len(y_train_in_use), batch_size):
                        X_batch = X_train_in_use[j:j + batch_size]
                        y_batch = y_train_in_use[j:j + batch_size]

                        # claculate the derative of the l2 norm of the weights -- regularize
                        l2_derivative = sum(weights)

                        # get the noise for all dimensions
                        noise = utils.add_noise(num_dimensions, epsilon)

                        # get the objective value
                        objective = utils.get_objective(
                            X_batch, y_batch, weights, batch_size)

                        # get the objective derivative value -- look at convergance
                        objective_derivative = weight_decay * l2_derivative + utils.loss_derivative(
                            X_batch, y_batch,
                            weights) / batch_size + noise / batch_size

                        # take a step towrads the optima
                        weights -= learning_rates[t] * (objective_derivative)

                        objective_info[epsilon]['objective'].append(
                            np.mean(objective))
                        objective_info[epsilon]['gradient'].append(
                            np.mean(objective_derivative))

                        objective_info[epsilon]['num_points'].append(
                            j + batch_size + points_from_last_epoch
                        )  # if we go to the next epoch we keep on couniting
                        #results[epsilon][n]['noise'] += noise.tolist()
                        t += 1

                    print('num_points',
                          objective_info[epsilon]['num_points'],
                          flush=True)

            # now we predict with the trained weights, using logistic regression
            num_correct = 0
            avg_error = 0
            for i in range(len(y_test)):
                if y_test[i] == utils.sigmoid_prediction(X_test[i], weights):
                    num_correct += 1
            avg_error = num_correct / len(y_test)

            results[epsilon][n]['error_rate'] = 1 - avg_error

            # take the last iteration of the noise and find its magnitude
            # this is done to compare it to the wegiths to see how it influences
            # the decision process -- when epsilon is inf no noise is added and we can see how the weights are
            if epsilon == float('Inf'):
                results[epsilon][n]['noise_and_weights_magnitude'] = sum(
                    abs(weights))
            else:
                results[epsilon][n]['noise_and_weights_magnitude'] = sum(
                    abs(noise))

            # lets investegate how the noise affects the weights .. by looking at how the final weights are after
            # each noise level

            results[epsilon][n]['weights'] = sum(abs(weights))

    return (results, objective_info)
예제 #25
0
for epoch in range(0, n_epochs):
    G.train()
    D.train()
    _batch = 0
    scheduler_lr.step()
    for X, _ in train_iter:
        _batch += 1

        real_x = X.to(DEVICE)
        z = T.randn(real_x.size(0), nz, 1, 1, device=DEVICE)
        fake_x = G(z)

        # instance noise trick
        if instance_noise_trick:
            real_x = add_noise(real_x, initial_noise_strength, anneal_epoch,
                               epoch)
            fake_x = add_noise(fake_x, initial_noise_strength, anneal_epoch,
                               epoch)

        fake_score = D(fake_x.detach())
        real_score = D(real_x)

        D.zero_grad()
        lss_D = criterion(real_score, T.ones_like(real_score)) + \
                criterion(fake_score, T.zeros_like(fake_score))
        lss_D.backward()
        opt_D.step()

        fake_score = D(fake_x)
        real_score = D(real_x)
def trainOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion,
                  weight_dict, learningRate):
    # Description: training one epoch (two options to train the model, using weighted gradient descent or normal gradient descent)
    # Input: (1)inputCoor: input coordinates (B, N, 3) (2) inputGraph: input graph (B, N*N) (3) inputLabel: labels (B, 1)
    #        (4) para: global Parameters  (5) sess: Session (6) trainOperaion: placeholder dictionary
    #        (7) weight_dict: weighting scheme used of weighted gradient descnet (8)learningRate: learning rate for current epoch
    # Return: average loss, acc, regularization loss for training set
    dataChunkLoss = []
    dataChunkAcc = []
    dataChunkRegLoss = []
    for i in range(len(inputLabel)):
        xTrain_1, graphTrain_1, labelTrain_1 = inputCoor[i], inputGraph[
            i], inputLabel[i]

        graphTrain_1 = graphTrain_1.tocsr()
        labelBinarize = label_binarize(labelTrain_1,
                                       classes=[j for j in range(40)])
        xTrain, graphTrain, labelTrain = shuffle(xTrain_1, graphTrain_1,
                                                 labelBinarize)

        batch_loss = []
        batch_acc = []
        batch_reg = []
        batchSize = para.batchSize
        for batchID in range(len(labelBinarize) / para.batchSize):
            start = batchID * batchSize
            end = start + batchSize
            batchCoor, batchGraph, batchLabel = get_mini_batch(
                xTrain, graphTrain, labelTrain, start, end)
            batchGraph = batchGraph.todense()
            batchCoor = add_noise(batchCoor, sigma=0.008, clip=0.02)
            if para.weighting_scheme == 'uniform':
                batchWeight = uniform_weight(batchLabel)
            elif para.weighting_scheme == 'weighted':
                batchWeight = weights_calculation(batchLabel, weight_dict)
            else:
                print 'please enter a valid weighting scheme'

            batchIndexL1, centroid_coordinates = farthest_sampling_new(
                batchCoor,
                M=para.clusterNumberL1,
                k=para.nearestNeighborL1,
                batch_size=batchSize,
                nodes_n=para.pointNumber)
            batchMiddleGraph = middle_graph_generation(centroid_coordinates,
                                                       batch_size=batchSize,
                                                       M=para.clusterNumberL1)

            feed_dict = {
                trainOperaion['inputPC']: batchCoor,
                trainOperaion['inputGraph']: batchGraph,
                trainOperaion['outputLabel']: batchLabel,
                trainOperaion['lr']: learningRate,
                trainOperaion['weights']: batchWeight,
                trainOperaion['keep_prob_1']: para.keep_prob_1,
                trainOperaion['keep_prob_2']: para.keep_prob_2,
                trainOperaion['batch_index_l1']: batchIndexL1,
                trainOperaion['l2Graph']: batchMiddleGraph,
                trainOperaion['batch_size']: para.batchSize
            }

            opt, loss_train, acc_train, loss_reg_train = sess.run(
                [
                    trainOperaion['train'], trainOperaion['loss_total'],
                    trainOperaion['acc'], trainOperaion['loss_reg']
                ],
                feed_dict=feed_dict)

            batch_loss.append(loss_train)
            batch_acc.append(acc_train)
            batch_reg.append(loss_reg_train)

            #print "The loss, L2 loss and acc for this batch is {}, {} and {}".format(loss_train, loss_reg_train, acc_train)

        dataChunkLoss.append(np.mean(batch_loss))
        dataChunkAcc.append(np.mean(batch_acc))
        dataChunkRegLoss.append(np.mean(batch_reg))

    train_average_loss = np.mean(dataChunkLoss)
    train_average_acc = np.mean(dataChunkAcc)
    loss_reg_average = np.mean(dataChunkRegLoss)
    return train_average_loss, train_average_acc, loss_reg_average
예제 #27
0
    plt.subplot(221)
    plt.title('image')
    io.imshow(img)
    plt.subplot(222)
    plt.title('label')
    io.imshow(label)
    plt.subplot(223)
    plt.title('affine')
    io.imshow(img_tf)
    plt.subplot(224)
    plt.title('affine')
    io.imshow(label_tf)

if 0:
    Y_train = np.squeeze(Y_train)
    noisy_imgs = add_noise(X_train)
    ix = 0
    plt.figure(figsize=(8, 8))
    plt.subplot(221)
    plt.title('image')
    io.imshow(X_train[ix])
    plt.subplot(222)
    plt.title('noisy')
    io.imshow(noisy_imgs[ix])
    plt.subplot(223)
    io.imshow(Y_train[ix])
    plt.subplot(224)
    io.imshow(Y_train[ix])

if 0:
    hrz_flp, vrt_flp = flip_images(X_train, Y_train)
예제 #28
0
def sgd(all_input_params):
    X_train_without_bias_start, y_train_start, X_test_without_bias_start, y_test_start, amount_in_interval, random_state, parameters = all_input_params
    # X are the predictors, come as np array
    # y are the targets, come as np array
    # amount_in_interval is the number of samples used to geneerate learning curve
    
    # multiprocessing dose not do different seed, so we take a random number to start different seeds
    np.random.seed(random_state)
    epochs = 1
    epsilons = [0.1, 1, 10, float('Inf')] # inf makes the noise go to zero -- equal to having no noise
    learning_rates = [1/np.sqrt(t + 1) for i in range(epochs) for t in range(amount_in_interval[-1])]
    results = {}
    objective_info = {}
    dimensions = [15, 50, 100, 200, 400, 'all']
    
    for d in dimensions:
        results[d] = {}
        objective_info[d] = {}
        if d != 'all':
        # do the random projection as they do in the paper -- second paper
            transformer = random_projection.GaussianRandomProjection(n_components = d)
            transformer.fit(X_train_without_bias_start)
            X_train_without_bias = transformer.transform(X_train_without_bias_start)
            X_test_without_bias = transformer.transform(X_test_without_bias_start)
            
            # we add bias term in front -- done for the gradient decent
            records, attributes = np.shape(X_train_without_bias)
            X_train = np.ones((records, attributes + 1))
            X_train[:,1:] = X_train_without_bias
            
            records, attributes = np.shape(X_test_without_bias)
            X_test = np.ones((records, attributes + 1))
            X_test[:,1:] = X_test_without_bias
        else:
             # we add bias term in front -- done for the gradient decent
            records, attributes = np.shape(X_train_without_bias_start)
            X_train = np.ones((records, attributes + 1))
            X_train[:,1:] = X_train_without_bias_start
            
            records, attributes = np.shape(X_test_without_bias_start)
            X_test = np.ones((records, attributes + 1))
            X_test[:,1:] = X_test_without_bias_start
            
    
        
        # shuffle so different data will be used in each process
        X_train, y_train = shuffle(X_train, y_train_start)
        
        num_dimensions = len(X_train[0])
    
        for epsilon in epsilons:
            if epsilon not in results:
                results[d][epsilon] = {}
            for n in amount_in_interval:
                if n not in results[d][epsilon]:
                    results[d][epsilon][n] = {}
                    #results[epsilon][n]['noise'] = []
                
                weights = np.array([0.0 for i in range(num_dimensions)])
                batch_size  = 5 #parameters[epsilon][n]['parameters'][0]
                weight_decay = 0.0001#parameters[epsilon][n]['parameters'][1]
                
                # this if sentance is just so we can invetegate some properties only for the last model
                # where it is trained on all avilable data
                if n != amount_in_interval[-1]:
                    t = 0
                    for i in range(epochs):
                        # shuffle the data so the minibatch takes different data in each epoch
                        X_train_in_use, y_train_in_use = shuffle(X_train[:int(n)], y_train[:int(n)])
                        for j in range(0, len(y_train_in_use), batch_size):
                            X_batch = X_train_in_use[j:j+batch_size]
                            y_batch = y_train_in_use[j:j+batch_size]
                                        
                            # claculate the derative of the l2 norm of the weights -- regularize 
                            l2_derivative = sum(weights)
                            
                            # get the noise for all dimensions
                            noise = utils.add_noise(num_dimensions, epsilon)
                            
                            # get the objective derivative value -- look at convergance
                            objective_derivative = weight_decay * l2_derivative  + utils.loss_derivative(X_batch, y_batch, weights) / batch_size + noise / batch_size
                            
                            
                            # take a step towrads the optima
                            weights -= learning_rates[t] *(objective_derivative)
                            

                            t += 1
                else:
                    #print('n != amount_in_interval[-1] = {}, n {}, amount_in_interval[-1] {}'.format(n != amount_in_interval[-1], n, amount_in_interval[-1]))
                    # we want to investegate how the objective changes thorugh iterations only for
                    # the models which are trained on all the data
                    if epsilon not in objective_info[d]:
                        objective_info[d][epsilon] = {}
                        objective_info[d][epsilon]['objective'] = []
                        objective_info[d][epsilon]['gradient'] = []
                        objective_info[d][epsilon]['num_points'] = []
                    t = 0
                    for i in range(epochs):
                        if objective_info[d][epsilon]['num_points']:
                            points_from_last_epoch = objective_info[d][epsilon]['num_points'][-1]   
                        else:
                            points_from_last_epoch = 0
                            
                        # shuffle the data so the minibatch takes different data in each epoch
                        X_train_in_use, y_train_in_use = shuffle(X_train[:int(n)], y_train[:int(n)])
                        #print(len(y_train ))
                        for j in range(0, len(y_train_in_use), batch_size):
                            X_batch = X_train_in_use[j:j+batch_size]
                            y_batch = y_train_in_use[j:j+batch_size]
                                        
                            # claculate the derative of the l2 norm of the weights -- regularize 
                            l2_derivative = sum(weights)
                            
                            # get the noise for all dimensions
                            noise = utils.add_noise(num_dimensions, epsilon)
                            
                            # get the objective value
                            objective = utils.get_objective(X_batch, y_batch, weights, batch_size, weight_decay)
                            
                            
                            # get the objective derivative value -- look at convergance
                            objective_derivative = weight_decay * l2_derivative  + utils.loss_derivative(X_batch, y_batch, weights) / batch_size + noise / batch_size
                            
                            # take a step towrads the optima
                            weights -= learning_rates[t] *(objective_derivative)
                            
                            objective_info[d][epsilon]['objective'].append(objective)
                            objective_info[d][epsilon]['gradient'].append(np.linalg.norm(weight_decay * l2_derivative  + utils.loss_derivative(X_batch, y_batch, weights) / batch_size, ord = 2))
                         
                            
                            objective_info[d][epsilon]['num_points'].append(j+batch_size + points_from_last_epoch) # if we go to the next epoch we keep on couniting
                            #results[epsilon][n]['noise'] += noise.tolist()
                            t += 1
                        
                        #print('num_points', objective_info[d][epsilon]['num_points'], flush = True)
                        
                
    
                # now we predict with the trained weights, using logistic regression
                num_correct = 0
                avg_error = 0
                for i in range(len(y_test)):
                    if y_test[i] == utils.sigmoid_prediction(X_test[i], weights):
                        num_correct += 1
                avg_error = num_correct/len(y_test)
                
                results[d][epsilon][n]['error_rate'] = 1 - avg_error
                
                
                # take the last iteration of the noise and find its magnitude
                # this is done to compare it to the wegiths to see how it influences
                # the decision process -- when epsilon is inf no noise is added and we can see how the weights are
                if epsilon == float('Inf'):
                    results[d][epsilon][n]['noise_and_weights_magnitude'] = sum(abs(weights))
                else:
                    results[d][epsilon][n]['noise_and_weights_magnitude'] = sum(abs(noise))
                    
                # lets investegate how the noise affects the weights .. by looking at how the final weights are after
                # each noise level
                
                results[d][epsilon][n]['weights'] = sum(abs(weights))
        print('dimension {}'.format(d), flush = True)
    
    return (results, objective_info)
예제 #29
0
def main(unused_argv):
    # Parse arguments.
    parser = argparse.ArgumentParser()
    args = parse_arguments(parser)

    # Model configuration.
    model_config = configuration.ModelConfig()
    training_config = configuration.TrainingConfig()

    # Create training directory.
    train_dir = args.train_dir
    if not tf.gfile.IsDirectory(train_dir):
        tf.logging.info("Creating training directory: %s", train_dir)
        tf.gfile.MakeDirs(train_dir)

    # Load MNIST data.
    mnist = input_data.read_data_sets('MNIST')

    # Build the TensorFlow graph.
    g = tf.Graph()

    with g.as_default():

        # Build the model.
        the_model = model.DAE(model_config)
        the_model.build()

        # Set up the learning rate.
        learning_rate = tf.constant(training_config.learning_rate)

        # Set up the training ops.
        train_op = tf.contrib.layers.optimize_loss(
            loss=the_model.total_loss,
            global_step=the_model.global_step,
            learning_rate=learning_rate,
            optimizer=training_config.optimizer)

        # Set up the Saver for saving and restoring model checkpoints.
        saver = tf.train.Saver()

        # Run training.

        print("Training")

        with tf.Session() as sess:

            print("Initializing parameters")
            sess.run(tf.global_variables_initializer())

            for step in range(1, args.number_of_steps):

                # Read batch.
                batch = mnist.train.next_batch(model_config.batch_size)[0]

                # Create a noisy version of the batch.
                noisy_batch = utils.add_noise(batch)

                # Prepare the dictionnary to feed the data to the graph.
                feed_dict = {
                    "images:0": batch,
                    "noisy_images:0": noisy_batch,
                    "phase_train:0": True
                }

                # Run training
                _, loss = sess.run([train_op, the_model.total_loss],
                                   feed_dict=feed_dict)

                if step % 50 == 0:
                    # Save checkpoint.
                    ave_path = saver.save(sess, train_dir + '/model.ckpt')

                    # Print Loss.
                    print("Step:", '%06d' % (step), "cost=",
                          "{:.9f}".format(loss))

            print('Finished training ...')

            print('Start testing ...')

            # load batch.
            testing_data = mnist.test.images
            # Plot the Original Image

            # Plot the Denoised Image

            # Create a noisy version of the data.
            corrupted_testing = utils.add_noise(testing_data)
            ori_plot = corrupted_testing[:10]
            count = 1
            for img in ori_plot:
                name = 'ori_img' + str(count)
                path = 'img/' + name
                count += 1
                plot_image(img.reshape((28, 28)), name, path)

# Prepare the dictionnary to feed the data to the graph.
            feed_dict = {
                "images:0": testing_data,
                "noisy_images:0": corrupted_testing,
                "phase_train:0": False
            }

            # Compute the loss
            reconstruc, loss = sess.run(
                [the_model.reconstructed_images, the_model.total_loss],
                feed_dict=feed_dict)
            ori_plot = reconstruc[:10]
            count = 1
            for img in ori_plot:
                name = 'de_img' + str(count)
                path = 'img/' + name
                count += 1
                plot_image(img.reshape((28, 28)), name, path)

            print(loss)

            print("Testing loss= ", loss)
예제 #30
0
def wider(layer1, layer2, new_width, bnorm=None):
    r""" Widens the layers in the network.

    Implemented according to NetMorph Widening operation. The next adjacent
    layer in the network also needs to be be widened due to increase in the
    width of previous layer.

    :param layer1: The layer to be widened
    :param layer2: The next adjacent layer to be widened
    :param new_width: Width of the new layer (output channels/features of first
    layer and input channels/features of next layer.
    :param bnorm: BN layer to be widened if provided.
    :return: widened layers
    """

    print 'NetMorph Widening... '
    if (isinstance(layer1, nn.Conv2d) or isinstance(layer1, nn.Linear)) and (
            isinstance(layer2, nn.Conv2d) or isinstance(layer2, nn.Linear)):

        teacher_w1 = layer1.weight.data
        teacher_b1 = layer1.bias.data
        teacher_w2 = layer2.weight.data
        teacher_b2 = layer2.bias.data

        assert new_width > teacher_w1.size(0), "New size should be larger"

        # Widening output channels/features of first layer
        # Randomly select weight from the first teacher layer and corresponding
        # bias and add it to first student layer. Add noise to newly created
        # student layer.
        student_w1 = teacher_w1.clone()
        student_b1 = teacher_b1.clone()

        rand_ids = th.randint(low=0, high=teacher_w1.shape[0],
                              size=((new_width - teacher_w1.shape[0]),))

        for i in range(rand_ids.numel()):
            teacher_index = int(rand_ids[i].item())
            new_weight = teacher_w1[teacher_index, ...]
            new_weight.unsqueeze_(0)
            student_w1 = th.cat((student_w1, new_weight), dim=0)
            new_bias = teacher_b1[teacher_index]
            new_bias.unsqueeze_(0)
            student_b1 = th.cat((student_b1, new_bias))

        if isinstance(layer1, nn.Conv2d):
            new_current_layer = nn.Conv2d(
                out_channels=new_width, in_channels=layer1.in_channels,
                kernel_size=(3, 3), stride=1, padding=1)
        else:
            new_current_layer = nn.Linear(
                in_features=layer1.out_channels * layer1.kernel_size[0] * layer1.kernel_size[1],
                out_features=layer2.out_features)

        new_current_layer.weight.data = add_noise(student_w1, teacher_w1)
        new_current_layer.bias.data = add_noise(student_b1, teacher_b1)
        layer1 = new_current_layer

        # Widening input channels/features of second layer. Copy the weights
        # from teacher layer and only add noise to additional filter
        # channels/features in student layer. The student layer will have same
        # bias as teacher.
        new_weight = th.zeros(teacher_w2.shape).cuda()
        noise = add_noise(new_weight, teacher_w2)

        student_w2 = th.cat((teacher_w2, noise), dim=1)

        if isinstance(layer2, nn.Conv2d):
            new_next_layer = nn.Conv2d(out_channels=layer2.out_channels,
                                       in_channels=new_width,
                                       kernel_size=(3, 3), stride=1, padding=1)
        else:
            new_next_layer = nn.Linear(
                in_features=layer1.out_channels * layer1.kernel_size[0] * layer1.kernel_size[1],
                out_features=layer2.out_features)

        new_next_layer.weight.data = student_w2
        new_next_layer.bias.data = teacher_b2
        layer2 = new_next_layer

    # Widening batch normalisation layer if provided. Only add noise to
    # additional features for all 4 parameters in the layer i.e. mean, variance,
    # weight and bias.
    if bnorm is not None:
        n_add = new_width - bnorm.num_features

        # get current parameter values
        bn_weights = bnorm.weight.data
        bn_bias = bnorm.bias.data
        bn_running_mean = bnorm.running_mean.data
        bn_running_var = bnorm.running_var.data

        # set noise for all parameter values
        weight_noise = add_noise(th.ones(n_add).cuda(), th.Tensor([0, 1]))
        bias_noise = add_noise(th.zeros(n_add).cuda(), th.Tensor([0, 1]))
        running_mean_noise = add_noise(th.zeros(n_add).cuda(),
                                       th.Tensor([0, 1]))
        running_var_noise = add_noise(th.ones(n_add).cuda(), th.Tensor([0, 1]))

        # append noise to current parameter values to widen
        new_bn_weights = th.cat((bn_weights, weight_noise))
        new_bn_bias = th.cat((bn_bias, bias_noise))
        new_bn_running_mean = th.cat((bn_running_mean, running_mean_noise))
        new_bn_running_var = th.cat((bn_running_var, running_var_noise))

        # assign new parameter values for new BN layer
        new_bn_layer = nn.BatchNorm2d(num_features=bnorm.num_features + n_add)
        new_bn_layer.weight.data = new_bn_weights
        new_bn_layer.bias.data = new_bn_bias
        new_bn_layer.running_mean.data = new_bn_running_mean
        new_bn_layer.running_var.data = new_bn_running_var

        bnorm = new_bn_layer

    return layer1, layer2, bnorm
예제 #31
0
def main(args):
    # Setup tensorboard stuff
    writer = SummaryWriter("../tensorboard_data/" + args.model_type + "-" +
                           args.denoise + str(args.denoise_latent) +
                           str(args.noise_level) +
                           str(datetime.datetime.now()))
    params_to_tb(writer, args)

    ## Load Data
    size = 2
    spectrum, y = load_data(args.channelwise)
    if args.benchmark:
        spectrum = spectrum[y[:, 0] <= 6000]
        y = y[y[:, 0] <= 6000]
        spectrum = spectrum[y[:, 0] >= 4000]
        y = y[y[:, 0] >= 4000]
        size = 4

    spectrum, y = interpolate(spectrum, y, number_of_inters=size)

    torch.manual_seed(0)
    #spectrum = add_noise(spectrum, args.noise_level)
    print(spectrum.shape)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    if args.model_type == 'bayes' or args.model_type == 'bAttnVGG' or args.model_type == 'bAttn1d':
        Bayesian = True
    else:
        Bayesian = False

    X_train, X_test, y_train, y_test = train_test_split(spectrum.data.numpy(),
                                                        y.data.numpy(),
                                                        random_state=55,
                                                        test_size=0.1)

    X_train, X_val, y_train, y_val = train_test_split(X_train,
                                                      y_train,
                                                      random_state=55,
                                                      test_size=0.1)

    X_train = torch.from_numpy(X_train).float()
    y_train = torch.from_numpy(y_train).float()
    X_val = torch.from_numpy(X_val).float()
    y_val = torch.from_numpy(y_val).float()
    X_test = torch.from_numpy(X_test).float()
    y_test = torch.from_numpy(y_test).float()

    print("Normalizing")
    train_means = torch.mean(y_train, dim=0)
    train_std = torch.std(y_train, dim=0)
    y_train = (y_train - train_means) / train_std

    y_val = (y_val - train_means) / train_std
    y_test = (y_test - train_means) / train_std
    print(train_std)
    print(train_means)

    print(spectrum.shape)
    print(y.shape)

    if args.model_type == 'conv1d':
        model = conv1D(in_size=spectrum.shape[-1],
                       out_size=4,
                       input_channels=spectrum.shape[1],
                       convolutions=args.convolutions,
                       kernel_size=args.kernel_size,
                       hiddenlayer=args.hiddenlayer,
                       maxpool=args.maxpool,
                       dropout=args.dropout)
    elif args.model_type == 'resnet':
        print("resnet")
        model = ResidualNetworkD1(in_size=spectrum.shape[-1],
                                  out_size=4,
                                  input_channels=spectrum.shape[2],
                                  convolutions=args.convolutions,
                                  kernel_size=args.kernel_size,
                                  hiddenlayer=args.hiddenlayer,
                                  maxpool=args.maxpool,
                                  dropout=args.dropout)
    elif args.model_type == 'conv2d':
        print("resnet2d")
        model = ResidualNetworkD2(in_size=8 * 4096,
                                  out_size=4,
                                  convolutions=args.convolutions,
                                  kernel_size=args.kernel_size,
                                  hiddenlayer=args.hiddenlayer,
                                  maxpool=args.maxpool,
                                  dropout=args.dropout)
    elif args.model_type == 'bayes':
        print('Bayesian')
        model = BayesianResidualNetworkD1(in_size=spectrum.shape[-1],
                                          out_size=4,
                                          input_channels=spectrum.shape[2],
                                          convolutions=args.convolutions,
                                          kernel_size=args.kernel_size,
                                          hiddenlayer=args.hiddenlayer,
                                          maxpool=args.maxpool,
                                          dropout=args.dropout)
    elif args.model_type == 'attention':
        print("spatialAttetion")
        model = SpatialAttentionNetwork(4)
    elif args.model_type == 'AttnVGG':
        print("AttnVGG")
        model = AttnVGG_after(im_size=4096,
                              num_classes=4,
                              attention=True,
                              normalize_attn=True)
    elif args.model_type == 'bAttnVGG':
        print("bAttnVGG")
        model = bAttnVGG_after(im_size=4096,
                               num_classes=4,
                               attention=True,
                               normalize_attn=args.norm_att)
    elif args.model_type == 'bAttn1d':
        print("batt1d")
        model = bAttnVGG_1d(im_size=4096,
                            num_classes=4,
                            attention=True,
                            normalize_attn=True)
    else:
        model = conv2D(in_size=8 * 4096,
                       out_size=4,
                       convolutions=args.convolutions,
                       kernel_size=args.kernel_size,
                       hiddenlayer=args.hiddenlayer,
                       maxpool=args.maxpool,
                       dropout=args.dropout)

    model.to(device)

    if (args.l1):
        criterion = nn.L1Loss()
    else:
        criterion = nn.MSELoss()

    if (args.SGD):
        optimizer = optim.AdamW(model.parameters(), lr=args.learningrate)
    else:
        optimizer = optim.Adam(model.parameters(),
                               lr=args.learningrate,
                               weight_decay=args.l2)

    scheduler = optim.lr_scheduler.MultiStepLR(
        optimizer,
        milestones=[(args.epochs - args.lr_decay_milestones)],
        gamma=args.lr_decay_factor)
    if (args.model_type == 'attention'):
        lr = 3e-4
        optim.Adam([{
            'params': model.networks.parameters(),
            'lr': lr,
            'weight_decay': 10e-5
        }, {
            'params': model.finals.parameters(),
            'lr': lr,
            'weight_decay': 10e-5
        }, {
            'params': model.stn.parameters(),
            'lr': lr * 10e-2,
            'weight_decay': 10e-5
        }])
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                   milestones=[325, 420],
                                                   gamma=args.lr_decay_factor)

    dataset = torch.utils.data.TensorDataset(X_train, y_train)
    dataset_val = torch.utils.data.TensorDataset(X_val, y_val)
    dataset_test = torch.utils.data.TensorDataset(X_test, y_test)

    BATCH_SIZE = args.batch_size

    trainloader = torch.utils.data.DataLoader(dataset,
                                              batch_size=BATCH_SIZE,
                                              shuffle=True,
                                              num_workers=0,
                                              pin_memory=True)
    valloader = torch.utils.data.DataLoader(dataset_val,
                                            batch_size=BATCH_SIZE,
                                            shuffle=True,
                                            num_workers=0,
                                            pin_memory=True)
    testloader = torch.utils.data.DataLoader(dataset_test,
                                             batch_size=BATCH_SIZE,
                                             shuffle=True,
                                             num_workers=0,
                                             pin_memory=True)

    if (args.denoise != " "):
        if (args.denoise == 'VAE1D'):
            denoiser = ConvVAE1D(dataset[0][0].squeeze(0).shape,
                                 args.denoise_latent**2)
        elif (args.denoise == 'DAE'):
            denoiser = ConvDAE(dataset[0][0].shape, args.denoise_latent**2)
        elif (args.denoise == 'DAE1d'):
            print("DAE1d")
            denoiser = DAE1d(dataset[0][0].squeeze(0).shape,
                             args.denoise_latent**2)
        elif (args.denoise == 'VAE2D'):
            denoiser = ConvVAE(dataset[0][0].shape, args.denoise_latent**2)
        elif (args.denoise == 'AFVAE'):
            denoiser = AFVAE(dataset[0][0].shape, args.denoise_latent**2)

        denoiser.load_state_dict(
            torch.load("../savedmodels/" + args.denoise +
                       str(args.denoise_latent) + str(args.noise_level) +
                       ".pth",
                       map_location=torch.device(device)))
        denoiser.to(device)
        denoiser.eval()
        test_spectrum_clean = spectrum[0:15].to(device)
        test_spectrum = spectrum[0:15].to(device)
        denoised, _ = denoiser.reconstruct(test_spectrum.to(device))
        print(
            f'MSE_recon: {torch.sum((denoised.cpu()-test_spectrum_clean.cpu())**2)}'
        )
        print(
            f'MSE_noise: {torch.sum((test_spectrum.cpu()-test_spectrum_clean.cpu())**2)}'
        )
        del test_spectrum_clean
        del test_spectrum
        del denoised

    print("setup Complete")
    TB_counter = 0
    epochs = args.epochs
    start_epoch = 0
    if args.restore_checkpoint:
        checkpoint = torch.load("../savedmodels/checkpoint" + args.model_type +
                                "-" + args.denoise + str(args.denoise_latent) +
                                str(args.noise_level))
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch']
        loss = checkpoint['loss']
        scheduler.load_state_dict(checkpoint['scheduler'])

    for epoch in range(start_epoch, epochs):
        train_loss = 0
        train_counter = 0
        model.train()
        for i, (mini_batch_x, mini_batch_y) in enumerate(trainloader):

            mini_batch_x = add_noise(mini_batch_x, args.noise_level)
            # If denoise run a denoising step
            if (args.denoise != " "):
                mini_batch_x, _ = denoiser.reconstruct(mini_batch_x.to(device))

            optimizer.zero_grad()
            #### Forward Pass
            y_pred = model(mini_batch_x.to(device))

            #### Compute Loss
            if Bayesian:
                loss = nll_loss(y_pred, mini_batch_y.to(device))
                #print(loss.item())
                #print(y_pred.mean)
                #print(y_pred.stddev)
            else:
                loss = loss_func(y_pred, mini_batch_y.to(device))
            #loss = loss_func(y_pred.squeeze(), mini_batch_y.to(device))
            #### Backward pass
            loss.backward()
            optimizer.step()
            train_loss += loss.cpu().data.numpy()
            train_counter += 1

        scheduler.step()
        writer.add_scalar("train_loss",
                          train_loss / train_counter,
                          global_step=TB_counter)
        TB_counter += 1
        if ((epoch) % 10) == 0:
            val_loss = 0
            val_counter = 0
            with torch.set_grad_enabled(False):
                model.eval()
                for i, (val_batch_x, val_batch_y) in enumerate(valloader):
                    val_batch_x = add_noise(val_batch_x, args.noise_level)
                    if (args.denoise != " "):
                        val_batch_x, _ = denoiser.reconstruct(
                            val_batch_x.to(device))

                    if Bayesian:
                        # just take the mean of the estimates
                        y_pred_test = model(val_batch_x.to(device)).mean
                    else:
                        y_pred_test = model(val_batch_x.to(device))
                    val_loss += loss_func(y_pred_test.squeeze(),
                                          val_batch_y.to(device))
                    val_counter += 1

            val_loss = (val_loss).cpu().data.numpy() / val_counter
            writer.add_scalar("validation_loss",
                              val_loss,
                              global_step=TB_counter)

        if ((epoch) % 10) == 0:
            print('Epoch {}: train_loss: {} Val loss: {}'.format(
                epoch, loss, val_loss))

        if ((epoch % 25) == 0 and args.model_type == 'bAttnVGG'):
            torch.save(
                {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': loss,
                    'scheduler': scheduler.state_dict()
                }, "../savedmodels/checkpoint" + args.model_type + "-" +
                args.denoise + str(args.denoise_latent) +
                str(args.noise_level))

    model.eval()
    old_batch = None
    old_label = None
    with torch.set_grad_enabled(False):
        final_val_loss = 0
        for i, (val_batch_x, val_batch_y) in enumerate(testloader):
            val_batch_x = add_noise(val_batch_x, args.noise_level)
            # If denoise run a denoising step
            if (args.denoise != " "):
                with torch.set_grad_enabled(False):
                    val_batch_x, _ = denoiser.reconstruct(
                        val_batch_x.to(device))
            if Bayesian:
                # just take the mean of the estimates
                y_pred_test = model(val_batch_x.to(device)).mean
                y_pred_test_std = model(val_batch_x.to(device)).stddev
            else:
                y_pred_test = model(val_batch_x.to(device))

            final_val_loss += loss_func(y_pred_test.squeeze(),
                                        val_batch_y.to(device)).cpu()

            y_pred = (y_pred_test.detach().cpu() * train_std) + train_means
            y = (val_batch_y.detach().cpu() * train_std) + train_means

            if i == 0:
                residuals = (y_pred - y).cpu().detach()
                if Bayesian:
                    residuals_stds = y_pred_test_std
            else:
                residuals = torch.cat([residuals, (y_pred - y).detach()],
                                      dim=0)
                if Bayesian:
                    residuals_stds = torch.cat(
                        [residuals_stds,
                         y_pred_test_std.detach()], dim=0)

            if i < 3:
                with open(
                        '../residuals/data-' + args.model_type + "-" +
                        args.denoise + str(args.denoise_latent) +
                        str(args.noise_level) + '.csv', 'a') as data:
                    np.savetxt(data,
                               val_batch_x.view(val_batch_x.shape[0],
                                                -1).cpu().data.numpy(),
                               delimiter=",")
                with open(
                        '../residuals/labels-' + args.model_type + "-" +
                        args.denoise + str(args.denoise_latent) +
                        str(args.noise_level) + '.csv', 'a') as data:
                    np.savetxt(data,
                               y.view(y.shape[0], -1).data.numpy(),
                               delimiter=",")
                with open(
                        '../residuals/residuals-' + args.model_type + "-" +
                        args.denoise + str(args.denoise_latent) +
                        str(args.noise_level) + '.csv', 'a') as res:
                    np.savetxt(res, (y_pred - y).detach(), delimiter=",")
                if Bayesian:
                    with open(
                            '../residuals/residuals-std-' + args.model_type +
                            "-" + args.denoise + str(args.denoise_latent) +
                            str(args.noise_level) + '.csv', 'a') as res:
                        np.savetxt(
                            res, (y_pred_test_std.detach().cpu() * train_std),
                            delimiter=",")

    if args.model_type == 'bAttnVGG' or args.model_type == 'AttnVGG' or args.model_type == 'bAttn1d':
        model.visual_att(testloader, device, args)
    final_val_loss = final_val_loss
    final_test_loss = 0
    final_counter = 0
    with torch.set_grad_enabled(False):
        for i, (val_batch_x, val_batch_y) in enumerate(testloader):
            val_batch_x = add_noise(val_batch_x, args.noise_level)
            if (args.denoise != " "):
                val_batch_x, _ = denoiser.reconstruct(val_batch_x.to(device))
            if Bayesian:
                # just take the mean of the estimates
                y_pred_test = model(val_batch_x.to(device)).mean
            else:
                y_pred_test = model(val_batch_x.to(device))

            final_test_loss += loss_func(
                y_pred_test.squeeze(),
                val_batch_y.to(device)).cpu().data.numpy()
            final_counter += 1
    final_test_loss = final_test_loss / final_counter

    print("final validation loss: {}".format(final_val_loss))
    print("final std of residuals from validation set: {}".format(
        torch.std(residuals, dim=0).cpu().data.numpy()))
    print("final mean squared error: {}".format(
        torch.mean(residuals**2, dim=0).cpu().data.numpy()))
    print("final RMSE error: {}".format(
        torch.sqrt(torch.mean(residuals**2, dim=0)).cpu().data.numpy()))
    print("final MAE error: {}".format(
        torch.mean(torch.abs(residuals), dim=0).cpu().data.numpy()))
    if Bayesian:
        print("final unnormed mean std from model: {}".format(
            torch.mean(y_pred_test_std.cpu() * train_std,
                       dim=0).cpu().data.numpy()))

    print("STARNET RMSE ")
    print("[51.2, 0.081, 0.040] ")
    print("STARNET MAE ")
    print("[31.2, 0.053, 0.025] ")

    print("final test loss: {}".format(final_test_loss))
    test_sun(model, train_means, train_std, device)
    print("Saving Residuals")
    if args.savemodel:
        torch.save(model.state_dict(), "../savedmodels/" + args.name)
예제 #32
0
    def __init__(self, shape, sess, variance_coef, data_info):
        """DAE initializer

        Args:
          shape:          list of ints specifying
                          num input, hidden1 units,...hidden_n units, num outputs
          sess:           tensorflow session object to use
          varience_coef:  multiplicative factor for the variance of noise wrt the variance of data
          data_info:      key information about the dataset
        """

        self.__shape = shape  # [input_dim,hidden1_dim,...,hidden_n_dim,output_dim]
        self.__variables = {}
        self.__sess = sess

        self.num_hidden_layers = np.size(shape) - 2

        self.batch_size = FLAGS.batch_size
        self.sequence_length = FLAGS.chunk_length

        self.scaling_factor = 1

        # maximal value and mean pose in the dataset (used for scaling it to interval [-1,1] and back)
        self.max_val = data_info.max_val
        self.mean_pose = data_info.mean_pose

        #################### Add the DATASETS to the GRAPH ###############

        #### 1 - TRAIN ###
        self._train_data_initializer = tf.placeholder(
            dtype=tf.float32, shape=data_info.train_shape)
        self._train_data = tf.Variable(self._train_data_initializer,
                                       trainable=False,
                                       collections=[],
                                       name='Train_data')
        train_epochs = FLAGS.training_epochs + FLAGS.pretraining_epochs * FLAGS.num_hidden_layers
        train_frames = tf.train.slice_input_producer([self._train_data],
                                                     num_epochs=train_epochs)
        self._train_batch = tf.train.shuffle_batch(train_frames,
                                                   batch_size=FLAGS.batch_size,
                                                   capacity=5000,
                                                   min_after_dequeue=1000,
                                                   name='Train_batch')

        #### 2 - VALIDATE, can be used as TEST ###
        # When optimizing - this dataset stores as a validation dataset,
        # when testing - this dataset stores a test dataset
        self._valid_data_initializer = tf.placeholder(
            dtype=tf.float32, shape=data_info.eval_shape)
        self._valid_data = tf.Variable(self._valid_data_initializer,
                                       trainable=False,
                                       collections=[],
                                       name='Valid_data')
        valid_frames = tf.train.slice_input_producer(
            [self._valid_data], num_epochs=FLAGS.training_epochs)
        self._valid_batch = tf.train.shuffle_batch(valid_frames,
                                                   batch_size=FLAGS.batch_size,
                                                   capacity=5000,
                                                   min_after_dequeue=1000,
                                                   name='Valid_batch')

        if FLAGS.weight_decay is not None:
            print('\nWe apply weight decay')

        ### Specify tensorflow setup  ###
        with sess.graph.as_default():

            ##############        SETUP VARIABLES       ######################

            with tf.variable_scope("AE_Variables"):

                for i in range(self.num_hidden_layers + 1):  # go over layers

                    # create variables for matrices and biases for each layer
                    self._create_variables(i, FLAGS.weight_decay)

                ##############        DEFINE THE NETWORK     ##################
                ''' 1 - Setup network for TRAINing '''
                # Input noisy data and reconstruct the original one
                # as in Denoising AutoEncoder
                self._input_ = add_noise(self._train_batch, variance_coef,
                                         data_info.data_sigma)
                self._target_ = self._train_batch

                # Define output and loss for the training data
                self._output, _, _ = self.construct_graph(
                    self._input_, FLAGS.dropout)
                self._reconstruction_loss = loss_reconstruction(
                    self._output, self._target_, self.max_val)
                tf.add_to_collection(
                    'losses',
                    self._reconstruction_loss)  # add weight decay loses
                self._loss = tf.add_n(tf.get_collection('losses'),
                                      name='total_loss')
                ''' 2 - Setup network for TESTing '''
                self._valid_input_ = self._valid_batch
                self._valid_target_ = self._valid_batch

                # Define output (no dropout)
                self._valid_output, self._encode, self._decode = \
                    self.construct_graph(self._valid_input_, 1)

                # Define loss
                self._valid_loss = loss_reconstruction(self._valid_output,
                                                       self._valid_target_,
                                                       self.max_val)
예제 #33
0
def train(epoch):
    model.train()
    train_loss = 0.
    SE, KLD = 0., 0.
    topo = 0.
    b01, b0, b1, b2 = 0., 0., 0., 0.
    for batch_idx, data in enumerate(train_loader):
        noisy_data = add_noise(data, device)
        data = data.to(device)
        noisy_data = noisy_data.to(device)
        optimizer.zero_grad()
        recon_batch, mu, logvar = model(noisy_data)
        if args.mode == 1:
            loss, l01, l0, l1, l2 = topological_loss(recon_batch)
            train_loss += loss.item()
            b01 += l01.item()
            b0 += l0.item()
            b1 += l1.item()
            b2 += l2.item()
        elif args.topo == True:
            loss, l_SE, l_KLD, l_topo, l01, l0, l1, l2 = loss_function(
                recon_batch, data, mu, logvar)
            train_loss += loss.item()
            SE += l_SE.item()
            KLD += l_KLD.item()
            topo += l_topo.item()
            b01 += l01.item()
            b0 += l0.item()
            b1 += l1.item()
            b2 += l2.item()
        else:
            loss, l_SE, l_KLD = loss_function(recon_batch, data, mu, logvar)
            train_loss += loss.item()
            SE += l_SE.item()
            KLD += l_KLD.item()

        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader),
                loss.item() / len(noisy_data)))

    train_loss /= len(train_loader)

    train_loss_list.append(train_loss)
    print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss))
    if args.mode == 0:
        SE /= len(train_loader)
        KLD /= len(train_loader)
        writer.add_scalars("loss/each_loss", {
            'Train': train_loss,
            'Rec': SE,
            'KL': KLD,
            'Topo': topo
        }, epoch)
        writer.add_scalars("loss/each_loss", {
            'Train': train_loss,
            'Rec': SE,
            'KL': KLD
        }, epoch)

        if args.topo == True:
            b01 /= len(train_loader)
            b0 /= len(train_loader)
            b1 /= len(train_loader)
            b2 /= len(train_loader)
            topo /= len(train_loader)
            writer.add_scalars("loss/topological_loss", {
                'topo': topo,
                'b01': b01,
                'b0': b0,
                'b1': b1,
                'b2': b2
            }, epoch)

    return train_loss
예제 #34
0
def wider(layer1, layer2, new_width, bnorm=None):

    print 'Net2Net Widening... '
    w1 = layer1.weight.data
    w2 = layer2.weight.data
    b1 = layer1.bias.data
    b2 = layer2.bias.data

    if isinstance(layer1, nn.Conv2d) and (isinstance(layer2, nn.Conv2d)
                                          or isinstance(layer2, nn.Linear)):

        # Convert Linear layers to Conv if linear layer follows target layer
        if isinstance(layer1, nn.Conv2d) and isinstance(layer2, nn.Linear):
            print w2.shape
            print w1.shape
            print w2.size(1)
            print w1.size(0)
            assert w2.size(1) % w1.size(
                0) == 0, 'Linear units need to be multiple'
            if w1.dim() == 4:
                kernel_size = int(np.sqrt(w2.size(1) // w1.size(0)))
                print kernel_size
                exit()
                w2 = w2.view(w2.size(0),
                             w2.size(1) // kernel_size**2, kernel_size,
                             kernel_size)
        else:
            assert w1.size(0) == w2.size(
                1), "Module weights are not compatible"

        assert new_width > w1.size(0), "New size should be larger"

        nw1 = w1.clone()
        nb1 = b1.clone()
        nw2 = w2.clone()

        old_width = w1.size(0)

        if bnorm is not None:
            nrunning_mean = bnorm.running_mean.clone().resize_(new_width)
            nrunning_var = bnorm.running_var.clone().resize_(new_width)
            if bnorm.affine:
                nweight = bnorm.weight.data.clone().resize_(new_width)
                nbias = bnorm.bias.data.clone().resize_(new_width)

        if isinstance(layer1, nn.Conv2d):
            new_current_layer = nn.Conv2d(out_channels=new_width,
                                          in_channels=layer1.in_channels,
                                          kernel_size=(3, 3),
                                          stride=1,
                                          padding=1)
        else:
            new_current_layer = nn.Linear(in_features=layer1.out_channels *
                                          layer1.kernel_size[0] *
                                          layer1.kernel_size[1],
                                          out_features=layer2.out_features)

        rand_ids = th.tensor(
            random.sample(range(w1.shape[0]), new_width - w1.shape[0]))
        replication_factor = np.bincount(rand_ids)

        for i in range(rand_ids.numel()):
            teacher_index = int(rand_ids[i].item())
            new_weight = w1.select(0, teacher_index)
            new_weight = add_noise(new_weight, nw1)
            new_weight = new_weight.unsqueeze(0)
            nw1 = th.cat((nw1, new_weight), dim=0)

            new_bias = b1[teacher_index].unsqueeze(0)
            nb1 = th.cat((nb1, new_bias))

            if bnorm is not None:
                nrunning_mean[old_width +
                              i] = bnorm.running_mean[teacher_index]
                nrunning_var[old_width + i] = bnorm.running_var[teacher_index]
                if bnorm.affine:
                    nweight[old_width + i] = bnorm.weight.data[teacher_index]
                    nbias[old_width + i] = bnorm.bias.data[teacher_index]

        new_current_layer.weight.data = nw1
        new_current_layer.bias.data = nb1
        layer1 = new_current_layer

        # Copy the weights from input channel of next layer and append it after
        # dividing the selected filter by replication factor.
        for i in range(rand_ids.numel()):
            teacher_index = int(rand_ids[i].item())
            factor = replication_factor[teacher_index] + 1
            assert factor > 1, 'Error in Net2Wider'
            # Calculate new weight according to replication factor
            new_weight = w2.select(1, teacher_index) * (1. / factor)
            # Append the new weight increasing its input channel
            new_weight_re = new_weight.unsqueeze(1)
            nw2 = th.cat((nw2, new_weight_re), dim=1)
            # Assign the calculated new weight to replicated filter
            nw2[:, teacher_index, :, :] = new_weight

        if isinstance(layer2, nn.Conv2d):
            new_next_layer = nn.Conv2d(out_channels=layer2.out_channels,
                                       in_channels=new_width,
                                       kernel_size=(3, 3),
                                       stride=1,
                                       padding=1)
            new_next_layer.weight.data = nw2
        else:
            new_next_layer = nn.Linear(in_features=layer1.out_channels *
                                       layer1.kernel_size[0] *
                                       layer1.kernel_size[1],
                                       out_features=layer2.out_features)
            # Convert the 4D tensor to 2D tensor for linear layer i.e. reverse
            # the earlier effect when linear layer was converted to
            # convolutional layer.
            new_next_layer.weight.data = nw2.view(layer2.weight.size(0),
                                                  new_width * kernel_size**2)

        # Set the bias for new next layer as previous bias for next layer
        new_next_layer.bias.data = b2
        layer2 = new_next_layer

        if bnorm is not None:
            bnorm.num_features = new_width
            bnorm.running_var = nrunning_var
            bnorm.running_mean = nrunning_mean
            if bnorm.affine:
                bnorm.weight.data = nweight
                bnorm.bias.data = nbias

        return layer1, layer2, bnorm