class OwnDataset(Dataset): def __init__(self, indices, path, rotations=True): self.hdf5file = path self.rot = Rotations() self.rotations = rotations if self.rotations: self.indices = np.tile(indices, 24) else: self.indices = indices self.length = len(indices) def __getitem__(self, index): data_idx = self.indices[index] with h5py.File(self.hdf5file, 'r') as file: if self.rotations: data = self.rot.rotation( data=file[str(data_idx) + '/data'][()][0], k=np.floor_divide(index, self.length)).copy() else: data = file[str(data_idx) + '/data'][()][0] label = file[str(data_idx) + '/label'][()] #label = -np.log10(np.exp(-label)) return data, label def __len__(self): if self.rotations: return self.length * 24 else: return self.length
class OwnDataset(Dataset): def __init__(self, indices, path, rotations=True, version=2): self.hdf5file = path self.rot = Rotations() self.rotations = rotations if self.rotations: self.indices = np.tile(indices, 24) else: self.indices = indices self.length = len(indices) self.version = version def __getitem__(self, index): data_idx = self.indices[index] with h5py.File(self.hdf5file, 'r') as file: if self.rotations: data = self.rot.rotation( data=file[str(data_idx) + '/data'][()][0], k=np.floor_divide(index, self.length)).copy() else: data = file[str(data_idx) + '/data'][()][0] label = file[str(data_idx) + '/label'][()] # In the old version of the code, the labels were calculated with the natural logarithm instead with the log10. So this is the recalc if self.version == 1: label = -np.log10(np.exp(-label)) return data, label def __len__(self): if self.rotations: return self.length * 24 else: return self.length
def testing(self, epoch, test_dataloader, dataset, ensemble=False, remember=10, rotation=True): self.model.eval() mse_list = [] rmse_list = [] batchnum = 0 batch_losses = [] criterion = torch.nn.MSELoss() if rotation: rot = Rotations() for batch_id, (data, target) in enumerate(test_dataloader): target = target.view(-1, 1) data = data.float().cuda() target = target.float() if rotation: outs = [] datatemp = data.cpu().numpy()[0] for j in range(24): newdat = rot.rotation(datatemp, j).copy() newdat = torch.from_numpy(newdat).float().cuda().view((1, 16, 24, 24, 24)) outs.append(self.model(newdat)) out = torch.tensor([[torch.mean(torch.tensor(outs))]]) else: out = self.model(data) if ensemble: pred = out if epoch == 0: prediction = pred else: prediction = self.calcPred(pred, remember=remember, batchnum=batchnum) batch_losses.append(out) prediction = out batchnum += 1 loss = criterion(prediction, target) mse_list.append(loss.data.item()) rmse_list.append(np.sqrt(loss.data.item())) # Print is adapted from https://github.com/pytorch/examples/blob/master/mnist_hogwild/train.py print('Test Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} MSE-Mean: {:.6f} RMSE-Mean: {:.6f}'.format( epoch, batch_id * len(data), dataset.__len__(), 100. * (batch_id * len(data)) / dataset.__len__(), loss.data.item(), np.mean(mse_list), np.mean(rmse_list))) if ensemble: self.epochLosses.append(batch_losses) print('Test Epoch: {} MSE (loss): {:.4f}, RMSE: {:.4f} Dataset length {}'.format(epoch, np.mean(mse_list), np.mean(rmse_list), dataset.__len__())) return np.mean(mse_list), np.mean(rmse_list)
def benchmark(self, n_datapoints, datapath, rotations=True, model=None, ensemble=False, version = 2): if ensemble: best_models = [] for i in os.listdir(model): if 'bestModel' in i: self.model = torch.load(model+i) self.model.eval() best_models.append(self.model) if rotations: rot = Rotations() datafile = datapath labels = [] outs = [] if model is not None and not ensemble: self.bestModel.load_state_dict(torch.load(model + 'bestModel.pt')) self.bestModel.eval() for i in range(n_datapoints): outs1 = [] target1 = [] for j in range(24): with h5py.File(datafile, 'r') as file: data = rot.rotation(data=file[str(i) + '/data'][()][0], k=j) # In the old version, wrong log was used -> recalc if version == 1: label = -np.log10(np.exp(-(file[str(i) + '/label'][()]))) else: label = file[str(i) + '/label'][()] data = torch.from_numpy(data.reshape(1, 16, 24, 24, 24).copy()).float().cuda() if ensemble: outall = [] for m in best_models: outall.append(m(data)) out = torch.mean(torch.tensor(outall)) else: out = self.bestModel(data) outs1.append(out.cpu().data.numpy()) target1.append(label) labels.append(np.mean(target1)) outs.append(np.mean(outs1)) error = [] for i in range(290): error.append((outs[i] - labels[i]) ** 2) print("testmean: ", np.mean(error)) return error, labels, outs else: kwargs = {'num_workers': 4} indices = np.arange(n_datapoints) test_set = OwnDataset(indices, datapath, rotations=False) test_dataloader = DataLoader(dataset=test_set, batch_size=1, shuffle=False, **kwargs) if model is not None and not ensemble: self.bestModel.load_state_dict(torch.load(model + 'bestModel.pt')) self.bestModel.eval() outs1 = [] target1 = [] for batch_id, (data, target) in enumerate(test_dataloader): target = target.view(-1, 1) target1.append(target.cpu().data.numpy()) data = data.float().cuda() if ensemble: outall = [] for m in best_models: out = m(data) outall.append(out) out = torch.mean(torch.tensor(outall)) else: out = self.bestModel(data) outs1.append(out.cpu().data.numpy()) error = [] for i in range(290): error.append((outs1[i] - target1[i]) ** 2) print(outs1, target1) print("testmean: ", np.mean(error)) return error, target1, outs1