def cumprod(x, axis: int = 0, exclusive: bool = False): if exclusive: x = torch.swapaxes(x, axis, -1) x = torch.cat((torch.ones_like(x[..., -1:]), x[..., :-1]), -1) res = torch.cumprod(x, -1) return torch.swapaxes(res, axis, -1) return torch.cumprod(x, axis)
def forward(self, x): # Force all data to be batched if it isn't already. if len(x.shape) == 2: x = x.view(-1, *x.shape) # But we don't know how to deal with batches-{of-batches}+. elif len(x.shape) > 3: assert 0 # A series is the powers of our adjacency matrix(es) X. # Compute as many powers as we have rows for each adjacency matrix. a_series = [torch.matrix_power(x,i+1) for i in range (1, self.rows+2)] # Must swap dims (0,1) since the above code places the batch as dim 1 rather than 0. a_series = torch.swapaxes(torch.stack(a_series),0,1).to(x.device) # Generate the full NxN matrix of 1's. _1 = torch.full(x.shape[-2:], 1.0, dtype=x.dtype).to(x.device) # Element wise raise the A series to the correct power, will normalize later. # Generator expression performs faster than for loop after profiling. powers = list((_1@(a_series[:,i])**(j+1)) for i in range(self.rows) for j in range(self.cols)) powers = torch.swapaxes(torch.stack(powers), 0,1).to(x.device) # Cannot use torch.trace, since that only works on 2d tensors, must roll our own using diag+sum. # See: https://discuss.pytorch.org/t/is-there-a-way-to-compute-matrix-trace-in-batch-broadcast-fashion/43866 traces = torch.diagonal(powers, dim1=-2, dim2=-1).sum(-1) traces = traces.view(-1, self.rows, self.cols) # The [i,j]'th position is equal to i+j+2. This is the power to which norm_pow_mat = torch.stack(list(torch.arange(0, self.cols)+i+2 for i in range(self.rows))).to(traces.device) # Compute the number of elements in an individual graph numel = powers.shape[-1]*powers.shape[-2] # The normalization for the [i,j]'th entry of each matrix is the number of elements raised to the i+j+2'th power. norm = torch.full(traces.shape, numel).to(traces.device)**norm_pow_mat return (self.coef * traces/norm).sum(dim=[-1,-2])
def _torch_cdf_distance(tensor_a, tensor_b): """ Torch implementation of _cdf_distance for Wasserstein distance input: tensor_a, tensor_b output: cdf_loss which the computed distance between the tensors. #Note: this function yields an difference of \approx 10^-9 Updated for batch support | 29/03/2022 Updated for multivariate time series support | 29/03/2022 Expects tensor_a and tensor_b to be of shape: (batch_size, segment_length, n_features), Example: a single batch of 10 time series with lengths of 12 should have shape=(1, 12, 10) """ batch_size = tensor_a.shape[0] assert tensor_a.shape == tensor_b.shape, 'tensor_a and tensor_b have different shape' #It is necessary to reshape the tensors to match the dimensions of Scipy. tensor_a = torch.reshape(torch.swapaxes( tensor_a, -1, -2), (batch_size, tensor_a.shape[2], tensor_a.shape[1])) tensor_b = torch.reshape(torch.swapaxes( tensor_b, -1, -2), (batch_size, tensor_b.shape[2], tensor_b.shape[1])) # Creater sorters: sorter_a = torch.argsort(tensor_a, dim=-1) sorter_b = torch.argsort(tensor_a, dim=-1) # We append both tensors and sort them all_values = torch.cat((tensor_a, tensor_b), dim=-1) all_values, idx = torch.sort(all_values, dim=-1) # Calculate the n-th discrete difference along the given axis (equivalent to np.diff()) deltas = all_values[:, :, 1:] - all_values[:, :, :-1] sorted_a, idx = torch.sort(tensor_a, dim=-1) sorted_b, idx = torch.sort(tensor_b, dim=-1) # Get the respective positions of the values of u and v among the values of # both distributions. a_cdf_index = torch.searchsorted(sorted_a.flatten(start_dim=2), all_values[:, :, :-1], right=True) # TODO: torch.searchsorted() expects contiguousarrays, passing non-contiguousarrays slows performance due to data copy | fix doesn't seem trivial b_cdf_index = torch.searchsorted(sorted_b.flatten(start_dim=2), all_values[:, :, :-1], right=True) #Compute the cdf a_cdf = a_cdf_index / tensor_a.shape[-1] b_cdf = b_cdf_index / tensor_b.shape[-1] #And the distance between them cdf_distance = torch.sum(torch.mul(torch.abs((a_cdf - b_cdf)), deltas), dim=-1) cdf_loss = cdf_distance.mean() return cdf_loss
def forward(self, x): x, _ = self.layer1(x) s, b, h = x.size() x = torch.swapaxes(x, 0, 1) x = x.contiguous().view(b, h * s) x = self.layer2(x) x = x.view(b, -1) return x
def tensor_indexing_ops(self): x = torch.randn(2, 4) y = torch.randn(4, 4) t = torch.tensor([[0, 0], [1, 0]]) mask = x.ge(0.5) i = [0, 1] return len( torch.cat((x, x, x), 0), torch.concat((x, x, x), 0), torch.conj(x), torch.chunk(x, 2), torch.dsplit(torch.randn(2, 2, 4), i), torch.column_stack((x, x)), torch.dstack((x, x)), torch.gather(x, 0, t), torch.hsplit(x, i), torch.hstack((x, x)), torch.index_select(x, 0, torch.tensor([0, 1])), x.index(t), torch.masked_select(x, mask), torch.movedim(x, 1, 0), torch.moveaxis(x, 1, 0), torch.narrow(x, 0, 0, 2), torch.nonzero(x), torch.permute(x, (0, 1)), torch.reshape(x, (-1, )), torch.row_stack((x, x)), torch.select(x, 0, 0), torch.scatter(x, 0, t, x), x.scatter(0, t, x.clone()), torch.diagonal_scatter(y, torch.ones(4)), torch.select_scatter(y, torch.ones(4), 0, 0), torch.slice_scatter(x, x), torch.scatter_add(x, 0, t, x), x.scatter_(0, t, y), x.scatter_add_(0, t, y), # torch.scatter_reduce(x, 0, t, reduce="sum"), torch.split(x, 1), torch.squeeze(x, 0), torch.stack([x, x]), torch.swapaxes(x, 0, 1), torch.swapdims(x, 0, 1), torch.t(x), torch.take(x, t), torch.take_along_dim(x, torch.argmax(x)), torch.tensor_split(x, 1), torch.tensor_split(x, [0, 1]), torch.tile(x, (2, 2)), torch.transpose(x, 0, 1), torch.unbind(x), torch.unsqueeze(x, -1), torch.vsplit(x, i), torch.vstack((x, x)), torch.where(x), torch.where(t > 0, t, 0), torch.where(t > 0, t, t), )
def get_LeNet(model_path, save_dir): try: os.stat(save_dir) except: os.mkdir(save_dir) model = LeNet() model.load_state_dict(torch.load(model_path)) acts = eval(model) for name, param in model.named_parameters(): name = name.replace(".", "_") # save conv layers as if "conv" in name and "weight" in name: # print(name, param.shape) param = param.reshape(param.shape[0], -1) # print(name, param.shape) save_tensor_as_mtx(param.detach(), save_dir+name+".mtx") # model1 # acts: x, xc1, xcp1, xc2, xcp2, xf0, xf1, xf2, xf3 for i in range(len(acts)): act = acts[i] if i == 0: act = torch.nn.Unfold(kernel_size=(5, 5), dilation=1, padding=2, stride=1)(act) act = torch.swapaxes(act, 1, 2) act = act.reshape(-1, act.shape[-1]) elif i == 2: act = torch.nn.Unfold(kernel_size=(5, 5), dilation=1, padding=0, stride=1)(act) act = torch.swapaxes(act, 1, 2) act = act.reshape(-1, act.shape[-1]) elif i >= 5: act = act else: continue print(i, act.shape) # x, xcp1, xf0, xf1, xf2, xf3 save_tensor_as_mtx(act.detach(), save_dir+"act_"+str(i)+".mtx")
def forward(self, *signals: Signal) -> Tuple[Signal]: """ Mix together a set of modulation signals. """ # Get params into batch_size x n_output x n_input matrix params = torch.stack([self.p(p) for p in self.torchparameters], dim=1) params = params.view(self.batch_size, self.n_input, self.n_output) params = torch.swapaxes(params, 1, 2) # Make sure there is the same number of input signals as mix params assert len(signals) == params.shape[2] signals = torch.stack(signals, dim=1) modulation = torch.chunk(torch.matmul(params, signals), self.n_output, dim=1) return tuple(m.squeeze(1).as_subclass(Signal) for m in modulation)
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1g ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts char_ids = self.words2charindices(sents) sents_t = pad_sents_char(char_ids, self['<pad>']) sents_var = torch.tensor(sents_t, dtype=torch.long, device=device) sents_var = torch.swapaxes(sents_var, 0, 1) return sents_var
def forward(self, x): ''' return logits_list(g) in paper ''' batch_size = x.shape[0] _input = torch.swapaxes(x, 0, 1) gru_inputs = self.linear(_input) outputs, _ = self._compute_gru(self.gru, gru_inputs, batch_size) logit_list = [] for index, (t_fw, t_bw) in enumerate(zip(self.output_points_fw, self.output_points_bw)): gru_output = [] if t_fw is not None: gru_output.append(outputs[t_fw, :, :self.hidden_units]) if t_bw is not None: gru_output.append(outputs[t_bw, :, self.hidden_units:]) gru_output = torch.cat(gru_output, dim=1).to(self.device) logit = self.list_linear[index](gru_output) logit_list.append(logit) return logit_list
def dataset_stats(): img_list_train_val = [x.split('.')[-2].split('/')[-1][:-3] for x in glob.glob(msrc_directory + '/train/*') if 'GT' in x] dataset_name = ['%s/%s.bmp' % (msrc_directory, x) for x in img_list_train_val] dataset = torch.as_tensor([]) for img_name in dataset_name: # print(f'img name {img_name}') img = torch.as_tensor(np.array(plt.imread(img_name))) if len(dataset) == 0: dataset = torch.unsqueeze(img, 0) else: if img.shape != torch.Size([213,320,3]): img = torch.swapaxes(img, 0, 1) # print(f'img shape: {torch.unsqueeze(img, 0).shape}') dataset = torch.cat((dataset,torch.unsqueeze(img, 0))) print(f'Calculating Mean and Std of the Dataset ...') dataset = dataset.to(DEVICE).float() imgs_mean = torch.mean(dataset,dim=(0,1,2)) imgs_std = torch.std(dataset,dim=(0,1,2)) print(f'imgs mean: {imgs_mean}\nimgs std: {imgs_std}') return imgs_mean, imgs_std
def cloud_cast_wrapper(model): from data.cloudcast import CloudCast import torch from tqdm import tqdm trainFolder = CloudCast( is_train=True, root="data/", n_frames_input=20, n_frames_output=1, batchsize=8, ) trainLoader = torch.utils.data.DataLoader( trainFolder, batch_size=8, num_workers=args.number_of_workers, shuffle=False) # device may need to change device = torch.device("gpu:0" if torch.cuda.is_available() else "cpu") t = tqdm(trainLoader, leave=False, total=2) for epoch in range(0, int(args.epochs)): train_loss = 0 for i, (idx, targetVar, inputVar, _, _) in enumerate(t): inputs = inputVar.to(device) inputs = torch.swapaxes(inputs, 2, 4) if args.reverse_scheduled_sampling == 1: real_input_flag = reserve_schedule_sampling_exp(i) ims = preprocess.reshape_patch(inputs, args.patch_size) loss = model.train(ims, real_input_flag) train_loss += loss.item() print(train_loss) # need to add comet #comet.log_metric("train_loss", train_loss / len(args.epoch), epoch=epoch) # runs and generates the validation at each epoch model.save(epoch) test(model, args, epoch)
def encodes(self, x): # get sitk objs im_path, segm_path = x folder = Path(segm_path).parent.name ras_adj = int(folder) in range(50455, 50464) mr = sitk.ReadImage(im_path, sitk.sitkFloat32) segm = meshio.read(segm_path) mask_arr = seg2mask(mr, segm, ras_adj) # resize so isotropic spacing orig_sp = mr.GetSpacing() orig_sz = mr.GetSize() new_sz = [int(round(osz*ospc/self.new_sp)) for osz,ospc in zip(orig_sz, orig_sp)] im = torch.swapaxes(torch.tensor(sitk.GetArrayFromImage(mr)), 0, 2) mk = torch.tensor(mask_arr).float() while im.ndim < 5: im = im.unsqueeze(0) mk = mk.unsqueeze(0) return F.interpolate(im, size = new_sz, mode = 'trilinear', align_corners=False).squeeze(), F.interpolate(mk, size = new_sz, mode = 'nearest').squeeze().long()
def test(model, configs, itr): from data.cloudcast import CloudCast import torch import lpips from skimage.metrics import structural_similarity #from skimage.measure import compare_ssim #import skimage.measure from core.utils import preprocess, metrics import cv2 from tqdm import tqdm loss_fn_alex = lpips.LPIPS(net='alex') device = torch.device("gpu:0" if torch.cuda.is_available() else "cpu") res_path = os.path.join(configs.gen_frm_dir, str(itr)) os.mkdir(res_path) avg_mse = 0 batch_id = 0 img_mse, ssim, psnr = [], [], [] lp = [] testFolder = CloudCast( is_train=False, root="data/", n_frames_input=20, n_frames_output=1, batchsize=8, ) # number of workers will need to be changed testLoader = torch.utils.data.DataLoader( testFolder, batch_size=8, num_workers=configs.number_of_workers, shuffle=False) t_test = tqdm(testLoader, leave=False, total=2) for i in range(configs.total_length - configs.input_length): img_mse.append(0) ssim.append(0) psnr.append(0) lp.append(0) # reverse schedule sampling if configs.reverse_scheduled_sampling == 1: mask_input = 1 else: mask_input = configs.input_length real_input_flag = np.zeros( (configs.batch_size, configs.total_length - mask_input - 1, configs.img_width // configs.patch_size, configs.img_width // configs.patch_size, configs.patch_size**2 * configs.img_channel)) if configs.reverse_scheduled_sampling == 1: real_input_flag[:, :configs.input_length - 1, :, :] = 1.0 for i, (idx, targetVar, inputVar, _, _) in enumerate(t_test): batch_id = batch_id + 1 inputs = inputVar.to(device) test_ims = torch.swapaxes(inputs, 2, 4) test_dat = preprocess.reshape_patch(test_ims, configs.patch_size) img_gen = model.test(test_dat, real_input_flag) img_gen = preprocess.reshape_patch_back(img_gen, configs.patch_size) output_length = configs.total_length - configs.input_length img_gen_length = img_gen.shape[1] img_out = img_gen[:, -output_length:] # MSE per frame for i in range(output_length): x = test_ims[:, i + configs.input_length, :, :, :] gx = img_out[:, i, :, :, :] gx = np.maximum(gx, 0) gx = np.minimum(gx, 1) mse = np.square(x - gx).sum() img_mse[i] += mse avg_mse += mse # cal lpips img_x = np.zeros( [configs.batch_size, 3, configs.img_width, configs.img_width]) if configs.img_channel == 3: img_x[:, 0, :, :] = x[:, :, :, 0] img_x[:, 1, :, :] = x[:, :, :, 1] img_x[:, 2, :, :] = x[:, :, :, 2] else: img_x[:, 0, :, :] = x[:, :, :, 0] img_x[:, 1, :, :] = x[:, :, :, 0] img_x[:, 2, :, :] = x[:, :, :, 0] img_x = torch.FloatTensor(img_x) img_gx = np.zeros( [configs.batch_size, 3, configs.img_width, configs.img_width]) if configs.img_channel == 3: img_gx[:, 0, :, :] = gx[:, :, :, 0] img_gx[:, 1, :, :] = gx[:, :, :, 1] img_gx[:, 2, :, :] = gx[:, :, :, 2] else: img_gx[:, 0, :, :] = gx[:, :, :, 0] img_gx[:, 1, :, :] = gx[:, :, :, 0] img_gx[:, 2, :, :] = gx[:, :, :, 0] img_gx = torch.FloatTensor(img_gx) lp_loss = loss_fn_alex(img_x, img_gx) lp[i] += torch.mean(lp_loss).item() real_frm = np.uint8(x * 255) pred_frm = np.uint8(gx * 255) psnr[i] += metrics.batch_psnr(pred_frm, real_frm) for b in range(configs.batch_size): #score = 10 # original method is depricated score, _ = structural_similarity(pred_frm[b], real_frm[b], full=True, multichannel=True) ssim[i] += score # save prediction examples if batch_id <= configs.num_save_samples: path = os.path.join(res_path, str(batch_id)) os.mkdir(path) for i in range(configs.total_length): name = 'gt' + str(i + 1) + '.png' file_name = os.path.join(path, name) img_gt = np.uint8(test_ims[0, i, :, :, :] * 255) cv2.imwrite(file_name, img_gt) for i in range(img_gen_length): name = 'pd' + str(i + 1 + configs.input_length) + '.png' file_name = os.path.join(path, name) img_pd = img_gen[0, i, :, :, :] img_pd = np.maximum(img_pd, 0) img_pd = np.minimum(img_pd, 1) img_pd = np.uint8(img_pd * 255) cv2.imwrite(file_name, img_pd) avg_mse = avg_mse / (batch_id * configs.batch_size) print('mse per seq: ' + str(avg_mse)) for i in range(configs.total_length - configs.input_length): print(img_mse[i] / (batch_id * configs.batch_size)) ssim = np.asarray(ssim, dtype=np.float32) / (configs.batch_size * batch_id) print('ssim per frame: ' + str(np.mean(ssim))) for i in range(configs.total_length - configs.input_length): print(ssim[i]) psnr = np.asarray(psnr, dtype=np.float32) / batch_id print('psnr per frame: ' + str(np.mean(psnr))) for i in range(configs.total_length - configs.input_length): print(psnr[i]) lp = np.asarray(lp, dtype=np.float32) / batch_id print('lpips per frame: ' + str(np.mean(lp))) for i in range(configs.total_length - configs.input_length): print(lp[i])
def reset_input_axis(feature_map, input_size): input_axis = feature_map.shape.index(input_size) return torch.swapaxes(feature_map, 0, input_axis)
def do_iteration(self, x_real, emb_org, train=True, cd=False, emb=False): self.G = self.G.train() # Identity mapping loss if (self.use_zero_src_tgt): x_identic, x_identic_psnt, code_real = self.G( x_real, torch.zeros_like(emb_org), torch.zeros_like(emb_org)) elif (self.use_true_rand_src_tgt): x_identic, x_identic_psnt, code_real = self.G( x_real, torch.rand_like(emb_org), torch.rand_like(emb_org)) elif (self.use_rand_src_tgt): rand_emb = torch.rand_like(emb_org) x_identic, x_identic_psnt, code_real = self.G( x_real, rand_emb, rand_emb) else: x_identic, x_identic_psnt, code_real = self.G( x_real, emb_org, emb_org) x_real = x_real.unsqueeze(1) g_loss_id = F.mse_loss(x_real, x_identic) g_loss_id_psnt = F.mse_loss(x_real, x_identic_psnt) if (cd): # Code semantic loss. code_reconst = self.G(x_identic_psnt, emb_org, None) g_loss_cd = F.l1_loss(code_real, code_reconst) else: g_loss_cd = torch.zeros(1).to(self.device) if (emb): # cross Embedding reconstruction loss size = x_real.size(0) // 2 x_a, x_b = (x_real[:size, 0], x_real[-size:, 0]) emb_a, emb_b = (emb_org[:size], emb_org[-size:] ) if self.batch_size != 2 else emb_org.unsqueeze(1) #print(x_real.shape, x_a.shape, emb_org.shape, emb_a.shape) _, x_identic_ab, _ = self.G(x_a, emb_a, emb_b) _, x_identic_ba, _ = self.G(x_b, emb_b, emb_a) x_identic_ab = x_identic_ab.squeeze(1) x_identic_ba = x_identic_ba.squeeze(1) #logging.info(x_identic_psnt_ab.shape, x_identic_psnt_ba.shape) pred_ab, emb_ab = self.C(torch.swapaxes(x_identic_ab, 1, 2), preprocessing=False, is_eval=True) pred_ba, emb_ba = self.C(torch.swapaxes(x_identic_ba, 1, 2), preprocessing=False, is_eval=True) g_loss_emb = (F.mse_loss(emb_b, emb_ab) + F.mse_loss(emb_a, emb_ba)) / 2 else: g_loss_emb = torch.zeros(1).to(self.device) # Backward and optimize. if (train): g_loss = 0.5 * g_loss_id + g_loss_id_psnt + self.lambda_cd * g_loss_cd + self.lambda_emb * g_loss_emb self.reset_grad() g_loss.backward() self.g_optimizer.step() #self.scheduler.step(g_loss) # Logging. loss = {} loss['G/loss_id'] = g_loss_id.item() loss['G/loss_id_psnt'] = g_loss_id_psnt.item() loss['G/loss_cd'] = g_loss_cd.item() loss['G/loss_emb'] = g_loss_emb.item() return loss
def forward(self, query, key, value, attn_mask=None): """ Calculate the masked attention output for the provided data, computing all attention heads in parallel. In the shape definitions below, N is the batch size, S is the source sequence length, T is the target sequence length, and E is the embedding dimension. Inputs: - query: Input data to be used as the query, of shape (N, S, E) - key: Input data to be used as the key, of shape (N, T, E) - value: Input data to be used as the value, of shape (N, T, E) - attn_mask: Array of shape (T, S) where mask[i,j] == 0 indicates token i in the target should not be influenced by token j in the source. Returns: - output: Tensor of shape (N, S, E) giving the weighted combination of data in value according to the attention weights calculated using key and query. """ N, S, D = query.shape N, T, D = value.shape # Create a placeholder, to be overwritten by your code below. output = torch.empty((N, T, D)) ############################################################################ # TODO: Implement multiheaded attention using the equations given in # # Transformer_Captioning.ipynb. # # A few hints: # # 1) You'll want to split your shape from (N, T, E) into (N, T, H, E/H), # # where H is the number of heads. # # 2) The function torch.matmul allows you to do a batched matrix multiply.# # For example, you can do (N, H, T, E/H) by (N, H, E/H, T) to yield a # # shape (N, H, T, T). For more examples, see # # https://pytorch.org/docs/stable/generated/torch.matmul.html # # 3) For applying attn_mask, think how the scores should be modified to # # prevent a value from influencing output. Specifically, the PyTorch # # function masked_fill may come in handy. # ############################################################################ # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** H = self.num_heads E = D # Compute the transformations of the inputs key_out = self.key(key) value_out = self.value(value) query_out = self.query(query) # Reshape the transformations key_split = torch.reshape(key_out, shape=(N, T, H, E // H)) value_split = torch.reshape(value_out, shape=(N, T, H, E // H)) query_split = torch.reshape(query_out, shape=(N, S, H, E // H)) # Swap axes to prepare matrices for matmul query_swapped = torch.swapaxes(query_split, 1, 2) # -> (N, H, S, E//H) key_swapped = torch.swapaxes(key_split, 1, 2) # -> (N, H, T, E//H) key_swapped = torch.swapaxes(key_swapped, 2, 3) # -> (N, H, E//H, T) # Obtain the alignment scores alignment_scores = torch.matmul(query_swapped, key_swapped) # -> (N, H, S, T) # Swap axes so that masking will work alignment_swapped = torch.swapaxes(alignment_scores, 2, 3) # -> (N, H, T, S) # Apply mask to alignment scores if attn_mask is not None: bool_mask = torch.tensor(attn_mask == 0) else: bool_mask = torch.full((T, S), False) alignment_swapped = alignment_swapped.masked_fill( bool_mask, float('-inf')) # Scale and get softmax scores scaling_term = math.sqrt(E // H) alignment_scaled = alignment_swapped / scaling_term attn_scores = F.softmax(alignment_scaled, dim=2) # -> (N, H, T, S) # attn_scores are (N, H, T, S) # values are (N, T, H, E//H) attn_scores = torch.unsqueeze(attn_scores, dim=2) # -> (N, H, 1, T, S) attn_scores = attn_scores.repeat(1, 1, E // H, 1, 1) # -> (N, H, E//H, T, S) value_swapped = torch.swapaxes(value_split, 1, 2) # -> (N, H, T, E//H) value_swapped = torch.swapaxes(value_swapped, 2, 3) # -> (N, H, E//H, T) value_swapped = torch.unsqueeze(value_swapped, dim=4) # -> (N, H, E//H, T, 1) value_swapped = value_swapped.repeat(1, 1, 1, 1, S) # -> (N, H, E//H, T, S) outputs = attn_scores * value_swapped # -> (N, H, E//H, T, S) outputs = torch.sum(outputs, dim=3) # -> (N, H, E//H, S) outputs = torch.swapaxes(outputs, 2, 3) # -> (N, H, S, E//H) # Apply dropout outputs = F.dropout(outputs, p=self.dropout) # Concatenate the outputs and project to the final output outputs_swapped = torch.swapaxes(outputs, 1, 2) # -> (N, S, H, E//H) outputs_squished = torch.reshape(outputs_swapped, shape=(N, S, E)) output = self.proj(outputs_squished) """ # Get softmax scores attn_scores = F.softmax(alignment_swapped, dim=2) # -> (N, H, T, S) # Swap axes to prepare for matmul attn_swapped = torch.swapaxes(attn_scores, 2, 3) # -> (N, H, S, T) value_swapped = torch.swapaxes(value_split, 1, 2) # -> (N, H, T, E//H) outputs = torch.matmul(attn_swapped, value_swapped) # -> (N, H, S, E//H) # Apply dropout outputs = F.dropout(outputs, p=self.dropout) # Concatenate the outputs and project to the final output outputs_swapped = torch.swapaxes(outputs, 1, 2) # -> (N, S, H, E//H) outputs_squished = torch.reshape(outputs_swapped, shape=(N, S, E)) output = self.proj(outputs_squished) """ # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** ############################################################################ # END OF YOUR CODE # ############################################################################ return output
w_opt = (torch.inverse(x_bias_t.mm(x_train_bias)).mm( x_train_bias.T)).mm(y_train_n) best_w0, best_w1 = w_opt.T[0] # Weights from analitical solution padding = 1 w0 = torch.linspace(best_w0 - padding, best_w0 + padding, 100) w1 = torch.linspace(best_w1 - padding, best_w1 + padding, 100) # create w0 and w1 grid w0_grid, w1_grid = torch.meshgrid(w0, w1) # calculate J w = torch.dstack((w0_grid, w1_grid)) # 100x100x2 x_train = x_train_bias # 50x2 => 100x50x2 y_train = y_train_n # 50x1 => 100x50x100 y_pred = torch.matmul(x_train, torch.swapaxes(w, 1, 2)) # 50x2 * 100x2x100 = 100x100x50 J = ((y_pred - y_train)**2).mean(1) external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css'] app = dash.Dash(__name__, external_stylesheets=external_stylesheets) df = pd.read_csv('https://plotly.github.io/datasets/country_indicators.csv') available_indicators = df['Indicator Name'].unique() app.layout = html.Div([ html.Div(id='hidden-div', style={'display': 'none'}), html.Div(dcc.Markdown(''' ##### Batch_size: '''),
def load_and_run_eval(model_path, train_data_file_path, train_label_file_path, eval_data_file_path, eval_label_file_path, model_channels, model_classes, device): # Create Model and Load Model model = BinaryEEGClassifierLIF(channels=model_channels).to(device) model.load_state_dict(torch.load(model_path)) # load training set train_data_file = train_data_file_path train_label_file = train_label_file_path training_data = CustomDataset(train_label_file, train_data_file) training_generator = DataLoader(training_data, batch_size=training_data.__len__()) # generate average spike frequncy rates for class and non class spike_frequencies = [0 for x in range(model_classes)] sample_amount = [0 for x in range(model_classes)] # Generate average spike frequencies # load all samples, sum all 1 and 0 spike trains and generate averages, save as comparison value for data, labels in training_generator: data = data[:, :, :].float() data = torch.swapaxes(data, 0, 2) data = torch.swapaxes(data, 1, 2) data = data.to(device) labels = labels.long() labels = labels.to(device) outputs = model(data) outputs = outputs[0].sum(dim=0) # batch size, spikes outputs = torch.squeeze(outputs) for i, x in enumerate(labels): c_label = int(x.item()) spikes = outputs[i] spike_frequencies[c_label] += spikes.item() sample_amount[c_label] += 1 spike_frequencies = (np.array(spike_frequencies) / np.array(sample_amount)) # set model to eval mode model.eval() # load eval files eval_data_file = eval_data_file_path eval_label_file = eval_label_file_path evaluate_data = CustomDataset(eval_label_file, eval_data_file) evaluation_generator = DataLoader(evaluate_data, batch_size=evaluate_data.__len__()) criterion = torch.nn.L1Loss() # for eval data with torch.set_grad_enabled(False): for data, labels in evaluation_generator: # transform data data = data[:, :, :].float() data = torch.swapaxes(data, 0, 2) data = torch.swapaxes(data, 1, 2) data = data.to(device) labels = labels.long() # Convert class labels to target spike frequencies s_labels = [spike_frequencies[i] for i in labels] s_labels = torch.tensor(s_labels) s_labels = s_labels.to(device) # generate output outputs = model(data) outputs = outputs[0].sum(dim=0) # batch size, spikes outputs = torch.squeeze(outputs) e_loss = criterion(outputs, s_labels) # transform to labels via average spike frequency distances = np.array([ x.cpu().clone().detach().numpy() - spike_frequencies for x in outputs ]) distances = np.absolute(distances) out_labels = np.argmin(distances, axis=1) diff_l = [ 0 if out_labels[i] == labels[i] else 1 for i in range(len(labels)) ] eval_c_1 = sum(labels) # generate stastics mae_acc = sum(diff_l) / len(diff_l) eval_acc = 1 - mae_acc chance_acc = eval_c_1 / len(labels) eval_kappa = (eval_acc - chance_acc) / (1 - chance_acc) # generate eval acc and print\log print( f'Eval Loss: {e_loss:.6f} \t Eval Acc: {eval_acc} \t Eval C1: {eval_c_1} \t Evak kappa: {eval_kappa}' ) # return statistics return e_loss, eval_acc, eval_kappa
def run_binary_classification(batch_size, shuffle, workers, max_epochs, train_data_file_path, train_label_file_path, val_data_file_path, val_label_file_path, eval_data_file_path, eval_label_file_path, model_channels, model_classes, model_learning_rate, model_weight_decay, save_model, model_name, device): # set parameters params = { 'batch_size': batch_size, 'shuffle': shuffle, 'num_workers': workers } max_epochs = max_epochs # get train val and eval files train_data_file = train_data_file_path train_label_file = train_label_file_path val_data_file = val_data_file_path val_label_file = val_label_file_path eval_data_file = eval_data_file_path eval_label_file = eval_label_file_path # create custom datasets training_data = CustomDataset(train_label_file, train_data_file) validate_data = CustomDataset(val_label_file, val_data_file) evaluate_data = CustomDataset(eval_label_file, eval_data_file) # prepare data loaders training_generator = DataLoader(training_data, **params) validation_generator = DataLoader(validate_data, **params) evaluation_generator = DataLoader(evaluate_data, batch_size=evaluate_data.__len__()) # create model, optimizer and loss function, prepare best epoch and min valid loss as well as statistics, prepare spike average array model = BinaryEEGClassifierLIF(channels=model_channels).to(device) optimizer = optim.Adam(model.parameters(), lr=model_learning_rate, weight_decay=model_weight_decay) criterion = torch.nn.L1Loss() min_valid_loss = np.inf best_val_epoch = -1 epoch_statistics = [] train_loss_statistics = [] train_acc_statistics = [] validation_loss_statistics = [] validation_acc_statistics = [] # generate average spike frequncy rates for class and non class # Beginn of Training for each epoch for epoch in range(max_epochs): spike_frequencies = [ 25, 150 ] # make training target 25 spikes for not my class, 150 spikes for my class # loss and acc train_loss = 0.0 train_mae_acc = 0.0 # for each batch for data, labels in training_generator: # transform data data = data[:, :, :].float() data = torch.swapaxes(data, 0, 2) data = torch.swapaxes(data, 1, 2) data = data.to(device) labels = labels.long() # Convert class labels to target spike frequencies s_labels = [spike_frequencies[i] for i in labels] s_labels = torch.tensor(s_labels) s_labels = s_labels.to(device) # generate outputs optimizer.zero_grad() outputs = model(data) # convert spike trains to sum of spikes outputs = outputs[0].sum(dim=0) # batch size, spikes outputs = torch.squeeze(outputs) # compute loss loss = criterion(outputs, s_labels) # backward loss loss.backward() # optimzer step optimizer.step() train_loss += loss.item() # convert spike trains to closest label for acc prediction distances = np.array([ x.cpu().clone().detach().numpy() - spike_frequencies for x in outputs ]) distances = np.absolute(distances) out_labels = np.argmin(distances, axis=1) diff_l = [ 0 if out_labels[i] == labels[i] else 1 for i in range(len(labels)) ] train_mae_acc += sum(diff_l) #print(spike_frequencies, outputs) # train_loss = train acc l = len(training_generator) * params['batch_size'] train_mae_acc = 1 - (train_mae_acc / l) # reset spike frequencies after training to get actual average spike frequencies spike_frequencies = [0 for x in range(model_classes)] sample_amount = [0 for x in range(model_classes)] with torch.set_grad_enabled(False): for data, labels in training_generator: data = data[:, :, :].float() data = torch.swapaxes(data, 0, 2) data = torch.swapaxes(data, 1, 2) data = data.to(device) labels = labels.long() labels = labels.to(device) outputs = model(data) outputs = outputs[0].sum(dim=0) # batch size, spikes outputs = torch.squeeze(outputs) for i, x in enumerate(labels): c_label = int(x.item()) spikes = outputs[i] spike_frequencies[c_label] += spikes.item() sample_amount[c_label] += 1 spike_frequencies = (np.array(spike_frequencies) / np.array(sample_amount)) # validation phase val_loss = 0.0 val_mae_acc = 0.0 # same as training steps but without optimizer step with torch.set_grad_enabled(False): for data, labels in validation_generator: # transform data data = data[:, :, :].float() data = torch.swapaxes(data, 0, 2) data = torch.swapaxes(data, 1, 2) data = data.to(device) labels = labels.long() # Convert class labels to target spike frequencies s_labels = [spike_frequencies[i] for i in labels] s_labels = torch.tensor(s_labels) s_labels = s_labels.to(device) outputs = model(data) outputs = outputs[0].sum(dim=0) # batch size, spikes outputs = torch.squeeze(outputs) v_loss = criterion(outputs, s_labels) val_loss += v_loss.item() # convert spike trains to closest label for acc prediction distances = np.array([ x.cpu().clone().detach().numpy() - spike_frequencies for x in outputs ]) distances = np.absolute(distances) out_labels = np.argmin(distances, axis=1) diff_l = [ 0 if out_labels[i] == labels[i] else 1 for i in range(len(labels)) ] val_mae_acc += sum(diff_l) l = len(validation_generator) * params['batch_size'] # val loss and acc val_mae_acc = 1 - (val_mae_acc / l) # log info logging.info( f'Epoch {epoch + 1} \t\t Training Loss: {train_loss / len(training_generator)} \t Training Acc: {train_mae_acc} \t\t Validation Loss: {val_loss / len(validation_generator)} \t Validation Acc: {val_mae_acc}' ) epoch_statistics.append(epoch) # append statistics train_loss_statistics.append(train_loss / len(training_generator)) train_acc_statistics.append(train_mae_acc) validation_loss_statistics.append(val_loss / len(validation_generator)) validation_acc_statistics.append(val_mae_acc) # check if val loss is reduced if min_valid_loss > val_loss / len(validation_generator): logging.info( f'Validation Loss Decreased({min_valid_loss:.6f}--->{val_loss / len(validation_generator):.6f}' ) min_valid_loss = val_loss / len(validation_generator) # save model if save_model: torch.save(model.state_dict(), f'{model_name}.pth') best_val_epoch = epoch # Beginn of Eval model.eval() # Load Eval set with torch.set_grad_enabled(False): for data, labels in evaluation_generator: # transform data data = data[:, :, :].float() data = torch.swapaxes(data, 0, 2) data = torch.swapaxes(data, 1, 2) data = data.to(device) labels = labels.long() # Convert class labels to target spike frequencies s_labels = [spike_frequencies[i] for i in labels] s_labels = torch.tensor(s_labels) s_labels = s_labels.to(device) outputs = model(data) outputs = outputs[0].sum(dim=0) # batch size, spikes outputs = torch.squeeze(outputs) e_loss = criterion(outputs, s_labels) # convert spike trains to closest label for acc prediction distances = np.array([ x.cpu().clone().detach().numpy() - spike_frequencies for x in outputs ]) distances = np.absolute(distances) out_labels = np.argmin(distances, axis=1) diff_l = [ 0 if out_labels[i] == labels[i] else 1 for i in range(len(labels)) ] eval_c_1 = sum(labels) mae_acc = sum(diff_l) / len(diff_l) eval_acc = 1 - mae_acc chance_acc = eval_c_1 / len(labels) eval_kappa = (eval_acc - chance_acc) / (1 - chance_acc) # generate eval acc and print\log print( f'Eval Loss: {e_loss:.6f} \t Eval Acc: {eval_acc} \t Eval C1: {eval_c_1} \t Evak kappa: {eval_kappa}' ) # save last model torch.save(model.state_dict(), f'{model_name}_last.pth') #combine and return statistics statistics = [ epoch_statistics, train_loss_statistics, train_acc_statistics, validation_loss_statistics, validation_acc_statistics ] return statistics, e_loss, eval_acc, eval_kappa, best_val_epoch
def forward(self, xu): input = self.standardize(xu) output = self.layers.forward(input) logtrans = torch.swapaxes(torch.tile(output, (self.nb_states, 1, 1)), 0, 1) return logtrans - torch.logsumexp(logtrans, dim=-1, keepdim=True)
def forward(self, xu): input = self.standardize(xu) feat = to_float(self.basis.fit_transform(np_float(input))).to(self.device) output = torch.einsum('...d,kd->...k', feat, self.weight) + self.bias logtrans = torch.swapaxes(torch.tile(output, (self.nb_states, 1, 1)), 0, 1) return logtrans - torch.logsumexp(logtrans, dim=-1, keepdim=True)
def main(base_path, base_model_name, class_amount, model_channels, model_classes, device): best_models = [] last_models = [] eval_label_file = f'{base_path}raw_eval_labels.npy' eval_labels = torch.from_numpy(np.load(eval_label_file)).to(device) eval_labels = eval_labels - 1 eval_sets = [] # load models for each class for i in range(class_amount): best_model_path = os.path.join( base_path, f'{base_model_name}_class{i + 1}_model.pth') last_model_path = os.path.join( base_path, f'{base_model_name}_class{i + 1}_model_last.pth') best_model = BinaryEEGClassifierLIF(channels=model_channels).to(device) best_model.load_state_dict(torch.load(best_model_path)) best_models.append(best_model) last_model = BinaryEEGClassifierLIF(channels=model_channels).to(device) last_model.load_state_dict(torch.load(last_model_path)) last_models.append(last_model) eval_sets.append( torch.from_numpy( np.load(f'{base_path}normalized_eval_class{i+1}.npy')).to( device)) best_predicts = [] last_predicts = [] best_models_convidence = [] last_models_convidence = [] for c in range(class_amount): b_model = best_models[c] l_model = last_models[c] data = eval_sets[c] data = data[:, :, :].float() data = torch.swapaxes(data, 0, 2) data = torch.swapaxes(data, 1, 2) outputs = b_model(data) outputs = outputs[0].sum(dim=0) # batch size, spikes outputs = torch.squeeze(outputs) # spikes per sample best_models_convidence.append(outputs) outputs = l_model(data) outputs = outputs[0].sum(dim=0) # batch size, spikes outputs = torch.squeeze(outputs) # spikes per sample last_models_convidence.append(outputs) best_models_convidence = [ torch.tensor(best_models_convidence[i]) for i in range(class_amount) ] best_models_convidence = [ torch.tensor([ best_models_convidence[0][i], best_models_convidence[1][i], best_models_convidence[2][i], best_models_convidence[3][i] ]) for i in range(best_models_convidence[0].shape[0]) ] last_models_convidence = [ torch.tensor(last_models_convidence[i]) for i in range(class_amount) ] last_models_convidence = [ torch.tensor([ last_models_convidence[0][i], last_models_convidence[1][i], last_models_convidence[2][i], last_models_convidence[3][i] ]) for i in range(last_models_convidence[0].shape[0]) ] best_models_labels = [ torch.argmax(best_models_convidence[i], dim=0) for i in range(len(best_models_convidence)) ] last_models_labels = [ torch.argmax(last_models_convidence[i], dim=0) for i in range(len(last_models_convidence)) ] diff_b = [ 1 if best_models_labels[i] == eval_labels[i] else 0 for i in range(len(eval_labels)) ] diff_l = [ 1 if last_models_labels[i] == eval_labels[i] else 0 for i in range(len(eval_labels)) ] diff_b_s = sum(diff_b) diff_l_s = sum(diff_l) best_acc = diff_b_s / len(best_models_labels) last_acc = diff_l_s / len(last_models_labels) best_kappa = (best_acc - 0.25) / (1 - 0.25) last_kappa = (last_acc - 0.25) / (1 - 0.25) return best_acc, best_kappa, last_acc, last_kappa
def meshgrid(*xs, indexing='ij'): ret = torch.meshgrid(*xs) if indexing == 'xy': # ToDo: verify if this is correct return tuple([torch.swapaxes(x, 1, 0) for x in ret]) return ret