def train(epoch, model): model.train() epoch_loss = 0.0 for batch_idx,(info, label, index) in enumerate(train_loader): #data = info[0] data = info data = data.float() target = label if args.cuda: data, target = data.cuda(), target.cuda() #data, target = Variable(data), Variable(target) optimizer.zero_grad() output= model(data) loss = criterion(output, target) #loss = F.nll_loss(output, target) loss.backward() optimizer.step() epoch_loss += loss.item() #if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t'.format( epoch, batch_idx * len(data)+1, len(train_loader.dataset), 100. * (batch_idx+1) / len(train_loader), loss.item()), Variable(output.data[0])) torch.zero_(data) torch.cuda.empty_cache() print(epoch, epoch_loss / len(train_loader)) train_loss = epoch_loss / len(train_loader) torch.cuda.empty_cache() return train_loss
def halve_or_stop(self, iteration): granu = self.search_space_size() // 2 print('granu of conv {} is {}'.format(self.conv_idx, granu)) # print(self.search_space) assert granu > 0 t_vector = self.get_averaged_accumulated_t_vector() torch.zero_(self.accumulated_t) torch.zero_(self.accumulated_cnt) sorted_t_vector = sorted(t_vector) # print('sorted t vector of layer {} is '.format(self.conv_idx), sorted_t_vector) if sorted_t_vector[granu - 1] < self.thresh: self.mask_cur_granu_and_finish_a_move(iteration, t_vector, granu) elif granu > 1: self.halve_search_space(iteration, t_vector, granu) else: self.start_aofp(iteration)
def _update_mask(self): memory_size = self.memory_size max_modules = self.max_modules print(f"updating ms_lmn mask for {max_modules} modules") if not hasattr(self, 'm_mask'): self.register_buffer( 'm_mask', torch.zeros(memory_size * max_modules, memory_size * max_modules)) self.mask_hook = self.Wmm.register_hook( lambda grad: grad * self.m_mask) torch.zero_(self.m_mask) for mi in range(self.num_modules): self.m_mask[mi * memory_size:(mi + 1) * memory_size, mi * memory_size:] = 1 self.Wmm.data = self.Wmm.data * self.m_mask
def fake_loss(out): batch = out.size(0) labels = torch.zero_(batch) # loss function criterion = nn.BCEWithLogitsLoss() # move to gpu labels = labels.to(device) # calculate loss loss = criterion(out, labels) return loss
def __init__(self, in_features, out_features, bias, activation, dropout, dim_hook, label_features, fc_zero_init, train_mode): super(FC_block, self).__init__() self.dropout = dropout self.fc = nn.Linear(in_features=in_features, out_features=out_features, bias=bias) if fc_zero_init: torch.zero_(self.fc.weight.data) if train_mode == 'FA': self.fc = FA_wrapper(module=self.fc, layer_type='fc', dim=self.fc.weight.shape) self.act = Activation(activation) if dropout != 0: self.drop = nn.Dropout(p=dropout) self.hook = TrainingHook(label_features=label_features, dim_hook=dim_hook, train_mode=train_mode)
def init(module: Module): if method == 'none': return module elif method == 'he': kaiming_normal_(module.weight) return module elif method == 'xavier': xavier_normal_(module.weight) return module elif method == 'dcgan': normal_(module.weight, 0.0, 0.02) return module elif method == 'dcgan_001': normal_(module.weight, 0.0, 0.01) return module elif method == "zero": with torch.no_grad(): zero_(module.weight) return module else: raise ("Invalid initialization method %s" % method)
def __init__(self, input_size, dense_n=64, dense_depth=0, output_size=None): """ ┌────────┐ ┌───► dense_n│ │ └───┬────┘ dense_depth │ │ ┌───▼────┐ └───┤ relu ├─────┐ └───┬────┘ │ │ │ ┌─────▼──────┐ │ │output_size │ │ └─────┬──────┘ │ │ │ ▼ ▼ """ super().__init__() self.input_size = input_size self.output_size = input_size dense = [] for i in range(dense_depth): linear = nn.Linear(input_size if i == 0 else dense_n, dense_n) nn.init.xavier_uniform_(linear.weight.data) torch.zero_(linear.bias.data) dense.append(linear) dense.append(nn.ReLU()) self.output_size = dense_n if output_size: dense.append(nn.Linear(self.output_size, output_size)) self.output_size = output_size self.dense = nn.Sequential(*dense)
def __init__(self, input_size, hidden_size, num_modules): super().__init__() self.input_size = input_size self.hidden_size = hidden_size self.num_modules = num_modules self.Wxh = nn.Parameter( torch.zeros(self.num_modules * hidden_size, input_size)) self.Whh = nn.Parameter( torch.zeros(self.num_modules * hidden_size, self.num_modules * hidden_size)) self.bh = nn.Parameter(torch.zeros(self.num_modules * hidden_size)) standard_init(self.parameters()) self.register_buffer( 'h_mask', torch.zeros(hidden_size * num_modules, hidden_size * num_modules)) self.mask_hook = self.Whh.register_hook( lambda grad: grad * self.h_mask) torch.zero_(self.h_mask) for mi in range(self.num_modules): self.h_mask[mi * hidden_size:(mi + 1) * hidden_size, mi * hidden_size:] = 1 self.Whh.data = self.Whh.data * self.h_mask
def forward(self, pos, pos_len, src, src_len): _, (hs, cs) = self.code_encoder(pos[:, 1:], pos_len - 1) # hs.size() ==> n_layer, batch_size, hidden_dim code_sum = self.codes(hs) # sort source sequence in decending order sorted_src_len, inds = torch.sort(src_len.clone().detach(), 0, descending=True) sorted_src = src[inds] _, (hs, cs) = self.src_encoder(sorted_src, sorted_src_len) # unsort sequence to original order hs = torch.zeros_like(hs).scatter_( 1, inds[None, :, None].expand(1, hs.shape[1], hs.shape[2]), hs) dec_init_hs = hs + code_sum[None] cs = torch.zero_(torch.empty_like(cs)) logits = self.decoder(pos, pos_len, (dec_init_hs, cs)) return logits
def train(imgL,imgR): model.train() #--------- # mask = (disp_true > 0) # mask.detach_() #---- optimizer.zero_grad() output = model(imgL,imgR) output = torch.squeeze(output,0) test=torch.zero_(1) test= Variable(torch.FloatTensor(test)) loss = nn.BCELoss(output3, dtest, size_average=True) loss.backward() optimizer.step() return loss.data[0]
def algebric_2d_to_3d_heatmaps(A_j, heatmaps): batch_size, C, J = heatmaps.shape[:3] y_cj = torch.zero_(batch_size, C, 3) for i in range(batch_size): for j in range(J): coordinate = heatmaps[i,:,j:] n_views = len(A_j) A_j = A_j[:, 2: 3].expand(n_views, 2, 4) * coordinate.view(n_views, 2, 1) A_j -= A_j[:, :2] A_j *= torch.ones(n_views).view(-1,1,1) # (4) svd u, s, vh = torch.svd(A_j.view(-1, 4)) point_3d_homo = -vh[:, 3] point_3d = (point_3d_homo.unsqueeze(0).transpose(1, 0)[:-1] / point_3d_homo.unsqueeze(0).transpose(1, 0)[-1]).transpose(1, 0)[0] y_cj[i,j] = point_3d return y_cj pass
model_path = f"../saves/spatial_temporal_external/DSTGCN/model_0.pkl" print(f'model path -> {model_path}') model = create_model() model.load_state_dict(torch.load(model_path)["model_state_dict"]) print(f'model epoch -> {torch.load(model_path)["epoch"]}') model = convert_to_gpu(model) print(model) data_loaders = get_data_loaders(get_attribute('K_hop'), get_attribute('batch_size')) phase = "test" tqdm_loader = tqdm(enumerate(data_loaders[phase])) predictions, targets = list(), list() for step, (g, spatial_features, temporal_features, external_features, truth_data) in tqdm_loader: torch.zero_(external_features) features, truth_data = convert_train_truth_to_gpu( [spatial_features, temporal_features, external_features], truth_data) outputs = model(g, *features) outputs = torch.squeeze( outputs) # squeeze [batch-size, 1] to [batch-size] with torch.no_grad(): predictions.append(outputs.cpu().numpy()) targets.append(truth_data.cpu().numpy()) scores = evaluate(np.concatenate(predictions), np.concatenate(targets)) print('===== Test predict result =====')
def __init__(self, features, eps=1e-16): super(LayerNorm, self).__init__() self.a_2 = nn.Parameter(torch.ones(features)) self.b_2 = nn.Parameter(torch.zero_(features)) self.eps = eps
def train_model(model: nn.Module, data_loaders: Dict[str, DataLoader], loss_func: callable, optimizer, model_folder: str, tensorboard_folder: str, pid: int): phases = ['train', 'validate', 'test'] writer = SummaryWriter(tensorboard_folder) num_epochs = get_attribute('epochs') since = time.perf_counter() model = convert_to_gpu(model) loss_func = convert_to_gpu(loss_func) save_dict, best_f1_score = {'model_state_dict': copy.deepcopy(model.state_dict()), 'epoch': 0}, 0 scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=.5, patience=2, threshold=1e-3, min_lr=1e-6) test_metric = None try: for epoch in range(num_epochs): running_loss, running_metrics = {phase: 0.0 for phase in phases}, {phase: dict() for phase in phases} save_validate_this_epoch = False for phase in phases: if phase == 'train': model.train() else: model.eval() steps, predictions, targets = 0, list(), list() tqdm_loader = tqdm(enumerate(data_loaders[phase])) for step, (g, spatial_features, temporal_features, external_features, truth_data) in tqdm_loader: if list(external_features.size())[0] != get_attribute("batch_size"): continue if not get_attribute("use_spatial_features"): torch.zero_(spatial_features) if not get_attribute("use_temporal_features"): torch.zero_(temporal_features) if not get_attribute("use_external_features"): torch.zero_(external_features) features, truth_data = convert_train_truth_to_gpu( [spatial_features, temporal_features, external_features], truth_data) with torch.set_grad_enabled(phase == 'train'): _outputs = model(g, *features) outputs = torch.squeeze(_outputs) # squeeze [batch-size, 1] to [batch-size] loss = loss_func(truth=truth_data, predict=outputs) if phase == 'train': optimizer.zero_grad() loss.backward() optimizer.step() targets.append(truth_data.cpu().numpy()) with torch.no_grad(): predictions.append(outputs.cpu().detach().numpy()) running_loss[phase] += loss * truth_data.size(0) steps += truth_data.size(0) tqdm_loader.set_description( f'{pid:2} pid: {phase:8} epoch: {epoch:3}, {phase:8} loss: {running_loss[phase] / steps:3.6}') # For the issue that the CPU memory increases while training. DO NOT know why, but it works. torch.cuda.empty_cache() print(f'{phase} metric ...') _cp = np.concatenate(predictions) _ct = np.concatenate(targets) scores = evaluate(_cp, _ct) running_metrics[phase] = scores print(scores) if phase == 'validate' and scores['F1-SCORE'] > best_f1_score: best_f1_score = scores['F1-SCORE'] save_validate_this_epoch = True save_dict.update(model_state_dict=copy.deepcopy(model.state_dict()), epoch=epoch, optimizer_state_dict=copy.deepcopy(optimizer.state_dict())) print(f"save model as {model_folder}/model_{epoch}.pkl") save_model(f"{model_folder}/model_{epoch}.pkl", **save_dict) scheduler.step(running_loss['train']) if save_validate_this_epoch: test_metric = running_metrics["test"].copy() for metric in running_metrics['train'].keys(): writer.add_scalars(metric, { f'{phase} {metric}': running_metrics[phase][metric] for phase in phases}, global_step=epoch) writer.add_scalars('Loss', { f'{phase} loss': running_loss[phase] / len(data_loaders[phase].dataset) for phase in phases}, global_step=epoch) finally: time_elapsed = time.perf_counter() - since print(f"cost {time_elapsed} seconds") save_model(f"{model_folder}/best_model.pkl", **save_dict) return test_metric
def model_epoch(epoch, loss_name, model, data_loader, concepts, optimizer, writer, **kwargs): print(loss_name) state = None if loss_name.find('train') != -1: state = "train" model.train() torch.set_grad_enabled(True) elif loss_name.find('test') != -1 or loss_name.find('val') != -1: state = "test" model.eval() torch.set_grad_enabled(False) else: assert False, ("Mode Error") metrics = { 'total': deque(), 'correct': deque(), 'total_g': deque(), 'correct_g': deque() } for batch_i, batch_data in enumerate(data_loader, 1): # input batch_img = batch_data['image'].to(DEVICE) batch_label = batch_data['label'].to(DEVICE) # conventional ZSL result gts = batch_label[:, concepts[loss_name]['label']][..., None] outputs = model(Variable(batch_img), concepts[loss_name]['vector'], state=state) # cal loss if state == 'train': optimizer.zero_grad() if model.type == "classifier": loss = F.binary_cross_entropy(outputs, gts) elif model.type == "DeVise": pos_i = np.where((gts == 1).squeeze().data.cpu()) neg_i = np.where((gts == 0).squeeze().data.cpu()) pos_sims = outputs[pos_i].view(outputs.shape[0], -1) neg_sims = outputs[neg_i].view(outputs.shape[0], -1) if 'use_smooth' in kwargs.keys() and kwargs['use_smooth']: s_p = torch.sum(torch.exp(-pos_sims), dim=1) s_n = torch.sum(torch.exp(neg_sims), dim=1) loss = torch.log(1.0 + torch.sum(s_n * s_p)) else: margin = torch.FloatTensor([kwargs['margin']]).to(DEVICE) loss = torch.mean( torch.stack([ torch.sum( torch.stack([ margin + (n - p) if margin + (n - p) > 0 else torch.FloatTensor([0]).to(DEVICE) for n in ns for p in ps ])) for ps, ns in zip(pos_sims, neg_sims) ])) loss = loss.mean() elif model.type == "transformer": pos_i = np.where((gts == 1).squeeze().data.cpu()) neg_i = np.where((gts == 0).squeeze().data.cpu()) pos_sims = outputs[pos_i].view(outputs.shape[0], -1) neg_sims = outputs[neg_i].view(outputs.shape[0], -1) if 'use_smooth' in kwargs.keys() and kwargs['use_smooth']: s_p = torch.sum(torch.exp(pos_sims), dim=1) s_n = torch.sum(torch.exp(-neg_sims), dim=1) loss = torch.log(1.0 + torch.sum(s_n * s_p)) else: margin = torch.FloatTensor([kwargs['margin']]).to(DEVICE) loss = torch.mean( torch.stack([ torch.sum( torch.stack([ margin + (p - n) if margin + (p - n) > 0 else torch.FloatTensor([0]).to(DEVICE) for n in ns for p in ps ])) for ps, ns in zip(pos_sims, neg_sims) ])) loss = loss.mean() else: assert False, "model type error" loss.backward() optimizer.step() tmp_loss = loss.item() writer.add_scalar(loss_name + '_loss', tmp_loss, batch_i + (epoch - 1) * len(data_loader)) print('[%d, %6d] loss: %.4f' % (epoch, batch_i * data_loader.batch_size, tmp_loss)) # ZSL predict maxs = torch.max(outputs, 1)[1][..., None] maxs_onehot = torch.zero_(outputs).scatter_(1, maxs, 1) metrics['total'].extend(np.array(gts.tolist())) metrics['correct'].extend(np.array(maxs_onehot.tolist())) # GZSL result gts_g = batch_label[:, concepts['general']['label']][..., None] outputs_g = model(Variable(batch_img), concepts['general']['vector'], state="test") # calibration if 'calibration_gamma' in kwargs.keys(): train_name = [k for k in concepts.keys() if k.find('train') >= 0][0] outputs_g[:, concepts[train_name]['label']] -= kwargs[ 'calibration_gamma'] # GZSL predict maxs_g = torch.max(outputs_g, 1)[1][..., None] maxs_g_onehot = torch.zero_(outputs_g).scatter_(1, maxs_g, 1) metrics['total_g'].extend(np.array(gts_g.tolist())) metrics['correct_g'].extend(np.array(maxs_g_onehot.tolist())) if 'debug' in kwargs.keys() and kwargs['debug']: break return metrics
def grad_zero(self): self.gradient_W = torch.zero_(self.gradient_W)
def train(rank, params, shared_model, optimizer): torch.manual_seed(params.seed + rank) env = create_atari_env(params.env_name) env.seed(params.seed + rank) model = ActorCritic(env.observation_space.shape[0], env.action_space) state = env.reset() state = torch.from_numpy(state) done = True episode_length = 0 while True: episode_length += 1 model.load_state_dict(shared_model.state_dict()) if done: cx = Variable(torch.zero_(1, 256)) hx = Variable(torch.zero_(1, 256)) else: cx = Variable(cx.data) hx = Variable(hx.data) values = [] log_probs = [] rewards = [] entropies = [] for step in range(params.num_steps): value, action_values, (hx, cx) = model( Variable(state.unsqueeze(0), (hx, cx))) prob = F.softmax(action_values) log_prob = F.log_softmax(action_values) entropy = (log_prob * prob).sum(1) entropies.append(entropy) action = prob.multinomial().data log_prob = log_prob.gather(1, Variable(action)) values.append(value) log_probs.append(log_prob) state, reward, done = env.step(action.numpy()) done = (done or episode_length > params.max_epoisode_length) reward = max(min(reward, 1), -1) if done: episode_length = 0 state = env.reset() state = torch.from_numpy(state) rewards.append(reward) if done: break R = torch.zero_(1, 1) if not done: value, _, _ = model((Variable(state.unsqueeze(0))), (hx, cx)) R = value.data values.append(Variable(R)) policy_loss = 0 value_loss = 0 R = Variable(R) gae = torch.zeros(1, 1) for i in reversed(range(len(rewards))): R = params.gamma * R + rewards[i] advantage = R - values[i] value_loss = value_loss + 0.5 * advantage.pow(2) TD = reward[i] + params.gamma * values[i + 1].data - value[i].data gae = gae * params.gamma * params.tau + TD policy_loss = policy_loss - log_prob[i] * Variable( gae) - 0.01 * entropies[i] optimizer.zero_grad() (policy_loss + 0.5 * value_loss).backward() torch.nn.utils.clip_grad_norm(model.parameters(), 40) ensure_shared_grad(model, shared_model) optimizer.step()
def one_hot(label, depth=10): out = torch.zero_(label.size(0), depth) idx = torch.LongTensor(label).view(-1, 1) out.scatter_(dim=1, index=idx, value=1) return out
def zero_pad_vector(vectors, pad_idx): if vectors is None: return # no pretrained if not isinstance(vectors, list): vectors = list(vectors) for vector in vectors: torch.zero_(vector[pad_idx])