def mlp_inference(): model = MLP() model.load_state_dict(torch.load(config.inference_model_path)) model.eval() dataset = FeatureDataset() dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=4) counter = 0 index = 0 with torch.no_grad(): for data in dataloader: index += 1 inputs = data['features'] labels = data['action'] outputs = model(inputs) # # probability_distribution = torch.nn.functional.softmax(outputs) prediction = np.argmax(outputs.detach().numpy()) # print('prediction of MLP model is {}'.format(prediction)) # print('label is {}'.format(labels.detach().numpy()[0])) # print('----') if labels.detach().numpy()[0] != prediction: counter += 1 print(index) print('prediction of MLP model is {}'.format(prediction)) print('label is {}'.format(labels.detach().numpy()[0])) print('----') print(counter)
def mpl(root, path_train, path_test): data_set_train = dataset_MLP(root + path_train, train=True) data_set_test = dataset_MLP(root + path_test, train=False) trainloader = DataLoader(data_set_train, batch_size=1000, shuffle=True) testloader = DataLoader(data_set_test, batch_size=1000) model = MLP() criterion = t.nn.CrossEntropyLoss() lr = 0.01 optimizer = t.optim.SGD(model.parameters(), lr, momentum=0.4) for epoch in range(240): for _, (data, label) in enumerate(trainloader): model.train() optimizer.zero_grad() score = model(data) loss = criterion(score, label) loss.backward() optimizer.step() print("Epoch:%d loss:%f" % (epoch, loss.mean())) res = [] for _, (data) in enumerate(testloader): model.eval() predict = model(data) predict = predict.detach().numpy().tolist() res += predict res = np.array(res) ans = np.argmax(res, axis=1) data_set_test.save_res(ans, "./images/res_MLP.csv")
def load_model(save_path): # torch.save(to_save, save_path) model = MLP(len(vocab), HIDDEN_SIZE, num_classes, device=device) checkpoint = torch.load(save_path + '/best_model.pt') model.load_state_dict(checkpoint['model_state_dict']) epoch = checkpoint['epoch'] # move the model to GPU if has one model.to(device) # need this for dropout model.eval() return model
class Agent(): def __init__(self, test=False): # device if torch.cuda.is_available(): self.device = torch.device('cuda') else : self.device = torch.device('cpu') self.model = MLP(state_dim=4,action_num=2,hidden_dim=256).to(self.device) if test: self.load('./pg_best.cpt') # discounted reward self.gamma = 0.99 # optimizer self.optimizer = torch.optim.Adam(self.model.parameters(), lr=3e-3) # saved rewards and actions self.memory = Memory() self.tensorboard = TensorboardLogger('./') def save(self, save_path): print('save model to', save_path) torch.save(self.model.state_dict(), save_path) def load(self, load_path): print('load model from', load_path) self.model.load_state_dict(torch.load(load_path)) def act(self,x,test=False): if not test: # boring type casting x = ((torch.from_numpy(x)).unsqueeze(0)).float().to(self.device) # stochastic sample action_prob = self.model(x) dist = torch.distributions.Categorical(action_prob) action = dist.sample() # memory log_prob self.memory.logprobs.append(dist.log_prob(action)) return action.item() else : self.model.eval() x = ((torch.from_numpy(x)).unsqueeze(0)).float().to(self.device) with torch.no_grad(): action_prob = self.model(x) # a = np.argmax(action_prob.cpu().numpy()) dist = torch.distributions.Categorical(action_prob) action = dist.sample() return action.item() def collect_data(self, state, action, reward): self.memory.actions.append(action) self.memory.rewards.append(torch.tensor(reward)) self.memory.states.append(state) def clear_data(self): self.memory.clear_memory() def update(self): R = 0 advantage_function = [] for t in reversed(range(0, len(self.memory.rewards))): R = R * self.gamma + self.memory.rewards[t] advantage_function.insert(0, R) # turn rewards to pytorch tensor and standardize advantage_function = torch.Tensor(advantage_function).to(self.device) advantage_function = (advantage_function - advantage_function.mean()) / (advantage_function.std() + np.finfo(np.float32).eps) policy_loss = [] for log_prob, reward in zip(self.memory.logprobs, advantage_function): policy_loss.append(-log_prob * reward) # Update network weights self.optimizer.zero_grad() loss = torch.cat(policy_loss).sum() loss.backward() self.optimizer.step() # boring log self.tensorboard.scalar_summary("loss", loss.item()) self.tensorboard.update()
class DQN: def __init__(self, n_states, n_actions, gamma=0.99, epsilon_start=0.9, epsilon_end=0.05, epsilon_decay=200, memory_capacity=10000, policy_lr=0.01, batch_size=128, device="cpu"): self.n_actions = n_actions # 总的动作个数 self.device = device # 设备,cpu或gpu等 self.gamma = gamma # 奖励的折扣因子 # e-greedy策略相关参数 self.actions_count = 0 # 用于epsilon的衰减计数 self.epsilon = 0 self.epsilon_start = epsilon_start self.epsilon_end = epsilon_end self.epsilon_decay = epsilon_decay self.batch_size = batch_size self.policy_net = MLP(n_states, n_actions).to(self.device) self.target_net = MLP(n_states, n_actions).to(self.device) # target_net的初始模型参数完全复制policy_net self.target_net.load_state_dict(self.policy_net.state_dict()) self.target_net.eval() # 不启用 BatchNormalization 和 Dropout # 可查parameters()与state_dict()的区别,前者require_grad=True self.optimizer = optim.Adam(self.policy_net.parameters(), lr=policy_lr) self.loss = 0 self.memory = ReplayBuffer(memory_capacity) def choose_action(self, state, train=True): '''选择动作 ''' if train: self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \ math.exp(-1. * self.actions_count / self.epsilon_decay) self.actions_count += 1 if random.random() > self.epsilon: with torch.no_grad(): # 先转为张量便于丢给神经网络,state元素数据原本为float64 # 注意state=torch.tensor(state).unsqueeze(0)跟state=torch.tensor([state])等价 state = torch.tensor([state], device=self.device, dtype=torch.float32) # 如tensor([[-0.0798, -0.0079]], grad_fn=<AddmmBackward>) q_value = self.policy_net(state) # tensor.max(1)返回每行的最大值以及对应的下标, # 如torch.return_types.max(values=tensor([10.3587]),indices=tensor([0])) # 所以tensor.max(1)[1]返回最大值对应的下标,即action action = q_value.max(1)[1].item() else: action = random.randrange(self.n_actions) return action else: with torch.no_grad(): # 取消保存梯度 # 先转为张量便于丢给神经网络,state元素数据原本为float64 # 注意state=torch.tensor(state).unsqueeze(0)跟state=torch.tensor([state])等价 state = torch.tensor( [state], device='cpu', dtype=torch.float32 ) # 如tensor([[-0.0798, -0.0079]], grad_fn=<AddmmBackward>) q_value = self.target_net(state) # tensor.max(1)返回每行的最大值以及对应的下标, # 如torch.return_types.max(values=tensor([10.3587]),indices=tensor([0])) # 所以tensor.max(1)[1]返回最大值对应的下标,即action action = q_value.max(1)[1].item() return action def update(self): if len(self.memory) < self.batch_size: return # 从memory中随机采样transition state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample( self.batch_size) '''转为张量 例如tensor([[-4.5543e-02, -2.3910e-01, 1.8344e-02, 2.3158e-01],...,[-1.8615e-02, -2.3921e-01, -1.1791e-02, 2.3400e-01]])''' state_batch = torch.tensor(state_batch, device=self.device, dtype=torch.float) action_batch = torch.tensor(action_batch, device=self.device).unsqueeze( 1) # 例如tensor([[1],...,[0]]) reward_batch = torch.tensor( reward_batch, device=self.device, dtype=torch.float) # tensor([1., 1.,...,1]) next_state_batch = torch.tensor(next_state_batch, device=self.device, dtype=torch.float) done_batch = torch.tensor(np.float32(done_batch), device=self.device).unsqueeze( 1) # 将bool转为float然后转为张量 '''计算当前(s_t,a)对应的Q(s_t, a)''' '''torch.gather:对于a=torch.Tensor([[1,2],[3,4]]),那么a.gather(1,torch.Tensor([[0],[1]]))=torch.Tensor([[1],[3]])''' q_values = self.policy_net(state_batch).gather( dim=1, index=action_batch) # 等价于self.forward # 计算所有next states的V(s_{t+1}),即通过target_net中选取reward最大的对应states next_state_values = self.target_net(next_state_batch).max( 1)[0].detach() # 比如tensor([ 0.0060, -0.0171,...,]) # 计算 expected_q_value # 对于终止状态,此时done_batch[0]=1, 对应的expected_q_value等于reward expected_q_values = reward_batch + self.gamma * \ next_state_values * (1-done_batch[0]) # self.loss = F.smooth_l1_loss(q_values,expected_q_values.unsqueeze(1)) # 计算 Huber loss self.loss = nn.MSELoss()(q_values, expected_q_values.unsqueeze(1)) # 计算 均方误差loss # 优化模型 self.optimizer.zero_grad( ) # zero_grad清除上一步所有旧的gradients from the last step # loss.backward()使用backpropagation计算loss相对于所有parameters(需要gradients)的微分 self.loss.backward() for param in self.policy_net.parameters(): # clip防止梯度爆炸 param.grad.data.clamp_(-1, 1) self.optimizer.step() # 更新模型 def save_model(self, path): torch.save(self.target_net.state_dict(), path) def load_model(self, path): self.target_net.load_state_dict(torch.load(path))
def main(): # check cuda device = f'cuda:{args.gpu}' if torch.cuda.is_available() and args.gpu >= 0 else 'cpu' # load data dataset = DglNodePropPredDataset(name=args.dataset) evaluator = Evaluator(name=args.dataset) split_idx = dataset.get_idx_split() g, labels = dataset[0] # graph: DGLGraph object, label: torch tensor of shape (num_nodes, num_tasks) if args.dataset == 'ogbn-arxiv': g = dgl.to_bidirected(g, copy_ndata=True) feat = g.ndata['feat'] feat = (feat - feat.mean(0)) / feat.std(0) g.ndata['feat'] = feat g = g.to(device) feats = g.ndata['feat'] labels = labels.to(device) # load masks for train / validation / test train_idx = split_idx["train"].to(device) valid_idx = split_idx["valid"].to(device) test_idx = split_idx["test"].to(device) n_features = feats.size()[-1] n_classes = dataset.num_classes # load model if args.model == 'mlp': model = MLP(n_features, args.hid_dim, n_classes, args.num_layers, args.dropout) elif args.model == 'linear': model = MLPLinear(n_features, n_classes) else: raise NotImplementedError(f'Model {args.model} is not supported.') model = model.to(device) print(f'Model parameters: {sum(p.numel() for p in model.parameters())}') if args.pretrain: print('---------- Before ----------') model.load_state_dict(torch.load(f'base/{args.dataset}-{args.model}.pt')) model.eval() y_soft = model(feats).exp() y_pred = y_soft.argmax(dim=-1, keepdim=True) valid_acc = evaluate(y_pred, labels, valid_idx, evaluator) test_acc = evaluate(y_pred, labels, test_idx, evaluator) print(f'Valid acc: {valid_acc:.4f} | Test acc: {test_acc:.4f}') print('---------- Correct & Smoothing ----------') cs = CorrectAndSmooth(num_correction_layers=args.num_correction_layers, correction_alpha=args.correction_alpha, correction_adj=args.correction_adj, num_smoothing_layers=args.num_smoothing_layers, smoothing_alpha=args.smoothing_alpha, smoothing_adj=args.smoothing_adj, autoscale=args.autoscale, scale=args.scale) mask_idx = torch.cat([train_idx, valid_idx]) y_soft = cs.correct(g, y_soft, labels[mask_idx], mask_idx) y_soft = cs.smooth(g, y_soft, labels[mask_idx], mask_idx) y_pred = y_soft.argmax(dim=-1, keepdim=True) valid_acc = evaluate(y_pred, labels, valid_idx, evaluator) test_acc = evaluate(y_pred, labels, test_idx, evaluator) print(f'Valid acc: {valid_acc:.4f} | Test acc: {test_acc:.4f}') else: opt = optim.Adam(model.parameters(), lr=args.lr) best_acc = 0 best_model = copy.deepcopy(model) # training print('---------- Training ----------') for i in range(args.epochs): model.train() opt.zero_grad() logits = model(feats) train_loss = F.nll_loss(logits[train_idx], labels.squeeze(1)[train_idx]) train_loss.backward() opt.step() model.eval() with torch.no_grad(): logits = model(feats) y_pred = logits.argmax(dim=-1, keepdim=True) train_acc = evaluate(y_pred, labels, train_idx, evaluator) valid_acc = evaluate(y_pred, labels, valid_idx, evaluator) print(f'Epoch {i} | Train loss: {train_loss.item():.4f} | Train acc: {train_acc:.4f} | Valid acc {valid_acc:.4f}') if valid_acc > best_acc: best_acc = valid_acc best_model = copy.deepcopy(model) # testing & saving model print('---------- Testing ----------') best_model.eval() logits = best_model(feats) y_pred = logits.argmax(dim=-1, keepdim=True) test_acc = evaluate(y_pred, labels, test_idx, evaluator) print(f'Test acc: {test_acc:.4f}') if not os.path.exists('base'): os.makedirs('base') torch.save(best_model.state_dict(), f'base/{args.dataset}-{args.model}.pt')
return sp, self.discrete_freq if __name__ == '__main__': args = docopt(__doc__) print("Command line args:\n", args) numlayer = int(args['-l']) numunit = int(args['-u']) model_path = args['-m'] gpuid = int(args['-g']) dtype = torch.float device = torch.device("cuda:"+str(gpuid) if gpuid>=0 else "cpu") FFTSIZE = 1024 FS = 16000 # [Hz] model = MLP(in_dim=FFTSIZE//2+1, out_dim=FFTSIZE//2+1, numlayer=numlayer, numunit=numunit) model.load_state_dict(torch.load(model_path)) model = model.to(device) model.eval() f02sp = F02SP(FFTSIZE,FS) f0 = 0.1 * np.arange(200,5000+1) # input, 0.1~800 [Hz] sp, discrete_freq = f02sp.get_sp(f0) input_sequence = discrete_freq / f0[:,np.newaxis] input_sequence = torch.from_numpy(input_sequence).to(dtype).to(device) pred_sp = model(input_sequence) pred_sp = pred_sp.cpu().data.numpy()
class NonLocalTrainer(object): def __init__(self, args, trainLoader, testLoader): self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.out_path = args.out self.sigma = args.sigma self.beta = args.beta self.nClass = args.nClass self.model = MLP().to(self.device) self.optim = torch.optim.Adam(self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay) self.criterion = nn.MSELoss() self.trainLoader = trainLoader self.testLoader = testLoader self.run_datetime = datetime.datetime.now() if not os.path.exists(self.out_path): os.makedirs(self.out_path) self.logger = Logger(self.out_path) with open(os.path.join(self.out_path, "para.json"), "w") as f: json.dump(args.__dict__, f) self.epoch = 0 self.iteration = 0 self.test_step = 0 self.max_epoch = args.epochs self.val_interval = args.interval self.res = 0 self.best_error = 1e7; self.best_res_epoch = 0 self.noiseMean = torch.zeros(args.batch_size, args.featureNums, 17, 17) self.noiseStd = torch.div(torch.ones(args.batch_size, args.featureNums, 17, 17), 1e3) def validate_one_epoch(self): self.model.eval() self.test_step += 1 tsthreas = [0.1, 1, 10] tp = [0] * len(tsthreas) # true positive tn = [0] * len(tsthreas) # true negetive fp = [0] * len(tsthreas) # false positve fn = [0] * len(tsthreas) # false negetive ts = [0] * len(tsthreas) totalRegressionLoss = [] total_error = 0 total_count = 0 p_error = 0 p_count = 0 largeGapCount = 0 largeGap = 0 for batch_idx, (data, target, _, _, _, _) in tqdm.tqdm( enumerate(self.testLoader), total=len(self.testLoader), desc='Valid :', ncols=80, leave=False): gt_micaps = target.numpy() data, target = data.to(device=self.device), target.to(device=self.device) with torch.no_grad(): predictValues = self.model(data) regressionLoss = self.criterion(predictValues, target) predictNumpy = predictValues.cpu().numpy() totalRegressionLoss.append(regressionLoss.item()) # totalClassificationLoss.append(classificationLoss.item()) # predicted = torch.argmax(preds, dim=1) # correct += (predicted == logits).sum().item() gapValues = np.abs(predictNumpy - gt_micaps) total_error += np.sum(gapValues) total_count += gt_micaps.shape[0] p_error += np.sum((gt_micaps > 0.01) * gapValues) p_count += np.sum(gt_micaps > 0.01) largeGap += np.sum((gapValues > 5) * gapValues) largeGapCount += np.sum(gapValues > 5) for i, threas in enumerate(tsthreas): tp[i] += np.sum((gt_micaps >= threas) * (predictNumpy >= threas)) tn[i] += np.sum((gt_micaps < threas) * (predictNumpy < threas)) fp[i] += np.sum((gt_micaps < threas) * (predictNumpy >= threas)) fn[i] += np.sum((gt_micaps >= threas) * (predictNumpy < threas)) for i, _ in enumerate(tsthreas): ts[i] += round(tp[i] / (tp[i] + fp[i] + fn[i]), 5) totalAverageError = round(total_error / total_count, 5) pAverageError = round(p_error / p_count, 5) totalLoss = np.sum(totalRegressionLoss) largeGapRatio = round(largeGapCount / total_count, 5) largeGapMae = round(largeGap / largeGapCount, 5) info = {"test_regression_loss": totalLoss, "ts_score": ts, "aver_gap": totalAverageError, "aver_p_gap": pAverageError, "large_gap_ratio": largeGapRatio, "large_gap_mae": largeGapMae } print("========================== Epoch {} Test Result Show ==========================".format(self.epoch + 1)) print(info) # for tag, value in info.items(): # self.logger.scalar_summary(tag, value, self.test_step) # if totalAverageError < self.best_error: # self.best_error = totalAverageError # self.best_res_epoch = self.epoch # info["epoch"] = self.epoch # info["modelParam"] = self.model.state_dict() # info["optimParam"] = self.optim.state_dict() # torch.save(info, os.path.join(self.out_path, str(self.epoch) + "_checkpoints.pth")) def train_one_epoch(self): self.model.train() for batch_idx, (data, target, _, _, _, _) in tqdm.tqdm( enumerate(self.trainLoader), total=len(self.trainLoader), desc='Train epoch=%d' % self.epoch, ncols=80, leave=False): iter_idx = batch_idx + self.epoch * len(self.trainLoader) # if (self.iteration != 0) and (iter_idx - 1) != self.iteration: # continue self.iteration = iter_idx assert self.model.training self.optim.zero_grad() data = data.to(device=self.device) target = target.to(device=self.device) predictValues = self.model(data) regressionLoss = self.criterion(predictValues, target) regressionLoss.backward() # for named,param in self.model.named_parameters(): # print("Name : " ,named) # print(param.grad.data.sum()) self.optim.step() regressionLossCpu = regressionLoss.item() self.logger.scalar_summary("train_regression_loss", regressionLossCpu, self.iteration + 1) for tag, value in self.model.named_parameters(): self.logger.histo_summary(tag, value.data.cpu().numpy(), self.epoch + 1) self.logger.histo_summary(tag + '/grad', value.grad.data.cpu().numpy(), self.epoch + 1) def run(self): for epoch in range(self.max_epoch): self.epoch = epoch self.train_one_epoch() if (self.epoch + 1) % self.val_interval == 0: self.validate_one_epoch()
# Bootstrapping if args.bootstrapping: # Bootstrapping n_el = np.floor(1.0 * X.size(0)).astype( np.uint32) # We fix the size of the bootstrap idx_subsample = np.random.choice(n_el, size=n_el, replace=True) X_sub = X[idx_subsample] Y_sub = Y[idx_subsample] dataset = RegressionDataset(X_sub, Y_sub) else: dataset = RegressionDataset(X, Y) net = MLP(args.dropout_rate) # Create a prior prior = MLP(args.dropout_rate) prior.eval() criterion = nn.MSELoss() optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.wd) # Update of the network parameters train_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False) step = 0 # Number of batches seen net.train() for epoch in tqdm(np.arange(args.n_epochs), disable=not args.verbose): # experiment.log_current_epoch(epoch) for batch_idx, (data, target) in enumerate(train_loader): data, target = data.cpu(), target.cpu() optimizer.zero_grad()
print('\n--------------------------------------') print('Run #{} Task #{} --> Train Classifier'.format( run, task)) print('--------------------------------------\n') #--------------- # Iteration Loop for it in range(args.disc_iters): model = retrieve_replay_update(args, model, opt, data, target, buffer, task, tr_loader,rehearse=task>0) buffer.add_reservoir(data.cpu(), target.cpu(), None, task) # ------------------------ eval ------------------------ # model = model.eval() eval_loaders = [('valid', val_loader), ('test', test_loader)] for mode, loader_ in eval_loaders: for task_t, te_loader in enumerate(loader_): if task_t > task: break LOG_temp = get_temp_logger(None, ['cls_loss', 'acc']) # iterate over samples from task for i, (data, target) in enumerate(te_loader): if args.unit_test and i > 10: break if args.cuda: data, target = data.to(args.device), target.to(args.device) logits = model(data)
class Trainer(): def __init__(self, config_path): config = configparser.ConfigParser() config.read(config_path) self.n_epoch = config.getint("general", "n_epoch") self.batch_size = config.getint("general", "batch_size") self.train_bert = config.getboolean("general", "train_bert") self.lr = config.getfloat("general", "lr") self.cut_frac = config.getfloat("general", "cut_frac") self.log_dir = Path(config.get("general", "log_dir")) if not self.log_dir.exists(): self.log_dir.mkdir(parents=True) self.model_save_freq = config.getint("general", "model_save_freq") self.device = "cuda" if torch.cuda.is_available() else "cpu" # bert_config_path = config.get("bert", "config_path") # bert_tokenizer_path = config.get("bert", "tokenizer_path") # bert_model_path = config.get("bert", "model_path") self.bert_tokenizer = LongformerTokenizer.from_pretrained( 'allenai/longformer-base-4096') # self.bert_tokenizer = BertTokenizer.from_pretrained(bert_tokenizer_path) tkzer_save_dir = self.log_dir / "tokenizer" if not tkzer_save_dir.exists(): tkzer_save_dir.mkdir() self.bert_tokenizer.save_pretrained(tkzer_save_dir) self.bert_model = LongformerModel.from_pretrained( 'allenai/longformer-base-4096') self.bert_config = self.bert_model.config # self.bert_config = BertConfig.from_pretrained(bert_config_path) # self.bert_model = BertModel.from_pretrained(bert_model_path, config=self.bert_config) self.max_seq_length = self.bert_config.max_position_embeddings - 2 # self.max_seq_length = self.bert_config.max_position_embeddings self.bert_model.to(self.device) if self.train_bert: self.bert_model.train() else: self.bert_model.eval() train_conll_path = config.get("data", "train_path") print("train path", train_conll_path) assert Path(train_conll_path).exists() dev_conll_path = config.get("data", "dev_path") print("dev path", dev_conll_path) assert Path(dev_conll_path).exists() dev1_conll_path = Path(dev_conll_path) / "1" print("dev1 path", dev1_conll_path) assert dev1_conll_path.exists() dev2_conll_path = Path(dev_conll_path) / "2" print("dev2 path", dev2_conll_path) assert dev2_conll_path.exists() self.train_dataset = ConllDataset(train_conll_path) # self.dev_dataset = ConllDataset(dev_conll_path) self.dev1_dataset = ConllDataset(dev1_conll_path) self.dev2_dataset = ConllDataset(dev2_conll_path) if self.batch_size == -1: self.batch_size = len(self.train_dataset) self.scaler = torch.cuda.amp.GradScaler() tb_cmt = f"lr_{self.lr}_cut-frac_{self.cut_frac}" self.writer = SummaryWriter(log_dir=self.log_dir, comment=tb_cmt) def transforms(self, example, label_list): feature = convert_single_example(example, label_list, self.max_seq_length, self.bert_tokenizer) label_ids = feature.label_ids label_map = feature.label_map gold_labels = [-1] * self.max_seq_length # Get "Element" or "Main" token indices for i, lid in enumerate(label_ids): if lid == label_map['B-Element']: gold_labels[i] = 0 elif lid == label_map['B-Main']: gold_labels[i] = 1 elif lid in (label_map['I-Element'], label_map['I-Main']): gold_labels[i] = 2 elif lid == label_map['X']: gold_labels[i] = 3 # flush data to bert model input_ids = torch.tensor(feature.input_ids).unsqueeze(0).to( self.device) if self.train_bert: model_output = self.bert_model(input_ids) else: with torch.no_grad(): model_output = self.bert_model(input_ids) # lstm (ignore padding parts) model_fv = model_output[0] input_ids = torch.tensor(feature.input_ids) label_ids = torch.tensor(feature.label_ids) gold_labels = torch.tensor(gold_labels) return model_fv, input_ids, label_ids, gold_labels @staticmethod def extract_tokens(fv, gold_labels): ents, golds = [], [] ents_mask = [-1] * len(gold_labels) ent, gold, ent_id = [], None, 0 ent_flag = False for i, gt in enumerate(gold_labels): if gt == 2: # in case of "I-xxx" ent.append(fv[i, :]) ents_mask[i] = ent_id ent_end = i elif gt == 3 and ent_flag: # in case of "X" ent.append(fv[i, :]) ents_mask[i] = ent_id ent_end = i elif ent: ents.append(ent) golds.append(gold) ent = [] ent_id += 1 ent_flag = False if gt in (0, 1): # in case of "B-xxx" ent.append(fv[i, :]) gold = gt ents_mask[i] = ent_id ent_start = i ent_flag = True else: if ent: ents.append(ent) golds.append(gold) return ents, golds, ents_mask def eval(self, dataset): tp, fp, tn, fn = 0, 0, 0, 0 with torch.no_grad(): for data in tqdm(dataset): # flush to Bert fname, example = data try: fvs, input_ids, label_ids, gold_labels = self.transforms( example, dataset.label_list) except RuntimeError: print(f"{fname} cannot put in memory!") continue # extract Element/Main tokens ents, ent_golds, _ = self.extract_tokens( fvs.squeeze(0), gold_labels) for i, ent in enumerate(ents): # convert to torch.tensor inputs = torch.empty( [len(ent), self.bert_config.hidden_size]).to(self.device) for j, token in enumerate(ent): inputs[j, :] = token target = ent_golds[i] inputs = torch.mean(inputs, dim=0, keepdim=True) # classification outputs = self.mlp(inputs) if target == 1: if outputs < 0.5: fn += 1 else: tp += 1 else: if outputs < 0.5: tn += 1 else: fp += 1 return Score(tp, fp, tn, fn).calc_score() def train(self): # MLP self.mlp = MLP(self.bert_config.hidden_size) self.mlp.to(self.device) self.mlp.train() # learnging parameter settings params = list(self.mlp.parameters()) if self.train_bert: params += list(self.bert_model.parameters()) # loss self.criterion = BCEWithLogitsLoss() # optimizer self.optimizer = AdamW(params, lr=self.lr) num_train_steps = int(self.n_epoch * len(self.train_dataset) / self.batch_size) num_warmup_steps = int(self.cut_frac * num_train_steps) self.scheduler = get_linear_schedule_with_warmup( self.optimizer, num_warmup_steps, num_train_steps) try: best_dev1_f1, best_dev2_f1 = 0, 0 # best_dev_f1 = 0 itr = 1 for epoch in range(1, self.n_epoch + 1): print("Epoch : {}".format(epoch)) print("training...") for i in tqdm( range(0, len(self.train_dataset), self.batch_size)): # fvs, ents, batch_samples, inputs, outputs = None, None, None, None, None itr += i # create batch samples if (i + self.batch_size) < len(self.train_dataset): end_i = (i + self.batch_size) else: end_i = len(self.train_dataset) batch_samples, batch_golds = [], [] for j in range(i, end_i): # flush to Bert fname, example = self.train_dataset[j] fvs, input_ids, label_ids, gold_labels = self.transforms( example, self.train_dataset.label_list) # extract Element/Main tokens ents, ent_golds, _ = self.extract_tokens( fvs.squeeze(0), gold_labels) for e in ents: ent = torch.empty( [len(e), self.bert_config.hidden_size]).to(self.device) for k, t in enumerate(e): ent[k, :] = t batch_samples.append(torch.mean(ent, dim=0)) batch_golds.extend(ent_golds) # convert to torch.tensor inputs = torch.empty( [len(batch_samples), self.bert_config.hidden_size]).to(self.device) for j, t in enumerate(batch_samples): inputs[j, :] = t targets = torch.tensor(batch_golds, dtype=torch.float).unsqueeze(1) self.optimizer.zero_grad() with torch.cuda.amp.autocast(): outputs = self.mlp(inputs) loss = self.criterion(outputs, targets.to(self.device)) # loss = loss / 100 self.scaler.scale(loss).backward() self.scaler.step(self.optimizer) self.scaler.update() self.scheduler.step() del fvs, ents, batch_samples, inputs, outputs torch.cuda.empty_cache() # write to SummaryWriter self.writer.add_scalar("loss", loss.item(), itr) self.writer.add_scalar( "lr", self.optimizer.param_groups[0]["lr"], itr) # write to SummaryWriter if self.train_bert: self.bert_model.eval() self.mlp.eval() # import pdb; pdb.set_trace() print("train data evaluation...") tr_acc, tr_rec, _, tr_prec, tr_f1 = self.eval( self.train_dataset) print( f"acc: {tr_acc}, rec: {tr_rec}, prec: {tr_prec}, f1: {tr_f1}" ) self.writer.add_scalar("train/acc", tr_acc, epoch) self.writer.add_scalar("train/rec", tr_rec, epoch) self.writer.add_scalar("train/prec", tr_prec, epoch) self.writer.add_scalar("train/f1", tr_f1, epoch) # print("dev data evaluation...") # dev_acc, dev_rec, _, dev_prec, dev_f1 = self.eval(self.dev_dataset) # print(f"acc: {dev_acc}, rec: {dev_rec}, prec: {dev_prec}, f1: {dev_f1}") # self.writer.add_scalar("dev/acc", dev_acc, epoch) # self.writer.add_scalar("dev/rec", dev_rec, epoch) # self.writer.add_scalar("dev/prec", dev_prec, epoch) # self.writer.add_scalar("dev/f1", dev_f1, epoch) # self.writer.flush() print("dev1 data evaluation...") dev1_acc, dev1_rec, _, dev1_prec, dev1_f1 = self.eval( self.dev1_dataset) print( f"acc: {dev1_acc}, rec: {dev1_rec}, prec: {dev1_prec}, f1: {dev1_f1}" ) self.writer.add_scalar("dev1/acc", dev1_acc, epoch) self.writer.add_scalar("dev1/rec", dev1_rec, epoch) self.writer.add_scalar("dev1/prec", dev1_prec, epoch) self.writer.add_scalar("dev1/f1", dev1_f1, epoch) self.writer.flush() print("dev2 data evaluation...") dev2_acc, dev2_rec, _, dev2_prec, dev2_f1 = self.eval( self.dev2_dataset) print( f"acc: {dev2_acc}, rec: {dev2_rec}, prec: {dev2_prec}, f1: {dev2_f1}" ) self.writer.add_scalar("dev2/acc", dev2_acc, epoch) self.writer.add_scalar("dev2/rec", dev2_rec, epoch) self.writer.add_scalar("dev2/prec", dev2_prec, epoch) self.writer.add_scalar("dev2/f1", dev2_f1, epoch) self.writer.flush() if self.train_bert: self.bert_model.train() self.mlp.train() if epoch % self.model_save_freq == 0: curr_log_dir = self.log_dir / f"epoch_{epoch}" if not curr_log_dir.exists(): curr_log_dir.mkdir() if self.train_bert: self.bert_model.save_pretrained(curr_log_dir) torch.save(self.mlp.state_dict(), curr_log_dir / "mlp.model") # if best_dev_f1 <= dev_f1: # best_dev_f1 = dev_f1 # best_dev_epoch = epoch # if self.train_bert: # best_dev_model = copy.deepcopy(self.bert_model) # best_dev_mlp = copy.deepcopy(self.mlp.state_dict()) if best_dev1_f1 <= dev1_f1: best_dev1_f1 = dev1_f1 best_dev1_epoch = epoch if self.train_bert: best_dev1_model = copy.deepcopy(self.bert_model).cpu() best_dev1_mlp = copy.deepcopy(self.mlp).cpu().state_dict() if best_dev2_f1 <= dev2_f1: best_dev2_f1 = dev2_f1 best_dev2_epoch = epoch if self.train_bert: best_dev2_model = copy.deepcopy(self.bert_model).cpu() best_dev2_mlp = copy.deepcopy(self.mlp).cpu().state_dict() except KeyboardInterrupt: # del fvs, ents, batch_samples, inputs, outputs # print(f"Best epoch was #{best_dev_epoch}!\nSave params...") # save_dev_dir = Path(self.log_dir) / "best" # if not save_dev_dir.exists(): # save_dev_dir.mkdir() # if self.train_bert: # best_dev_model.save_pretrained(save_dev_dir) # torch.save(best_dev_mlp, save_dev_dir / "mlp.model") # print("Training was successfully finished!") print( f"Best epoch was dev1: #{best_dev1_epoch}, dev2: #{best_dev2_epoch}!\nSave params..." ) save_dev1_dir = Path(self.log_dir) / "dev1_best" if not save_dev1_dir.exists(): save_dev1_dir.mkdir() save_dev2_dir = Path(self.log_dir) / "dev2_best" if not save_dev2_dir.exists(): save_dev2_dir.mkdir() if self.train_bert: best_dev1_model.save_pretrained(save_dev1_dir) best_dev2_model.save_pretrained(save_dev2_dir) torch.save(best_dev1_mlp, save_dev1_dir / "mlp.model") torch.save(best_dev2_mlp, save_dev2_dir / "mlp.model") print("Training was successfully finished!") raise KeyboardInterrupt else: # print(f"Best epoch was #{best_dev_epoch}!\nSave params...") # save_dev_dir = Path(self.log_dir) / "best" # if not save_dev_dir.exists(): # save_dev_dir.mkdir() # if self.train_bert: # best_dev_model.save_pretrained(save_dev_dir) # torch.save(best_dev_mlp, save_dev_dir / "mlp.model") # print("Training was successfully finished!") print( f"Best epoch was dev1: #{best_dev1_epoch}, dev2: #{best_dev2_epoch}!\nSave params..." ) save_dev1_dir = Path(self.log_dir) / "dev1_best" if not save_dev1_dir.exists(): save_dev1_dir.mkdir() save_dev2_dir = Path(self.log_dir) / "dev2_best" if not save_dev2_dir.exists(): save_dev2_dir.mkdir() if self.train_bert: best_dev1_model.save_pretrained(save_dev1_dir) best_dev2_model.save_pretrained(save_dev2_dir) torch.save(best_dev1_mlp, save_dev1_dir / "mlp.model") torch.save(best_dev2_mlp, save_dev2_dir / "mlp.model") print("Training was successfully finished!") sys.exit()
class Solver(object): def __init__(self, config, train_loader, val_loader): self.use_cuda = torch.cuda.is_available() self.device = torch.device('cuda' if self.use_cuda else 'cpu') self.train_loader = train_loader self.val_loader = val_loader self.episodes_per_epoch = config.episodes_per_epoch self.N_way_train = config.N_way_train self.N_shot_train = config.N_shot_train self.N_query_train = config.N_query_train self.M_aug_train = config.M_aug_train self.N_way_val = config.N_way_val self.N_shot_val = config.N_shot_val self.N_query_val = config.N_query_val self.M_aug_val = config.M_aug_val self.matching_fn = config.matching_fn self.nz = config.nz self.num_epochs = config.num_epochs self.resume_iter = config.resume_iter self.lr = config.lr self.num_steps_decay = config.num_steps_decay self.beta1 = config.beta1 self.beta2 = config.beta2 self.weight_decay = config.weight_decay self.exp_name = config.name os.makedirs(config.ckp_dir, exist_ok=True) self.ckp_dir = os.path.join(config.ckp_dir, self.exp_name) os.makedirs(self.ckp_dir, exist_ok=True) self.log_interval = config.log_interval self.ckp_interval = config.ckp_interval self.use_wandb = config.use_wandb self.build_model() def build_model(self): self.cnn = Convnet().to(self.device) self.g = Hallucinator(self.nz).to(self.device) self.mlp = MLP().to(self.device) self.optimizer = torch.optim.AdamW(list(self.cnn.parameters()) + list(self.g.parameters()) + list(self.mlp.parameters()), lr=self.lr, betas=[self.beta1, self.beta2], weight_decay=self.weight_decay) if self.matching_fn == 'parametric': self.parametric = nn.Sequential(nn.Linear(800, 400), nn.ReLU(), nn.Dropout(), nn.Linear(400, 1)).to(self.device) self.optimizer = torch.optim.AdamW( list(self.cnn.parameters()) + list(self.g.parameters()) + list(self.mlp.parameters()) + list(self.parametric.parameters()), lr=self.lr, betas=[self.beta1, self.beta2], weight_decay=self.weight_decay) self.scheduler = StepLR(self.optimizer, step_size=self.num_steps_decay, gamma=0.9) def save_checkpoint(self, step): state = { 'cnn': self.cnn.state_dict(), 'g': self.g.state_dict(), 'mlp': self.mlp.state_dict(), 'optimizer': self.optimizer.state_dict() } if self.matching_fn == 'parametric': state['parametric'] = self.parametric.state_dict() new_checkpoint_path = os.path.join(self.ckp_dir, '{}-dhm.pth'.format(step + 1)) torch.save(state, new_checkpoint_path) print('model saved to %s' % new_checkpoint_path) def load_checkpoint(self, resume_iter): print('Loading the trained models from step {}...'.format(resume_iter)) new_checkpoint_path = os.path.join(self.ckp_dir, '{}-dhm.pth'.format(resume_iter)) state = torch.load(new_checkpoint_path) self.cnn.load_state_dict(state['cnn']) self.g.load_state_dict(state['g']) self.mlp.load_state_dict(state['mlp']) self.optimizer.load_state_dict(state['optimizer']) if self.matching_fn == 'parametric': self.parametric.load_state_dict(state['parametric']) print('model loaded from %s' % new_checkpoint_path) def train(self): criterion = nn.CrossEntropyLoss() best_mean = 0 iteration = 0 self.sample_idx_val = [] self.noise_val = [] for i in range(self.episodes_per_epoch): self.sample_idx_val.append( torch.tensor([ torch.randint(self.N_shot_val * i, self.N_shot_val * (i + 1), (self.M_aug_val, )).numpy() for i in range(self.N_way_val) ]).reshape(-1)) self.noise_val.append( torch.randn((self.N_way_val * self.M_aug_val, self.nz), device=self.device)) if self.resume_iter: print("resuming step %d ..." % self.resume_iter) iteration = self.resume_iter self.load_checkpoint(self.resume_iter) loss, mean, std = self.eval() if mean > best_mean: best_mean = mean episodic_acc = [] for ep in range(self.num_epochs): self.cnn.train() self.g.train() self.mlp.train() for batch_idx, (data, target) in enumerate(self.train_loader): data = data.to(self.device) self.optimizer.zero_grad() support_input = data[:self.N_way_train * self.N_shot_train, :, :, :] query_input = data[self.N_way_train * self.N_shot_train:, :, :, :] label_encoder = { target[i * self.N_shot_train]: i for i in range(self.N_way_train) } query_label = torch.cuda.LongTensor([ label_encoder[class_name] for class_name in target[self.N_way_train * self.N_shot_train:] ]) support = self.cnn(support_input) queries = self.cnn(query_input) sample_idx = torch.tensor([ torch.randint(self.N_shot_train * i, self.N_shot_train * (i + 1), (self.M_aug_train, )).numpy() for i in range(self.N_way_train) ]).reshape(-1) sample = support[sample_idx] noise = torch.randn( (self.N_way_train * self.M_aug_train, self.nz), device=self.device) support_g = self.g(sample, noise).reshape(self.N_way_train, self.M_aug_train, -1) support = support.reshape(self.N_way_train, self.N_shot_train, -1) support_aug = torch.cat([support, support_g], dim=1) support_aug = support_aug.reshape( self.N_way_train * (self.N_shot_train + self.M_aug_train), -1) prototypes = self.mlp(support_aug) prototypes = prototypes.reshape( self.N_way_train, self.N_shot_train + self.M_aug_train, -1).mean(dim=1) queries = self.mlp(queries) if self.matching_fn == 'parametric': distances = pairwise_distances(queries, prototypes, self.matching_fn, self.parametric) else: distances = pairwise_distances(queries, prototypes, self.matching_fn) loss = criterion(-distances, query_label) loss.backward() self.optimizer.step() y_pred = (-distances).softmax(dim=1).max(1, keepdim=True)[1] episodic_acc.append( 1. * y_pred.eq(query_label.view_as(y_pred)).sum().item() / len(query_label)) if (iteration + 1) % self.log_interval == 0: episodic_acc = np.array(episodic_acc) mean = episodic_acc.mean() std = episodic_acc.std() print( 'Epoch: {:3d} [{:d}/{:d}]\tIteration: {:5d}\tLoss: {:.6f}\tAccuracy: {:.2f} +- {:.2f} %' .format( ep, (batch_idx + 1), len(self.train_loader), iteration + 1, loss.item(), mean * 100, 1.96 * std / (self.log_interval)**(1 / 2) * 100)) if self.use_wandb: import wandb wandb.log( { "loss": loss.item(), "acc_mean": mean * 100, "acc_ci": 1.96 * std / (self.log_interval)**(1 / 2) * 100, 'lr': self.optimizer.param_groups[0]['lr'] }, step=iteration + 1) episodic_acc = [] if (iteration + 1) % self.ckp_interval == 0: loss, mean, std = self.eval() if mean > best_mean: best_mean = mean self.save_checkpoint(iteration) if self.use_wandb: wandb.run.summary[ "best_accuracy"] = best_mean * 100 if self.use_wandb: import wandb wandb.log( { "val_loss": loss, "val_acc_mean": mean * 100, "val_acc_ci": 1.96 * std / (600)**(1 / 2) * 100 }, step=iteration + 1, commit=False) iteration += 1 self.scheduler.step() self.save_checkpoint(iteration) def eval(self): criterion = nn.CrossEntropyLoss() self.cnn.eval() self.g.eval() self.mlp.eval() episodic_acc = [] loss = [] with torch.no_grad(): for b_idx, (data, target) in enumerate(self.val_loader): data = data.to(self.device) support_input = data[:self.N_way_val * self.N_shot_val, :, :, :] query_input = data[self.N_way_val * self.N_shot_val:, :, :, :] label_encoder = { target[i * self.N_shot_val]: i for i in range(self.N_way_val) } query_label = torch.cuda.LongTensor([ label_encoder[class_name] for class_name in target[self.N_way_val * self.N_shot_val:] ]) support = self.cnn(support_input) queries = self.cnn(query_input) sample_idx = self.sample_idx_val[b_idx] sample = support[sample_idx] noise = self.noise_val[b_idx] support_g = self.g(sample, noise).reshape(self.N_way_val, self.M_aug_val, -1) support = support.reshape(self.N_way_val, self.N_shot_val, -1) support_aug = torch.cat([support, support_g], dim=1) support_aug = support_aug.reshape( self.N_way_val * (self.N_shot_val + self.M_aug_val), -1) prototypes = self.mlp(support_aug) prototypes = prototypes.reshape( self.N_way_val, self.N_shot_val + self.M_aug_val, -1).mean(dim=1) queries = self.mlp(queries) if self.matching_fn == 'parametric': distances = pairwise_distances(queries, prototypes, self.matching_fn, self.parametric) else: distances = pairwise_distances(queries, prototypes, self.matching_fn) loss.append(criterion(-distances, query_label).item()) y_pred = (-distances).softmax(dim=1).max(1, keepdim=True)[1] episodic_acc.append( 1. * y_pred.eq(query_label.view_as(y_pred)).sum().item() / len(query_label)) loss = np.array(loss) episodic_acc = np.array(episodic_acc) loss = loss.mean() mean = episodic_acc.mean() std = episodic_acc.std() print('\nLoss: {:.6f}\tAccuracy: {:.2f} +- {:.2f} %\n'.format( loss, mean * 100, 1.96 * std / (600)**(1 / 2) * 100)) return loss, mean, std
class Test(): def __init__(self, config_path): config = configparser.ConfigParser() config.read(config_path) self.save_dir = Path(config.get("general", "save_dir")) if not self.save_dir.exists(): self.save_dir.mkdir(parents=True) self.clf_th = config.getfloat("general", "clf_th") self.mlp_model_path = config.get("model", "mlp") assert Path(self.mlp_model_path).exists() self.device = "cuda" if torch.cuda.is_available() else "cpu" bert_config_path = config.get("bert", "config_path") assert Path(bert_config_path).exists() self.bert_config = LongformerConfig.from_json_file(bert_config_path) self.max_seq_length = self.bert_config.max_position_embeddings - 2 self.bert_tokenizer = LongformerTokenizer.from_pretrained( 'allenai/longformer-base-4096') # bert_tokenizer_path = config.get("bert", "tokenizer_path") # assert Path(bert_config_path).exists() # self.bert_tokenizer = LongformerTokenizer.from_pretrained(bert_tokenizer_path) bert_model_path = config.get("bert", "model_path") assert Path(bert_model_path).exists() self.bert_model = LongformerModel.from_pretrained( bert_model_path, config=self.bert_config) self.bert_model.to(self.device) self.bert_model.eval() gold_dir = Path(config.get("data", "gold_dir")) assert Path(gold_dir).exists() self.gold_dataset = ConllDataset(gold_dir) target_dir = Path(config.get("data", "target_dir")) assert Path(target_dir).exists() self.target_dataset = ConllDataset(target_dir) def transforms(self, example, label_list, is_gold): feature = convert_single_example(example, label_list, self.max_seq_length, self.bert_tokenizer) label_ids = feature.label_ids label_map = feature.label_map if is_gold: gold_labels = [-1] * self.max_seq_length # Get "Element" or "Main" token indices for i, lid in enumerate(label_ids): if lid == label_map['B-Element']: gold_labels[i] = 0 elif lid == label_map['B-Main']: gold_labels[i] = 1 elif lid in (label_map['I-Element'], label_map['I-Main']): gold_labels[i] = 2 elif lid == label_map['X']: gold_labels[i] = 3 gold_labels = gold_labels else: gold_labels = [-1] * self.max_seq_length # Get "Element" or "Main" token indices for i, lid in enumerate(label_ids): if lid == label_map['B-Element']: gold_labels[i] = 0 elif lid == label_map['I-Element']: gold_labels[i] = 2 elif lid == label_map['X']: gold_labels[i] = 3 gold_labels = gold_labels # flush data to bert model input_ids = torch.tensor(feature.input_ids).unsqueeze(0).to( self.device) with torch.no_grad(): bert_output = self.bert_model(input_ids) # lstm (ignore padding parts) bert_fv = bert_output[0] input_ids = torch.tensor(feature.input_ids) label_ids = torch.tensor(feature.label_ids) return bert_fv, input_ids, label_ids, label_map, gold_labels def load_model(self): # MLP self.mlp = MLP(self.bert_config.hidden_size) self.mlp.load_state_dict(torch.load(self.mlp_model_path)) self.mlp.to(self.device) self.mlp.eval() def eval(self): self.load_model() correct_save_dir = self.save_dir / "correct" if not correct_save_dir.exists(): correct_save_dir.mkdir(parents=True) incorrect_save_dir = self.save_dir / "incorrect" if not incorrect_save_dir.exists(): incorrect_save_dir.mkdir(parents=True) tp, fp, tn, fn = 0, 0, 0, 0 with torch.no_grad(): for gold_data, target_data in tqdm( zip(self.gold_dataset, self.target_dataset)): # flush to Bert gold_fname, gold_example = gold_data target_fname, target_example = target_data if not gold_fname == target_fname: import pdb pdb.set_trace() assert gold_fname == target_fname _, _, _, _, gold_labels = self.transforms( gold_example, self.gold_dataset.label_list, is_gold=True) fvs, input_ids, label_ids, label_map, pred_labels = self.transforms( target_example, self.target_dataset.label_list, is_gold=False) # extract Element/Main tokens is_correct = True _, ent_gold_labels, golds_mask = Trainer.extract_tokens( fvs.squeeze(0), gold_labels) golds = {} if len(ent_gold_labels) >= 1: i = 0 while True: try: ent_start = golds_mask.index(i) except ValueError: break for n, j in enumerate(golds_mask[ent_start:]): if j != i: ent_end = (ent_start + n - 1) break golds[(ent_start, ent_end)] = ent_gold_labels[i] i += 1 ents, ent_pred_labels, preds_mask = Trainer.extract_tokens( fvs.squeeze(0), pred_labels) preds = {} if len(ent_pred_labels) >= 1: i = 0 while True: try: ent_start = preds_mask.index(i) except ValueError: break for n, j in enumerate(preds_mask[ent_start:]): if j != i: ent_end = (ent_start + n - 1) break preds[(ent_start, ent_end)] = ent_pred_labels[i] i += 1 for gold_span, gold_label in golds.items(): if gold_span not in preds.keys(): if gold_label == 1: fn += 1 is_correct = False ents_pred = [0] * len(ents) for i, pred in enumerate(preds): # convert to torch.tensor inputs = torch.empty( [len(ents[i]), self.bert_config.hidden_size]).to(self.device) for j, token in enumerate(ents[i]): inputs[j, :] = token inputs = torch.mean(inputs, dim=0, keepdim=True) outputs = self.mlp(inputs) if pred in golds.keys(): target = golds[pred] if target == 1: if outputs < self.clf_th: fn += 1 is_correct = False else: tp += 1 else: if outputs < self.clf_th: tn += 1 else: fp += 1 is_correct = False else: if outputs < self.clf_th: pass else: fp += 1 is_correct = False outputs_ = outputs.to('cpu').detach().numpy().copy() if np.all(outputs_ > self.clf_th): ents_pred[i] = 1 if is_correct: save_dir = correct_save_dir else: save_dir = incorrect_save_dir save_path = save_dir / (target_fname + ".conll") lines = [] elem_cnt = -1 for i in range(len(target_example.text)): text = target_example.text[i] label = target_example.label[i] start = target_example.start[i] end = target_example.end[i] if label == "B-Element": elem_cnt += 1 if ents_pred[elem_cnt] == 1: lines.append(f"B-Main\t{start}\t{end}\t{text}") elif ents_pred[elem_cnt] == 0: lines.append(f"{label}\t{start}\t{end}\t{text}") elif label == "I-Element": if ents_pred[elem_cnt] == 1: lines.append(f"I-Main\t{start}\t{end}\t{text}") elif ents_pred[elem_cnt] == 0: lines.append(f"{label}\t{start}\t{end}\t{text}") else: lines.append(f"{label}\t{start}\t{end}\t{text}") with save_path.open("w") as f: f.write("\n".join(lines)) return Score(tp, fp, tn, fn).calc_score()
def test(args): # setup multiprocessing instance torch.multiprocessing.set_sharing_strategy('file_system') # setup data_loader instances if args.arch == "MLP": test_data_loader = EdgeDataLoader(mode="test", data_path=args.data, batch_size=1, shuffle=True, num_workers=4, batch_type="large_batch") elif args.arch == "DeepSetMLP": test_data_loader = SubGraphDataLoader(mode="test", data_path=args.data, batch_size=1, shuffle=True, num_workers=4, batch_type="large_batch") elif args.arch == "DeepAPGMLP": test_data_loader = AnchorParentDataLoader(mode="test", data_path=args.data, batch_size=1, shuffle=True, num_workers=4, batch_type="large_batch") # setup device device = torch.device( f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu') # load model if args.arch == "MLP": model = MLP(vocab_size=29654, embed_dim=250, first_hidden=1000, second_hidden=500, activation=nn.LeakyReLU()) # model = MLP(vocab_size=431416, embed_dim=250, first_hidden=1000, second_hidden=500, activation=nn.LeakyReLU()) elif args.arch == "DeepSetMLP": model = DeepSetMLP(vocab_size=29654, embed_dim=250, first_hidden=1500, second_hidden=1000, activation=nn.LeakyReLU()) # model = DeepSetMLP(vocab_size=431416, embed_dim=250, first_hidden=1500, second_hidden=1000, activation=nn.LeakyReLU()) elif args.arch == "DeepAPGMLP": model = DeepAPGMLP(vocab_size=29654, embed_dim=250, first_hidden=2000, second_hidden=1000, activation=nn.LeakyReLU()) checkpoint = torch.load(args.resume) state_dict = checkpoint['state_dict'] model.load_state_dict(state_dict) model = model.to(device) model.eval() # get function handles of loss and metrics loss_fn = bce_loss metric_fn = [ macro_averaged_rank, batched_topk_hit_1, batched_topk_hit_3, batched_topk_hit_5, batched_scaled_MRR ] # start evaluation on test data total_loss = 0.0 total_metrics = torch.zeros(len(metric_fn)) with torch.no_grad(): for batched_examples in tqdm(test_data_loader): energy_scores = [] all_labels = [] if len(batched_examples) == 3: batched_parents, batched_children, batched_labels = batched_examples[ 0], batched_examples[1], batched_examples[2] for parents, children, labels in zip(batched_parents, batched_children, batched_labels): parents, children = parents.to(device), children.to(device) prediction = model(parents, children).to(device) loss = loss_fn(prediction, labels.to(device)) total_loss += loss.item() energy_scores.extend(prediction.squeeze_().tolist()) all_labels.extend(labels.tolist()) elif len(batched_examples) == 4: batched_parents, batched_siblings, batched_children, batched_labels = batched_examples[ 0], batched_examples[1], batched_examples[ 2], batched_examples[3] for parents, siblings, children, labels in zip( batched_parents, batched_siblings, batched_children, batched_labels): parents, siblings, children = parents.to( device), siblings.to(device), children.to(device) prediction = model(parents, siblings, children).to(device) loss = loss_fn(prediction, labels.to(device)) total_loss += loss.item() energy_scores.extend(prediction.squeeze_().tolist()) all_labels.extend(labels.tolist()) elif len(batched_examples) == 5: batched_parents, batched_siblings, batched_grand_parents, batched_children, batched_labels = batched_examples[ 0], batched_examples[1], batched_examples[ 2], batched_examples[3], batched_examples[4] for parents, siblings, grand_parents, children, labels in zip( batched_parents, batched_siblings, batched_grand_parents, batched_children, batched_labels): parents, siblings, grand_parents, children = parents.to( device), siblings.to(device), grand_parents.to( device), children.to(device) prediction = model(parents, siblings, grand_parents, children).to(device) loss = loss_fn(prediction, labels.to(device)) total_loss += loss.item() energy_scores.extend(prediction.squeeze_().tolist()) all_labels.extend(labels.tolist()) energy_scores = torch.tensor(energy_scores).unsqueeze_(1) all_labels = torch.tensor(all_labels) # computing metrics on test set for i, metric in enumerate(metric_fn): total_metrics[i] += metric(energy_scores, all_labels) n_samples = test_data_loader.n_samples print(f"Test loss: {total_loss / n_samples}") for i in range(len(metric_fn)): print( f"{metric_fn[i].__name__} : {total_metrics[i].item() / n_samples}")
class Trainer: def __init__(self, M1_dim: Tuple[int, int], M2_dim: Tuple[int, int], hidden_layers: List[int], log_dir: str, learning_rate: float = 1e-3, batch_size: int = 32, buffer_size: int = 1000, n_steps: int = int(1e6), val_every=1e4, loss: str = "mse", optimizer: str = "adam", activation: str = "ReLU", layer="affine", x_min=-100, x_max=100, **kwargs): self.M1_dim = M1_dim self.M2_dim = M2_dim self.x_min = x_min self.x_max = x_max self.out_dim = (M1_dim[0], M2_dim[1]) if layer.lower() in ("prod", "product"): self.mlp = ProdMLP(M1_dim=M1_dim, M2_dim=M2_dim, hiddens=hidden_layers, activation=activation) else: self.mlp = MLP(M1_dim=M1_dim, M2_dim=M2_dim, hiddens=hidden_layers, activation=activation) self.lr = learning_rate if optimizer.lower() == "adam": self.optimizer = optim.Adam(self.mlp.parameters(), lr=self.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) else: print("using SGD instead of Adam") self.optimizer = optim.SGD(MLP.parameters, lr=self.lr) if loss == "mse": self.loss = nn.MSELoss() elif loss == "huber": self.loss = nn.SmoothL1Loss() else: print(f"{loss} not supported, using MSE") self.loss = nn.MSELoss() self.val_loss = nn.L1Loss() self.batch_size = batch_size self.buffer_size = buffer_size self.buffer = deque(maxlen=self.buffer_size) self.n_steps = n_steps self.val_every = val_every self.writer = SummaryWriter(log_dir) # fills the buffer with randomly created examples from which to train on def _fill_buffer(self): for _ in range(self.buffer_size): self.buffer.append(self._make_data()) def _make_data(self): M1 = torch.rand(self.M1_dim) * random.randint(self.x_min, self.x_max) M2 = torch.rand(self.M2_dim) * random.randint(self.x_min, self.x_max) x = torch.cat((M1.reshape(-1), M2.reshape(-1)), dim=0) y = (M1 @ M2).reshape(-1) return x, y def _train_batch(self): X, y = zip(*random.sample(self.buffer, self.batch_size)) X = torch.stack(X, dim=0) y = torch.stack(y, dim=0) y_hat = self.mlp(X) loss = self.loss(y_hat, y) self.optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.mlp.parameters(), 1) self.optimizer.step() return loss.item() def _validate(self): X = [] y = [] for _ in range(self.batch_size): x_sample, y_sample = self._make_data() X.append(x_sample) y.append(y_sample) X = torch.stack(X, dim=0) y = torch.stack(y, dim=0) with torch.no_grad(): self.mlp.eval() y_hat = self.mlp(X) loss = self.val_loss(y_hat, y) self.mlp.train() mean_val = torch.mean(torch.abs(y)) M1 = X[0][:np.product(self.M1_dim)].reshape(self.M1_dim) M2 = X[0][np.product(self.M1_dim):].reshape(self.M2_dim) M_out = y[0].reshape(self.out_dim) M_fitted = y_hat[0].reshape(self.out_dim) print(f"average loss per matrix element is {loss}") print("-" * 40) print("-" * 40) print(f"Matrix 1 is {M1}") print("-" * 40) print(f"Matrix 2 is {M2}") print("-" * 40) print(f"predicted output is {M_fitted}") print("-" * 40) print(f"reference is is {M_out}") print("-" * 40) print(f"diff is is {M_out - M_fitted}") return loss, mean_val def train(self): self._fill_buffer() self.mlp.train() for step in trange(1, self.n_steps + 1): loss = self._train_batch() self.writer.add_scalar("Train/avg_loss", loss, step) self.buffer.append(self._make_data()) if step % self.val_every == 0: val_loss, mean_val = self._validate() self.writer.add_scalar("Validate/avg_error", val_loss, step // self.val_every) self.writer.add_scalar("Validate/percent_off", (val_loss / mean_val) * 100, step // self.val_every) print(f"average loss per matrix element is {val_loss}")
def test(n, run_number): df_4_40 = pd.read_csv('./test_{}/merged_config_test_4_40.csv'.format(run_number)) df_4_60 = pd.read_csv('./test_{}/merged_config_test_4_60.csv'.format(run_number)) df_4_80 = pd.read_csv('./test_{}/merged_config_test_4_80.csv'.format(run_number)) df_4_100 = pd.read_csv('./test_{}/merged_config_test_4_100.csv'.format(run_number)) df_8_40 = pd.read_csv('./test_{}/merged_config_test_8_40.csv'.format(run_number)) df_8_60 = pd.read_csv('./test_{}/merged_config_test_8_60.csv'.format(run_number)) df_8_80 = pd.read_csv('./test_{}/merged_config_test_8_80.csv'.format(run_number)) df_8_100 = pd.read_csv('./test_{}/merged_config_test_8_100.csv'.format(run_number)) best_config = pd.read_csv('./test_{}/best_config_file.csv'.format(run_number)) df_keys = {0: df_4_40, 1: df_4_60, 2: df_4_80, 3: df_4_100, 4: df_8_40, 5: df_8_60, 6: df_8_80, 7: df_8_100} min_rows = 0 min_rows = get_min_rows(df_keys, min_rows) if n == 1: model = MLP(15, 16, 8) else: model = MLP(7, 16, 8) model.load_state_dict(torch.load('checkpoint/MLP_model_19_train.pwf', map_location='cpu')) model.eval() data_point = list(df_8_100.iloc[0, [1, 2, 3, 5, 6, 7, 8]].values) if n == 1: one_hot_y = [0, 0, 0, 0, 0, 0, 0, 0] data_point = torch.Tensor(data_point + one_hot_y) else: data_point = torch.Tensor(data_point) with open("parameters.txt", "w") as f: f.write("Parameters \n") for i, param in enumerate(list(model.parameters())): if i % 2 == 0: weight = "weight for {} layer: ".format(i / 2 + 1) + str(param) + "\n" f.write(weight) else: bias = "bias for {} layer: ".format(int(i / 2) + 1) + str(param) + "\n" f.write(bias) cycles = df_8_100.iloc[0, 4] cycles_complete = df_8_100.iloc[0, 4] best_cycles = df_keys[best_config.iloc[0, -1]].iloc[0, 4] predicted = model.forward(data_point.reshape(1, -1)) predicted = np.argmax(predicted.detach().cpu().numpy(), axis=-1) cycles_array = [int(cycles)] cores = [8] llc = [100] x_pos = [0] for i in range(1, min_rows): data_point = list(df_keys[predicted[0]].iloc[i, [1, 2, 3, 5, 6, 7, 8]].values) if n == 1: one_hot_y = oneHotEncoding(predicted)[0] data_point = torch.Tensor(data_point + one_hot_y) else: data_point = torch.Tensor(data_point) x_pos.append(cycles) cycles_array.append(int(df_keys[predicted[0]].iloc[i, 4])) cores.append(cores_llc_dict[predicted[0]]['cores']) llc.append(cores_llc_dict[predicted[0]]['llc']) cycles = cycles + df_keys[predicted[0]].iloc[i, 4] predicted = model.forward(data_point.reshape(1, -1)) predicted = np.argmax(predicted.detach().cpu().numpy(), axis=-1) cycles_complete = cycles_complete + df_8_100.iloc[i, 4] best_cycles = best_cycles + df_keys[best_config.iloc[i, -1]].iloc[i, 4] print('About to plot the graphs for run_number: {}'.format(run_number)) font = {'family': 'serif', 'color': 'darkred', 'weight': 'normal', 'size': 32, } widths = [cycle * 10**-8*0.8 for cycle in cycles_array] x_pos_reduced = [x * 10**-8 for x in x_pos] plot_test_results(cores, font, run_number, widths, x_pos_reduced, 'Cores') plot_test_results(llc, font, run_number, widths, x_pos_reduced, 'LLC') print('run number:', run_number) print('cycles calculated:', cycles) print('cycles for complete configuration:', cycles_complete) print('best configuration cycles:', best_cycles) print('complete cycle percentage', cycles/cycles_complete * 100) print('best cycle percentage', cycles/best_cycles*100) print('\n')