Exemplo n.º 1
0
def mlp_inference():
    model = MLP()
    model.load_state_dict(torch.load(config.inference_model_path))
    model.eval()

    dataset = FeatureDataset()
    dataloader = DataLoader(dataset,
                            batch_size=1,
                            shuffle=False,
                            num_workers=4)

    counter = 0
    index = 0
    with torch.no_grad():
        for data in dataloader:
            index += 1
            inputs = data['features']
            labels = data['action']
            outputs = model(inputs)
            # # probability_distribution = torch.nn.functional.softmax(outputs)
            prediction = np.argmax(outputs.detach().numpy())
            # print('prediction of MLP model is {}'.format(prediction))
            # print('label is {}'.format(labels.detach().numpy()[0]))
            # print('----')
            if labels.detach().numpy()[0] != prediction:
                counter += 1
                print(index)
                print('prediction of MLP model is {}'.format(prediction))
                print('label is {}'.format(labels.detach().numpy()[0]))
                print('----')
    print(counter)
Exemplo n.º 2
0
def mpl(root, path_train, path_test):
    data_set_train = dataset_MLP(root + path_train, train=True)
    data_set_test = dataset_MLP(root + path_test, train=False)

    trainloader = DataLoader(data_set_train, batch_size=1000, shuffle=True)
    testloader = DataLoader(data_set_test, batch_size=1000)

    model = MLP()

    criterion = t.nn.CrossEntropyLoss()
    lr = 0.01
    optimizer = t.optim.SGD(model.parameters(), lr, momentum=0.4)

    for epoch in range(240):
        for _, (data, label) in enumerate(trainloader):
            model.train()
            optimizer.zero_grad()
            score = model(data)
            loss = criterion(score, label)
            loss.backward()
            optimizer.step()
        print("Epoch:%d loss:%f" % (epoch, loss.mean()))

    res = []
    for _, (data) in enumerate(testloader):
        model.eval()
        predict = model(data)
        predict = predict.detach().numpy().tolist()
        res += predict
    res = np.array(res)

    ans = np.argmax(res, axis=1)
    data_set_test.save_res(ans, "./images/res_MLP.csv")
Exemplo n.º 3
0
def load_model(save_path):
    # torch.save(to_save, save_path)
    model = MLP(len(vocab), HIDDEN_SIZE, num_classes, device=device)

    checkpoint = torch.load(save_path + '/best_model.pt')
    model.load_state_dict(checkpoint['model_state_dict'])
    epoch = checkpoint['epoch']

    # move the model to GPU if has one
    model.to(device)

    # need this for dropout
    model.eval()
    return model
Exemplo n.º 4
0
class Agent():
    def __init__(self, test=False):
        # device
        if torch.cuda.is_available():
            self.device = torch.device('cuda')
        else :
            self.device = torch.device('cpu')
        
        self.model = MLP(state_dim=4,action_num=2,hidden_dim=256).to(self.device)  
        if test:
            self.load('./pg_best.cpt')        
        # discounted reward
        self.gamma = 0.99 
        # optimizer
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=3e-3)
        # saved rewards and actions
        self.memory = Memory()
        self.tensorboard = TensorboardLogger('./')
    def save(self, save_path):
        print('save model to', save_path)
        torch.save(self.model.state_dict(), save_path)
    def load(self, load_path):
        print('load model from', load_path)
        self.model.load_state_dict(torch.load(load_path))
    def act(self,x,test=False):
        if not test:
            # boring type casting
            x = ((torch.from_numpy(x)).unsqueeze(0)).float().to(self.device)
            # stochastic sample
            action_prob = self.model(x)
            dist = torch.distributions.Categorical(action_prob)
            action = dist.sample()
            # memory log_prob
            self.memory.logprobs.append(dist.log_prob(action))
            return action.item()    
        else :
            self.model.eval()
            x = ((torch.from_numpy(x)).unsqueeze(0)).float().to(self.device)
            with torch.no_grad():
                action_prob = self.model(x)
                # a = np.argmax(action_prob.cpu().numpy())
                dist = torch.distributions.Categorical(action_prob)
                action = dist.sample()
                return action.item()
    def collect_data(self, state, action, reward):
        self.memory.actions.append(action)
        self.memory.rewards.append(torch.tensor(reward))
        self.memory.states.append(state)
    def clear_data(self):
        self.memory.clear_memory()

    def update(self):
        R = 0
        advantage_function = []        
        for t in reversed(range(0, len(self.memory.rewards))):
            R = R * self.gamma + self.memory.rewards[t]
            advantage_function.insert(0, R)

        # turn rewards to pytorch tensor and standardize
        advantage_function = torch.Tensor(advantage_function).to(self.device)
        advantage_function = (advantage_function - advantage_function.mean()) / (advantage_function.std() + np.finfo(np.float32).eps)

        policy_loss = []
        for log_prob, reward in zip(self.memory.logprobs, advantage_function):
            policy_loss.append(-log_prob * reward)
        # Update network weights
        self.optimizer.zero_grad()
        loss = torch.cat(policy_loss).sum()
        loss.backward()
        self.optimizer.step() 
        # boring log
        self.tensorboard.scalar_summary("loss", loss.item())
        self.tensorboard.update()
class DQN:
    def __init__(self,
                 n_states,
                 n_actions,
                 gamma=0.99,
                 epsilon_start=0.9,
                 epsilon_end=0.05,
                 epsilon_decay=200,
                 memory_capacity=10000,
                 policy_lr=0.01,
                 batch_size=128,
                 device="cpu"):

        self.n_actions = n_actions  # 总的动作个数
        self.device = device  # 设备,cpu或gpu等
        self.gamma = gamma  # 奖励的折扣因子
        # e-greedy策略相关参数
        self.actions_count = 0  # 用于epsilon的衰减计数
        self.epsilon = 0
        self.epsilon_start = epsilon_start
        self.epsilon_end = epsilon_end
        self.epsilon_decay = epsilon_decay
        self.batch_size = batch_size
        self.policy_net = MLP(n_states, n_actions).to(self.device)
        self.target_net = MLP(n_states, n_actions).to(self.device)
        # target_net的初始模型参数完全复制policy_net
        self.target_net.load_state_dict(self.policy_net.state_dict())
        self.target_net.eval()  # 不启用 BatchNormalization 和 Dropout
        # 可查parameters()与state_dict()的区别,前者require_grad=True
        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=policy_lr)
        self.loss = 0
        self.memory = ReplayBuffer(memory_capacity)

    def choose_action(self, state, train=True):
        '''选择动作
        '''
        if train:
            self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \
                math.exp(-1. * self.actions_count / self.epsilon_decay)
            self.actions_count += 1
            if random.random() > self.epsilon:
                with torch.no_grad():
                    # 先转为张量便于丢给神经网络,state元素数据原本为float64
                    # 注意state=torch.tensor(state).unsqueeze(0)跟state=torch.tensor([state])等价
                    state = torch.tensor([state],
                                         device=self.device,
                                         dtype=torch.float32)
                    # 如tensor([[-0.0798, -0.0079]], grad_fn=<AddmmBackward>)
                    q_value = self.policy_net(state)
                    # tensor.max(1)返回每行的最大值以及对应的下标,
                    # 如torch.return_types.max(values=tensor([10.3587]),indices=tensor([0]))
                    # 所以tensor.max(1)[1]返回最大值对应的下标,即action
                    action = q_value.max(1)[1].item()
            else:
                action = random.randrange(self.n_actions)
            return action
        else:
            with torch.no_grad():  # 取消保存梯度
                # 先转为张量便于丢给神经网络,state元素数据原本为float64
                # 注意state=torch.tensor(state).unsqueeze(0)跟state=torch.tensor([state])等价
                state = torch.tensor(
                    [state], device='cpu', dtype=torch.float32
                )  # 如tensor([[-0.0798, -0.0079]], grad_fn=<AddmmBackward>)
                q_value = self.target_net(state)
                # tensor.max(1)返回每行的最大值以及对应的下标,
                # 如torch.return_types.max(values=tensor([10.3587]),indices=tensor([0]))
                # 所以tensor.max(1)[1]返回最大值对应的下标,即action
                action = q_value.max(1)[1].item()
            return action

    def update(self):

        if len(self.memory) < self.batch_size:
            return
        # 从memory中随机采样transition
        state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample(
            self.batch_size)
        '''转为张量
        例如tensor([[-4.5543e-02, -2.3910e-01,  1.8344e-02,  2.3158e-01],...,[-1.8615e-02, -2.3921e-01, -1.1791e-02,  2.3400e-01]])'''
        state_batch = torch.tensor(state_batch,
                                   device=self.device,
                                   dtype=torch.float)
        action_batch = torch.tensor(action_batch,
                                    device=self.device).unsqueeze(
                                        1)  # 例如tensor([[1],...,[0]])
        reward_batch = torch.tensor(
            reward_batch, device=self.device,
            dtype=torch.float)  # tensor([1., 1.,...,1])
        next_state_batch = torch.tensor(next_state_batch,
                                        device=self.device,
                                        dtype=torch.float)
        done_batch = torch.tensor(np.float32(done_batch),
                                  device=self.device).unsqueeze(
                                      1)  # 将bool转为float然后转为张量
        '''计算当前(s_t,a)对应的Q(s_t, a)'''
        '''torch.gather:对于a=torch.Tensor([[1,2],[3,4]]),那么a.gather(1,torch.Tensor([[0],[1]]))=torch.Tensor([[1],[3]])'''
        q_values = self.policy_net(state_batch).gather(
            dim=1, index=action_batch)  # 等价于self.forward
        # 计算所有next states的V(s_{t+1}),即通过target_net中选取reward最大的对应states
        next_state_values = self.target_net(next_state_batch).max(
            1)[0].detach()  # 比如tensor([ 0.0060, -0.0171,...,])
        # 计算 expected_q_value
        # 对于终止状态,此时done_batch[0]=1, 对应的expected_q_value等于reward
        expected_q_values = reward_batch + self.gamma * \
            next_state_values * (1-done_batch[0])
        # self.loss = F.smooth_l1_loss(q_values,expected_q_values.unsqueeze(1)) # 计算 Huber loss
        self.loss = nn.MSELoss()(q_values,
                                 expected_q_values.unsqueeze(1))  # 计算 均方误差loss
        # 优化模型
        self.optimizer.zero_grad(
        )  # zero_grad清除上一步所有旧的gradients from the last step
        # loss.backward()使用backpropagation计算loss相对于所有parameters(需要gradients)的微分
        self.loss.backward()
        for param in self.policy_net.parameters():  # clip防止梯度爆炸
            param.grad.data.clamp_(-1, 1)

        self.optimizer.step()  # 更新模型

    def save_model(self, path):
        torch.save(self.target_net.state_dict(), path)

    def load_model(self, path):
        self.target_net.load_state_dict(torch.load(path))
Exemplo n.º 6
0
def main():
    # check cuda
    device = f'cuda:{args.gpu}' if torch.cuda.is_available() and args.gpu >= 0 else 'cpu'
    # load data
    dataset = DglNodePropPredDataset(name=args.dataset)
    evaluator = Evaluator(name=args.dataset)

    split_idx = dataset.get_idx_split()
    g, labels = dataset[0] # graph: DGLGraph object, label: torch tensor of shape (num_nodes, num_tasks)
    
    if args.dataset == 'ogbn-arxiv':
        g = dgl.to_bidirected(g, copy_ndata=True)
        
        feat = g.ndata['feat']
        feat = (feat - feat.mean(0)) / feat.std(0)
        g.ndata['feat'] = feat

    g = g.to(device)
    feats = g.ndata['feat']
    labels = labels.to(device)

    # load masks for train / validation / test
    train_idx = split_idx["train"].to(device)
    valid_idx = split_idx["valid"].to(device)
    test_idx = split_idx["test"].to(device)

    n_features = feats.size()[-1]
    n_classes = dataset.num_classes
    
    # load model
    if args.model == 'mlp':
        model = MLP(n_features, args.hid_dim, n_classes, args.num_layers, args.dropout)
    elif args.model == 'linear':
        model = MLPLinear(n_features, n_classes)
    else:
        raise NotImplementedError(f'Model {args.model} is not supported.')

    model = model.to(device)
    print(f'Model parameters: {sum(p.numel() for p in model.parameters())}')

    if args.pretrain:
        print('---------- Before ----------')
        model.load_state_dict(torch.load(f'base/{args.dataset}-{args.model}.pt'))
        model.eval()

        y_soft = model(feats).exp()

        y_pred = y_soft.argmax(dim=-1, keepdim=True)
        valid_acc = evaluate(y_pred, labels, valid_idx, evaluator)
        test_acc = evaluate(y_pred, labels, test_idx, evaluator)
        print(f'Valid acc: {valid_acc:.4f} | Test acc: {test_acc:.4f}')

        print('---------- Correct & Smoothing ----------')
        cs = CorrectAndSmooth(num_correction_layers=args.num_correction_layers,
                              correction_alpha=args.correction_alpha,
                              correction_adj=args.correction_adj,
                              num_smoothing_layers=args.num_smoothing_layers,
                              smoothing_alpha=args.smoothing_alpha,
                              smoothing_adj=args.smoothing_adj,
                              autoscale=args.autoscale,
                              scale=args.scale)
        
        mask_idx = torch.cat([train_idx, valid_idx])
        y_soft = cs.correct(g, y_soft, labels[mask_idx], mask_idx)
        y_soft = cs.smooth(g, y_soft, labels[mask_idx], mask_idx)
        y_pred = y_soft.argmax(dim=-1, keepdim=True)
        valid_acc = evaluate(y_pred, labels, valid_idx, evaluator)
        test_acc = evaluate(y_pred, labels, test_idx, evaluator)
        print(f'Valid acc: {valid_acc:.4f} | Test acc: {test_acc:.4f}')
    else:
        opt = optim.Adam(model.parameters(), lr=args.lr)

        best_acc = 0
        best_model = copy.deepcopy(model)

        # training
        print('---------- Training ----------')
        for i in range(args.epochs):

            model.train()
            opt.zero_grad()

            logits = model(feats)
            
            train_loss = F.nll_loss(logits[train_idx], labels.squeeze(1)[train_idx])
            train_loss.backward()

            opt.step()
            
            model.eval()
            with torch.no_grad():
                logits = model(feats)
                
                y_pred = logits.argmax(dim=-1, keepdim=True)

                train_acc = evaluate(y_pred, labels, train_idx, evaluator)
                valid_acc = evaluate(y_pred, labels, valid_idx, evaluator)

                print(f'Epoch {i} | Train loss: {train_loss.item():.4f} | Train acc: {train_acc:.4f} | Valid acc {valid_acc:.4f}')

                if valid_acc > best_acc:
                    best_acc = valid_acc
                    best_model = copy.deepcopy(model)
        
        # testing & saving model
        print('---------- Testing ----------')
        best_model.eval()
        
        logits = best_model(feats)
        
        y_pred = logits.argmax(dim=-1, keepdim=True)
        test_acc = evaluate(y_pred, labels, test_idx, evaluator)
        print(f'Test acc: {test_acc:.4f}')

        if not os.path.exists('base'):
            os.makedirs('base')

        torch.save(best_model.state_dict(), f'base/{args.dataset}-{args.model}.pt')
Exemplo n.º 7
0
		return sp, self.discrete_freq


if __name__ == '__main__':
	args = docopt(__doc__)
	print("Command line args:\n", args)
	numlayer = int(args['-l'])
	numunit = int(args['-u'])
	model_path = args['-m']
	gpuid = int(args['-g'])
	
	dtype = torch.float
	device = torch.device("cuda:"+str(gpuid) if gpuid>=0 else "cpu")
	
	FFTSIZE = 1024
	FS = 16000 # [Hz]
	
	model = MLP(in_dim=FFTSIZE//2+1, out_dim=FFTSIZE//2+1, numlayer=numlayer, numunit=numunit)
	model.load_state_dict(torch.load(model_path))
	model = model.to(device)
	model.eval()
	
	f02sp = F02SP(FFTSIZE,FS)
	f0 = 0.1 * np.arange(200,5000+1) # input, 0.1~800 [Hz]
	sp, discrete_freq = f02sp.get_sp(f0)
	input_sequence = discrete_freq / f0[:,np.newaxis]
	input_sequence = torch.from_numpy(input_sequence).to(dtype).to(device)
	
	pred_sp = model(input_sequence)
	pred_sp = pred_sp.cpu().data.numpy()
Exemplo n.º 8
0
class NonLocalTrainer(object):
    def __init__(self, args,
                 trainLoader, testLoader):

        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        self.out_path = args.out
        self.sigma = args.sigma
        self.beta = args.beta
        self.nClass = args.nClass

        self.model = MLP().to(self.device)
        self.optim = torch.optim.Adam(self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
        self.criterion = nn.MSELoss()

        self.trainLoader = trainLoader
        self.testLoader = testLoader

        self.run_datetime = datetime.datetime.now()

        if not os.path.exists(self.out_path):
            os.makedirs(self.out_path)

        self.logger = Logger(self.out_path)

        with open(os.path.join(self.out_path, "para.json"), "w") as f:
            json.dump(args.__dict__, f)

        self.epoch = 0
        self.iteration = 0
        self.test_step = 0
        self.max_epoch = args.epochs
        self.val_interval = args.interval
        self.res = 0
        self.best_error = 1e7;
        self.best_res_epoch = 0

        self.noiseMean = torch.zeros(args.batch_size, args.featureNums, 17, 17)
        self.noiseStd = torch.div(torch.ones(args.batch_size, args.featureNums, 17, 17), 1e3)

    def validate_one_epoch(self):
        self.model.eval()
        self.test_step += 1

        tsthreas = [0.1, 1, 10]

        tp = [0] * len(tsthreas)  # true positive
        tn = [0] * len(tsthreas)  # true negetive
        fp = [0] * len(tsthreas)  # false positve
        fn = [0] * len(tsthreas)  # false negetive
        ts = [0] * len(tsthreas)

        totalRegressionLoss = []
        total_error = 0
        total_count = 0
        p_error = 0
        p_count = 0

        largeGapCount = 0
        largeGap = 0

        for batch_idx, (data, target, _, _, _, _) in tqdm.tqdm(
                enumerate(self.testLoader), total=len(self.testLoader),
                desc='Valid :', ncols=80,
                leave=False):
            gt_micaps = target.numpy()
            data, target = data.to(device=self.device), target.to(device=self.device)

            with torch.no_grad():

                predictValues = self.model(data)

                regressionLoss = self.criterion(predictValues, target)

                predictNumpy = predictValues.cpu().numpy()
                totalRegressionLoss.append(regressionLoss.item())
                # totalClassificationLoss.append(classificationLoss.item())

                # predicted = torch.argmax(preds, dim=1)
                # correct += (predicted == logits).sum().item()

                gapValues = np.abs(predictNumpy - gt_micaps)
                total_error += np.sum(gapValues)
                total_count += gt_micaps.shape[0]
                p_error += np.sum((gt_micaps > 0.01) * gapValues)
                p_count += np.sum(gt_micaps > 0.01)

                largeGap += np.sum((gapValues > 5) * gapValues)
                largeGapCount += np.sum(gapValues > 5)

                for i, threas in enumerate(tsthreas):
                    tp[i] += np.sum((gt_micaps >= threas) * (predictNumpy >= threas))
                    tn[i] += np.sum((gt_micaps < threas) * (predictNumpy < threas))
                    fp[i] += np.sum((gt_micaps < threas) * (predictNumpy >= threas))
                    fn[i] += np.sum((gt_micaps >= threas) * (predictNumpy < threas))

        for i, _ in enumerate(tsthreas):
            ts[i] += round(tp[i] / (tp[i] + fp[i] + fn[i]), 5)

        totalAverageError = round(total_error / total_count, 5)
        pAverageError = round(p_error / p_count, 5)
        totalLoss = np.sum(totalRegressionLoss)
        largeGapRatio = round(largeGapCount / total_count, 5)
        largeGapMae = round(largeGap / largeGapCount, 5)

        info = {"test_regression_loss": totalLoss,
                "ts_score": ts,
                "aver_gap": totalAverageError,
                "aver_p_gap": pAverageError,
                "large_gap_ratio": largeGapRatio,
                "large_gap_mae": largeGapMae
                }
        print("========================== Epoch {} Test Result Show ==========================".format(self.epoch + 1))

        print(info)

        # for tag, value in info.items():
        #     self.logger.scalar_summary(tag, value, self.test_step)

        # if totalAverageError < self.best_error:
        #     self.best_error = totalAverageError
        #     self.best_res_epoch = self.epoch
        #     info["epoch"] = self.epoch
        #     info["modelParam"] = self.model.state_dict()
        #     info["optimParam"] = self.optim.state_dict()
        #     torch.save(info, os.path.join(self.out_path, str(self.epoch) + "_checkpoints.pth"))

    def train_one_epoch(self):
        self.model.train()

        for batch_idx, (data, target, _, _, _, _) in tqdm.tqdm(
                enumerate(self.trainLoader), total=len(self.trainLoader),
                desc='Train epoch=%d' % self.epoch, ncols=80, leave=False):
            iter_idx = batch_idx + self.epoch * len(self.trainLoader)
            # if (self.iteration != 0) and (iter_idx - 1) != self.iteration:
            #     continue
            self.iteration = iter_idx

            assert self.model.training
            self.optim.zero_grad()

            data = data.to(device=self.device)
            target = target.to(device=self.device)

            predictValues = self.model(data)

            regressionLoss = self.criterion(predictValues, target)

            regressionLoss.backward()
            # for named,param in self.model.named_parameters():
            #     print("Name : " ,named)
            #     print(param.grad.data.sum())
            self.optim.step()

            regressionLossCpu = regressionLoss.item()
            self.logger.scalar_summary("train_regression_loss", regressionLossCpu, self.iteration + 1)

        for tag, value in self.model.named_parameters():
            self.logger.histo_summary(tag, value.data.cpu().numpy(), self.epoch + 1)
            self.logger.histo_summary(tag + '/grad', value.grad.data.cpu().numpy(), self.epoch + 1)

    def run(self):
        for epoch in range(self.max_epoch):
            self.epoch = epoch
            self.train_one_epoch()
            if (self.epoch + 1) % self.val_interval == 0:
                self.validate_one_epoch()
Exemplo n.º 9
0
# Bootstrapping
if args.bootstrapping:
    # Bootstrapping
    n_el = np.floor(1.0 * X.size(0)).astype(
        np.uint32)  # We fix the size of the bootstrap
    idx_subsample = np.random.choice(n_el, size=n_el, replace=True)
    X_sub = X[idx_subsample]
    Y_sub = Y[idx_subsample]
    dataset = RegressionDataset(X_sub, Y_sub)
else:
    dataset = RegressionDataset(X, Y)

net = MLP(args.dropout_rate)
# Create a prior
prior = MLP(args.dropout_rate)
prior.eval()
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.wd)

# Update of the network parameters
train_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False)

step = 0  # Number of batches seen
net.train()
for epoch in tqdm(np.arange(args.n_epochs), disable=not args.verbose):
    # experiment.log_current_epoch(epoch)

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.cpu(), target.cpu()

        optimizer.zero_grad()
                print('\n--------------------------------------')
                print('Run #{} Task #{} --> Train Classifier'.format(
                    run, task))
                print('--------------------------------------\n')

            #---------------
            # Iteration Loop
            for it in range(args.disc_iters):
                model = retrieve_replay_update(args,
                                    model, opt, data, target, buffer, task, tr_loader,rehearse=task>0)

            buffer.add_reservoir(data.cpu(), target.cpu(), None, task)

        # ------------------------ eval ------------------------ #
        model = model.eval()
        eval_loaders = [('valid', val_loader), ('test', test_loader)]

        for mode, loader_ in eval_loaders:
            for task_t, te_loader in enumerate(loader_):
                if task_t > task: break
                LOG_temp = get_temp_logger(None, ['cls_loss', 'acc'])

                # iterate over samples from task
                for i, (data, target) in enumerate(te_loader):
                    if args.unit_test and i > 10: break

                    if args.cuda:
                        data, target = data.to(args.device), target.to(args.device)

                    logits = model(data)
Exemplo n.º 11
0
class Trainer():
    def __init__(self, config_path):
        config = configparser.ConfigParser()
        config.read(config_path)

        self.n_epoch = config.getint("general", "n_epoch")
        self.batch_size = config.getint("general", "batch_size")
        self.train_bert = config.getboolean("general", "train_bert")
        self.lr = config.getfloat("general", "lr")
        self.cut_frac = config.getfloat("general", "cut_frac")
        self.log_dir = Path(config.get("general", "log_dir"))
        if not self.log_dir.exists():
            self.log_dir.mkdir(parents=True)
        self.model_save_freq = config.getint("general", "model_save_freq")

        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        # bert_config_path = config.get("bert", "config_path")
        # bert_tokenizer_path = config.get("bert", "tokenizer_path")
        # bert_model_path = config.get("bert", "model_path")

        self.bert_tokenizer = LongformerTokenizer.from_pretrained(
            'allenai/longformer-base-4096')
        # self.bert_tokenizer = BertTokenizer.from_pretrained(bert_tokenizer_path)
        tkzer_save_dir = self.log_dir / "tokenizer"
        if not tkzer_save_dir.exists():
            tkzer_save_dir.mkdir()
        self.bert_tokenizer.save_pretrained(tkzer_save_dir)
        self.bert_model = LongformerModel.from_pretrained(
            'allenai/longformer-base-4096')
        self.bert_config = self.bert_model.config
        # self.bert_config = BertConfig.from_pretrained(bert_config_path)
        # self.bert_model = BertModel.from_pretrained(bert_model_path, config=self.bert_config)
        self.max_seq_length = self.bert_config.max_position_embeddings - 2
        # self.max_seq_length = self.bert_config.max_position_embeddings
        self.bert_model.to(self.device)

        if self.train_bert:
            self.bert_model.train()
        else:
            self.bert_model.eval()

        train_conll_path = config.get("data", "train_path")
        print("train path", train_conll_path)
        assert Path(train_conll_path).exists()
        dev_conll_path = config.get("data", "dev_path")
        print("dev path", dev_conll_path)
        assert Path(dev_conll_path).exists()
        dev1_conll_path = Path(dev_conll_path) / "1"
        print("dev1 path", dev1_conll_path)
        assert dev1_conll_path.exists()
        dev2_conll_path = Path(dev_conll_path) / "2"
        print("dev2 path", dev2_conll_path)
        assert dev2_conll_path.exists()
        self.train_dataset = ConllDataset(train_conll_path)
        # self.dev_dataset = ConllDataset(dev_conll_path)
        self.dev1_dataset = ConllDataset(dev1_conll_path)
        self.dev2_dataset = ConllDataset(dev2_conll_path)
        if self.batch_size == -1:
            self.batch_size = len(self.train_dataset)

        self.scaler = torch.cuda.amp.GradScaler()
        tb_cmt = f"lr_{self.lr}_cut-frac_{self.cut_frac}"
        self.writer = SummaryWriter(log_dir=self.log_dir, comment=tb_cmt)

    def transforms(self, example, label_list):
        feature = convert_single_example(example, label_list,
                                         self.max_seq_length,
                                         self.bert_tokenizer)
        label_ids = feature.label_ids
        label_map = feature.label_map
        gold_labels = [-1] * self.max_seq_length
        # Get "Element" or "Main" token indices
        for i, lid in enumerate(label_ids):
            if lid == label_map['B-Element']:
                gold_labels[i] = 0
            elif lid == label_map['B-Main']:
                gold_labels[i] = 1
            elif lid in (label_map['I-Element'], label_map['I-Main']):
                gold_labels[i] = 2
            elif lid == label_map['X']:
                gold_labels[i] = 3
        # flush data to bert model
        input_ids = torch.tensor(feature.input_ids).unsqueeze(0).to(
            self.device)
        if self.train_bert:
            model_output = self.bert_model(input_ids)
        else:
            with torch.no_grad():
                model_output = self.bert_model(input_ids)

        # lstm (ignore padding parts)
        model_fv = model_output[0]
        input_ids = torch.tensor(feature.input_ids)
        label_ids = torch.tensor(feature.label_ids)
        gold_labels = torch.tensor(gold_labels)
        return model_fv, input_ids, label_ids, gold_labels

    @staticmethod
    def extract_tokens(fv, gold_labels):
        ents, golds = [], []
        ents_mask = [-1] * len(gold_labels)
        ent, gold, ent_id = [], None, 0
        ent_flag = False
        for i, gt in enumerate(gold_labels):
            if gt == 2:  # in case of "I-xxx"
                ent.append(fv[i, :])
                ents_mask[i] = ent_id
                ent_end = i
            elif gt == 3 and ent_flag:  # in case of "X"
                ent.append(fv[i, :])
                ents_mask[i] = ent_id
                ent_end = i
            elif ent:
                ents.append(ent)
                golds.append(gold)
                ent = []
                ent_id += 1
                ent_flag = False
            if gt in (0, 1):  # in case of "B-xxx"
                ent.append(fv[i, :])
                gold = gt
                ents_mask[i] = ent_id
                ent_start = i
                ent_flag = True
        else:
            if ent:
                ents.append(ent)
                golds.append(gold)
        return ents, golds, ents_mask

    def eval(self, dataset):
        tp, fp, tn, fn = 0, 0, 0, 0
        with torch.no_grad():
            for data in tqdm(dataset):
                # flush to Bert
                fname, example = data

                try:
                    fvs, input_ids, label_ids, gold_labels = self.transforms(
                        example, dataset.label_list)
                except RuntimeError:
                    print(f"{fname} cannot put in memory!")
                    continue

                # extract Element/Main tokens
                ents, ent_golds, _ = self.extract_tokens(
                    fvs.squeeze(0), gold_labels)

                for i, ent in enumerate(ents):
                    # convert to torch.tensor
                    inputs = torch.empty(
                        [len(ent),
                         self.bert_config.hidden_size]).to(self.device)
                    for j, token in enumerate(ent):
                        inputs[j, :] = token
                    target = ent_golds[i]
                    inputs = torch.mean(inputs, dim=0, keepdim=True)

                    # classification
                    outputs = self.mlp(inputs)
                    if target == 1:
                        if outputs < 0.5:
                            fn += 1
                        else:
                            tp += 1
                    else:
                        if outputs < 0.5:
                            tn += 1
                        else:
                            fp += 1

        return Score(tp, fp, tn, fn).calc_score()

    def train(self):
        # MLP
        self.mlp = MLP(self.bert_config.hidden_size)
        self.mlp.to(self.device)
        self.mlp.train()
        # learnging parameter settings
        params = list(self.mlp.parameters())
        if self.train_bert:
            params += list(self.bert_model.parameters())
        # loss
        self.criterion = BCEWithLogitsLoss()
        # optimizer
        self.optimizer = AdamW(params, lr=self.lr)
        num_train_steps = int(self.n_epoch * len(self.train_dataset) /
                              self.batch_size)
        num_warmup_steps = int(self.cut_frac * num_train_steps)
        self.scheduler = get_linear_schedule_with_warmup(
            self.optimizer, num_warmup_steps, num_train_steps)

        try:
            best_dev1_f1, best_dev2_f1 = 0, 0
            # best_dev_f1 = 0
            itr = 1
            for epoch in range(1, self.n_epoch + 1):
                print("Epoch : {}".format(epoch))
                print("training...")
                for i in tqdm(
                        range(0, len(self.train_dataset), self.batch_size)):
                    # fvs, ents, batch_samples, inputs, outputs = None, None, None, None, None
                    itr += i
                    # create batch samples
                    if (i + self.batch_size) < len(self.train_dataset):
                        end_i = (i + self.batch_size)
                    else:
                        end_i = len(self.train_dataset)

                    batch_samples, batch_golds = [], []

                    for j in range(i, end_i):
                        # flush to Bert
                        fname, example = self.train_dataset[j]

                        fvs, input_ids, label_ids, gold_labels = self.transforms(
                            example, self.train_dataset.label_list)

                        # extract Element/Main tokens
                        ents, ent_golds, _ = self.extract_tokens(
                            fvs.squeeze(0), gold_labels)
                        for e in ents:
                            ent = torch.empty(
                                [len(e),
                                 self.bert_config.hidden_size]).to(self.device)
                            for k, t in enumerate(e):
                                ent[k, :] = t
                            batch_samples.append(torch.mean(ent, dim=0))
                        batch_golds.extend(ent_golds)

                    # convert to torch.tensor
                    inputs = torch.empty(
                        [len(batch_samples),
                         self.bert_config.hidden_size]).to(self.device)
                    for j, t in enumerate(batch_samples):
                        inputs[j, :] = t
                    targets = torch.tensor(batch_golds,
                                           dtype=torch.float).unsqueeze(1)

                    self.optimizer.zero_grad()
                    with torch.cuda.amp.autocast():
                        outputs = self.mlp(inputs)
                        loss = self.criterion(outputs, targets.to(self.device))
                        # loss = loss / 100
                    self.scaler.scale(loss).backward()
                    self.scaler.step(self.optimizer)
                    self.scaler.update()
                    self.scheduler.step()

                    del fvs, ents, batch_samples, inputs, outputs
                    torch.cuda.empty_cache()

                    # write to SummaryWriter
                    self.writer.add_scalar("loss", loss.item(), itr)
                    self.writer.add_scalar(
                        "lr", self.optimizer.param_groups[0]["lr"], itr)

                # write to SummaryWriter
                if self.train_bert:
                    self.bert_model.eval()
                self.mlp.eval()
                # import pdb; pdb.set_trace()

                print("train data evaluation...")
                tr_acc, tr_rec, _, tr_prec, tr_f1 = self.eval(
                    self.train_dataset)
                print(
                    f"acc: {tr_acc}, rec: {tr_rec}, prec: {tr_prec}, f1: {tr_f1}"
                )
                self.writer.add_scalar("train/acc", tr_acc, epoch)
                self.writer.add_scalar("train/rec", tr_rec, epoch)
                self.writer.add_scalar("train/prec", tr_prec, epoch)
                self.writer.add_scalar("train/f1", tr_f1, epoch)
                # print("dev data evaluation...")
                # dev_acc, dev_rec, _, dev_prec, dev_f1 = self.eval(self.dev_dataset)
                # print(f"acc: {dev_acc}, rec: {dev_rec}, prec: {dev_prec}, f1: {dev_f1}")
                # self.writer.add_scalar("dev/acc", dev_acc, epoch)
                # self.writer.add_scalar("dev/rec", dev_rec, epoch)
                # self.writer.add_scalar("dev/prec", dev_prec, epoch)
                # self.writer.add_scalar("dev/f1", dev_f1, epoch)
                # self.writer.flush()
                print("dev1 data evaluation...")
                dev1_acc, dev1_rec, _, dev1_prec, dev1_f1 = self.eval(
                    self.dev1_dataset)
                print(
                    f"acc: {dev1_acc}, rec: {dev1_rec}, prec: {dev1_prec}, f1: {dev1_f1}"
                )
                self.writer.add_scalar("dev1/acc", dev1_acc, epoch)
                self.writer.add_scalar("dev1/rec", dev1_rec, epoch)
                self.writer.add_scalar("dev1/prec", dev1_prec, epoch)
                self.writer.add_scalar("dev1/f1", dev1_f1, epoch)
                self.writer.flush()
                print("dev2 data evaluation...")
                dev2_acc, dev2_rec, _, dev2_prec, dev2_f1 = self.eval(
                    self.dev2_dataset)
                print(
                    f"acc: {dev2_acc}, rec: {dev2_rec}, prec: {dev2_prec}, f1: {dev2_f1}"
                )
                self.writer.add_scalar("dev2/acc", dev2_acc, epoch)
                self.writer.add_scalar("dev2/rec", dev2_rec, epoch)
                self.writer.add_scalar("dev2/prec", dev2_prec, epoch)
                self.writer.add_scalar("dev2/f1", dev2_f1, epoch)
                self.writer.flush()
                if self.train_bert:
                    self.bert_model.train()
                self.mlp.train()

                if epoch % self.model_save_freq == 0:
                    curr_log_dir = self.log_dir / f"epoch_{epoch}"
                    if not curr_log_dir.exists():
                        curr_log_dir.mkdir()
                    if self.train_bert:
                        self.bert_model.save_pretrained(curr_log_dir)
                    torch.save(self.mlp.state_dict(),
                               curr_log_dir / "mlp.model")

                # if best_dev_f1 <= dev_f1:
                #     best_dev_f1 = dev_f1
                #     best_dev_epoch = epoch
                #     if self.train_bert:
                #         best_dev_model = copy.deepcopy(self.bert_model)
                #     best_dev_mlp = copy.deepcopy(self.mlp.state_dict())
                if best_dev1_f1 <= dev1_f1:
                    best_dev1_f1 = dev1_f1
                    best_dev1_epoch = epoch
                    if self.train_bert:
                        best_dev1_model = copy.deepcopy(self.bert_model).cpu()
                    best_dev1_mlp = copy.deepcopy(self.mlp).cpu().state_dict()
                if best_dev2_f1 <= dev2_f1:
                    best_dev2_f1 = dev2_f1
                    best_dev2_epoch = epoch
                    if self.train_bert:
                        best_dev2_model = copy.deepcopy(self.bert_model).cpu()
                    best_dev2_mlp = copy.deepcopy(self.mlp).cpu().state_dict()

        except KeyboardInterrupt:
            # del fvs, ents, batch_samples, inputs, outputs
            # print(f"Best epoch was #{best_dev_epoch}!\nSave params...")
            # save_dev_dir = Path(self.log_dir) / "best"
            # if not save_dev_dir.exists():
            #     save_dev_dir.mkdir()
            # if self.train_bert:
            #     best_dev_model.save_pretrained(save_dev_dir)
            # torch.save(best_dev_mlp, save_dev_dir / "mlp.model")
            # print("Training was successfully finished!")
            print(
                f"Best epoch was dev1: #{best_dev1_epoch}, dev2: #{best_dev2_epoch}!\nSave params..."
            )
            save_dev1_dir = Path(self.log_dir) / "dev1_best"
            if not save_dev1_dir.exists():
                save_dev1_dir.mkdir()
            save_dev2_dir = Path(self.log_dir) / "dev2_best"
            if not save_dev2_dir.exists():
                save_dev2_dir.mkdir()
            if self.train_bert:
                best_dev1_model.save_pretrained(save_dev1_dir)
                best_dev2_model.save_pretrained(save_dev2_dir)
            torch.save(best_dev1_mlp, save_dev1_dir / "mlp.model")
            torch.save(best_dev2_mlp, save_dev2_dir / "mlp.model")
            print("Training was successfully finished!")
            raise KeyboardInterrupt
        else:
            # print(f"Best epoch was #{best_dev_epoch}!\nSave params...")
            # save_dev_dir = Path(self.log_dir) / "best"
            # if not save_dev_dir.exists():
            #     save_dev_dir.mkdir()
            # if self.train_bert:
            #     best_dev_model.save_pretrained(save_dev_dir)
            # torch.save(best_dev_mlp, save_dev_dir / "mlp.model")
            # print("Training was successfully finished!")
            print(
                f"Best epoch was dev1: #{best_dev1_epoch}, dev2: #{best_dev2_epoch}!\nSave params..."
            )
            save_dev1_dir = Path(self.log_dir) / "dev1_best"
            if not save_dev1_dir.exists():
                save_dev1_dir.mkdir()
            save_dev2_dir = Path(self.log_dir) / "dev2_best"
            if not save_dev2_dir.exists():
                save_dev2_dir.mkdir()
            if self.train_bert:
                best_dev1_model.save_pretrained(save_dev1_dir)
                best_dev2_model.save_pretrained(save_dev2_dir)
            torch.save(best_dev1_mlp, save_dev1_dir / "mlp.model")
            torch.save(best_dev2_mlp, save_dev2_dir / "mlp.model")
            print("Training was successfully finished!")
            sys.exit()
Exemplo n.º 12
0
class Solver(object):
    def __init__(self, config, train_loader, val_loader):
        self.use_cuda = torch.cuda.is_available()
        self.device = torch.device('cuda' if self.use_cuda else 'cpu')
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.episodes_per_epoch = config.episodes_per_epoch
        self.N_way_train = config.N_way_train
        self.N_shot_train = config.N_shot_train
        self.N_query_train = config.N_query_train
        self.M_aug_train = config.M_aug_train
        self.N_way_val = config.N_way_val
        self.N_shot_val = config.N_shot_val
        self.N_query_val = config.N_query_val
        self.M_aug_val = config.M_aug_val
        self.matching_fn = config.matching_fn
        self.nz = config.nz

        self.num_epochs = config.num_epochs
        self.resume_iter = config.resume_iter
        self.lr = config.lr
        self.num_steps_decay = config.num_steps_decay
        self.beta1 = config.beta1
        self.beta2 = config.beta2
        self.weight_decay = config.weight_decay
        self.exp_name = config.name
        os.makedirs(config.ckp_dir, exist_ok=True)
        self.ckp_dir = os.path.join(config.ckp_dir, self.exp_name)
        os.makedirs(self.ckp_dir, exist_ok=True)
        self.log_interval = config.log_interval
        self.ckp_interval = config.ckp_interval

        self.use_wandb = config.use_wandb

        self.build_model()

    def build_model(self):
        self.cnn = Convnet().to(self.device)
        self.g = Hallucinator(self.nz).to(self.device)
        self.mlp = MLP().to(self.device)
        self.optimizer = torch.optim.AdamW(list(self.cnn.parameters()) +
                                           list(self.g.parameters()) +
                                           list(self.mlp.parameters()),
                                           lr=self.lr,
                                           betas=[self.beta1, self.beta2],
                                           weight_decay=self.weight_decay)

        if self.matching_fn == 'parametric':
            self.parametric = nn.Sequential(nn.Linear(800, 400), nn.ReLU(),
                                            nn.Dropout(),
                                            nn.Linear(400, 1)).to(self.device)
            self.optimizer = torch.optim.AdamW(
                list(self.cnn.parameters()) + list(self.g.parameters()) +
                list(self.mlp.parameters()) +
                list(self.parametric.parameters()),
                lr=self.lr,
                betas=[self.beta1, self.beta2],
                weight_decay=self.weight_decay)

        self.scheduler = StepLR(self.optimizer,
                                step_size=self.num_steps_decay,
                                gamma=0.9)

    def save_checkpoint(self, step):
        state = {
            'cnn': self.cnn.state_dict(),
            'g': self.g.state_dict(),
            'mlp': self.mlp.state_dict(),
            'optimizer': self.optimizer.state_dict()
        }

        if self.matching_fn == 'parametric':
            state['parametric'] = self.parametric.state_dict()

        new_checkpoint_path = os.path.join(self.ckp_dir,
                                           '{}-dhm.pth'.format(step + 1))
        torch.save(state, new_checkpoint_path)
        print('model saved to %s' % new_checkpoint_path)

    def load_checkpoint(self, resume_iter):
        print('Loading the trained models from step {}...'.format(resume_iter))
        new_checkpoint_path = os.path.join(self.ckp_dir,
                                           '{}-dhm.pth'.format(resume_iter))
        state = torch.load(new_checkpoint_path)
        self.cnn.load_state_dict(state['cnn'])
        self.g.load_state_dict(state['g'])
        self.mlp.load_state_dict(state['mlp'])
        self.optimizer.load_state_dict(state['optimizer'])
        if self.matching_fn == 'parametric':
            self.parametric.load_state_dict(state['parametric'])
        print('model loaded from %s' % new_checkpoint_path)

    def train(self):
        criterion = nn.CrossEntropyLoss()

        best_mean = 0
        iteration = 0
        self.sample_idx_val = []
        self.noise_val = []
        for i in range(self.episodes_per_epoch):
            self.sample_idx_val.append(
                torch.tensor([
                    torch.randint(self.N_shot_val * i,
                                  self.N_shot_val * (i + 1),
                                  (self.M_aug_val, )).numpy()
                    for i in range(self.N_way_val)
                ]).reshape(-1))
            self.noise_val.append(
                torch.randn((self.N_way_val * self.M_aug_val, self.nz),
                            device=self.device))

        if self.resume_iter:
            print("resuming step %d ..." % self.resume_iter)
            iteration = self.resume_iter
            self.load_checkpoint(self.resume_iter)
            loss, mean, std = self.eval()
            if mean > best_mean:
                best_mean = mean

        episodic_acc = []

        for ep in range(self.num_epochs):
            self.cnn.train()
            self.g.train()
            self.mlp.train()

            for batch_idx, (data, target) in enumerate(self.train_loader):
                data = data.to(self.device)
                self.optimizer.zero_grad()

                support_input = data[:self.N_way_train *
                                     self.N_shot_train, :, :, :]
                query_input = data[self.N_way_train *
                                   self.N_shot_train:, :, :, :]

                label_encoder = {
                    target[i * self.N_shot_train]: i
                    for i in range(self.N_way_train)
                }
                query_label = torch.cuda.LongTensor([
                    label_encoder[class_name]
                    for class_name in target[self.N_way_train *
                                             self.N_shot_train:]
                ])

                support = self.cnn(support_input)
                queries = self.cnn(query_input)

                sample_idx = torch.tensor([
                    torch.randint(self.N_shot_train * i,
                                  self.N_shot_train * (i + 1),
                                  (self.M_aug_train, )).numpy()
                    for i in range(self.N_way_train)
                ]).reshape(-1)

                sample = support[sample_idx]
                noise = torch.randn(
                    (self.N_way_train * self.M_aug_train, self.nz),
                    device=self.device)

                support_g = self.g(sample,
                                   noise).reshape(self.N_way_train,
                                                  self.M_aug_train, -1)
                support = support.reshape(self.N_way_train, self.N_shot_train,
                                          -1)

                support_aug = torch.cat([support, support_g], dim=1)
                support_aug = support_aug.reshape(
                    self.N_way_train * (self.N_shot_train + self.M_aug_train),
                    -1)

                prototypes = self.mlp(support_aug)
                prototypes = prototypes.reshape(
                    self.N_way_train, self.N_shot_train + self.M_aug_train,
                    -1).mean(dim=1)
                queries = self.mlp(queries)

                if self.matching_fn == 'parametric':
                    distances = pairwise_distances(queries, prototypes,
                                                   self.matching_fn,
                                                   self.parametric)

                else:
                    distances = pairwise_distances(queries, prototypes,
                                                   self.matching_fn)

                loss = criterion(-distances, query_label)
                loss.backward()
                self.optimizer.step()

                y_pred = (-distances).softmax(dim=1).max(1, keepdim=True)[1]
                episodic_acc.append(
                    1. * y_pred.eq(query_label.view_as(y_pred)).sum().item() /
                    len(query_label))

                if (iteration + 1) % self.log_interval == 0:
                    episodic_acc = np.array(episodic_acc)
                    mean = episodic_acc.mean()
                    std = episodic_acc.std()

                    print(
                        'Epoch: {:3d} [{:d}/{:d}]\tIteration: {:5d}\tLoss: {:.6f}\tAccuracy: {:.2f} +- {:.2f} %'
                        .format(
                            ep, (batch_idx + 1), len(self.train_loader),
                            iteration + 1, loss.item(), mean * 100,
                            1.96 * std / (self.log_interval)**(1 / 2) * 100))

                    if self.use_wandb:
                        import wandb
                        wandb.log(
                            {
                                "loss":
                                loss.item(),
                                "acc_mean":
                                mean * 100,
                                "acc_ci":
                                1.96 * std /
                                (self.log_interval)**(1 / 2) * 100,
                                'lr':
                                self.optimizer.param_groups[0]['lr']
                            },
                            step=iteration + 1)

                    episodic_acc = []

                if (iteration + 1) % self.ckp_interval == 0:
                    loss, mean, std = self.eval()
                    if mean > best_mean:
                        best_mean = mean
                        self.save_checkpoint(iteration)
                        if self.use_wandb:
                            wandb.run.summary[
                                "best_accuracy"] = best_mean * 100

                    if self.use_wandb:
                        import wandb
                        wandb.log(
                            {
                                "val_loss": loss,
                                "val_acc_mean": mean * 100,
                                "val_acc_ci": 1.96 * std / (600)**(1 / 2) * 100
                            },
                            step=iteration + 1,
                            commit=False)

                iteration += 1

            self.scheduler.step()
        self.save_checkpoint(iteration)

    def eval(self):
        criterion = nn.CrossEntropyLoss()
        self.cnn.eval()
        self.g.eval()
        self.mlp.eval()
        episodic_acc = []
        loss = []

        with torch.no_grad():
            for b_idx, (data, target) in enumerate(self.val_loader):
                data = data.to(self.device)
                support_input = data[:self.N_way_val *
                                     self.N_shot_val, :, :, :]
                query_input = data[self.N_way_val * self.N_shot_val:, :, :, :]

                label_encoder = {
                    target[i * self.N_shot_val]: i
                    for i in range(self.N_way_val)
                }
                query_label = torch.cuda.LongTensor([
                    label_encoder[class_name]
                    for class_name in target[self.N_way_val * self.N_shot_val:]
                ])

                support = self.cnn(support_input)
                queries = self.cnn(query_input)

                sample_idx = self.sample_idx_val[b_idx]
                sample = support[sample_idx]

                noise = self.noise_val[b_idx]

                support_g = self.g(sample,
                                   noise).reshape(self.N_way_val,
                                                  self.M_aug_val, -1)
                support = support.reshape(self.N_way_val, self.N_shot_val, -1)

                support_aug = torch.cat([support, support_g], dim=1)
                support_aug = support_aug.reshape(
                    self.N_way_val * (self.N_shot_val + self.M_aug_val), -1)

                prototypes = self.mlp(support_aug)
                prototypes = prototypes.reshape(
                    self.N_way_val, self.N_shot_val + self.M_aug_val,
                    -1).mean(dim=1)
                queries = self.mlp(queries)

                if self.matching_fn == 'parametric':
                    distances = pairwise_distances(queries, prototypes,
                                                   self.matching_fn,
                                                   self.parametric)
                else:
                    distances = pairwise_distances(queries, prototypes,
                                                   self.matching_fn)

                loss.append(criterion(-distances, query_label).item())
                y_pred = (-distances).softmax(dim=1).max(1, keepdim=True)[1]
                episodic_acc.append(
                    1. * y_pred.eq(query_label.view_as(y_pred)).sum().item() /
                    len(query_label))

        loss = np.array(loss)
        episodic_acc = np.array(episodic_acc)
        loss = loss.mean()
        mean = episodic_acc.mean()
        std = episodic_acc.std()

        print('\nLoss: {:.6f}\tAccuracy: {:.2f} +- {:.2f} %\n'.format(
            loss, mean * 100, 1.96 * std / (600)**(1 / 2) * 100))

        return loss, mean, std
Exemplo n.º 13
0
class Test():
    def __init__(self, config_path):
        config = configparser.ConfigParser()
        config.read(config_path)

        self.save_dir = Path(config.get("general", "save_dir"))
        if not self.save_dir.exists():
            self.save_dir.mkdir(parents=True)
        self.clf_th = config.getfloat("general", "clf_th")

        self.mlp_model_path = config.get("model", "mlp")
        assert Path(self.mlp_model_path).exists()

        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        bert_config_path = config.get("bert", "config_path")
        assert Path(bert_config_path).exists()
        self.bert_config = LongformerConfig.from_json_file(bert_config_path)
        self.max_seq_length = self.bert_config.max_position_embeddings - 2
        self.bert_tokenizer = LongformerTokenizer.from_pretrained(
            'allenai/longformer-base-4096')
        # bert_tokenizer_path = config.get("bert", "tokenizer_path")
        # assert Path(bert_config_path).exists()
        # self.bert_tokenizer = LongformerTokenizer.from_pretrained(bert_tokenizer_path)
        bert_model_path = config.get("bert", "model_path")
        assert Path(bert_model_path).exists()
        self.bert_model = LongformerModel.from_pretrained(
            bert_model_path, config=self.bert_config)
        self.bert_model.to(self.device)
        self.bert_model.eval()

        gold_dir = Path(config.get("data", "gold_dir"))
        assert Path(gold_dir).exists()
        self.gold_dataset = ConllDataset(gold_dir)
        target_dir = Path(config.get("data", "target_dir"))
        assert Path(target_dir).exists()
        self.target_dataset = ConllDataset(target_dir)

    def transforms(self, example, label_list, is_gold):
        feature = convert_single_example(example, label_list,
                                         self.max_seq_length,
                                         self.bert_tokenizer)
        label_ids = feature.label_ids
        label_map = feature.label_map
        if is_gold:
            gold_labels = [-1] * self.max_seq_length
            # Get "Element" or "Main" token indices
            for i, lid in enumerate(label_ids):
                if lid == label_map['B-Element']:
                    gold_labels[i] = 0
                elif lid == label_map['B-Main']:
                    gold_labels[i] = 1
                elif lid in (label_map['I-Element'], label_map['I-Main']):
                    gold_labels[i] = 2
                elif lid == label_map['X']:
                    gold_labels[i] = 3
            gold_labels = gold_labels
        else:
            gold_labels = [-1] * self.max_seq_length
            # Get "Element" or "Main" token indices
            for i, lid in enumerate(label_ids):
                if lid == label_map['B-Element']:
                    gold_labels[i] = 0
                elif lid == label_map['I-Element']:
                    gold_labels[i] = 2
                elif lid == label_map['X']:
                    gold_labels[i] = 3
            gold_labels = gold_labels
        # flush data to bert model
        input_ids = torch.tensor(feature.input_ids).unsqueeze(0).to(
            self.device)
        with torch.no_grad():
            bert_output = self.bert_model(input_ids)
        # lstm (ignore padding parts)
        bert_fv = bert_output[0]
        input_ids = torch.tensor(feature.input_ids)
        label_ids = torch.tensor(feature.label_ids)
        return bert_fv, input_ids, label_ids, label_map, gold_labels

    def load_model(self):
        # MLP
        self.mlp = MLP(self.bert_config.hidden_size)
        self.mlp.load_state_dict(torch.load(self.mlp_model_path))
        self.mlp.to(self.device)
        self.mlp.eval()

    def eval(self):
        self.load_model()

        correct_save_dir = self.save_dir / "correct"
        if not correct_save_dir.exists():
            correct_save_dir.mkdir(parents=True)
        incorrect_save_dir = self.save_dir / "incorrect"
        if not incorrect_save_dir.exists():
            incorrect_save_dir.mkdir(parents=True)

        tp, fp, tn, fn = 0, 0, 0, 0
        with torch.no_grad():
            for gold_data, target_data in tqdm(
                    zip(self.gold_dataset, self.target_dataset)):
                # flush to Bert
                gold_fname, gold_example = gold_data
                target_fname, target_example = target_data
                if not gold_fname == target_fname:
                    import pdb
                    pdb.set_trace()
                assert gold_fname == target_fname

                _, _, _, _, gold_labels = self.transforms(
                    gold_example, self.gold_dataset.label_list, is_gold=True)
                fvs, input_ids, label_ids, label_map, pred_labels = self.transforms(
                    target_example,
                    self.target_dataset.label_list,
                    is_gold=False)

                # extract Element/Main tokens
                is_correct = True
                _, ent_gold_labels, golds_mask = Trainer.extract_tokens(
                    fvs.squeeze(0), gold_labels)
                golds = {}
                if len(ent_gold_labels) >= 1:
                    i = 0
                    while True:
                        try:
                            ent_start = golds_mask.index(i)
                        except ValueError:
                            break
                        for n, j in enumerate(golds_mask[ent_start:]):
                            if j != i:
                                ent_end = (ent_start + n - 1)
                                break
                        golds[(ent_start, ent_end)] = ent_gold_labels[i]
                        i += 1

                ents, ent_pred_labels, preds_mask = Trainer.extract_tokens(
                    fvs.squeeze(0), pred_labels)

                preds = {}
                if len(ent_pred_labels) >= 1:
                    i = 0
                    while True:
                        try:
                            ent_start = preds_mask.index(i)
                        except ValueError:
                            break
                        for n, j in enumerate(preds_mask[ent_start:]):
                            if j != i:
                                ent_end = (ent_start + n - 1)
                                break
                        preds[(ent_start, ent_end)] = ent_pred_labels[i]
                        i += 1
                for gold_span, gold_label in golds.items():
                    if gold_span not in preds.keys():
                        if gold_label == 1:
                            fn += 1
                            is_correct = False

                ents_pred = [0] * len(ents)
                for i, pred in enumerate(preds):
                    # convert to torch.tensor
                    inputs = torch.empty(
                        [len(ents[i]),
                         self.bert_config.hidden_size]).to(self.device)
                    for j, token in enumerate(ents[i]):
                        inputs[j, :] = token

                    inputs = torch.mean(inputs, dim=0, keepdim=True)
                    outputs = self.mlp(inputs)

                    if pred in golds.keys():
                        target = golds[pred]
                        if target == 1:
                            if outputs < self.clf_th:
                                fn += 1
                                is_correct = False
                            else:
                                tp += 1
                        else:
                            if outputs < self.clf_th:
                                tn += 1
                            else:
                                fp += 1
                                is_correct = False
                    else:
                        if outputs < self.clf_th:
                            pass
                        else:
                            fp += 1
                            is_correct = False

                    outputs_ = outputs.to('cpu').detach().numpy().copy()
                    if np.all(outputs_ > self.clf_th):
                        ents_pred[i] = 1

                if is_correct:
                    save_dir = correct_save_dir
                else:
                    save_dir = incorrect_save_dir
                save_path = save_dir / (target_fname + ".conll")
                lines = []
                elem_cnt = -1
                for i in range(len(target_example.text)):
                    text = target_example.text[i]
                    label = target_example.label[i]
                    start = target_example.start[i]
                    end = target_example.end[i]
                    if label == "B-Element":
                        elem_cnt += 1
                        if ents_pred[elem_cnt] == 1:
                            lines.append(f"B-Main\t{start}\t{end}\t{text}")
                        elif ents_pred[elem_cnt] == 0:
                            lines.append(f"{label}\t{start}\t{end}\t{text}")
                    elif label == "I-Element":
                        if ents_pred[elem_cnt] == 1:
                            lines.append(f"I-Main\t{start}\t{end}\t{text}")
                        elif ents_pred[elem_cnt] == 0:
                            lines.append(f"{label}\t{start}\t{end}\t{text}")
                    else:
                        lines.append(f"{label}\t{start}\t{end}\t{text}")

                with save_path.open("w") as f:
                    f.write("\n".join(lines))

        return Score(tp, fp, tn, fn).calc_score()
Exemplo n.º 14
0
def test(args):
    # setup multiprocessing instance
    torch.multiprocessing.set_sharing_strategy('file_system')

    # setup data_loader instances
    if args.arch == "MLP":
        test_data_loader = EdgeDataLoader(mode="test",
                                          data_path=args.data,
                                          batch_size=1,
                                          shuffle=True,
                                          num_workers=4,
                                          batch_type="large_batch")
    elif args.arch == "DeepSetMLP":
        test_data_loader = SubGraphDataLoader(mode="test",
                                              data_path=args.data,
                                              batch_size=1,
                                              shuffle=True,
                                              num_workers=4,
                                              batch_type="large_batch")
    elif args.arch == "DeepAPGMLP":
        test_data_loader = AnchorParentDataLoader(mode="test",
                                                  data_path=args.data,
                                                  batch_size=1,
                                                  shuffle=True,
                                                  num_workers=4,
                                                  batch_type="large_batch")

    # setup device
    device = torch.device(
        f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu')

    # load model
    if args.arch == "MLP":
        model = MLP(vocab_size=29654,
                    embed_dim=250,
                    first_hidden=1000,
                    second_hidden=500,
                    activation=nn.LeakyReLU())
        # model = MLP(vocab_size=431416, embed_dim=250, first_hidden=1000, second_hidden=500, activation=nn.LeakyReLU())
    elif args.arch == "DeepSetMLP":
        model = DeepSetMLP(vocab_size=29654,
                           embed_dim=250,
                           first_hidden=1500,
                           second_hidden=1000,
                           activation=nn.LeakyReLU())
        # model = DeepSetMLP(vocab_size=431416, embed_dim=250, first_hidden=1500, second_hidden=1000, activation=nn.LeakyReLU())
    elif args.arch == "DeepAPGMLP":
        model = DeepAPGMLP(vocab_size=29654,
                           embed_dim=250,
                           first_hidden=2000,
                           second_hidden=1000,
                           activation=nn.LeakyReLU())
    checkpoint = torch.load(args.resume)
    state_dict = checkpoint['state_dict']
    model.load_state_dict(state_dict)
    model = model.to(device)
    model.eval()

    # get function handles of loss and metrics
    loss_fn = bce_loss
    metric_fn = [
        macro_averaged_rank, batched_topk_hit_1, batched_topk_hit_3,
        batched_topk_hit_5, batched_scaled_MRR
    ]

    # start evaluation on test data
    total_loss = 0.0
    total_metrics = torch.zeros(len(metric_fn))

    with torch.no_grad():
        for batched_examples in tqdm(test_data_loader):
            energy_scores = []
            all_labels = []
            if len(batched_examples) == 3:
                batched_parents, batched_children, batched_labels = batched_examples[
                    0], batched_examples[1], batched_examples[2]
                for parents, children, labels in zip(batched_parents,
                                                     batched_children,
                                                     batched_labels):
                    parents, children = parents.to(device), children.to(device)
                    prediction = model(parents, children).to(device)
                    loss = loss_fn(prediction, labels.to(device))
                    total_loss += loss.item()
                    energy_scores.extend(prediction.squeeze_().tolist())
                    all_labels.extend(labels.tolist())
            elif len(batched_examples) == 4:
                batched_parents, batched_siblings, batched_children, batched_labels = batched_examples[
                    0], batched_examples[1], batched_examples[
                        2], batched_examples[3]
                for parents, siblings, children, labels in zip(
                        batched_parents, batched_siblings, batched_children,
                        batched_labels):
                    parents, siblings, children = parents.to(
                        device), siblings.to(device), children.to(device)
                    prediction = model(parents, siblings, children).to(device)
                    loss = loss_fn(prediction, labels.to(device))
                    total_loss += loss.item()
                    energy_scores.extend(prediction.squeeze_().tolist())
                    all_labels.extend(labels.tolist())
            elif len(batched_examples) == 5:
                batched_parents, batched_siblings, batched_grand_parents, batched_children, batched_labels = batched_examples[
                    0], batched_examples[1], batched_examples[
                        2], batched_examples[3], batched_examples[4]
                for parents, siblings, grand_parents, children, labels in zip(
                        batched_parents, batched_siblings,
                        batched_grand_parents, batched_children,
                        batched_labels):
                    parents, siblings, grand_parents, children = parents.to(
                        device), siblings.to(device), grand_parents.to(
                            device), children.to(device)
                    prediction = model(parents, siblings, grand_parents,
                                       children).to(device)
                    loss = loss_fn(prediction, labels.to(device))
                    total_loss += loss.item()
                    energy_scores.extend(prediction.squeeze_().tolist())
                    all_labels.extend(labels.tolist())

            energy_scores = torch.tensor(energy_scores).unsqueeze_(1)
            all_labels = torch.tensor(all_labels)

            # computing metrics on test set
            for i, metric in enumerate(metric_fn):
                total_metrics[i] += metric(energy_scores, all_labels)

    n_samples = test_data_loader.n_samples
    print(f"Test loss: {total_loss / n_samples}")
    for i in range(len(metric_fn)):
        print(
            f"{metric_fn[i].__name__} : {total_metrics[i].item() / n_samples}")
Exemplo n.º 15
0
class Trainer:
    def __init__(self,
                 M1_dim: Tuple[int, int],
                 M2_dim: Tuple[int, int],
                 hidden_layers: List[int],
                 log_dir: str,
                 learning_rate: float = 1e-3,
                 batch_size: int = 32,
                 buffer_size: int = 1000,
                 n_steps: int = int(1e6),
                 val_every=1e4,
                 loss: str = "mse",
                 optimizer: str = "adam",
                 activation: str = "ReLU",
                 layer="affine",
                 x_min=-100,
                 x_max=100,
                 **kwargs):

        self.M1_dim = M1_dim
        self.M2_dim = M2_dim
        self.x_min = x_min
        self.x_max = x_max

        self.out_dim = (M1_dim[0], M2_dim[1])
        if layer.lower() in ("prod", "product"):
            self.mlp = ProdMLP(M1_dim=M1_dim,
                               M2_dim=M2_dim,
                               hiddens=hidden_layers,
                               activation=activation)
        else:
            self.mlp = MLP(M1_dim=M1_dim,
                           M2_dim=M2_dim,
                           hiddens=hidden_layers,
                           activation=activation)

        self.lr = learning_rate
        if optimizer.lower() == "adam":
            self.optimizer = optim.Adam(self.mlp.parameters(),
                                        lr=self.lr,
                                        betas=(0.9, 0.999),
                                        eps=1e-08,
                                        weight_decay=0,
                                        amsgrad=False)
        else:
            print("using SGD instead of Adam")
            self.optimizer = optim.SGD(MLP.parameters, lr=self.lr)
        if loss == "mse":
            self.loss = nn.MSELoss()
        elif loss == "huber":
            self.loss = nn.SmoothL1Loss()
        else:
            print(f"{loss} not supported, using MSE")
            self.loss = nn.MSELoss()
        self.val_loss = nn.L1Loss()
        self.batch_size = batch_size
        self.buffer_size = buffer_size
        self.buffer = deque(maxlen=self.buffer_size)

        self.n_steps = n_steps
        self.val_every = val_every

        self.writer = SummaryWriter(log_dir)

    # fills the buffer with randomly created examples from which to train on
    def _fill_buffer(self):
        for _ in range(self.buffer_size):
            self.buffer.append(self._make_data())

    def _make_data(self):
        M1 = torch.rand(self.M1_dim) * random.randint(self.x_min, self.x_max)
        M2 = torch.rand(self.M2_dim) * random.randint(self.x_min, self.x_max)
        x = torch.cat((M1.reshape(-1), M2.reshape(-1)), dim=0)
        y = (M1 @ M2).reshape(-1)
        return x, y

    def _train_batch(self):
        X, y = zip(*random.sample(self.buffer, self.batch_size))
        X = torch.stack(X, dim=0)
        y = torch.stack(y, dim=0)
        y_hat = self.mlp(X)

        loss = self.loss(y_hat, y)
        self.optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.mlp.parameters(), 1)
        self.optimizer.step()

        return loss.item()

    def _validate(self):
        X = []
        y = []
        for _ in range(self.batch_size):
            x_sample, y_sample = self._make_data()
            X.append(x_sample)
            y.append(y_sample)
        X = torch.stack(X, dim=0)
        y = torch.stack(y, dim=0)
        with torch.no_grad():
            self.mlp.eval()
            y_hat = self.mlp(X)
            loss = self.val_loss(y_hat, y)
            self.mlp.train()
            mean_val = torch.mean(torch.abs(y))
        M1 = X[0][:np.product(self.M1_dim)].reshape(self.M1_dim)
        M2 = X[0][np.product(self.M1_dim):].reshape(self.M2_dim)
        M_out = y[0].reshape(self.out_dim)
        M_fitted = y_hat[0].reshape(self.out_dim)

        print(f"average loss per matrix element is {loss}")
        print("-" * 40)
        print("-" * 40)
        print(f"Matrix 1 is {M1}")
        print("-" * 40)
        print(f"Matrix 2 is {M2}")
        print("-" * 40)
        print(f"predicted output is {M_fitted}")
        print("-" * 40)
        print(f"reference is is {M_out}")
        print("-" * 40)
        print(f"diff is is {M_out - M_fitted}")

        return loss, mean_val

    def train(self):
        self._fill_buffer()
        self.mlp.train()
        for step in trange(1, self.n_steps + 1):
            loss = self._train_batch()
            self.writer.add_scalar("Train/avg_loss", loss, step)
            self.buffer.append(self._make_data())
            if step % self.val_every == 0:
                val_loss, mean_val = self._validate()
                self.writer.add_scalar("Validate/avg_error", val_loss,
                                       step // self.val_every)
                self.writer.add_scalar("Validate/percent_off",
                                       (val_loss / mean_val) * 100,
                                       step // self.val_every)
                print(f"average loss per matrix element is {val_loss}")
Exemplo n.º 16
0
def test(n, run_number):
    df_4_40 = pd.read_csv('./test_{}/merged_config_test_4_40.csv'.format(run_number))
    df_4_60 = pd.read_csv('./test_{}/merged_config_test_4_60.csv'.format(run_number))
    df_4_80 = pd.read_csv('./test_{}/merged_config_test_4_80.csv'.format(run_number))
    df_4_100 = pd.read_csv('./test_{}/merged_config_test_4_100.csv'.format(run_number))
    df_8_40 = pd.read_csv('./test_{}/merged_config_test_8_40.csv'.format(run_number))
    df_8_60 = pd.read_csv('./test_{}/merged_config_test_8_60.csv'.format(run_number))
    df_8_80 = pd.read_csv('./test_{}/merged_config_test_8_80.csv'.format(run_number))
    df_8_100 = pd.read_csv('./test_{}/merged_config_test_8_100.csv'.format(run_number))
    best_config = pd.read_csv('./test_{}/best_config_file.csv'.format(run_number))
    df_keys = {0: df_4_40, 1: df_4_60, 2: df_4_80, 3: df_4_100,
               4: df_8_40, 5: df_8_60, 6: df_8_80, 7: df_8_100}

    min_rows = 0
    min_rows = get_min_rows(df_keys, min_rows)

    if n == 1:
        model = MLP(15, 16, 8)
    else:
        model = MLP(7, 16, 8)
    model.load_state_dict(torch.load('checkpoint/MLP_model_19_train.pwf', map_location='cpu'))
    model.eval()

    data_point = list(df_8_100.iloc[0, [1, 2, 3, 5, 6, 7, 8]].values)

    if n == 1:
        one_hot_y = [0, 0, 0, 0, 0, 0, 0, 0]
        data_point = torch.Tensor(data_point + one_hot_y)
    else:
        data_point = torch.Tensor(data_point)

    with open("parameters.txt", "w") as f:
        f.write("Parameters \n")
        for i, param in enumerate(list(model.parameters())):
            if i % 2 == 0:
                weight = "weight for {} layer: ".format(i / 2 + 1) + str(param) + "\n"
                f.write(weight)
            else:
                bias = "bias for {} layer: ".format(int(i / 2) + 1) + str(param) + "\n"
                f.write(bias)

    cycles = df_8_100.iloc[0, 4]
    cycles_complete = df_8_100.iloc[0, 4]
    best_cycles = df_keys[best_config.iloc[0, -1]].iloc[0, 4]
    predicted = model.forward(data_point.reshape(1, -1))
    predicted = np.argmax(predicted.detach().cpu().numpy(), axis=-1)
    cycles_array = [int(cycles)]
    cores = [8]
    llc = [100]
    x_pos = [0]
    for i in range(1, min_rows):
        data_point = list(df_keys[predicted[0]].iloc[i, [1, 2, 3, 5, 6, 7, 8]].values)
        if n == 1:
            one_hot_y = oneHotEncoding(predicted)[0]
            data_point = torch.Tensor(data_point + one_hot_y)
        else:
            data_point = torch.Tensor(data_point)
        x_pos.append(cycles)
        cycles_array.append(int(df_keys[predicted[0]].iloc[i, 4]))
        cores.append(cores_llc_dict[predicted[0]]['cores'])
        llc.append(cores_llc_dict[predicted[0]]['llc'])
        cycles = cycles + df_keys[predicted[0]].iloc[i, 4]
        predicted = model.forward(data_point.reshape(1, -1))
        predicted = np.argmax(predicted.detach().cpu().numpy(), axis=-1)
        cycles_complete = cycles_complete + df_8_100.iloc[i, 4]
        best_cycles = best_cycles + df_keys[best_config.iloc[i, -1]].iloc[i, 4]

    print('About to plot the graphs for run_number: {}'.format(run_number))
    font = {'family': 'serif',
            'color': 'darkred',
            'weight': 'normal',
            'size': 32,
            }

    widths = [cycle * 10**-8*0.8 for cycle in cycles_array]
    x_pos_reduced = [x * 10**-8 for x in x_pos]
    plot_test_results(cores, font, run_number, widths, x_pos_reduced, 'Cores')
    plot_test_results(llc, font, run_number, widths, x_pos_reduced, 'LLC')

    print('run number:', run_number)
    print('cycles calculated:', cycles)
    print('cycles for complete configuration:', cycles_complete)
    print('best configuration cycles:', best_cycles)
    print('complete cycle percentage', cycles/cycles_complete * 100)
    print('best cycle percentage', cycles/best_cycles*100)
    print('\n')