def ft_data_loader_init(args, data_length, image_tmpl, train_transforms, test_transforms, eval_transforms):
    if args.ft_dataset in ['ucf101', 'hmdb51', 'diving48', 'sth_v1']:
        from data.dataset import DataSet as DataSet
    elif args.ft_dataset == 'kinetics':
        from data.video_dataset import VideoDataSet as DataSet
    else:
        Exception("unsupported dataset")
    train_dataset = DataSet(args, args.ft_root, args.ft_train_list, num_segments=1, new_length=data_length,
                      stride=args.ft_stride, modality=args.ft_mode, dataset=args.ft_dataset, test_mode=False,
                      image_tmpl=image_tmpl if args.ft_mode in ["rgb", "RGBDiff"]
                      else args.flow_prefix + "{}_{:05d}.jpg", transform=train_transforms)
    print("training samples:{}".format(train_dataset.__len__()))
    train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.ft_batch_size, shuffle=True,
                                                    num_workers=args.ft_workers, pin_memory=True)
    val_dataset = DataSet(args, args.ft_root, args.ft_val_list, num_segments=1, new_length=data_length,
                          stride=args.ft_stride, modality=args.ft_mode, test_mode=True, dataset=args.ft_dataset,
                          image_tmpl=image_tmpl if args.ft_mode in ["rgb", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg",
                          random_shift=False, transform=test_transforms)
    print("val samples:{}".format(val_dataset.__len__()))
    val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.ft_batch_size, shuffle=False,
                                                  num_workers=args.ft_workers, pin_memory=True)
    eval_dataset = DataSet(args, args.ft_root, args.ft_val_list, num_segments=1, new_length=data_length,
                          stride=args.ft_stride, modality=args.ft_mode, test_mode=True, dataset=args.ft_dataset,
                          image_tmpl=image_tmpl if args.ft_mode in ["rgb", "RGBDiff"] else args.ft_flow_prefix + "{}_{:05d}.jpg",
                          random_shift=False, transform=eval_transforms, full_video=True)
    print("eval samples:{}".format(eval_dataset.__len__()))
    eval_data_loader = torch.utils.data.DataLoader(eval_dataset, batch_size=args.ft_batch_size, shuffle=False,
                                                  num_workers=args.ft_workers, pin_memory=True)
    return train_data_loader, val_data_loader, eval_data_loader, train_dataset.__len__(), val_dataset.__len__(), eval_dataset.__len__()
示例#2
0
    def test(self, epochs=99):

        dataset = DataSet(self.args.img_path, self.args.img_txt_test, self.test_transform)
        dataloader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=8)

        criterion = nn.CrossEntropyLoss().cuda()
        # test the top-1 acc and top-5 acc
        top1, top5 = self.validate(dataloader, criterion, self.args)

        # please refer to [ICML 2019] Making Convolutional Networks Shift-Invariant Again
        # https://arxiv.org/pdf/1904.11486.pdf
        # test the robustness acc to shift transform, which is not considered in our paper
        val_dataset = DataSet(self.args.img_path, self.args.img_txt_test, self.val_transform)
        if (epochs+1) % 10 == 0: 
           print('start to validate shift:')
           val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=8)
           validate_shift(val_dataloader, self.net, self.args)

        # please refer to [ICML 2019] Making Convolutional Networks Shift-Invariant Again
        # https://arxiv.org/pdf/1904.11486.pdf
        # test the robustness acc to diagonal transform, which is not considered in our paper  
        if (epochs+1) % 10 == 0:
           print('start to validate diagonal:')
           torch.cuda.empty_cache()
           val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=8)
           validate_diagonal(val_dataloader, self.net, self.args)

        self.net.train()
        return top1, top5
示例#3
0
    def __init__(self, nn_lr=0.005, kg_lr=0.0005, l2_penalty=0.0001, conv_window=3, hidden_dim=230, k=50, use_alt=-1, load_model=None):
        self.vocab, pretrained = get_pretrained_skipgram()
        self.word2idx = {word: idx for idx, word in enumerate(self.vocab)}

        self.dataset = DataSet() if use_alt < 0 else AltDataSet()
        self.dataset.featurize_all()
        self.rels = sorted(self.dataset.all_relations)
        self.ents = sorted(self.dataset.all_entities)
        self.rel2idx = {rel: idx for idx, rel in enumerate(self.rels)}
        self.idx2rel = {idx: rel for rel, idx in self.rel2idx.items()}
        self.ent2idx = {ent: idx for idx, ent in enumerate(self.ents)}
        self.train_data, self.test_data = self.dataset.split() if use_alt < 0 else self.dataset.load_from_file(use_alt)
        self.max_len = 0
        for info in list(self.train_data.values()) + list(self.test_data.values()):
            for example in info.examples:
                example.indices = (
                    [self.word2idx[word] if word in self.word2idx else 0 for word in example.pre.split(' ')] +
                    [0] +
                    [self.word2idx[word] if word in self.word2idx else 0 for word in example.mid.split(' ')] +
                    [0] +
                    [self.word2idx[word] if word in self.word2idx else 0 for word in example.post.split(' ')]
                )
                self.set_position(example)
                self.max_len = max(self.max_len, len(example.indices))
        for info in list(self.train_data.values()) + list(self.test_data.values()):
            for example in info.examples:
                self.pad_example(example)

        self.model = Joint(
            num_relations=len(self.rels),
            num_entities=len(self.ents),
            pretrained=pretrained,
            conv_window=conv_window,
            hidden_dim=hidden_dim,
            k=k,
        )
        if CUDA:
            self.model = self.model.cuda()
        if load_model:
            self.model.load_state_dict(torch.load(load_model))
        self.nn_optimizer = optim.Adam(
            self.model.parameters(),
            lr=nn_lr,
            weight_decay=l2_penalty,
        )
        self.kg_optimizer = optim.Adam(
            self.model.parameters(),
            lr=kg_lr,
            weight_decay=l2_penalty,
        )
示例#4
0
    def __init__(self, use_alt=-1):
        vocab, self.pretrained = get_pretrained_skipgram()
        self.word2idx = {word: idx for idx, word in enumerate(vocab)}
        self.dataset = DataSet() if use_alt < 0 else AltDataSet()
        self.rels = sorted(self.dataset.all_relations)
        self.ents = sorted(self.dataset.all_entities)
        self.rel2idx = {rel: idx for idx, rel in enumerate(self.rels)}
        self.idx2rel = {idx: rel for rel, idx in self.rel2idx.items()}
        self.ent2idx = {ent: idx for idx, ent in enumerate(self.ents)}
        self.train_data, self.test_data = self.dataset.split(
        ) if use_alt < 0 else self.dataset.load_from_file(use_alt)
        self.enrich_data()
        self.num_examples_per_batch = 5

        self.model = None
        self.optimizer = None
        self.loss = None
示例#5
0
文件: agent.py 项目: epochstamp/mdrli
 def startMode(self, mode, epochLength, n_episodes=None):
     if self._in_episode:
         raise AgentError(
             "Trying to start mode while current episode is not yet finished. This method can be "
             "called only *between* episodes for testing and validation.")
     elif mode == -1:
         raise AgentError(
             "Mode -1 is reserved and means 'training mode'; use resumeTrainingMode() instead."
         )
     else:
         self._n_episodes = self._n_episodes_init if n_episodes is None else n_episodes
         self._mode = mode
         self._mode_epochs_length = epochLength
         self._total_mode_reward = 0.
         del self._tmp_dataset
         self._tmp_dataset = DataSet(
             self._environment,
             self._random_state,
             max_size=self._replay_memory_size,
             only_full_history=self._only_full_history)
def import_data_test(data_name, ratio_train_size, test_size):
    if (data_name == "mnist"):
        all_data_X, all_data_Y, all_test_X, all_test_Y = import_mnist()
        nb_class = 10
    if (data_name == "cifar10"):
        all_data_X, all_data_Y, all_test_X, all_test_Y = import_cifar10()
        nb_class = 10
    if (data_name == "cifar100"):
        all_data_X, all_data_Y, all_test_X, all_test_Y = import_cifar100()
        nb_class = 100
    test_X = all_test_X[0:test_size]
    test_Y = all_test_Y[0:test_size]
    test = DataSet(test_X, test_Y)
    if (ratio_train_size == 1):
        data = DataSet(all_data_X, all_data_Y)
        return data, test
    all_data_size = len(all_data_Y)
    train_size = (int)(all_data_size * ratio_train_size)
    nb_each_class = int(train_size / nb_class)
    train_X = []
    train_Y = []
    ind = 0
    cur_nbEachClass = np.zeros([nb_class])
    while (len(train_X) < nb_each_class * nb_class) and (ind < all_data_size):
        class_nb = all_data_Y[ind]
        if (cur_nbEachClass[class_nb] < nb_each_class):
            train_X.append(all_data_X[ind])
            train_Y.append(all_data_Y[ind])
            cur_nbEachClass[class_nb] = cur_nbEachClass[class_nb] + 1
        ind = ind + 1
    if (ind >= all_data_size):
        train_X = all_data_X[0:train_size]
        train_Y = all_data_Y[0:train_size]
    train_X = np.array(train_X)
    train_Y = np.array(train_Y)
    data = DataSet(train_X, train_Y)
    return data, test
示例#7
0
    def test_ensemble(self, epochs=99):
        pth = self.args.ensemble
        cnt = len(pth)
        print(f'>>>>>>>>>>>ensemble {cnt} model<<<<<<<<<<<<<')
        net = []
        for i in range(cnt):
            net.append(resnet.resnet50(num_classes=self.args.num_classes))
        for i in range(cnt):
            print(pth[i])
            net[i].load_state_dict(torch.load(pth[i]), strict=True)
        
        for i in range(cnt):
            net[i].cuda()
            net[i].eval()
        
        dataset = DataSet(self.args.img_path, self.args.img_txt_test, self.test_transform)

        val_loader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=8)
        
        top1 = AvgMeter()
        top5 = AvgMeter()
       
        
        with torch.no_grad():
            for i, (input, target) in enumerate(val_loader):
                input = input.cuda(non_blocking=True)
                target = target.cuda(non_blocking=True)
                target = target.squeeze(1)
                # compute output
                output = net[0](input)
                output = torch.softmax(output, dim=1)
                for k in range(1, cnt):
                    output += torch.softmax(net[k](input), dim=1)
                
                
                # measure accuracy and record loss
                acc1, acc5 = accuracy(output, target, topk=(1, 5))
                top1.update(acc1[0], input.size(0))
                top5.update(acc5[0], input.size(0))

                if i % self.args.print_freq == 0:
                    print('Test: [{0}/{1}]\t'
                          'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                          'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                           i, len(val_loader), top1=top1, top5=top5))

            print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
                  .format(top1=top1, top5=top5))
        print(f'Top-1 ACC: {top1.avg}, Top-5 ACC: {top5.avg}')
示例#8
0
    def train(self):
        self.cur_epoch_lams = torch.zeros(self.args.num_classes, self.args.num_classes, dtype=torch.float32).cuda()
        self.cur_epoch_cnt = torch.zeros(self.args.num_classes, dtype=torch.float32).cuda()
        
        self.net = self.net.cuda()
        self.net.train()
        loss_recoder = AvgMeter()
        dataset = DataSet(self.args.img_path, self.args.img_txt_train, self.train_transform)
        dataloader = DataLoader(dataset, batch_size=self.args.batch_size, shuffle=True, num_workers=8)

        optimizer = torch.optim.SGD(self.net.parameters(), lr=self.args.lr, momentum=0.9, weight_decay=self.args.wd)
        if self.args.method == 'baseline':
            self.criterion = nn.CrossEntropyLoss().cuda()
        elif self.args.method == 'ls':
            self.criterion = label_smooth(self.args.num_classes).cuda()
        elif self.args.method == 'ols':
            self.criterion = nn.CrossEntropyLoss().cuda()
        elif self.args.method == 'tfkd':
            self.criterion = tf_kd_loss

        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[45, 80], gamma=0.1)

        f = open('{}/log.txt'.format(self.args.expname), 'w')
        writer = SummaryWriter(self.args.expname)
        
        cur_loss = 0
        best_top1, best_top5 = 0.0, 0.0
        istep = 0
        for epoch in range(self.args.epochs):
            self.cur_epoch_lams = torch.zeros(self.args.num_classes, self.args.num_classes, dtype=torch.float32).cuda()
            self.cur_epoch_cnt = torch.zeros(self.args.num_classes, dtype=torch.float32).cuda()
            for i, (x, y) in enumerate(dataloader):
                x = x.cuda()
                y = y.cuda()
                y = y.squeeze(1)
                target = y
                optimizer.zero_grad()
         
                output = self.net(x)
                if self.args.method == 'ols':
                    self.update_loss_lams(output, target)
                    loss = self.soft_cross_entropy(output, target) * 1.0 + self.criterion(output, target) * 1.0
                else:
                    if self.args.method == 'ls':
                        loss = self.criterion(output, target)
                    elif self.args.method == 'tf_kd':
                        loss = self.criterion(output, target)
                    else:
                        loss = self.criterion(output, target)
                
                loss.backward()
                optimizer.step()

                loss_recoder.update(loss.item(), 1)
                cur_loss += loss.item()
                time_now = datetime.now().strftime('%H:%M:%S')
                
                #record loss
                writer.add_scalar("loss", loss.item(), istep)
                istep+=1
                
                if (i+1) % (len(dataset)//2//self.args.batch_size) == 0:
                    print('%s [epoch %d/%d, iter %d/%d] lr = %f cur_loss = %f avg_loss = %f' % (time_now, epoch, self.args.epochs, i, len(dataloader), optimizer.param_groups[0]['lr'], cur_loss/100, loss_recoder.avg))
                    cur_loss = 0
                    
            if self.args.method == 'ols':   
                for cls in range(self.args.num_classes):
                    if self.cur_epoch_cnt[cls].max() < 0.5:
                        self.loss_lams[cls] = 1. / self.args.num_classes 
                    else:
                        # It is empirically found that adding restrictions here can be better for fine-grained classification
                        # Not for ImageNet or CIFAR
                        if self.loss_lams[cls].max() >= 0.88:
                            continue
                        else:
                            self.loss_lams[cls] = self.cur_epoch_lams[cls] / self.cur_epoch_cnt[cls]    

            scheduler.step()
            top1, top5 = self.test()
            writer.add_scalar("Top-1 ACC", top1, epoch)
            writer.add_scalar("Top-5 ACC", top5, epoch)
            torch.save(self.net.state_dict(), f'./{self.args.expname}/{epoch}.pth')
            if top1 > best_top1:
                best_top1, best_top5 = top1, top5
                torch.save(self.net.state_dict(), '{}/best.pth'.format(self.args.expname))
            print('Currently Best top-1 = {}, top-5 = {}'.format(best_top1, best_top5))
            f.writelines('Currently Best top-1 = {}, top-5 = {}'.format(best_top1, best_top5))
        f.close()
        writer.close()        
示例#9
0
class BaseRunner:
    def __init__(self, use_alt=-1):
        vocab, self.pretrained = get_pretrained_skipgram()
        self.word2idx = {word: idx for idx, word in enumerate(vocab)}
        self.dataset = DataSet() if use_alt < 0 else AltDataSet()
        self.rels = sorted(self.dataset.all_relations)
        self.ents = sorted(self.dataset.all_entities)
        self.rel2idx = {rel: idx for idx, rel in enumerate(self.rels)}
        self.idx2rel = {idx: rel for rel, idx in self.rel2idx.items()}
        self.ent2idx = {ent: idx for idx, ent in enumerate(self.ents)}
        self.train_data, self.test_data = self.dataset.split(
        ) if use_alt < 0 else self.dataset.load_from_file(use_alt)
        self.enrich_data()
        self.num_examples_per_batch = 5

        self.model = None
        self.optimizer = None
        self.loss = None

    def enrich_data(self):
        pass

    def train(self,
              epochs=10,
              batch_size=100,
              persist_path='../trained_models/na'):
        self.model.train()
        print("start training")

        num = 0
        batch = defaultdict(list)
        for i in range(epochs):
            sample_space = self.sample(self.train_data)
            loss_so_far = 0.0
            for pair, info in sample_space:
                num += 1
                self.add_to_batch(pair, info, batch)
                if num == batch_size:
                    output = self.forward_with_batch(batch)
                    loss = self.loss(
                        output,
                        torch.FloatTensor(batch['target']).cuda()
                        if CUDA else torch.FloatTensor(batch['target']))
                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()
                    loss_so_far += loss.item()

                    num = 0
                    batch = defaultdict(list)
            print("Epoch {} with loss {}".format(i, loss_so_far))

            predict_scores, facts, predict_rels, pairs = self.predict('tmp')
            precision, recall, _ = precision_recall_curve(facts,
                                                          predict_scores,
                                                          pos_label=1)
            self.print_curve(precision, recall)
            self.model.train()

            torch.save(self.model.state_dict(), persist_path)

    def predict(self, result_filename, topk=1):
        torch.no_grad()
        self.model.eval()
        print("start predicting")

        predict_scores = []
        predict_rels = []
        facts = []
        pairs = []
        count = 0
        sample_space = self.sample(self.test_data, True)
        for pair, info in sample_space:
            if len(info.examples) > 500:
                continue
            with torch.no_grad():
                output = self.predict_single(pair, info)
                for _ in range(topk):
                    pairs.append(pair)
                top, top_indices = torch.topk(output.squeeze(), k=topk, dim=-1)
                predict_rels.extend(
                    [self.idx2rel[idx] for idx in top_indices.tolist()])
                for idx in top_indices.tolist():
                    if self.idx2rel[idx] in info.relations:
                        facts.append(1)
                    else:
                        facts.append(0)
                predict_scores.extend(top.tolist())

                if CUDA:
                    torch.cuda.empty_cache()
                count += 1
                if count % 1000 == 0:
                    print("Predicted {}".format(count))
        assert len(predict_scores) == len(facts) == len(predict_rels) == len(
            pairs)
        with open(result_filename, 'w') as file:
            for predict_score, fact, predict_rel, pair in zip(
                    predict_scores, facts, predict_rels, pairs):
                file.write("{},{},{},{},{}\n".format(predict_score, fact,
                                                     predict_rel, pair[0],
                                                     pair[1]))
        return predict_scores, facts, predict_rels, pairs

    def predict_single(self, pair, info):
        raise NotImplementedError

    def forward_with_batch(self, batch):
        raise NotImplementedError

    def add_to_batch(self, pair, info, batch):
        raise NotImplementedError

    def sample(self, data, use_all=False):
        sample_space = [(pair, info) for pair, info in data.items()
                        if '[NA]' not in info.relations]
        if use_all:
            sample_space += [(pair, info) for pair, info in data.items()
                             if '[NA]' in info.relations]
        else:
            sample_space += sample(
                [(pair, info)
                 for pair, info in data.items() if '[NA]' in info.relations],
                len(sample_space) // len(self.rels))
        shuffle(sample_space)
        return sample_space

    def print_curve(self, precision, recall):
        def find_closest_idx(arr, tgt):
            idx = -1
            delta = 100000
            for i, n in enumerate(arr):
                new_delta = abs(n - tgt)
                if new_delta < delta:
                    delta = new_delta
                    idx = i
            return idx

        thresholds = [
            0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95
        ]
        results = [precision[find_closest_idx(recall, t)] for t in thresholds]
        print(results)
示例#10
0
    def _gen_data(self, num_validation):
        import tensorflow as tf

        mnist_data = tf.keras.datasets.mnist.load_data()
        (y_train, train_labels), (y_test, test_labels) = mnist_data
        assert y_train.shape == (
            MNIST_N_TRAIN,
            MNIST_HEIGHT_PIXELS,
            MNIST_WIDTH_PIXELS,
        )
        assert y_test.shape == (
            MNIST_N_TEST,
            MNIST_HEIGHT_PIXELS,
            MNIST_WIDTH_PIXELS,
        )
        n = MNIST_WIDTH_PIXELS * MNIST_HEIGHT_PIXELS
        y_train = y_train.reshape(MNIST_N_TRAIN, MNIST_OUTPUT_DIM, n) / 255.0
        y_test = y_test.reshape(MNIST_N_TEST, MNIST_OUTPUT_DIM, n) / 255.0

        validation_inds = np.random.choice(
            MNIST_N_TRAIN,
            num_validation,
            replace=False,
        )
        validation_bool_inds = np.full(MNIST_N_TRAIN, False)
        validation_bool_inds[validation_inds] = True
        y_validation = y_train[validation_bool_inds]
        y_train = y_train[~validation_bool_inds]
        validation_labels = train_labels[validation_bool_inds]
        train_labels = train_labels[~validation_bool_inds]

        x0 = np.linspace(-1, 1, MNIST_WIDTH_PIXELS)
        x1 = np.linspace(-1, 1, MNIST_HEIGHT_PIXELS)
        xx = np.meshgrid(x0, x1)
        x = np.stack([xx[0].ravel(), np.flip(xx[1].ravel())], axis=0)
        assert x.shape == (MNIST_INPUT_DIM, n)

        self.train = TaskSubMap()
        self.test = TaskSubMap()
        self.validation = TaskSubMap()

        for task_submap, y_subset, label_subset in [
            [self.train, y_train, train_labels],
            [self.test, y_test, test_labels],
            [self.validation, y_validation, validation_labels],
        ]:
            for y, label in zip(y_subset, label_subset):
                task = DataSet()
                task.input_dim = MNIST_INPUT_DIM
                task.output_dim = MNIST_OUTPUT_DIM
                for data_subset in [task.train, task.test]:
                    data_subset.x = x
                    data_subset.y = y
                    data_subset.n = n
                    data_subset.label = label
                task_submap.add_task(task, label)

        if not os.path.isdir(MNIST_DATA_DIRNAME):
            os.makedirs(MNIST_DATA_DIRNAME)

        with open(MNIST_DATA_FILENAME, "wb") as f:
            pickle.dump(
                [
                    self.train,
                    self.test,
                    self.validation,
                ],
                f,
            )
示例#11
0
        print('loading checkpoint {}'.format(opt.resume_path))
        checkpoint = torch.load(opt.resume_path)
        assert opt.arch == checkpoint['arch']

        opt.begin_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        if not opt.no_train:
            optimizer.load_state_dict(checkpoint['optimizer'])
            step_scheduler.load_state_dict(checkpoint['step_scheduler'])
    print('run')
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            #每次重新生成该周期的训练集文件,每个渐变采样4帧,渐变前后各采样1帧,跳过紧接着渐变的2帧
            generate_train_samples.main(opt.train_gts_json_path,4,0.25,2,opt.train_list_path)
            training_data = DataSet(opt.train_subdir,opt.train_list_path,
                                spatial_transform=spatial_transform,
                                temporal_transform=temporal_transform,
                                target_transform=target_transform, sample_duration=opt.sample_duration)
            train_loader = torch.utils.data.DataLoader(
                                training_data,
                                batch_size=opt.batch_size,
                                shuffle=True,
                                num_workers=opt.n_threads,
                                pin_memory=True)
            step_scheduler.step()
            train_epoch(i, train_loader, model, criterion, optimizer, opt,
                        train_logger, train_batch_logger,step_scheduler)
    if opt.test:
        spatial_transform = Compose([
            Scale(opt.sample_size),
            CenterCrop(opt.sample_size),
            ToTensor(opt.norm_value), norm_method
示例#12
0
文件: env.py 项目: epochstamp/mdrli
            angle = 2*PI - abs(angle)
        elif (angle > PI):
            angle = -(2*PI - angle)

        return angle

    def inputDimensions(self):
        return self._input_dim  

    def nActions(self):
        # The environment allows two different actions to be taken
        # at each time step
        return self._n_actions             

    def observe(self):
        return self.convert_repr() 

if __name__ == "__main__":
    rng = np.random.RandomState()
    env = Doublecartpole(rng,min_f=-5,max_f=5,stepsize=5)
    env.reset(mode=-1)
    dataset = DataSet(env)
    
    print(env.nActions())
    for i in range(1000):
        act = rng.randint(env.nActions())
        r = env.act(act)
        obs = env.observe()
        dataset.addSample(obs, act, r, False, 0)
    env.summarizePerformance(dataset,"./test/","testoide")
示例#13
0
文件: agent.py 项目: epochstamp/mdrli
    def __init__(self,
                 environments,
                 q_networks,
                 replay_memory_size=1000000,
                 replay_start_size=None,
                 batch_size=32,
                 random_state=np.random.RandomState(),
                 exp_priority=0,
                 train_policy=None,
                 train_policy_kwargs=None,
                 test_policy=None,
                 test_policy_kwargs=None,
                 only_full_history=True,
                 init_env=0):

        self._controllers = []
        self._environments = environments
        self._networks = q_networks
        self._e = init_env
        self._environment = environments[self._e]
        self._network = self._networks[self._e]
        self._network_memory = []
        self._replay_memory_size = replay_memory_size
        self._replay_start_size = replay_start_size
        self._batch_size = batch_size
        self._random_state = random_state
        self._exp_priority = exp_priority
        self._only_full_history = only_full_history
        self._datasets = list()
        for i in range(len(self._environments)):
            self._datasets.append(
                DataSet(self._environments[i],
                        max_size=replay_memory_size,
                        random_state=random_state,
                        use_priority=self._exp_priority,
                        only_full_history=self._only_full_history))
        self._dataset = self._datasets[self._e]
        self._tmp_dataset = None  # Will be created by startTesting() when necessary
        self._mode = -1
        self._mode_epochs_length = 0
        self._total_mode_reward = 0
        self._training_loss_averages = []
        self._Vs_on_last_episode = []
        self._in_episode = False
        self._selected_action = -1
        self._selected_batch = None
        self._train_policies = [None] * len(environments)
        self._test_policies = [None] * len(environments)
        self._states = [None] * len(self._environments)
        for i in range(len(self._environments)):
            self._states[i] = []
            inputDims = self._environments[i].inputDimensions()

            if self._replay_start_size is None:
                self._replay_start_size = max(inputDims[i][0]
                                              for i in range(len(inputDims)))
            elif self._replay_start_size < max(inputDims[i][0]
                                               for i in range(len(inputDims))):
                raise AgentError(
                    "Replay_start_size should be greater than the biggest history of a state."
                )
            for j in range(len(inputDims)):
                self._states[i].append(
                    np.zeros(inputDims[j], dtype=config.floatX))

            if (train_policy is None):
                self._train_policies[i] = EpsilonGreedyPolicy(
                    self._environments[i].nActions(), random_state, 0.1)
            else:
                #Todo : change the number of actions. Listify the policies
                self._train_policies[i] = train_policy(
                    self._environments[i].nActions(), random_state,
                    **train_policy_kwargs)
            self._train_policies[i].setAttribute("model", q_networks[i])

            if (test_policy is None):
                self._test_policies[i] = GreedyPolicy(
                    self._environments[i].nActions(), random_state)
            else:
                self._test_policies[i] = test_policy(
                    self._environments[i].nActions(), random_state,
                    **test_policy_kwargs)
            self._test_policies[i].setAttribute("model", q_networks[i])

        self._test_policy = self._test_policies[self._e]
        self._train_policy = self._train_policies[self._e]
        self._state = self._states[self._e]
示例#14
0
文件: agent.py 项目: epochstamp/mdrli
class NeuralAgent(object):
    """The NeuralAgent class wraps a deep Q-network for training and testing in a given environment.
    
    Attach controllers to it in order to conduct an experiment (when to train the agent, when to test,...).
    
    Parameters
    -----------
    environment : object from class Environment
        The environment in which the agent interacts
    q_network : object from class QNetwork
        The q_network associated to the agent
    replay_memory_size : int
        Size of the replay memory. Default : 1000000
    replay_start_size : int
        Number of observations (=number of time steps taken) in the replay memory before starting learning. 
        Default: minimum possible according to environment.inputDimensions().
    batch_size : int
        Number of tuples taken into account for each iteration of gradient descent. Default : 32
    random_state : numpy random number generator
        Default : random seed.
    exp_priority : float
        The exponent that determines how much prioritization is used, default is 0 (uniform priority).
        One may check out Schaul et al. (2016) - Prioritized Experience Replay.
    train_policy : object from class Policy
        Policy followed when in training mode (mode -1)
    test_policy : object from class Policy
        Policy followed when in other modes than training (validation and test modes)
    only_full_history : boolean
        Whether we wish to train the neural network only on full histories or we wish to fill with zeroes the 
        observations before the beginning of the episode
    """
    def __init__(self,
                 environments,
                 q_networks,
                 replay_memory_size=1000000,
                 replay_start_size=None,
                 batch_size=32,
                 random_state=np.random.RandomState(),
                 exp_priority=0,
                 train_policy=None,
                 train_policy_kwargs=None,
                 test_policy=None,
                 test_policy_kwargs=None,
                 only_full_history=True,
                 init_env=0):

        self._controllers = []
        self._environments = environments
        self._networks = q_networks
        self._e = init_env
        self._environment = environments[self._e]
        self._network = self._networks[self._e]
        self._network_memory = []
        self._replay_memory_size = replay_memory_size
        self._replay_start_size = replay_start_size
        self._batch_size = batch_size
        self._random_state = random_state
        self._exp_priority = exp_priority
        self._only_full_history = only_full_history
        self._datasets = list()
        for i in range(len(self._environments)):
            self._datasets.append(
                DataSet(self._environments[i],
                        max_size=replay_memory_size,
                        random_state=random_state,
                        use_priority=self._exp_priority,
                        only_full_history=self._only_full_history))
        self._dataset = self._datasets[self._e]
        self._tmp_dataset = None  # Will be created by startTesting() when necessary
        self._mode = -1
        self._mode_epochs_length = 0
        self._total_mode_reward = 0
        self._training_loss_averages = []
        self._Vs_on_last_episode = []
        self._in_episode = False
        self._selected_action = -1
        self._selected_batch = None
        self._train_policies = [None] * len(environments)
        self._test_policies = [None] * len(environments)
        self._states = [None] * len(self._environments)
        for i in range(len(self._environments)):
            self._states[i] = []
            inputDims = self._environments[i].inputDimensions()

            if self._replay_start_size is None:
                self._replay_start_size = max(inputDims[i][0]
                                              for i in range(len(inputDims)))
            elif self._replay_start_size < max(inputDims[i][0]
                                               for i in range(len(inputDims))):
                raise AgentError(
                    "Replay_start_size should be greater than the biggest history of a state."
                )
            for j in range(len(inputDims)):
                self._states[i].append(
                    np.zeros(inputDims[j], dtype=config.floatX))

            if (train_policy is None):
                self._train_policies[i] = EpsilonGreedyPolicy(
                    self._environments[i].nActions(), random_state, 0.1)
            else:
                #Todo : change the number of actions. Listify the policies
                self._train_policies[i] = train_policy(
                    self._environments[i].nActions(), random_state,
                    **train_policy_kwargs)
            self._train_policies[i].setAttribute("model", q_networks[i])

            if (test_policy is None):
                self._test_policies[i] = GreedyPolicy(
                    self._environments[i].nActions(), random_state)
            else:
                self._test_policies[i] = test_policy(
                    self._environments[i].nActions(), random_state,
                    **test_policy_kwargs)
            self._test_policies[i].setAttribute("model", q_networks[i])

        self._test_policy = self._test_policies[self._e]
        self._train_policy = self._train_policies[self._e]
        self._state = self._states[self._e]

    def setEnvironment(self, e, reset=False):
        """ Change the environment and the related dataset
        """
        self._e = e
        self._dataset = self._datasets[self._e]
        self._state = self._states[e]
        if reset:
            self._state[...] = 0
            self._dataset.flush()
            if self._tmp_dataset is not None:
                self._tmp_dataset.flush()
        self._environment = self._environments[self._e]
        self._train_policy = self._train_policies[self._e]
        self._test_policy = self._test_policies[self._e]
        self._network = self._networks[self._e]

    def setControllersActive(self, toDisable, active):
        """ Activate controller
        """
        for i in toDisable:
            self._controllers[i].setActive(active)

    def setLearningRate(self, lr):
        """ Set the learning rate for the gradient descent
        """
        self._network.setLearningRate(lr)

    def learningRate(self):
        """ Get the learning rate
        """
        return self._network.learningRate()

    def setDiscountFactor(self, df):
        """ Set the discount factor
        """
        self._network.setDiscountFactor(df)

    def discountFactor(self):
        """ Get the discount factor
        """
        return self._network.discountFactor()

    def overrideNextAction(self, action):
        """ Possibility to override the chosen action. This possibility should be used on the signal OnActionChosen.
        """
        self._selected_action = action

    def avgBellmanResidual(self):
        """ Returns the average training loss on the epoch
        """
        if (len(self._training_loss_averages) == 0):
            return -1
        return np.average(self._training_loss_averages)

    def avgEpisodeVValue(self):
        """ Returns the average V value on the episode (on time steps where a non-random action has been taken)
        """
        if (len(self._Vs_on_last_episode) == 0):
            return -1
        if (np.trim_zeros(self._Vs_on_last_episode) != []):
            return np.average(np.trim_zeros(self._Vs_on_last_episode))
        else:
            return 0

    def totalRewardOverLastTest(self):
        """ Returns the average sum of rewards per episode and the number of episode
        """

        return self._total_mode_reward / self._totalModeNbrEpisode, self._totalModeNbrEpisode

    def statRewardsOverLastTests(self):
        """ Returns the average sum of rewards per episode and the number of episode
        """

        return np.mean(self._mode_rewards), np.var(self._mode_rewards), np.std(
            self._mode_rewards), self._totalModeNbrEpisode

    def bestAction(self):
        """ Returns the best Action
        """
        action = self._network.chooseBestAction(self._state)
        V = max(self._network.qValues(self._state))
        return action, V

    def attach(self, controller):
        if (isinstance(controller, Controller)):
            self._controllers.append(controller)
        else:
            raise TypeError(
                "The object you try to attach is not a Controller.")

    def detach(self, controllerIdx):
        return self._controllers.pop(controllerIdx)

    def mode(self):
        return self._mode

    def startMode(self, mode, epochLength, n_episodes=None):
        if self._in_episode:
            raise AgentError(
                "Trying to start mode while current episode is not yet finished. This method can be "
                "called only *between* episodes for testing and validation.")
        elif mode == -1:
            raise AgentError(
                "Mode -1 is reserved and means 'training mode'; use resumeTrainingMode() instead."
            )
        else:
            self._n_episodes = self._n_episodes_init if n_episodes is None else n_episodes
            self._mode = mode
            self._mode_epochs_length = epochLength
            self._total_mode_reward = 0.
            del self._tmp_dataset
            self._tmp_dataset = DataSet(
                self._environment,
                self._random_state,
                max_size=self._replay_memory_size,
                only_full_history=self._only_full_history)

    def resumeTrainingMode(self, n_episodes=None):
        self._n_episodes = self._n_episodes_init if n_episodes is None else self._n_episodes
        self._mode = -1

    def summarizeTestPerformance(self, **kwargs):
        if self._mode == -1:
            raise AgentError(
                "Cannot summarize test performance outside test environment.")

        self._environment.summarizePerformance(self._tmp_dataset, **kwargs)

    def generateAndStoreBatch(self):
        self._selected_batch = self._dataset.randomBatch(
            self._batch_size, self._exp_priority)
        return copy.deepcopy(self._selected_batch)

    def train(self):
        """
        This function selects a random batch of data (with self._dataset.randomBatch) and performs a 
        Q-learning iteration (with self._network.train).        
        """
        # We make sure that the number of elements in the replay memory
        # is strictly superior to self._replay_start_size before taking
        # a random batch and perform training
        if self._dataset.n_elems <= self._replay_start_size:
            return

        try:
            states, actions, rewards, next_states, terminals, rndValidIndices = self._dataset.randomBatch(
                self._batch_size, self._exp_priority
            ) if self._selected_batch is None else self._selected_batch
            loss, loss_ind = self._network.train(states, actions, rewards,
                                                 next_states, terminals)
            self._training_loss_averages.append(loss)
            if (self._exp_priority):

                self._dataset.updatePriorities(
                    pow(loss_ind, self._exp_priority) + 0.0001,
                    rndValidIndices[1])
            self._selected_batch = None
        except SliceError as e:
            warn("Training not done - " + str(e), AgentWarning)

    def dumpNetwork(self, fname, nEpoch=-1, path="."):
        """ Dump the network
        
        Parameters
        -----------
        fname : string
            Name of the file where the network will be dumped
        nEpoch : int
            Epoch number (Optional)
        """
        try:
            os.makedirs(path + "/nnets")
        except Exception:
            pass
        basename = path + "/nnets/" + fname

        for f in os.listdir(path + "/nnets/"):
            if fname in f:
                os.remove(path + "/nnets/" + f)

        all_params = self._network.getAllParams()

        if (nEpoch >= 0):
            joblib.dump(all_params, basename + ".epoch={}".format(nEpoch))
        else:
            joblib.dump(all_params, basename, compress=True)

    def storeNetwork(self, **kwargs):
        """ Store a copy of the network in memory
        """
        self._network_memory.append((self._network.getCopy(), kwargs))

    def getNetworks(self):
        """ Store a copy of the network in memory
        """
        return self._network_memory

    def setNetwork(self, fname, nEpoch=-1):
        """ Set values into the network
        
        Parameters
        -----------
        fname : string
            Name of the file where the values are
        nEpoch : int
            Epoch number (Optional)
        """

        basename = "nnets/" + fname

        if (nEpoch >= 0):
            all_params = joblib.load(basename + ".epoch={}".format(nEpoch))
        else:
            all_params = joblib.load(basename)

        self._network.setAllParams(all_params)

    def run(self, n_epochs, epoch_length, n_episodes=1):
        """
        This function encapsulates the whole process of the learning.
        It starts by calling the controllers method "onStart", 
        Then it runs a given number of epochs where an epoch is made up of one or many episodes (called with 
        agent._runEpisode) and where an epoch ends up after the number of steps reaches the argument "epoch_length".
        It ends up by calling the controllers method "end".

        Parameters
        -----------
        n_epochs : number of epochs 
            int
        epoch_length : maximum number of steps for a given epoch
            int
        """
        self._n_episodes = n_episodes
        self._n_episodes_init = n_episodes
        for c in self._controllers:
            c.onStart(self)
        i = 0
        while i < n_epochs:
            self._training_loss_averages = []

            if self._mode != -1:
                self._totalModeNbrEpisode = 0
                self._mode_rewards = []
                while self._totalModeNbrEpisode < self._n_episodes:
                    mode_epoch_length = self._mode_epochs_length
                    self._runEpisode(mode_epoch_length)
                    self._mode_rewards.append(self._total_mode_reward)
                    self._total_mode_reward = 0
                    self._totalModeNbrEpisode += 1


#                    mode_epoch_length = self._mode_epochs_length
#                    while mode_epoch_length > 0 and self._totalModeNbrEpisode < self._n_episodes:
#                        self._totalModeNbrEpisode += 1
#                        mode_epoch_length = self._runEpisode(mode_epoch_length)
#
#                        self._mode_rewards.append(self._total_mode_reward)
#                        self._total_mode_reward = 0

            else:
                length = epoch_length
                n_episodes = self._n_episodes
                while n_episodes > 0:
                    while length > 0:
                        length = self._runEpisode(length)
                    n_episodes -= 1
                i += 1

            for c in self._controllers:
                c.onEpochEnd(self)
        self._environment.end()
        for c in self._controllers:
            c.onEnd(self)

    def _runEpisode(self, maxSteps):
        """
        This function runs an episode of learning. An episode ends up when the environment method "inTerminalState" 
        returns True (or when the number of steps reaches the argument "maxSteps")
        
        Parameters
        -----------
        maxSteps : maximum number of steps before automatically ending the episode
            int
        """
        self._in_episode = True
        initState = self._environment.reset(self._mode)
        inputDims = self._environment.inputDimensions()
        for i in range(len(inputDims)):
            if inputDims[i][0] > 1:
                self._state[i][1:] = initState[i][1:]

        self._Vs_on_last_episode = []
        while maxSteps > 0:
            maxSteps -= 1

            obs = self._environment.observe()

            for i in range(len(obs)):
                self._state[i][0:-1] = self._state[i][1:]
                self._state[i][-1] = obs[i]

            V, action, reward = self._step()

            self._Vs_on_last_episode.append(V)
            if self._mode != -1:
                self._total_mode_reward += reward

            is_terminal = self._environment.inTerminalState() or maxSteps == 0

            self._addSample(obs, action, reward, is_terminal)
            for c in self._controllers:
                c.onActionTaken(self)

            if is_terminal:
                break

        self._in_episode = False
        for c in self._controllers:
            c.onEpisodeEnd(self, is_terminal, reward)
        return maxSteps

    def _step(self):
        """
        This method is called at each time step. If the agent is currently in testing mode, and if its *test* replay 
        memory has enough samples, it will select the best action it can. If there are not enough samples, FIXME.
        In the case the agent is not in testing mode, if its replay memory has enough samples, it will select the best 
        action it can with probability 1-CurrentEpsilon and a random action otherwise. If there are not enough samples, 
        it will always select a random action.
        Parameters
        -----------
        state : ndarray
            An ndarray(size=number_of_inputs, dtype='object), where states[input] is a 1+D matrix of dimensions
               input.historySize x "shape of a given ponctual observation for this input".
        Returns
        -------
        action : int
            The id of the action selected by the agent.
        V : float
            Estimated value function of current state.
        """

        action, V = self._chooseAction()
        reward = self._environment.act(action)

        return V, action, reward

    def _addSample(self, ponctualObs, action, reward, is_terminal):
        if self._mode != -1:
            self._tmp_dataset.addSample(ponctualObs,
                                        action,
                                        reward,
                                        is_terminal,
                                        priority=1)
        else:
            self._dataset.addSample(ponctualObs,
                                    action,
                                    reward,
                                    is_terminal,
                                    priority=1)

    def _chooseAction(self):

        if self._mode != -1:
            # Act according to the test policy if not in training mode
            action, V = self._test_policy.action(self._state)
        else:
            if self._dataset.n_elems > self._replay_start_size:
                # follow the train policy
                action, V = self._train_policy.action(
                    self._state
                )  #is self._state the only way to store/pass the state?
            else:
                # Still gathering initial data: choose dummy action
                action, V = self._train_policy.randomAction()

        for c in self._controllers:
            c.onActionChosen(self, action)
        return action, V
示例#15
0
class Runner:
    def __init__(self, nn_lr=0.005, kg_lr=0.0005, l2_penalty=0.0001, conv_window=3, hidden_dim=230, k=50, use_alt=-1, load_model=None):
        self.vocab, pretrained = get_pretrained_skipgram()
        self.word2idx = {word: idx for idx, word in enumerate(self.vocab)}

        self.dataset = DataSet() if use_alt < 0 else AltDataSet()
        self.dataset.featurize_all()
        self.rels = sorted(self.dataset.all_relations)
        self.ents = sorted(self.dataset.all_entities)
        self.rel2idx = {rel: idx for idx, rel in enumerate(self.rels)}
        self.idx2rel = {idx: rel for rel, idx in self.rel2idx.items()}
        self.ent2idx = {ent: idx for idx, ent in enumerate(self.ents)}
        self.train_data, self.test_data = self.dataset.split() if use_alt < 0 else self.dataset.load_from_file(use_alt)
        self.max_len = 0
        for info in list(self.train_data.values()) + list(self.test_data.values()):
            for example in info.examples:
                example.indices = (
                    [self.word2idx[word] if word in self.word2idx else 0 for word in example.pre.split(' ')] +
                    [0] +
                    [self.word2idx[word] if word in self.word2idx else 0 for word in example.mid.split(' ')] +
                    [0] +
                    [self.word2idx[word] if word in self.word2idx else 0 for word in example.post.split(' ')]
                )
                self.set_position(example)
                self.max_len = max(self.max_len, len(example.indices))
        for info in list(self.train_data.values()) + list(self.test_data.values()):
            for example in info.examples:
                self.pad_example(example)

        self.model = Joint(
            num_relations=len(self.rels),
            num_entities=len(self.ents),
            pretrained=pretrained,
            conv_window=conv_window,
            hidden_dim=hidden_dim,
            k=k,
        )
        if CUDA:
            self.model = self.model.cuda()
        if load_model:
            self.model.load_state_dict(torch.load(load_model))
        self.nn_optimizer = optim.Adam(
            self.model.parameters(),
            lr=nn_lr,
            weight_decay=l2_penalty,
        )
        self.kg_optimizer = optim.Adam(
            self.model.parameters(),
            lr=kg_lr,
            weight_decay=l2_penalty,
        )

    def set_position(self, example):
        pre_len = len(example.pre.split(' '))
        mid_len = len(example.mid.split(' '))
        post_len = len(example.post.split(' '))
        example.pos_pos_1 = [0] * pre_len + list(range(mid_len + post_len + 2))
        example.neg_pos_1 = list(range(pre_len + 1))
        example.neg_pos_1.reverse()
        example.neg_pos_1 += [0] * (mid_len + post_len + 1)
        example.entity_pos_1 = [0] * (pre_len + mid_len + post_len + 2)
        example.entity_pos_1[pre_len] = 1
        example.pos_pos_2 = [0] * (pre_len + mid_len + 1) + list(range(post_len + 1))
        example.neg_pos_2 = list(range(pre_len + mid_len + 2))
        example.neg_pos_2.reverse()
        example.neg_pos_2 += [0] * post_len
        example.entity_pos_2 = [0] * (pre_len + mid_len + post_len + 2)
        example.entity_pos_2[pre_len + mid_len + 1] = 1

        if example.h_idx > example.t_idx:
            tmp = example.pos_pos_1
            example.pos_pos_1 = example.pos_pos_2
            example.pos_pos_2 = tmp
            tmp = example.neg_pos_1
            example.neg_pos_1 = example.neg_pos_2
            example.neg_pos_2 = tmp
            tmp = example.entity_pos_1
            example.entity_pos_1 = example.entity_pos_2
            example.entity_pos_2 = tmp
        self.cap_position(example.pos_pos_1)
        self.cap_position(example.neg_pos_1)
        self.cap_position(example.pos_pos_2)
        self.cap_position(example.neg_pos_2)
        self.cap_position(example.entity_pos_1)
        self.cap_position(example.entity_pos_2)

    def cap_position(self, position):
        for i in range(len(position)):
            if position[i] > 99:
                position[i] = 99

    def pad_example(self, example):
        example.indices += [0] * (self.max_len - len(example.indices))
        example.pos_pos_1 += [0] * (self.max_len - len(example.pos_pos_1))
        example.neg_pos_1 += [0] * (self.max_len - len(example.neg_pos_1))
        example.entity_pos_1 += [0] * (self.max_len - len(example.entity_pos_1))
        example.pos_pos_2 += [0] * (self.max_len - len(example.pos_pos_2))
        example.neg_pos_2 += [0] * (self.max_len - len(example.neg_pos_2))
        example.entity_pos_2 += [0] * (self.max_len - len(example.entity_pos_2))
        assert len(example.pos_pos_1) == self.max_len

    def train(self, iters=10, batch_size=100, persist_path='../trained_models/han.mod'):
        print("start training")
        self.model.train()

        rel2pairs = {}
        for pair, info in self.train_data.items():
            for rel in info.relations:
                if rel in rel2pairs:
                    rel2pairs[rel].append(pair)
                else:
                    rel2pairs[rel] = [pair]

        nn_losser = nn.BCELoss()
        kg_losser = nn.NLLLoss()

        num = 0
        batch = defaultdict(list)
        for i in range(iters):
            sample_space = [(pair, info) for pair, info in self.train_data.items() if '[NA]' not in info.relations]
            sample_space += sample([(pair, info) for pair, info in self.train_data.items() if '[NA]' in info.relations], len(sample_space) // len(self.rels))
            shuffle(sample_space)

            total_nn_loss = 0.0
            total_kg_loss = 0.0
            for pair, info in sample_space:
                num += 1
                # for nn
                batch['h_idx'].append(self.ent2idx[pair[0]])
                batch['t_idx'].append(self.ent2idx[pair[1]])
                examples = [choice(info.examples) for _ in range(20)]
                batch['X'].append([ex.indices for ex in examples])
                batch['pos_pos_1'].append([ex.pos_pos_1 for ex in examples])
                batch['pos_pos_2'].append([ex.pos_pos_2 for ex in examples])
                batch['neg_pos_1'].append([ex.neg_pos_1 for ex in examples])
                batch['neg_pos_2'].append([ex.neg_pos_2 for ex in examples])
                batch['entity_pos_1'].append([ex.entity_pos_1 for ex in examples])
                batch['entity_pos_2'].append([ex.entity_pos_2 for ex in examples])
                batch['target'].append(
                    [0] * len(self.rel2idx)
                )
                for rel in info.relations:
                    batch['target'][-1][self.rel2idx[rel]] = 1.0 / len(info.relations)
                # for kg
                r = choice(info.relations)
                all_h_indices = [self.ent2idx[t[0]] for t in rel2pairs[r]]
                all_t_indices = [self.ent2idx[t[1]] for t in rel2pairs[r]]
                batch['r_idx'].append(self.rel2idx[r])
                batch['h_indices'].append([choice(all_h_indices) for _ in range(20)])
                batch['t_indices'].append([choice(all_t_indices) for _ in range(20)])

                if num == batch_size:
                    nn_output = self.model(
                        X=torch.LongTensor(batch['X']).cuda() if CUDA else torch.LongTensor(batch['X']),
                        positions=[
                            torch.LongTensor(batch['pos_pos_1']).cuda() if CUDA else torch.LongTensor(batch['pos_pos_1']),
                            torch.LongTensor(batch['pos_pos_2']).cuda() if CUDA else torch.LongTensor(batch['pos_pos_2']),
                            torch.LongTensor(batch['neg_pos_1']).cuda() if CUDA else torch.LongTensor(batch['neg_pos_1']),
                            torch.LongTensor(batch['neg_pos_2']).cuda() if CUDA else torch.LongTensor(batch['neg_pos_2']),
                            torch.LongTensor(batch['entity_pos_1']).cuda() if CUDA else torch.LongTensor(batch['entity_pos_1']),
                            torch.LongTensor(batch['entity_pos_2']).cuda() if CUDA else torch.LongTensor(batch['entity_pos_2']),
                        ],
                        h_idx=torch.LongTensor(batch['h_idx']).cuda() if CUDA else torch.LongTensor(batch['h_idx']),
                        t_idx=torch.LongTensor(batch['t_idx']).cuda() if CUDA else torch.LongTensor(batch['t_idx']),
                        r_idx=None,
                        h_indices=None,
                        t_indices=None,
                        nn=True,
                    )

                    nn_loss = nn_losser(nn_output, torch.FloatTensor(batch['target']).cuda() if CUDA else torch.FloatTensor(batch['target']))
                    self.nn_optimizer.zero_grad()
                    nn_loss.backward()
                    self.nn_optimizer.step()
                    total_nn_loss += nn_loss.item()

                    nn_output = None
                    nn_loss = None
                    if CUDA:
                        torch.cuda.empty_cache()

                    pr, ph, pt = self.model(
                        X=None,
                        positions=None,
                        h_idx=torch.LongTensor(batch['h_idx']).cuda() if CUDA else torch.LongTensor(batch['h_idx']),
                        t_idx=torch.LongTensor(batch['t_idx']).cuda() if CUDA else torch.LongTensor(batch['t_idx']),
                        r_idx=torch.LongTensor(batch['r_idx']).cuda() if CUDA else torch.LongTensor(batch['r_idx']),
                        h_indices=torch.LongTensor(batch['h_indices']).cuda() if CUDA else torch.LongTensor(batch['h_indices']),
                        t_indices=torch.LongTensor(batch['t_indices']).cuda() if CUDA else torch.LongTensor(batch['t_indices']),
                        nn=False,
                    )

                    kg_loss = (
                        kg_losser(pr, torch.LongTensor(batch['r_idx']).cuda() if CUDA else torch.LongTensor(batch['r_idx'])) +
                        kg_losser(ph, torch.LongTensor(batch['h_idx']).cuda() if CUDA else torch.LongTensor(batch['h_idx'])) +
                        kg_losser(pt, torch.LongTensor(batch['t_idx']).cuda() if CUDA else torch.LongTensor(batch['t_idx']))
                    )
                    self.kg_optimizer.zero_grad()
                    kg_loss.backward()
                    self.kg_optimizer.step()
                    total_kg_loss += kg_loss.item()

                    num = 0
                    batch = defaultdict(list)

            if i % 10 == 0:
                print("Iter {} nn loss {} kg loss {}".format(i, total_nn_loss, total_kg_loss))
                torch.save(self.model.state_dict(), persist_path)

                predict_scores, facts, predict_rels, pairs = self.predict('tmp')
                precision, recall, _ = precision_recall_curve(facts, predict_scores, pos_label=1)
                self.print_curve(precision, recall)
                self.model.train()


    def predict(self, result_filename, topk=1):
        torch.no_grad()
        self.model.eval()

        predict_scores = []
        predict_rels = []
        facts = []
        pairs = []
        count = 0
        to_be_tested = [t for t in self.test_data.items() if '[NA]' not in t[1].relations]
        to_be_tested += [t for t in self.test_data.items() if '[NA]' in t[1].relations]

        for pair, info in to_be_tested:
            h_idx = self.ent2idx[pair[0]]
            t_idx = self.ent2idx[pair[1]]
            output = self.model(
                X=torch.LongTensor([[ex.indices for ex in info.examples]]).cuda() if CUDA else torch.LongTensor([[ex.indices for ex in info.examples]]),
                positions=[
                    torch.LongTensor([[ex.pos_pos_1 for ex in info.examples]]).cuda() if CUDA else torch.LongTensor([[ex.pos_pos_1 for ex in info.examples]]),
                    torch.LongTensor([[ex.pos_pos_2 for ex in info.examples]]).cuda() if CUDA else torch.LongTensor([[ex.pos_pos_2 for ex in info.examples]]),
                    torch.LongTensor([[ex.neg_pos_1 for ex in info.examples]]).cuda() if CUDA else torch.LongTensor([[ex.neg_pos_1 for ex in info.examples]]),
                    torch.LongTensor([[ex.neg_pos_2 for ex in info.examples]]).cuda() if CUDA else torch.LongTensor([[ex.neg_pos_2 for ex in info.examples]]),
                    torch.LongTensor([[ex.entity_pos_1 for ex in info.examples]]).cuda() if CUDA else torch.LongTensor([[ex.entity_pos_1 for ex in info.examples]]),
                    torch.LongTensor([[ex.entity_pos_2 for ex in info.examples]]).cuda() if CUDA else torch.LongTensor([[ex.entity_pos_2 for ex in info.examples]]),
                ],
                h_idx=torch.LongTensor([h_idx]).cuda() if CUDA else torch.LongTensor([h_idx]),
                t_idx=torch.LongTensor([t_idx]).cuda() if CUDA else torch.LongTensor([t_idx]),
                r_idx=None,
                h_indices=None,
                t_indices=None,
                nn=True,
            )
            for _ in range(topk):
                pairs.append(pair)
            top, top_indices = torch.topk(output.squeeze(), k=topk, dim=-1)
            predict_rels.extend([self.idx2rel[idx] for idx in top_indices.tolist()])
            for idx in top_indices.tolist():
                if self.idx2rel[idx] in info.relations:
                    facts.append(1)
                else:
                    facts.append(0)
            predict_scores.extend(top.tolist())

            count += 1
            if count % 100 == 0:
                print("Predicted {}".format(count))
        assert len(predict_scores) == len(facts) == len(predict_rels) == len(pairs)
        with open(result_filename, 'w') as file:
            for predict_score, fact, predict_rel, pair in zip(predict_scores, facts, predict_rels, pairs):
                file.write("{},{},{},{},{}\n".format(predict_score, fact, predict_rel, pair[0], pair[1]))
        return predict_scores, facts, predict_rels, pairs

    def print_curve(self, precision, recall):
        def find_closest_idx(arr, tgt):
            idx = -1
            delta = 100000
            for i, n in enumerate(arr):
                new_delta = abs(n - tgt)
                if new_delta < delta:
                    delta = new_delta
                    idx = i
            return idx
        thresholds = [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95]
        results = [precision[find_closest_idx(recall, t)] for t in thresholds]
        print(results)  
示例#16
0
from config import VALIDATION_SET_PATH, TRAINING_SET_PATH, EPOCHS_TO_TRAIN

from data.dataset import DataSet
from data.dataset_generator import DataSetGenerator

from model import PBPModel

from training import fit_epochs

if __name__ == '__main__':
    model = PBPModel().get_model()
    training_set_gen = DataSetGenerator(data_root=TRAINING_SET_PATH)
    validation_data = DataSet(data_root=VALIDATION_SET_PATH)
    fit_epochs(EPOCHS_TO_TRAIN, model, training_set_gen, validation_data)