def one_fold(fold_int, is_nine_folds):
    fold_id = str(fold_int)
    if is_nine_folds:
        fold_path = 'data/Folds_9_Emotions/fold_' + fold_id
        num_labels = 9
    else:
        fold_path = 'data/Folds/fold_' + fold_id
        num_labels = 16

    vocab_size = 5000
    pad_len = 30
    batch_size = 64
    embedding_dim = 200
    hidden_dim = 600

    es = EarlyStop(2)
    word2id, id2word = build_vocab(fold_path, vocab_size, use_unk=True)
    train_data = DataSet(os.path.join(fold_path, 'train.csv'), pad_len,
                         word2id, num_labels)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

    test_data = DataSet(os.path.join(fold_path, 'test.csv'), pad_len, word2id,
                        num_labels)
    test_loader = DataLoader(test_data, batch_size=batch_size)

    model = AttentionLSTMClassifier(embedding_dim, hidden_dim, vocab_size,
                                    word2id, num_labels, batch_size)
    model.load_glove_embedding(id2word)
    model.cuda()

    optimizer = optim.Adam(model.parameters())
    loss_criterion = nn.MSELoss()
    for epoch in range(4):
        print('Epoch:', epoch, '===================================')
        train_loss = 0
        for i, (data, seq_len, label) in enumerate(train_loader):
            data, label, seq_len = sort_batch(data, label, seq_len.view(-1))
            y_pred = model(Variable(data).cuda(), seq_len)
            optimizer.zero_grad()
            loss = loss_criterion(y_pred, Variable(label).cuda())
            loss.backward()
            optimizer.step()
            train_loss += loss.data[0]
        pred_list = []
        gold_list = []
        test_loss = 0
        for i, (data, seq_len, label) in enumerate(test_loader):
            data, label, seq_len = sort_batch(data, label, seq_len.view(-1))
            y_pred = model(Variable(data, volatile=True).cuda(), seq_len)
            loss = loss_criterion(y_pred,
                                  Variable(label, volatile=True).cuda())
            test_loss += loss.data[0]
            pred_list.append(y_pred.data.cpu().numpy())
            gold_list.append(label.numpy())

        print("Train Loss: ", train_loss, " Evaluation: ", test_loss)
        es.new_loss(test_loss)
        if es.if_stop():
            print('Start over fitting')
            break
def one_fold(X_train, y_train, X_test, y_test):
    num_labels = NUM_CLASS
    vocab_size = 20000
    pad_len = 30
    batch_size = 100
    embedding_dim = 200
    hidden_dim = 400
    __use_unk = False
    es = EarlyStop(2)
    word2id, id2word = build_vocab(X_train, vocab_size, use_unk=__use_unk)

    train_data = DataSet(X_train, y_train, pad_len, word2id, num_labels, use_unk=__use_unk)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False)

    test_data = DataSet(X_test, y_test, pad_len, word2id, num_labels, use_unk=__use_unk)
    test_loader = DataLoader(test_data, batch_size=batch_size)

    model = AttentionLSTMClassifier(embedding_dim, hidden_dim, vocab_size, word2id,
                                    num_labels, batch_size, use_att=True)
    model.load_glove_embedding(id2word)
    model.cuda()

    optimizer = optim.Adam(model.parameters())
    loss_criterion = nn.MSELoss()
    for epoch in range(4):
        print('Epoch:', epoch, '===================================')
        train_loss = 0
        for i, (data, seq_len, label) in enumerate(train_loader):
            # print(i)
            data, label, seq_len = sort_batch(data, label, seq_len.view(-1))
            y_pred = model(Variable(data).cuda(), seq_len)
            optimizer.zero_grad()
            loss = loss_criterion(y_pred, Variable(label).cuda())
            loss.backward()
            optimizer.step()
            train_loss += loss.data[0]
        pred_list = []
        gold_list = []
        test_loss = 0
        for i, (data, seq_len, label) in enumerate(test_loader):
            data, label, seq_len = sort_batch(data, label, seq_len.view(-1))
            y_pred = model(Variable(data, volatile=True).cuda(), seq_len)
            loss = loss_criterion(y_pred, Variable(label, volatile=True).cuda())
            test_loss += loss.data[0]
            pred_list.append(y_pred.data.cpu().numpy())
            gold_list.append(label.numpy())

        print("Train Loss: ", train_loss, " Evaluation: ", test_loss)
        es.new_loss(test_loss)
        if es.if_stop():
            print('Start over fitting')
            break

    return np.concatenate(pred_list, axis=0), np.concatenate(gold_list, axis=0)
Exemplo n.º 3
0
 def load_config(self, config_path: Path, data_path: Path) -> None:
     with config_path.open() as f:
         self.config = config = parse_config(yaml.load(f))
     if self.device == 'cpu':
         warnings.warn("Using CPU will be slow")
     elif self.dist:
         if self.gpu is not None:
             config['mini_batch_size'] = int(config['mini_batch_size'] /
                                             self.ngpus_per_node)
             config['num_workers'] = int(
                 (config['num_workers'] + self.ngpus_per_node - 1) /
                 self.ngpus_per_node)
     self.train_loader, self.train_sampler,\
         self.test_loader, self.num_classes = get_data(
             name=config['dataset'], root=data_path,
             mini_batch_size=config['mini_batch_size'],
             num_workers=config['num_workers'],
             dist=self.dist)
     self.criterion = torch.nn.CrossEntropyLoss().cuda(self.gpu) if \
         config['loss'] == 'cross_entropy' else None
     if np.less(float(config['early_stop_threshold']), 0):
         print("AdaS: Notice: early stop will not be used as it was " +
               f"set to {config['early_stop_threshold']}, " +
               "training till completion")
     elif config['optimizer'] != 'SGD' and \
             config['scheduler'] != 'AdaS':
         print("AdaS: Notice: early stop will not be used as it is not " +
               "SGD with AdaS, training till completion")
         config['early_stop_threshold'] = -1.
     self.early_stop = EarlyStop(
         patience=int(config['early_stop_patience']),
         threshold=float(config['early_stop_threshold']))
     cudnn.benchmark = True
     if self.resume is not None:
         if self.gpu is None:
             self.checkpoint = torch.load(str(self.resume))
         else:
             self.checkpoint = torch.load(str(self.resume),
                                          map_location=f'cuda:{self.gpu}')
         self.start_epoch = self.checkpoint['epoch']
         self.start_trial = self.checkpoint['trial']
         self.best_acc1 = self.checkpoint['best_acc1']
         print(f'Resuming config for trial {self.start_trial} at ' +
               f'epoch {self.start_epoch}')
Exemplo n.º 4
0
def one_fold(fold_int, is_nine_folds):
    fold_id = str(fold_int)
    if is_nine_folds:
        fold_path = 'data/Folds_9_Emotions/fold_' + fold_id
        num_labels = 9
    else:
        fold_path = 'data/Folds/fold_' + fold_id
        num_labels = 16

    vocab_size = 5000
    pad_len = 30
    batch_size = 64
    hidden_dim = 600

    es = EarlyStop(2)
    word2id, id2word = build_vocab(fold_path, vocab_size, use_unk=True)
    embedding_dim = len(word2id)
    train_data = DataSet(os.path.join(fold_path, 'train.csv'), pad_len,
                         word2id, num_labels)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

    test_data = DataSet(os.path.join(fold_path, 'test.csv'), pad_len, word2id,
                        num_labels)
    test_loader = DataLoader(test_data, batch_size=batch_size)

    model = AttentionLSTMClassifier(embedding_dim, hidden_dim, vocab_size,
                                    word2id, num_labels, batch_size)
    model.load_bog_embedding(word2id)
    model.cuda()

    optimizer = optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()))
    loss_criterion = nn.BCELoss()
    for epoch in range(4):
        print('Epoch:', epoch, '===================================')
        train_loss = 0
        for i, (data, seq_len, label) in enumerate(train_loader):
            data, label, seq_len = sort_batch(data, label, seq_len.view(-1))
            y_pred = model(Variable(data).cuda(), seq_len)
            optimizer.zero_grad()
            loss = loss_criterion(y_pred, Variable(label).cuda())
            loss.backward()
            optimizer.step()
            train_loss += loss.data[0]
        pred_list = []
        gold_list = []
        test_loss = 0
        for i, (data, seq_len, label) in enumerate(test_loader):
            data, label, seq_len = sort_batch(data, label, seq_len.view(-1))
            y_pred = model(Variable(data, volatile=True).cuda(), seq_len)
            loss = loss_criterion(y_pred,
                                  Variable(label, volatile=True).cuda())
            test_loss += loss.data[0]
            pred_list.append(y_pred.data.cpu().numpy())
            gold_list.append(label.numpy())

        print("Train Loss: ", train_loss, " Evaluation: ", test_loss)
        es.new_loss(test_loss)
        if es.if_stop():
            print('Start over fitting')
            break
    f_ma = []
    f_mi = []
    for threshold in range(0, 100, 5):
        threshold /= 100
        tmp = CalculateFM(np.concatenate(pred_list, axis=0),
                          np.concatenate(gold_list, axis=0),
                          threshold=threshold)
        f_ma.append(tmp['MacroFM'])
        f_mi.append(tmp['MicroFM'])
    return f_ma, f_mi
Exemplo n.º 5
0
    def __init__(self, config):
        self.config = config

        # start tensorboard summary writer
        self.writer = SummaryWriter(config.log_path)

        # load training dataset generator
        if self.config.random_flip or self.config.random_crop:
            self.train_loader = LMDBDataLoaderAugmenter(
                self.config, self.config.train_source)
        else:
            self.train_loader = LMDBDataLoader(self.config,
                                               self.config.train_source)
        print(f"Training with {len(self.train_loader.dataset)} images.")

        # loads validation dataset generator if a validation dataset is given
        if self.config.val_source is not None:
            self.val_loader = LMDBDataLoader(self.config,
                                             self.config.val_source, False)

        # creates model
        self.img2pose_model = img2poseModel(
            depth=self.config.depth,
            min_size=self.config.min_size,
            max_size=self.config.max_size,
            device=self.config.device,
            pose_mean=self.config.pose_mean,
            pose_stddev=self.config.pose_stddev,
            distributed=self.config.distributed,
            gpu=self.config.gpu,
            threed_68_points=np.load(self.config.threed_68_points),
            threed_5_points=np.load(self.config.threed_5_points),
        )
        # optimizer for the backbone and heads

        if args.optimizer == "Adam":
            self.optimizer = optim.Adam(
                self.img2pose_model.fpn_model.parameters(),
                lr=self.config.lr,
                weight_decay=self.config.weight_decay,
            )
        elif args.optimizer == "SGD":
            self.optimizer = optim.SGD(
                self.img2pose_model.fpn_model.parameters(),
                lr=self.config.lr,
                weight_decay=self.config.weight_decay,
                momentum=self.config.momentum,
            )
        else:
            raise Exception(
                "No optimizer founded, please select between SGD or Adam.")

        # loads a model with optimizer so that it can continue training where it stopped
        if self.config.resume_path:
            print(f"Resuming training from {self.config.resume_path}")
            load_model(
                self.img2pose_model.fpn_model,
                self.config.resume_path,
                model_only=False,
                optimizer=self.optimizer,
                cpu_mode=str(self.config.device) == "cpu",
            )

        # loads a pretrained model without loading the optimizer
        if self.config.pretrained_path:
            print(
                f"Loading pretrained weights from {self.config.pretrained_path}"
            )
            load_model(
                self.img2pose_model.fpn_model,
                self.config.pretrained_path,
                model_only=True,
                cpu_mode=str(self.config.device) == "cpu",
            )

        # saves configuration to file for easier retrival later
        print(self.config)
        self.save_file(self.config, "config.txt")

        # saves optimizer config to file for easier retrival later
        print(self.optimizer)

        self.save_file(self.optimizer, "optimizer.txt")

        self.tensorboard_loss_every = max(len(self.train_loader) // 100, 1)
        # self.evaluate_every = max(len(self.train_loader) // 1, 1)

        # reduce learning rate when the validation loss stops to decrease
        if self.config.lr_plateau:
            self.scheduler = ReduceLROnPlateau(
                self.optimizer,
                mode="min",
                factor=0.1,
                patience=3,
                verbose=True,
                threshold=0.001,
                cooldown=1,
                min_lr=0.00001,
            )

        # stops training before the defined epochs if validation loss stops to decrease
        if self.config.early_stop:
            self.early_stop = EarlyStop(mode="min", patience=5)
Exemplo n.º 6
0
class TrainingAgent:
    config: Dict[str, Any] = None
    train_loader = None
    test_loader = None
    train_sampler = None
    num_classes: int = None
    network: torch.nn.Module = None
    optimizer: torch.optim.Optimizer = None
    scheduler = None
    loss = None
    output_filename: Path = None
    checkpoint = None

    def __init__(self,
                 config_path: Path,
                 device: str,
                 output_path: Path,
                 data_path: Path,
                 checkpoint_path: Path,
                 resume: Path = None,
                 save_freq: int = 25,
                 gpu: int = None,
                 ngpus_per_node: int = 0,
                 world_size: int = -1,
                 rank: int = -1,
                 dist: bool = False,
                 mpd: bool = False,
                 dist_url: str = None,
                 dist_backend: str = None) -> None:

        self.gpu = gpu
        self.mpd = mpd
        self.dist = dist
        self.rank = rank
        self.best_acc1 = 0.
        self.start_epoch = 0
        self.start_trial = 0
        self.device = device
        self.resume = resume
        self.dist_url = dist_url
        self.save_freq = save_freq
        self.world_size = world_size
        self.dist_backend = dist_backend
        self.ngpus_per_node = ngpus_per_node

        self.data_path = data_path
        self.output_path = output_path
        self.checkpoint_path = checkpoint_path

        self.load_config(config_path, data_path)
        print("AdaS: Experiment Configuration")
        print("-" * 45)
        for k, v in self.config.items():
            if isinstance(v, list) or isinstance(v, dict):
                print(f"    {k:<20} {v}")
            else:
                print(f"    {k:<20} {v:<20}")
        print("-" * 45)

    def load_config(self, config_path: Path, data_path: Path) -> None:
        with config_path.open() as f:
            self.config = config = parse_config(yaml.load(f))
        if self.device == 'cpu':
            warnings.warn("Using CPU will be slow")
        elif self.dist:
            if self.gpu is not None:
                config['mini_batch_size'] = int(config['mini_batch_size'] /
                                                self.ngpus_per_node)
                config['num_workers'] = int(
                    (config['num_workers'] + self.ngpus_per_node - 1) /
                    self.ngpus_per_node)
        self.train_loader, self.train_sampler,\
            self.test_loader, self.num_classes = get_data(
                name=config['dataset'], root=data_path,
                mini_batch_size=config['mini_batch_size'],
                num_workers=config['num_workers'],
                dist=self.dist)
        self.criterion = torch.nn.CrossEntropyLoss().cuda(self.gpu) if \
            config['loss'] == 'cross_entropy' else None
        if np.less(float(config['early_stop_threshold']), 0):
            print("AdaS: Notice: early stop will not be used as it was " +
                  f"set to {config['early_stop_threshold']}, " +
                  "training till completion")
        elif config['optimizer'] != 'SGD' and \
                config['scheduler'] != 'AdaS':
            print("AdaS: Notice: early stop will not be used as it is not " +
                  "SGD with AdaS, training till completion")
            config['early_stop_threshold'] = -1.
        self.early_stop = EarlyStop(
            patience=int(config['early_stop_patience']),
            threshold=float(config['early_stop_threshold']))
        cudnn.benchmark = True
        if self.resume is not None:
            if self.gpu is None:
                self.checkpoint = torch.load(str(self.resume))
            else:
                self.checkpoint = torch.load(str(self.resume),
                                             map_location=f'cuda:{self.gpu}')
            self.start_epoch = self.checkpoint['epoch']
            self.start_trial = self.checkpoint['trial']
            self.best_acc1 = self.checkpoint['best_acc1']
            print(f'Resuming config for trial {self.start_trial} at ' +
                  f'epoch {self.start_epoch}')
        # self.reset()

    def reset(self, learning_rate: float) -> None:
        self.performance_statistics = dict()
        self.network = get_network(name=self.config['network'],
                                   num_classes=self.num_classes)
        self.metrics = Metrics(list(self.network.parameters()),
                               p=self.config['p'])
        # TODO add other parallelisms
        if self.device == 'cpu':
            print("Resetting cpu-based network")
        elif self.dist:
            if self.gpu is not None:
                torch.cuda.set_device(self.gpu)
                self.network.cuda(self.gpu)
                self.network = torch.nn.parallel.DistributedDataParallel(
                    self.network, device_ids=[self.gpu])
            else:
                self.network.cuda()
                self.network = torch.nn.parallel.DistributedDataParallel(
                    self.network)
        elif self.gpu is not None:
            torch.cuda.set_device(self.gpu)
            self.network = self.network.cuda(self.gpu)
        else:
            if isinstance(self.network, VGG):
                self.network.features = torch.nn.DataParallel(
                    self.network.features)
                self.network.cuda()
            else:
                self.network = torch.nn.DataParallel(self.network)
        self.optimizer, self.scheduler = get_optimizer_scheduler(
            optim_method=self.config['optimizer'],
            lr_scheduler=self.config['scheduler'],
            init_lr=learning_rate,
            net_parameters=self.network.parameters(),
            listed_params=list(self.network.parameters()),
            train_loader_len=len(self.train_loader),
            mini_batch_size=self.config['mini_batch_size'],
            max_epochs=self.config['max_epochs'],
            optimizer_kwargs=self.config['optimizer_kwargs'],
            scheduler_kwargs=self.config['scheduler_kwargs'])
        self.early_stop.reset()

    def train(self) -> None:
        if not isinstance(self.config['init_lr'], list):
            list_lr = [self.config['init_lr']]
        else:
            list_lr = self.config['init_lr']
        for learning_rate in list_lr:
            lr_output_path = self.output_path / f'lr-{learning_rate}'
            lr_output_path.mkdir(exist_ok=True, parents=True)
            for trial in range(self.start_trial, self.config['n_trials']):
                self.reset(learning_rate)
                if trial == self.start_trial and self.resume is not None:
                    print("Resuming Network/Optimizer")
                    self.network.load_state_dict(
                        self.checkpoint['state_dict_network'])
                    self.optimizer.load_state_dict(
                        self.checkpoint['state_dict_optimizer'])
                    if not isinstance(self.scheduler, AdaS) \
                            and self.scheduler is not None:
                        self.scheduler.load_state_dict(
                            self.checkpoint['state_dict_scheduler'])
                    else:
                        self.metrics.historical_metrics = \
                            self.checkpoint['historical_metrics']
                    epochs = range(self.start_epoch, self.config['max_epochs'])
                    self.output_filename = self.checkpoint['output_filename']
                    self.performance_statistics = self.checkpoint[
                        'performance_statistics']
                else:
                    epochs = range(0, self.config['max_epochs'])
                    self.output_filename = "results_" +\
                        f"date={datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}_" +\
                        f"trial={trial}_" +\
                        f"{self.config['network']}_" +\
                        f"{self.config['dataset']}_" +\
                        f"{self.config['optimizer']}" +\
                        '_'.join([f"{k}={v}" for k, v in
                                  self.config['optimizer_kwargs'].items()]) +\
                        f"_{self.config['scheduler']}" +\
                        '_'.join([f"{k}={v}" for k, v in
                                  self.config['scheduler_kwargs'].items()]) +\
                        f"_LR={learning_rate}" +\
                        ".xlsx".replace(' ', '-')
                self.output_filename = str(lr_output_path /
                                           self.output_filename)
                self.run_epochs(trial, epochs)

    def run_epochs(self, trial: int, epochs: List[int]) -> None:
        for epoch in epochs:
            if self.dist:
                self.train_sampler.set_epoch(epoch)
            start_time = time.time()
            train_loss, (train_acc1,
                         train_acc5) = self.epoch_iteration(trial, epoch)
            test_loss, (test_acc1, test_acc5) = self.validate(epoch)
            end_time = time.time()
            if isinstance(self.scheduler, StepLR):
                self.scheduler.step()
            total_time = time.time()
            scheduler_string = f" w/ {self.config['scheduler']}" if \
                self.scheduler is not None else ''
            print(
                f"{self.config['optimizer']}{scheduler_string} " +
                f"on {self.config['dataset']}: " +
                f"T {trial + 1}/{self.config['n_trials']} | " +
                f"E {epoch + 1}/{epochs[-1] + 1} Ended | " +
                "E Time: {:.3f}s | ".format(end_time - start_time) +
                "~Time Left: {:.3f}s | ".format(
                    (total_time - start_time) * (epochs[-1] - epoch)),
                "Train Loss: {:.4f}% | Train Acc. {:.4f}% | ".format(
                    train_loss, train_acc1 * 100) +
                "Test Loss: {:.4f}% | Test Acc. {:.4f}%".format(
                    test_loss, test_acc1 * 100))
            df = pd.DataFrame(data=self.performance_statistics)

            df.to_excel(self.output_filename)
            if self.early_stop(train_loss):
                print("AdaS: Early stop activated.")
                break
            if not self.mpd or \
                    (self.mpd and self.rank % self.ngpus_per_node == 0):
                data = {
                    'epoch':
                    epoch + 1,
                    'trial':
                    trial,
                    'config':
                    self.config,
                    'state_dict_network':
                    self.network.state_dict(),
                    'state_dict_optimizer':
                    self.optimizer.state_dict(),
                    'state_dict_scheduler':
                    self.scheduler.state_dict()
                    if not isinstance(self.scheduler, AdaS)
                    and self.scheduler is not None else None,
                    'best_acc1':
                    self.best_acc1,
                    'performance_statistics':
                    self.performance_statistics,
                    'output_filename':
                    Path(self.output_filename).name,
                    'historical_metrics':
                    self.metrics.historical_metrics
                }
                if epoch % self.save_freq == 0:
                    filename = f'trial_{trial}_epoch_{epoch}.pth.tar'
                    torch.save(data, str(self.checkpoint_path / filename))
                if np.greater(test_acc1, self.best_acc1):
                    self.best_acc1 = test_acc1
                    torch.save(data,
                               str(self.checkpoint_path / 'best.pth.tar'))
        torch.save(data, str(self.checkpoint_path / 'last.pth.tar'))

    def epoch_iteration(self, trial: int, epoch: int):
        # logging.info(f"Adas: Train: Epoch: {epoch}")
        # global net, performance_statistics, metrics, adas, config
        self.network.train()
        train_loss = 0
        top1 = AverageMeter()
        top5 = AverageMeter()
        # correct = 0
        # total = 0
        """train CNN architecture"""
        for batch_idx, (inputs, targets) in enumerate(self.train_loader):
            # start = time.time()
            # print(f'{batch_idx} / {len(train_loader)}')
            if self.gpu is not None:
                inputs = inputs.cuda(self.gpu, non_blocking=True)
            if self.device == 'cuda':
                targets = targets.cuda(self.gpu, non_blocking=True)
            # inputs, targets = inputs.to(self.device), targets.to(self.device)
            if isinstance(self.scheduler, CosineAnnealingWarmRestarts):
                self.scheduler.step(epoch + batch_idx / len(self.train_loader))
            self.optimizer.zero_grad()
            if isinstance(self.optimizer, SLS) or \
                    isinstance(self.optimizer, AdaSLS):

                def closure():
                    outputs = self.network(inputs)
                    loss = self.criterion(outputs, targets)
                    return loss, outputs

                loss, outputs = self.optimizer.step(closure=closure)
            else:
                outputs = self.network(inputs)
                loss = self.criterion(outputs, targets)
                loss.backward()
                if isinstance(self.scheduler, AdaS):
                    self.optimizer.step(self.metrics.layers_index_todo,
                                        self.scheduler.lr_vector)
                elif isinstance(self.optimizer, SPS):
                    self.optimizer.step(loss=loss)
                else:
                    self.optimizer.step()

            train_loss += loss.item()
            # _, predicted = outputs.max(1)
            # total += targets.size(0)
            # correct += predicted.eq(targets).sum().item()
            acc1, acc5 = accuracy(outputs, targets, (1, 5))
            top1.update(acc1[0], inputs.size(0))
            top5.update(acc5[0], inputs.size(0))
            if isinstance(self.scheduler, OneCycleLR):
                self.scheduler.step()
        self.performance_statistics[f'train_acc1_epoch_{epoch}'] = \
            top1.avg.cpu().item() / 100.
        self.performance_statistics[f'train_acc5_epoch_{epoch}'] = \
            top5.avg.cpu().item() / 100.
        self.performance_statistics[f'train_loss_epoch_{epoch}'] = \
            train_loss / (batch_idx + 1)

        io_metrics = self.metrics.evaluate(epoch)
        self.performance_statistics[f'in_S_epoch_{epoch}'] = \
            io_metrics.input_channel_S
        self.performance_statistics[f'out_S_epoch_{epoch}'] = \
            io_metrics.output_channel_S
        self.performance_statistics[f'fc_S_epoch_{epoch}'] = \
            io_metrics.fc_S
        self.performance_statistics[f'in_rank_epoch_{epoch}'] = \
            io_metrics.input_channel_rank
        self.performance_statistics[f'out_rank_epoch_{epoch}'] = \
            io_metrics.output_channel_rank
        self.performance_statistics[f'fc_rank_epoch_{epoch}'] = \
            io_metrics.fc_rank
        self.performance_statistics[f'in_condition_epoch_{epoch}'] = \
            io_metrics.input_channel_condition

        self.performance_statistics[f'out_condition_epoch_{epoch}'] = \
            io_metrics.output_channel_condition
        # if GLOBALS.ADAS is not None:
        if isinstance(self.scheduler, AdaS):
            lrmetrics = self.scheduler.step(epoch, self.metrics)
            self.performance_statistics[f'rank_velocity_epoch_{epoch}'] = \
                lrmetrics.rank_velocity
            self.performance_statistics[f'learning_rate_epoch_{epoch}'] = \
                lrmetrics.r_conv
        else:
            # if GLOBALS.CONFIG['optim_method'] == 'SLS' or \
            #         GLOBALS.CONFIG['optim_method'] == 'SPS':
            if isinstance(self.optimizer, SLS) or isinstance(
                    self.optimizer, SPS) or isinstance(self.optimizer, AdaSLS):
                self.performance_statistics[f'learning_rate_epoch_{epoch}'] = \
                    self.optimizer.state['step_size']
            else:
                self.performance_statistics[
                    f'learning_rate_epoch_{epoch}'] = \
                    self.optimizer.param_groups[0]['lr']
        return train_loss / (batch_idx + 1), (top1.avg.cpu().item() / 100.,
                                              top5.avg.cpu().item() / 100.)

    def validate(self, epoch: int):
        self.network.eval()
        test_loss = 0
        # correct = 0
        # total = 0
        top1 = AverageMeter()
        top5 = AverageMeter()
        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(self.test_loader):
                # inputs, targets = \
                #     inputs.to(self.device), targets.to(self.device)
                if self.gpu is not None:
                    inputs = inputs.cuda(self.gpu, non_blocking=True)
                if self.device == 'cuda':
                    targets = targets.cuda(self.gpu, non_blocking=True)
                outputs = self.network(inputs)
                loss = self.criterion(outputs, targets)
                test_loss += loss.item()
                # _, predicted = outputs.max(1)
                # total += targets.size(0)
                # correct += predicted.eq(targets).sum().item()
                acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
                top1.update(acc1[0], inputs.size(0))
                top5.update(acc5[0], inputs.size(0))

        # Save checkpoint.
        # acc = 100. * correct / total
        # if acc > self.best_acc:
        #     # print('Adas: Saving checkpoint...')
        #     state = {
        #         'net': self.network.state_dict(),
        #         'acc': acc,
        #         'epoch': epoch + 1,
        #     }
        #     if not isinstance(self.scheduler, AdaS):
        #         state['historical_io_metrics'] = \
        #             self.metrics.historical_metrics
        #     torch.save(state, str(self.checkpoint_path / 'ckpt.pth'))
        #     self.best_acc = acc
        self.performance_statistics[f'test_acc1_epoch_{epoch}'] = (
            top1.avg.cpu().item() / 100.)
        self.performance_statistics[f'test_acc5_epoch_{epoch}'] = (
            top5.avg.cpu().item() / 100.)
        self.performance_statistics[f'test_loss_epoch_{epoch}'] = test_loss / (
            batch_idx + 1)
        return test_loss / (batch_idx + 1), (top1.avg.cpu().item() / 100,
                                             top5.avg.cpu().item() / 100)
Exemplo n.º 7
0
def main(args: APNamespace):
    root_path = Path(args.root).expanduser()
    config_path = Path(args.config).expanduser()
    data_path = root_path / Path(args.data).expanduser()
    output_path = root_path / Path(args.output).expanduser()
    global checkpoint_path, config
    checkpoint_path = root_path / Path(args.checkpoint).expanduser()

    if not config_path.exists():
        # logging.critical(f"AdaS: Config path {config_path} does not exist")
        print(f"AdaS: Config path {config_path} does not exist")
        raise ValueError
    if not data_path.exists():
        print(f"AdaS: Data dir {data_path} does not exist, building")
        data_path.mkdir(exist_ok=True, parents=True)
    if not output_path.exists():
        print(f"AdaS: Output dir {output_path} does not exist, building")
        output_path.mkdir(exist_ok=True, parents=True)
    if not checkpoint_path.exists():
        if args.resume:
            print(f"AdaS: Cannot resume from checkpoint without specifying " +
                  "checkpoint dir")
            raise ValueError
        checkpoint_path.mkdir(exist_ok=True, parents=True)
    with config_path.open() as f:
        config = yaml.load(f)
    print("Adas: Argument Parser Options")
    print("-" * 45)
    print(f"    {'config':<20}: {args.config:<40}")
    print(f"    {'data':<20}: {str(Path(args.root) / args.data):<40}")
    print(f"    {'output':<20}: {str(Path(args.root) / args.output):<40}")
    print(f"    {'checkpoint':<20}: " +
          f"{str(Path(args.root) / args.checkpoint):<40}")
    print(f"    {'root':<20}: {args.root:<40}")
    print(f"    {'resume':<20}: {'True' if args.resume else 'False':<20}")
    print("\nAdas: Train: Config")
    print(f"    {'Key':<20} {'Value':<20}")
    print("-" * 45)
    for k, v in config.items():
        print(f"    {k:<20} {v:<20}")
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"AdaS: Pytorch device is set to {device}")
    global best_acc
    best_acc = 0  # best test accuracy
    start_epoch = 0  # start from epoch 0 or last checkpoint epoch
    if np.less(float(config['early_stop_threshold']), 0):
        print("AdaS: Notice: early stop will not be used as it was set to " +
              f"{config['early_stop_threshold']}, training till completion.")

    for trial in range(config['n_trials']):
        if config['lr_scheduler'] == 'AdaS':
            filename = \
                f"stats_{config['optim_method']}_AdaS_trial={trial}_" +\
                f"beta={config['beta']}_initlr={config['init_lr']}_" +\
                f"net={config['network']}_dataset={config['dataset']}.csv"
        else:
            filename = \
                f"stats_{config['optim_method']}_{config['lr_scheduler']}_" +\
                f"trial={trial}_initlr={config['init_lr']}" +\
                f"net={config['network']}_dataset={config['dataset']}.csv"
        Profiler.filename = output_path / filename
        device
        # Data
        # logging.info("Adas: Preparing Data")
        train_loader, test_loader = get_data(
            root=data_path,
            dataset=config['dataset'],
            mini_batch_size=config['mini_batch_size'])
        global performance_statistics, net, metrics, adas
        performance_statistics = {}

        # logging.info("AdaS: Building Model")
        net = get_net(config['network'],
                      num_classes=10 if config['dataset'] == 'CIFAR10' else
                      100 if config['dataset'] == 'CIFAR100' else
                      1000 if config['dataset'] == 'ImageNet' else 10)
        metrics = Metrics(list(net.parameters()), p=config['p'])
        if config['lr_scheduler'] == 'AdaS':
            adas = AdaS(parameters=list(net.parameters()),
                        beta=config['beta'],
                        zeta=config['zeta'],
                        init_lr=float(config['init_lr']),
                        min_lr=float(config['min_lr']),
                        p=config['p'])

        net = net.to(device)

        global criterion
        criterion = get_loss(config['loss'])

        optimizer, scheduler = get_optimizer_scheduler(
            net_parameters=net.parameters(),
            init_lr=float(config['init_lr']),
            optim_method=config['optim_method'],
            lr_scheduler=config['lr_scheduler'],
            train_loader_len=len(train_loader),
            max_epochs=int(config['max_epoch']))
        early_stop = EarlyStop(patience=int(config['early_stop_patience']),
                               threshold=float(config['early_stop_threshold']))

        if device == 'cuda':
            net = torch.nn.DataParallel(net)
            cudnn.benchmark = True

        if args.resume:
            # Load checkpoint.
            print("Adas: Resuming from checkpoint...")
            checkpoint = torch.load(str(checkpoint_path / 'ckpt.pth'))
            # if checkpoint_path.is_dir():
            #     checkpoint = torch.load(str(checkpoint_path / 'ckpt.pth'))
            # else:
            #     checkpoint = torch.load(str(checkpoint_path))
            net.load_state_dict(checkpoint['net'])
            best_acc = checkpoint['acc']
            start_epoch = checkpoint['epoch']
            if adas is not None:
                metrics.historical_metrics = \
                    checkpoint['historical_io_metrics']

        # model_parameters = filter(lambda p: p.requires_grad,
        #                           net.parameters())
        # params = sum([np.prod(p.size()) for p in model_parameters])
        # print(params)
        epochs = range(start_epoch, start_epoch + config['max_epoch'])
        for epoch in epochs:
            start_time = time.time()
            # print(f"AdaS: Epoch {epoch}/{epochs[-1]} Started.")
            train_loss, train_accuracy, test_loss, test_accuracy = epoch_iteration(
                train_loader, test_loader, epoch, device, optimizer, scheduler)
            end_time = time.time()
            if config['lr_scheduler'] == 'StepLR':
                scheduler.step()
            total_time = time.time()
            print(
                f"AdaS: Trial {trial}/{config['n_trials'] - 1} | " +
                f"Epoch {epoch}/{epochs[-1]} Ended | " +
                "Total Time: {:.3f}s | ".format(total_time - start_time) +
                "Epoch Time: {:.3f}s | ".format(end_time - start_time) +
                "~Time Left: {:.3f}s | ".format(
                    (total_time - start_time) * (epochs[-1] - epoch)),
                "Train Loss: {:.4f}% | Train Acc. {:.4f}% | ".format(
                    train_loss, train_accuracy) +
                "Test Loss: {:.4f}% | Test Acc. {:.4f}%".format(
                    test_loss, test_accuracy))
            df = pd.DataFrame(data=performance_statistics)
            if config['lr_scheduler'] == 'AdaS':
                xlsx_name = \
                    f"{config['optim_method']}_AdaS_trial={trial}_" +\
                    f"beta={config['beta']}_initlr={config['init_lr']}_" +\
                    f"net={config['network']}_dataset={config['dataset']}.xlsx"
            else:
                xlsx_name = \
                    f"{config['optim_method']}_{config['lr_scheduler']}_" +\
                    f"trial={trial}_initlr={config['init_lr']}" +\
                    f"net={config['network']}_dataset={config['dataset']}.xlsx"

            df.to_excel(str(output_path / xlsx_name))
            if early_stop(train_loss):
                print("AdaS: Early stop activated.")
                break
    return
def one_fold(X_train, y_train, X_dev, y_dev, class_weight):

    num_labels = NUM_CLASS
    vocab_size = 20000
    pad_len = 40
    batch_size = 64
    embedding_dim = 200
    hidden_dim = 500
    __use_unk = False

    word2id, id2word = build_vocab(X_train, vocab_size)

    train_data = DataSet(X_train,
                         y_train,
                         pad_len,
                         word2id,
                         num_labels,
                         use_unk=__use_unk)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

    dev_data = DataSet(X_dev,
                       y_dev,
                       pad_len,
                       word2id,
                       num_labels,
                       use_unk=__use_unk)
    dev_loader = DataLoader(dev_data, batch_size=batch_size, shuffle=False)

    # test_data = TestDataSet(X_test, pad_len, word2id, num_labels, use_unk=__use_unk)
    # test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

    model = AttentionLSTMClassifier(embedding_dim,
                                    hidden_dim,
                                    vocab_size,
                                    word2id,
                                    num_labels,
                                    batch_size,
                                    use_att=False)
    model.load_glove_embedding(id2word)
    model.cuda()
    es = EarlyStop(2)
    optimizer = optim.Adam(model.parameters())

    for epoch in range(30):
        print('Epoch:', epoch, '===================================')
        train_loss = 0
        for i, (data, seq_len, label) in enumerate(train_loader):
            weight = torch.FloatTensor(class_weight)  # re-weight
            weight_expanded = weight.expand(len(data), -1)
            loss_criterion = nn.BCELoss(weight=weight_expanded.cuda())  #
            data, label, seq_len = sort_batch(data, label, seq_len.view(-1))
            y_pred = model(Variable(data).cuda(), seq_len)

            #roc_reward = roc_auc_score(label.numpy().argmax(axis=1), y_pred.data.cpu().numpy()[:, 1])
            optimizer.zero_grad()
            loss = loss_criterion(
                y_pred,
                Variable(label).cuda(
                ))  #* Variable(torch.FloatTensor([roc_reward])).cuda()
            loss.backward()
            optimizer.step()
            train_loss += loss.data[0]

        pred_list = []
        gold_list = []
        test_loss = 0
        for _, (_data, _seq_len, _label) in enumerate(dev_loader):
            data, label, seq_len = sort_batch(_data, _label, _seq_len.view(-1))
            y_pred = model(Variable(data, volatile=True).cuda(), seq_len)
            weight = torch.FloatTensor(class_weight)  # re-weight
            weight_expanded = weight.expand(len(data), -1)
            loss_criterion = nn.BCELoss(weight=weight_expanded.cuda())
            loss = loss_criterion(y_pred,
                                  Variable(label, volatile=True).cuda())
            test_loss += loss.data[0]
            pred_list.append(y_pred.data.cpu().numpy())
            gold_list.append(label.numpy())

        pred_list_2 = np.concatenate(pred_list, axis=0)[:, 1]
        pred_list = np.concatenate(pred_list, axis=0).argmax(axis=1)
        gold_list = np.concatenate(gold_list, axis=0).argmax(axis=1)
        roc = roc_auc_score(gold_list, pred_list_2)
        print('roc:', roc)
        a = accuracy_score(gold_list, pred_list)
        p = precision_score(gold_list, pred_list, average='binary')
        r = recall_score(gold_list, pred_list, average='binary')
        f1 = f1_score(gold_list, pred_list, average='binary')
        print('accuracy:', a, 'precision_score:', p, 'recall:', r, 'f1:', f1)
        print("Train Loss: ", train_loss, " Evaluation: ", test_loss)
        es.new_loss(test_loss)
        if es.if_stop():
            print('Start over fitting')
            break

    return gold_list, pred_list
Exemplo n.º 9
0
def one_fold(fold_path):

    vocab_size = 20000
    pad_len = 30
    batch_size = 64
    embedding_dim = 200
    hidden_dim = 800
    num_labels = NUM_CLASS

    X, y = cbet_data(os.path.join(fold_path, 'train.csv'))

    train_index, dev_index = stratified_shuffle_split(X, y)
    y = np.asarray(y)
    X_train, X_dev = [X[i] for i in train_index], [X[i] for i in dev_index]
    y_train, y_dev = y[train_index], y[dev_index]

    word2id, id2word = build_vocab(X_train, vocab_size)
    # __X, __y, __pad_len, __word2id, __num_labels
    train_data = DataSet(X_train, y_train, pad_len, word2id, num_labels)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

    dev_data = DataSet(X_dev, y_dev, pad_len, word2id, num_labels)
    dev_loader = DataLoader(dev_data, batch_size=batch_size, shuffle=True)

    X_test, y_test = cbet_data(os.path.join(fold_path, 'test.csv'))
    test_data = DataSet(X_test, y_test, pad_len, word2id, num_labels)
    test_loader = DataLoader(test_data, batch_size=batch_size)

    model = AttentionLSTMClassifier(embedding_dim,
                                    hidden_dim,
                                    vocab_size,
                                    word2id,
                                    num_labels,
                                    batch_size,
                                    use_att=True,
                                    soft_last=True)
    model.load_glove_embedding(id2word)
    model.cuda()

    optimizer = optim.Adam(model.parameters())
    loss_criterion = nn.BCELoss()
    es = EarlyStop(2)
    old_model = None
    for epoch in range(10):
        print('Epoch:', epoch, '===================================')
        train_loss = 0
        for i, (data, seq_len, label) in enumerate(train_loader):
            data, label, seq_len = sort_batch(data, label, seq_len.view(-1))
            y_pred = model(Variable(data).cuda(), seq_len)
            optimizer.zero_grad()
            loss = loss_criterion(y_pred, Variable(label).cuda())
            loss.backward()
            optimizer.step()
            train_loss += loss.data[0]
        pred_list = []
        gold_list = []
        test_loss = 0
        # evaluation
        for i, (data, seq_len, label) in enumerate(dev_loader):
            data, label, seq_len = sort_batch(data, label, seq_len.view(-1))
            y_pred = model(Variable(data, volatile=True).cuda(), seq_len)
            loss = loss_criterion(y_pred,
                                  Variable(label, volatile=True).cuda())
            test_loss += loss.data[0]
            pred_list.append(y_pred.data.cpu().numpy())
            gold_list.append(label.numpy())

        if old_model is not None:
            del old_model
            old_model = copy.deepcopy(model)
        else:
            old_model = copy.deepcopy(model)
        print("Train Loss: ", train_loss, " Evaluation: ", test_loss)
        es.new_loss(test_loss)
        if es.if_stop():
            print('Start over fitting')
            del model
            model = old_model
            break

    # testing
    pred_list = []
    gold_list = []
    test_loss = 0
    for i, (data, seq_len, label) in enumerate(test_loader):
        data, label, seq_len = sort_batch(data, label, seq_len.view(-1))
        y_pred = model(Variable(data, volatile=True).cuda(), seq_len)
        loss = loss_criterion(y_pred, Variable(label, volatile=True).cuda())
        test_loss += loss.data[0]
        pred_list.append(y_pred.data.cpu().numpy())
        gold_list.append(label.numpy())

    return np.concatenate(pred_list, axis=0), np.concatenate(gold_list, axis=0)
def one_fold(X_train, y_train, X_dev, y_dev):
    num_labels = NUM_CLASS
    vocab_size = 30000
    pad_len = 40
    batch_size = 64
    embedding_dim = 200
    hidden_dim = 600
    __use_unk = False

    word2id, id2word = build_vocab(X_train, vocab_size)

    train_data = DataSet(X_train,
                         y_train,
                         pad_len,
                         word2id,
                         num_labels,
                         use_unk=__use_unk)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

    dev_data = DataSet(X_dev,
                       y_dev,
                       pad_len,
                       word2id,
                       num_labels,
                       use_unk=__use_unk)
    dev_loader = DataLoader(dev_data, batch_size=batch_size, shuffle=False)

    # test_data = TestDataSet(X_test, pad_len, word2id, num_labels, use_unk=__use_unk)
    # test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

    model = AttentionLSTMClassifier(embedding_dim,
                                    hidden_dim,
                                    vocab_size,
                                    word2id,
                                    num_labels,
                                    batch_size,
                                    use_att=True,
                                    soft_last=False)
    model.load_glove_embedding(id2word)
    model.cuda()
    es = EarlyStop(2)
    optimizer = optim.Adam(model.parameters(), lr=1e-5)
    loss_criterion = nn.MSELoss()  #
    old_model = None
    for epoch in range(20):
        print('Epoch:', epoch, '===================================')
        train_loss = 0
        model.train()
        for i, (data, seq_len, label) in enumerate(train_loader):
            data, label, seq_len = sort_batch(data, label, seq_len.view(-1))
            y_pred = model(Variable(data).cuda(), seq_len)
            #roc_reward = roc_auc_score(label.numpy().argmax(axis=1), y_pred.data.cpu().numpy()[:, 1])
            optimizer.zero_grad()
            loss = loss_criterion(
                y_pred,
                Variable(label).cuda(
                ))  #* Variable(torch.FloatTensor([roc_reward])).cuda()
            loss.backward()
            optimizer.step()
            train_loss += loss.data[0] * batch_size

        pred_list = []
        gold_list = []
        test_loss = 0
        model.eval()
        for _, (_data, _seq_len, _label) in enumerate(dev_loader):
            data, label, seq_len = sort_batch(_data, _label, _seq_len.view(-1))
            y_pred = model(Variable(data, volatile=True).cuda(), seq_len)
            loss = loss_criterion(
                y_pred,
                Variable(label).cuda(
                ))  #* Variable(torch.FloatTensor([roc_reward])).cuda()
            test_loss += loss.data[0] * batch_size
            y_pred = y_pred.data.cpu().numpy()
            pred_list.append(y_pred)  # x[np.where( x > 3.0 )]
            gold_list.append(label.numpy())

        # pred_list_2 = np.concatenate(pred_list, axis=0)[:, 1]
        pred_list = np.concatenate(pred_list, axis=0)
        gold_list = np.concatenate(gold_list, axis=0)
        # roc = roc_auc_score(gold_list, pred_list_2)
        # print('roc:', roc)
        # a = accuracy_score(gold_list, pred_list)
        # p = precision_score(gold_list, pred_list, average='binary')
        # r = recall_score(gold_list, pred_list, average='binary')
        # f1 = f1_score(gold_list, pred_list, average='binary')
        # print('accuracy:', a, 'precision_score:', p, 'recall:', r, 'f1:', f1)
        print("Train Loss: ", train_loss / len(train_data), " Evaluation: ",
              test_loss / len(dev_data))
        es.new_loss(test_loss)
        if old_model is not None:
            del old_model, old_pred_list
            old_model = copy.deepcopy(model)
            old_pred_list = copy.deepcopy(pred_list)

        else:
            old_model = copy.deepcopy(model)
            old_pred_list = copy.deepcopy(pred_list)

        if es.if_stop():
            print('Start over fitting')
            del model
            model = old_model
            pred_list = old_pred_list
            torch.save(model.state_dict(),
                       open(os.path.join('checkpoint', 'cbet.model'), 'wb'))
            with open('checkpoint/some_data.pkl', 'wb') as f:
                pickle.dump([word2id, id2word], f)
            break

    return gold_list, pred_list, model, pad_len, word2id, num_labels