Пример #1
0
    def _predict_model(self):
        torch.backends.cudnn.benchmark = True

        epoch_loss = 0.0
        epoch_metrics = 0.0
        total = 0
        with torch.no_grad():
            for data in self._dataloaders:
                inputs = add_device(data[self._input_key], self._device)
                with torch.no_grad():
                    inputs = self._preprocess(inputs)
                inputs = inputs.to(self._device)
                labels = add_device(data[self._target_key], self._device)
                outputs = self._model(inputs)

                loss = self._criterion(outputs, labels)
                epoch_loss += loss.item() * inputs.size(0)
                total += inputs.size(0)

                running_loss = epoch_loss / total

                if self._activation:
                    outputs = self._activation(outputs)
                preds = tensor_to_array(outputs)
                labels = tensor_to_array(labels)
                metrics = self._metrics(labels, preds)
                epoch_metrics += metrics.item() * inputs.size(0)

                running_metrics = epoch_metrics / total

        return running_loss, running_metrics
Пример #2
0
    def _predict_model(self):
        torch.backends.cudnn.benchmark = True

        results = np.array([])
        epoch_loss = 0.0
        total = 0
        with torch.no_grad():
            for data in self._dataloaders:
                inputs = add_device(data[self._input_key], self._device)
                labels = add_device(data[self._target_key], self._device)
                outputs = self._model(inputs)
                loss = self._criterion(outputs, labels)
                epoch_loss += loss.item() * inputs[0].size(0)
                total += inputs[0].size(0)

                running_loss = epoch_loss / total

                preds = tensor_to_array(outputs)
                if not results.any():
                    results = preds
                else:
                    results = np.concatenate([results, preds])

        upper = np.loadtxt('./logs/GP_upper.csv', delimiter=',')
        lower = np.loadtxt('./logs/GP_lower.csv', delimiter=',')

        from models.sub_task import set_phi_within_valid_range

        def reshape3vec(data):
            return data.reshape(-1, 3)

        results = set_phi_within_valid_range(reshape3vec(results))
        upper = set_phi_within_valid_range(reshape3vec(upper))
        lower = set_phi_within_valid_range(reshape3vec(lower))
        ratio = np.sum(
            np.where(
                ((lower < upper)
                 & (results < upper)
                 & (lower < results))
                | ((upper < lower)
                   & (upper < results)
                   & (lower < results))
                |
                ((upper < lower)
                 & (results < upper)
                 &
                 (results < lower)), True, False).all(axis=1)) / (len(results))

        return running_loss, ratio
Пример #3
0
def valid(model,
          dataloader,
          input_key,
          target_key,
          device=torch.device('cpu'),
          activation=None):
    outputs_data = []
    targets_data = []
    for step, data in enumerate(dataloader):
        inputs = add_device(data[input_key], device)
        targets = add_device(data[target_key], device)
        if activation:
            outputs = activation(model(inputs))
        else:
            outputs = model(inputs)
        outputs_data.extend(outputs.tolist())
        targets_data.extend(targets.tolist())
    return outputs_data, targets_data
Пример #4
0
def train(model,
          dataloader,
          input_key,
          target_key,
          optimizer,
          loss_func,
          device=torch.device('cpu')):
    train_loss = 0.0
    for step, data in enumerate(dataloader):
        inputs = add_device(data[input_key], device)
        targets = add_device(data[target_key], device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_func(outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        postfix = {'train_loss': f'{(train_loss / (step + 1)):.5f}'}
        dataloader.set_postfix(log=postfix)
Пример #5
0
    def _train_batch_model(self, epoch, phase):
        from tqdm import tqdm
        epoch_loss = 0.0
        total = 0

        getattr(self._dataloaders.dataset, phase)()
        with tqdm(total=len(self._dataloaders), unit="batch",
                  ncols=120) as pbar:
            pbar.set_description(
                f"Epoch [{epoch}/{self._hp_epochs}] ({phase.ljust(5)})")

            for data in self._dataloaders:
                inputs = add_device(data[self._input_key], self._device)
                labels = add_device(data[self._target_key], self._device)
                self._optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = self._model(inputs)
                    loss = self._criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        self._optimizer.step()

                    epoch_loss += loss.item() * inputs[0].size(0)
                    total += inputs[0].size(0)

                    running_loss = epoch_loss / total

                    pbar.set_postfix({
                        "loss": f'{running_loss:.4f}',
                        "lr": f'{self._lr:.4f}'
                    })
                    pbar.update(1)
        if phase == 'valid':
            logger.info(f'Epoch [{epoch}/{self._hp_epochs}] ({phase.ljust(6)})'
                        f'{self._model.__class__.__name__} (Loss_1ST) :'
                        f'{running_loss}')
        return running_loss
def evaluate(model, conf, dataloader, metrics, result, choice=None):
    with torch.no_grad():
        logger.info('start eval mode')
        model.eval()
        dataloader.dataset.test()
        test_dataset = dataloader.dataset
        test_dataloader = DataLoader(test_dataset,
                                     batch_size=100,
                                     shuffle=False)
        A = range(len(conf.sub_task_params.tau4vec.tasks))
        B = range(len(conf.sub_task_params.higgsId.tasks))
        num_name_conb = {
            num: f'{f}_{s}'
            for num, (f, s) in zip(
                product(A, B), product(FIRST_MODEL_NAME, SECOND_MODEL_NAME))
        }
        outputs_data = []
        targets_data = []
        temp_outputs_data = []
        temp_targets_data = []
        for data in test_dataloader:
            inputs = add_device(data['inputs'], DEVICE)
            targets = add_device(data['targets'], DEVICE)
            outputs, now_choice = model(inputs, choice)
            outputs_data.extend(tensor_to_array(outputs[1]))
            targets_data.extend(tensor_to_array(targets[1]))
            temp_outputs_data.extend(tensor_to_array(outputs[0]))
            temp_targets_data.extend(tensor_to_array(targets[0]))
        targets_data = np.array(targets_data)
        outputs_data = np.array(outputs_data)
        auc_score = metrics(targets_data, outputs_data)
        result['AUC'][num_name_conb[choice]].append(auc_score)
        temp_outputs_data = np.array(temp_outputs_data)
        temp_targets_data = np.array(temp_targets_data)
        upper = np.loadtxt('./logs/GP_upper.csv', delimiter=',')
        lower = np.loadtxt('./logs/GP_lower.csv', delimiter=',')

        c_1 = set_module([torch.nn, MyLoss], conf.SPOS_NAS, 'loss_first')
        c_2 = set_module([torch.nn, MyLoss], conf.SPOS_NAS, 'loss_second')
        loss_1st = c_1(torch.tensor(temp_outputs_data),
                       torch.tensor(temp_targets_data))
        loss_2nd = c_2(torch.tensor(outputs_data), torch.tensor(targets_data))

        from models.sub_task import set_phi_within_valid_range

        def reshape3vec(data):
            return data.reshape(-1, 3)

        temp_outputs_data = set_phi_within_valid_range(
            reshape3vec(temp_outputs_data))
        upper = set_phi_within_valid_range(reshape3vec(upper))
        lower = set_phi_within_valid_range(reshape3vec(lower))
        ratio = np.sum(
            np.where(((lower < upper)
                      & (temp_outputs_data < upper)
                      & (lower < temp_outputs_data))
                     | ((upper < lower)
                        & (upper < temp_outputs_data)
                        & (lower < temp_outputs_data))
                     | ((upper < lower)
                        & (temp_outputs_data < upper)
                        & (temp_outputs_data < lower)), True,
                     False).all(axis=1)) / (len(temp_outputs_data))
        only_pt_ratio = np.sum(
            np.where(((lower[:, 0] < upper[:, 0])
                      & (temp_outputs_data[:, 0] < upper[:, 0])
                      & (lower[:, 0] < temp_outputs_data[:, 0]))
                     | ((upper[:, 0] < lower[:, 0])
                        & (upper[:, 0] < temp_outputs_data[:, 0])
                        & (lower[:, 0] < temp_outputs_data[:, 0]))
                     | ((upper[:, 0] < lower[:, 0])
                        & (temp_outputs_data[:, 0] < upper[:, 0])
                        & (temp_outputs_data[:, 0] < lower[:, 0])), True,
                     False)) / (len(temp_outputs_data))

        result['RATIO'][num_name_conb[choice]].append(ratio)
        result['ONLY_PT_RATIO'][num_name_conb[choice]].append(only_pt_ratio)
        result['LOSS_1ST'][num_name_conb[choice]].append(loss_1st.item())
        result['LOSS_2ND'][num_name_conb[choice]].append(loss_2nd.item())
        logger.info(f'[Choice:{now_choice} / auc:{auc_score:.6f}] / ' +
                    f'first_loss: {loss_1st:.6f} / ' +
                    f'ratio: {ratio:.6f} / ' +
                    f'only_pt_ratio: {only_pt_ratio:.6f} / ')

    logger.info(result)
    return result
Пример #7
0
    def _train_batch_model(self, epoch, phase):
        from tqdm import tqdm
        epoch_loss = 0.0
        epoch_metrics = 0.0
        total = 0

        getattr(self._dataloaders.dataset, phase)()
        with tqdm(total=len(self._dataloaders), unit="batch",
                  ncols=120) as pbar:
            pbar.set_description(
                f"Epoch [{epoch}/{self._hp_epochs}] ({phase.ljust(5)})")

            for data in self._dataloaders:
                inputs = add_device(data[self._input_key], self._device)
                with torch.no_grad():
                    inputs = self._preprocess(inputs)
                inputs = inputs.to(device)
                labels = add_device(data[self._target_key], self._device)
                self._optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = self._model(inputs)
                    loss = self._criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        self._optimizer.step()

                    epoch_loss += loss.item() * inputs.size(0)
                    total += inputs.size(0)

                    running_loss = epoch_loss / total
                    if self._metrics is not None:
                        if self._activation:
                            outputs = self._activation(outputs)
                        outputs = tensor_to_array(outputs)
                        labels = tensor_to_array(labels)
                        metrics = self._metrics(labels, outputs)
                        epoch_metrics += metrics.item() * inputs.size(0)
                        running_metrics = epoch_metrics / total
                        s = {
                            "loss": f'{running_loss:.4f}',
                            "metrics": f'{running_metrics:.4f}',
                            "lr": f'{self._lr:.4f}'
                        }
                        log_s = (
                            f'Epoch [{epoch}/{self._hp_epochs}] ({phase.ljust(6)})'
                            f'{self._model.__class__.__name__} (Loss_2ND) :'
                            f'loss/{running_loss}  '
                            f'metrics/{running_metrics}')
                    else:
                        s = {
                            "loss": f'{running_loss:.4f}',
                            "lr": f'{self._lr:.4f}'
                        }
                        log_s = (
                            f'Epoch [{epoch}/{self._hp_epochs}] ({phase.ljust(6)})'
                            f'{self._model.__class__.__name__} (Loss_2ND) :'
                            f'loss/{running_loss}')

                    pbar.set_postfix(s)
                    pbar.update(1)
        if phase == 'valid':
            logger.info(log_s)
        return running_loss
Пример #8
0
    def fit(self,
            epochs,
            dataloader,
            device,
            optimizer,
            scheduler,
            patience=3,
            choice=None):
        self.to(device)
        logger.info(f'save at {self._save_path}')
        early_stopping = EarlyStopping(patience=patience,
                                       verbose=True,
                                       path=self._save_path,
                                       save=True)
        sigmoid = nn.Sigmoid()
        metrics = Calc_Auc()
        for epoch in range(epochs):
            self.train()
            train_loss = 0.0
            dataloader.dataset.train()
            train_data = tqdm(dataloader)
            lr = scheduler.get_last_lr()[0]
            train_data.set_description(
                f'[Epoch:{epoch+1:04d}/{epochs:04d} lr:{lr:.5f}]')
            for step, data in enumerate(train_data):
                inputs = add_device(data[self._input_key], device)
                targets = add_device(data[self._target_key], device)
                optimizer.zero_grad()
                outputs, now_choice = self.__call__(inputs, choice)
                loss = 0.0
                for criterion, weight, output, target in zip(
                        self._loss_func, self._loss_weight, outputs, targets):
                    if weight is not None:
                        loss += criterion(output, target) * weight
                loss.backward()
                optimizer.step()
                scheduler.step()
                train_loss += loss.item()
                running_loss = train_loss / (step + 1)
                postfix = {
                    'train_loss': f'{running_loss:.5f}',
                    'choice': f'{now_choice}'
                }
                train_data.set_postfix(log=postfix)
            with torch.no_grad():
                self.eval()
                dataloader.dataset.valid()
                outputs_data = []
                targets_data = []
                valid_loss = 0.0
                for step, data in enumerate(dataloader):
                    inputs = add_device(data[self._input_key], device)
                    targets = add_device(data[self._target_key], device)
                    outputs, now_choice = self.__call__(inputs, choice)
                    loss = 0.0
                    for criterion, weight, output, target in zip(
                            self._loss_func, self._loss_weight, outputs,
                            targets):
                        if weight is not None:
                            loss += criterion(output, target) * weight
                    outputs_data.extend(tensor_to_array(sigmoid(outputs[1])))
                    targets_data.extend(tensor_to_array(targets[1]))
                    valid_loss += loss.item()
                targets_data = np.array(targets_data)
                outputs_data = np.array(outputs_data)
                auc_score = metrics(targets_data, outputs_data)
                s = f'[Epoch:{epoch+1:04d}|valid| / '\
                    f'auc:{auc_score:.6f} / '\
                    f'loss:{valid_loss/(step+1):.6f}]'
                logger.info(s)
                _ = early_stopping(valid_loss / (step + 1), self)
                if early_stopping.early_stop:
                    logger.info("Early stopping")
                    break

        self.load_state_dict(
            torch.load(os.path.join(self._save_path, 'checkpoint.pt')))
Пример #9
0
def evaluate(model, conf, dataloader, metrics, result, is_gp_3dim):
    is_gp_check = False
    if conf['dataset']['params']['max_events'] == 50000:
        is_gp_check = True
    with torch.no_grad():
        logger.info('start eval mode')
        model.eval()
        dataloader.dataset.test()
        test_dataset = dataloader.dataset
        test_dataloader = DataLoader(test_dataset,
                                     batch_size=100,
                                     shuffle=False)
        A = range(len(FIRST_MODEL_NAME))
        B = range(len(SECOND_MODEL_NAME))
        count = 0
        num_name_conb = {
            num: f'{f}_{s}' for num, (f, s) in enumerate(
                product(
                    FIRST_MODEL_NAME, SECOND_MODEL_NAME
                )
            )
        }
        num_name_1st = {
            num: f for num, (f, s) in enumerate(
                product(
                    FIRST_MODEL_NAME, SECOND_MODEL_NAME
                )
            )
        }
        for choice in product(A, B):
            outputs_data = []
            targets_data = []
            temp_outputs_data = []
            temp_targets_data = []
            for data in test_dataloader:
                inputs = add_device(data['inputs'], DEVICE)
                targets = add_device(data['targets'], DEVICE)
                outputs, choice = model(inputs, choice)
                outputs_data.extend(tensor_to_array(outputs[1]))
                targets_data.extend(tensor_to_array(targets[1]))
                temp_outputs_data.extend(tensor_to_array(outputs[0]))
                temp_targets_data.extend(tensor_to_array(targets[0]))
            targets_data = np.array(targets_data)
            outputs_data = np.array(outputs_data)
            auc_score = metrics(targets_data, outputs_data)
            result['AUC'][num_name_conb[count]].append(auc_score)
            temp_outputs_data = np.array(temp_outputs_data)
            temp_targets_data = np.array(temp_targets_data)
            upper = np.loadtxt('./logs/GP_upper.csv', delimiter=',')
            lower = np.loadtxt('./logs/GP_lower.csv', delimiter=',')

            c_1 = set_module([torch.nn, MyLoss], conf.SPOS_NAS, 'loss_first')
            c_2 = set_module([torch.nn, MyLoss], conf.SPOS_NAS, 'loss_second')
            loss_1st = c_1(torch.tensor(temp_outputs_data),
                           torch.tensor(temp_targets_data))
            loss_2nd = c_2(torch.tensor(outputs_data),
                           torch.tensor(targets_data))

            if is_gp_check:
                from models.sub_task import set_phi_within_valid_range

                def reshape3vec(data):
                    return data.reshape(-1, 3)

                temp_outputs_data = set_phi_within_valid_range(
                    reshape3vec(temp_outputs_data)
                )
                upper = set_phi_within_valid_range(
                    reshape3vec(upper)
                )
                lower = set_phi_within_valid_range(
                    reshape3vec(lower)
                )
                if not is_gp_3dim:
                    temp_outputs_data = temp_outputs_data.reshape(-1, 6)
                    upper = upper.reshape(-1, 6)
                    lower = lower.reshape(-1, 6)

                query = (
                    ((lower < upper)
                     & (temp_outputs_data < upper)
                     & (lower < temp_outputs_data))
                    | ((upper < lower)
                       & (upper < temp_outputs_data)
                       & (lower < temp_outputs_data))
                    | ((upper < lower)
                       & (temp_outputs_data < upper)
                       & (temp_outputs_data < lower))
                )
                ratio = np.sum(
                    np.where(query, True, False).all(axis=1)
                )/(len(temp_outputs_data))
                result['RATIO'][num_name_1st[count]] = [ratio]

                query = (
                    ((lower[:, 0] < upper[:, 0])
                     & (temp_outputs_data[:, 0] < upper[:, 0])
                     & (lower[:, 0] < temp_outputs_data[:, 0]))
                    | ((upper[:, 0] < lower[:, 0])
                       & (upper[:, 0] < temp_outputs_data[:, 0])
                       & (lower[:, 0] < temp_outputs_data[:, 0]))
                    | ((upper[:, 0] < lower[:, 0])
                       & (temp_outputs_data[:, 0] < upper[:, 0])
                       & (temp_outputs_data[:, 0] < lower[:, 0]))
                )
                if not is_gp_3dim:
                    query = (
                        ((lower[:, [0, 3]] < upper[:, [0, 3]])
                         & (temp_outputs_data[:, [0, 3]] < upper[:, [0, 3]])
                         & (lower[:, [0, 3]] < temp_outputs_data[:, [0, 3]]))
                        | ((upper[:, [0, 3]] < lower[:, [0, 3]])
                           & (upper[:, [0, 3]] < temp_outputs_data[:, [0, 3]])
                           & (lower[:, [0, 3]] < temp_outputs_data[:, [0, 3]]))
                        | ((upper[:, [0, 3]] < lower[:, [0, 3]])
                           & (temp_outputs_data[:, [0, 3]] < upper[:, [0, 3]])
                           & (temp_outputs_data[:, [0, 3]] < lower[:, [0, 3]]))
                    )
                only_pt_ratio = np.sum(
                    np.where(query, True, False)
                )/(len(temp_outputs_data))
                result['ONLY_PT_RATIO'][num_name_1st[count]] = [only_pt_ratio]
            else:
                ratio = -1.0
                only_pt_ratio = -1.0
                result['RATIO'][num_name_1st[count]] = [ratio]
                result['ONLY_PT_RATIO'][num_name_1st[count]] = [only_pt_ratio]

            result['LOSS_1ST'][num_name_1st[count]] = [loss_1st.item()]

            result['LOSS_2ND'][num_name_conb[count]].append(loss_2nd.item())
            logger.info(f'[Choice:{choice} / auc:{auc_score:.6f}] / ' +
                        f'first_loss: {loss_1st:.6f} / ' +
                        f'ratio: {ratio:.6f} / ' +
                        f'only_pt_ratio: {only_pt_ratio:.6f}')
            count += 1

    logger.info(result)
    return result