Exemplo n.º 1
0
    def _fit(self, modules: nn.ModuleDict, train_dl: DeviceDataLoader,
             valid_dl: DeviceDataLoader):
        r""" Fits \p modules' learners to the training and validation \p DataLoader objects """
        self._configure_fit_vars(modules)

        for mod_name, module in modules.items():
            lr = config.get_learner_val(mod_name,
                                        LearnerParams.Attribute.LEARNING_RATE)
            wd = config.get_learner_val(mod_name,
                                        LearnerParams.Attribute.WEIGHT_DECAY)
            is_lin_ff = config.DATASET.is_synthetic(
            ) and module.module.num_hidden_layers == 0
            if is_lin_ff:
                module.optim = LBFGS(module.parameters(), lr=lr)
            else:
                module.optim = AdamW(module.parameters(),
                                     lr=lr,
                                     weight_decay=wd,
                                     amsgrad=True)
            logging.debug(
                f"{mod_name} Optimizer: {module.optim.__class__.__name__}")

        for ep in range(1, config.NUM_EPOCH + 1):
            # noinspection PyUnresolvedReferences
            for _, module in modules.items():
                module.epoch_start()

            for batch in train_dl:
                for _, module in modules.items():
                    module.process_batch(batch)

            for _, module in modules.items():
                module.calc_valid_loss(valid_dl)
            self._log_epoch(ep, modules)
        self._restore_best_model(modules)
        self.eval()
Exemplo n.º 2
0
def _write_results_to_disk(dest_dir: Path, start_time: str, all_res: dict) -> None:
    r""" Logs the results to disk for later analysis """
    def _log_val(_v) -> str:
        if isinstance(_v, str): return _v
        if isinstance(_v, bool): return str(_v)
        if isinstance(_v, int): return f"{_v:d}"
        if isinstance(_v, Tensor): _v = float(_v.item())
        if isinstance(_v, float): return f"{_v:.15f}"
        if isinstance(_v, Enum): return _v.name
        if isinstance(_v, set):
            return ",".join([_log_val(_x) for _x in sorted(_v)])
        if _v is None: return "NA"
        if isinstance(_v, list):
            lst_str = ", ".join([str(ele) for ele in _v])
            return f"\"[{lst_str}]\""
        raise ValueError(f"Unknown value type \"{type(_v)}\" to log")

    classifier_name_idx = 1
    loss_name_idx = classifier_name_idx + 1
    num_epoch_idx = None  # Start of fields to ignore for PUc learner
    kernel_type_idx = None

    header = ["start-time", "classifier-name", "loss-name"]
    base_fields = [start_time, None, None]
    for key, val in vars(config).items():
        # Exclude dunders in all modules
        if key.startswith("__") and key.endswith("__"): continue
        # Exclude any functions in config
        if callable(val): continue
        # Exclude any imported modules
        if isinstance(val, ModuleType): continue
        if key.lower().endswith("_key"): continue
        if key.upper() not in config.__all__:
            logging.debug(f"Skipping {key} as not in config.__all__")
            continue
        if key.lower() == config.NUM_EPOCH_KEY.lower():
            assert num_epoch_idx is None, f"Number of epochs field should be None by default"
            num_epoch_idx = len(base_fields)

        header.append(key.replace("_", "-"))
        if key == "bias" and isinstance(val, list):
            base_fields.append(",".join([f"{x:.2f}" for _, x in val]))
            continue
        if key.lower() == config.KERNEL_KEY.lower():
            kernel_type_idx = len(base_fields)
            base_fields.append("N/A")
            continue
        base_fields.append(_log_val(val))

    all_fields = []
    for i, (block_name, block_res) in enumerate(all_res.items()):
        fields = copy.deepcopy(base_fields)
        fields[classifier_name_idx] = block_name
        if PUcLearner.BASE_NAME.lower() != block_name.lower():
            fields[loss_name_idx] = block_res.loss_name
        else:
            for itr in range(num_epoch_idx, len(fields)):
                fields[itr] = "N/A"
            fields[loss_name_idx] = "squared_loss"
            fields[kernel_type_idx] = config.KERNEL_TYPE

        # Block identifier
        fields[classifier_name_idx] = block_name
        # Add learner specific parameters
        for attr in LearnerParams.Attribute:
            if i == 0: header.append(attr.name)
            attr_val = config.get_learner_val(block_name, attr)
            assert attr_val is not None, "Attribute value unset"
            fields.append(_log_val(attr_val))

        for field_name in ("valid_loss", "step1_soft_err", "step1_hard_err", "step1_topk_err",
                           "decision_m", "decision_b"):
            if i == 0: header.append(field_name)
            fields.append(_log_val(block_res.__getattribute__(field_name)))

        for res_name in ("unlabel_train", "tr_test", "unlabel_test", "test"):
            res_val = block_res.__getattribute__(res_name)
            for fld_name, fld_val in vars(res_val).items():
                if i == 0: header.append(f"{res_name}-{fld_name}".replace("_", "-"))
                fields.append(_log_val(fld_val))
        # Add the results
        all_fields.append(fields)

    # Writes the file
    filename = construct_filename(prefix="res", out_dir=dest_dir, file_ext="csv",
                                  add_timestamp=True)
    with open(str(filename), "w+") as f_out:
        f_out.write(LearnerResults.FIELD_SEP.join(header))
        for block_fields in all_fields:
            f_out.write("\n")
            f_out.write(LearnerResults.FIELD_SEP.join(block_fields))
Exemplo n.º 3
0
    def __init__(self,
                 base_module: nn.Module,
                 sigma: Optional[CalibratedLearner] = None,
                 rho_vals: Optional[Iterable] = np.linspace(start=0.0,
                                                            stop=1.0,
                                                            num=11),
                 bias_priors: bool = False):
        super().__init__("aPU")

        # train_loss = log_loss
        # train_loss = ramp_loss
        train_loss = sigmoid_loss
        valid_loss = sigmoid_loss
        # valid_loss = loss_0_1

        self._pu_blocks = nn.ModuleDict()

        tr_priors, te_priors = [config.TRAIN_PRIOR], [config.TEST_PRIOR]
        if bias_priors:
            biases = [0.8, 1.2]
            tr_priors.extend([bias * config.TRAIN_PRIOR for bias in biases])
            te_priors.extend([bias * config.TEST_PRIOR for bias in biases])

        self.s1_soft_acc = self.s1_hard_acc = self.s1_topk_acc = None
        self._sigma = sigma
        if not self._sigma.is_calibrated():
            raise ValueError("Sigma method is not calibrated")
        self._sigma.freeze()

        s1_methods = self.get_step1_methods()
        wuu_itrs = itertools.product(s1_methods, tr_priors, te_priors)
        for s1_method, tr_prior, te_prior in wuu_itrs:
            wuu = WUU(train_prior=tr_prior,
                      test_prior=te_prior,
                      u_tr_label=Labels.Training.U_TRAIN.value,
                      u_te_label=Labels.Training.U_TEST.value,
                      train_loss=train_loss,
                      valid_loss=valid_loss,
                      abs_nn=True,
                      s1_method=s1_method)

            wuu.gamma = config.get_learner_val(wuu.name(),
                                               LearnerParams.Attribute.GAMMA)
            self._pu_blocks[wuu.name()] = apu.utils.ClassifierBlock(
                base_module, wuu)

        purr_itrs = itertools.product(tr_priors, te_priors)
        for tr_prior, te_prior in purr_itrs:
            l_purr = PURR(train_prior=tr_prior,
                          test_prior=te_prior,
                          train_loss=train_loss,
                          valid_loss=valid_loss,
                          abs_nn=True)

            l_purr.gamma = config.get_learner_val(
                l_purr.name(), LearnerParams.Attribute.GAMMA)
            self._pu_blocks[l_purr.name()] = apu.utils.ClassifierBlock(
                base_module, l_purr)

        apnu_itrs = itertools.product(s1_methods, rho_vals, tr_priors,
                                      te_priors)
        for s1_method, rho, tr_prior, te_prior in apnu_itrs:
            nn_pnu = APNU(train_prior=tr_prior,
                          test_prior=te_prior,
                          rho=rho,
                          train_loss=train_loss,
                          valid_loss=valid_loss,
                          abs_nn=True,
                          s1_method=s1_method)

            nn_pnu.gamma = config.get_learner_val(
                nn_pnu.name(), LearnerParams.Attribute.GAMMA)
            classifier = apu.utils.ClassifierBlock(base_module, nn_pnu)
            self._pu_blocks[nn_pnu.name()] = classifier

        # Construct the nnPU learners
        for i in range(2):
            if i == 0:
                num_unlabeled_pos = (config.TRAIN_PRIOR * config.N_U_TRAIN +
                                     config.TEST_PRIOR * config.N_U_TEST)
                tot_u_size = config.N_U_TRAIN + config.N_U_TEST
                prior = num_unlabeled_pos / tot_u_size

                only_u_test = False
            elif i == 1:
                prior = config.TEST_PRIOR
                only_u_test = True
            else:
                raise ValueError("Unknown configuration")
            nnpu = NNPU(prior=prior,
                        train_loss=train_loss,
                        valid_loss=valid_loss,
                        only_u_test=only_u_test)
            nnpu.gamma = config.get_learner_val(nnpu.name(),
                                                LearnerParams.Attribute.GAMMA)
            self._pu_blocks[nnpu.name()] = ClassifierBlock(base_module, nnpu)

        pn = PN(train_loss=train_loss,
                valid_loss=valid_loss,
                is_train=False,
                prior=config.TEST_PRIOR)
        self._pn_test = nn.ModuleDict(
            {"te_pn": ClassifierBlock(base_module, pn)})

        self.to(device=TORCH_DEVICE)
Exemplo n.º 4
0
    def __init__(self,
                 base_module: nn.Module,
                 sigma: Optional[CalibratedLearner] = None,
                 rho_vals: Optional[Iterable] = np.linspace(start=0.0,
                                                            stop=1.0,
                                                            num=11)):
        super().__init__("aPU")

        # train_loss = log_loss
        # train_loss = ramp_loss
        train_loss = sigmoid_loss
        valid_loss = sigmoid_loss
        # valid_loss = loss_0_1

        self._pu_blocks = nn.ModuleDict()

        self._sigma = sigma
        if self._sigma is not None:
            if not self._sigma.is_calibrated():
                raise ValueError("Sigma method is not calibrated")
            self._sigma.freeze()
            wuu = WUU(train_prior=config.TRAIN_PRIOR,
                      test_prior=config.TEST_PRIOR,
                      u_tr_label=Labels.Training.U_TRAIN.value,
                      u_te_label=Labels.Training.U_TEST.value,
                      train_loss=train_loss,
                      valid_loss=valid_loss,
                      abs_nn=config.USE_ABS)

            wuu.gamma = config.get_learner_val(wuu.name(),
                                               LearnerParams.Attribute.GAMMA)
            self._pu_blocks[wuu.name()] = apu.utils.ClassifierBlock(
                base_module, wuu)

        # Construct the PURR learners
        l_purr = PURR(train_prior=config.TRAIN_PRIOR,
                      test_prior=config.TEST_PRIOR,
                      train_loss=train_loss,
                      valid_loss=valid_loss,
                      abs_nn=config.USE_ABS)

        l_purr.gamma = config.get_learner_val(l_purr.name(),
                                              LearnerParams.Attribute.GAMMA)
        self._pu_blocks[l_purr.name()] = apu.utils.ClassifierBlock(
            base_module, l_purr)

        # Construct aPNU with varied values of rho
        if self._sigma is not None:
            for rho in rho_vals:
                nn_pnu = APNU(train_prior=config.TRAIN_PRIOR,
                              test_prior=config.TEST_PRIOR,
                              rho=rho,
                              train_loss=train_loss,
                              valid_loss=valid_loss,
                              abs_nn=config.USE_ABS)

                nn_pnu.gamma = config.get_learner_val(
                    nn_pnu.name(), LearnerParams.Attribute.GAMMA)
                classifier = apu.utils.ClassifierBlock(base_module, nn_pnu)

                self._pu_blocks[nn_pnu.name()] = classifier

        # Construct the nnPU learners
        for i in range(2):
            if i == 0:
                num_unlabeled_pos = (config.TRAIN_PRIOR * config.N_U_TRAIN +
                                     config.TEST_PRIOR * config.N_U_TEST)
                tot_u_size = config.N_U_TRAIN + config.N_U_TEST
                prior = num_unlabeled_pos / tot_u_size

                only_u_test = False
            elif i == 1:
                prior = config.TEST_PRIOR
                only_u_test = True
            else:
                raise ValueError("Unknown configuration")
            nnpu = NNPU(prior=prior,
                        train_loss=train_loss,
                        valid_loss=valid_loss,
                        only_u_train=False,
                        only_u_test=only_u_test)
            nnpu.gamma = config.get_learner_val(nnpu.name(),
                                                LearnerParams.Attribute.GAMMA)
            self._pu_blocks[nnpu.name()] = ClassifierBlock(base_module, nnpu)

        pn = PN(train_loss=train_loss, valid_loss=valid_loss, is_train=True)
        self._pn_train = nn.ModuleDict(
            {"tr_pn": ClassifierBlock(base_module, pn)})

        pn = PN(train_loss=train_loss, valid_loss=valid_loss, is_train=False)
        self._pn_test = nn.ModuleDict(
            {"te_pn": ClassifierBlock(base_module, pn)})

        self.to(device=TORCH_DEVICE)