def __init__(self, configs=MethodConfigs()): super(FuseTransfer, self).__init__(configs) self.use_oracle = False #self.target_weight_scale = None self.target_weight_scale = .75 self.label_transform = NanLabelBinarizer()
def __init__(self, configs=None): super(FuseTransfer, self).__init__(configs) self.use_oracle = False # self.target_weight_scale = None self.target_weight_scale = 0.75 self.label_transform = NanLabelBinarizer()
class FuseTransfer(TargetTranfer): def __init__(self, configs=MethodConfigs()): super(FuseTransfer, self).__init__(configs) self.use_oracle = False #self.target_weight_scale = None self.target_weight_scale = .75 self.label_transform = NanLabelBinarizer() def train(self, data): is_labeled_train = data.is_labeled & data.is_train n_labeled_target = (data.is_train & is_labeled_train).sum() n_labeled_source = (data.is_train & is_labeled_train).sum() data.instance_weights = np.ones(data.n) if self.target_weight_scale is not None: assert 0 <= self.target_weight_scale <= 1 data.instance_weights[data.is_source] /= n_labeled_source data.instance_weights[data.is_target] /= n_labeled_target data.instance_weights[data.is_target] *= self.target_weight_scale data.instance_weights[data.is_source] *= (1 - self.target_weight_scale) y_old = data.y if self.label_transform is not None: data.y = self.label_transform.fit_transform(data.y) super(FuseTransfer, self).train(data) data.y = y_old def _prepare_data(self, data, include_unlabeled=True): source_labels = self.configs.source_labels target_labels = self.configs.target_labels data_copy = copy.deepcopy(data) if data.data_set_ids is not None: #assert source_labels is None #assert target_labels is None #data_copy.type[data_copy.data_set_ids > 0] = data_lib.TYPE_SOURCE for i in source_labels: data_copy.type[data_copy.data_set_ids == i] = data_lib.TYPE_SOURCE return data_copy #source_inds = array_functions.find_set(data_copy.true_y,source_labels) if self.use_oracle: oracle_labels = self.configs.oracle_labels data_copy = data_copy.get_transfer_subset(np.concatenate( (oracle_labels.ravel(), target_labels.ravel())), include_unlabeled=True) data_copy.data_set_ids = np.zeros(data_copy.n) for i, s in enumerate(source_labels): source_inds = data_copy.get_transfer_inds(s) if not data_copy.is_regression: data_copy.change_labels(s, target_labels) data_copy.type[source_inds] = data_lib.TYPE_SOURCE data_copy.is_train[source_inds] = True data_copy.data_set_ids[source_inds] = i + 1 if getattr(self, 'use_all_source', False): data_copy.reveal_labels(data_copy.is_source) return data_copy @property def prefix(self): s = 'FuseTransfer+' + self.base_learner.prefix if 'target_weight_scale' in self.__dict__ and self.target_weight_scale is not None: s += '-tws=' + str(self.target_weight_scale) if 'use_oracle' in self.__dict__ and self.use_oracle: s += '-Oracle' return s
class FuseTransfer(TargetTranfer): def __init__(self, configs=None): super(FuseTransfer, self).__init__(configs) self.use_oracle = False # self.target_weight_scale = None self.target_weight_scale = 0.75 self.label_transform = NanLabelBinarizer() def train(self, data): is_labeled_train = data.is_labeled & data.is_train n_labeled_target = (data.is_train & is_labeled_train).sum() n_labeled_source = (data.is_train & is_labeled_train).sum() data.instance_weights = np.ones(data.n) if self.target_weight_scale is not None: assert 0 <= self.target_weight_scale <= 1 data.instance_weights[data.is_source] /= n_labeled_source data.instance_weights[data.is_target] /= n_labeled_target data.instance_weights[data.is_target] *= self.target_weight_scale data.instance_weights[data.is_source] *= 1 - self.target_weight_scale y_old = data.y if self.label_transform is not None: data.y = self.label_transform.fit_transform(data.y) super(FuseTransfer, self).train(data) data.y = y_old def _prepare_data(self, data, include_unlabeled=True): source_labels = self.configs.source_labels target_labels = self.configs.target_labels data_copy = copy.deepcopy(data) if data.data_set_ids is not None: # assert source_labels is None # assert target_labels is None # data_copy.type[data_copy.data_set_ids > 0] = data_lib.TYPE_SOURCE for i in source_labels: data_copy.type[data_copy.data_set_ids == i] = data_lib.TYPE_SOURCE return data_copy # source_inds = array_functions.find_set(data_copy.true_y,source_labels) if self.use_oracle: oracle_labels = self.configs.oracle_labels data_copy = data_copy.get_transfer_subset( np.concatenate((oracle_labels.ravel(), target_labels.ravel())), include_unlabeled=True ) data_copy.data_set_ids = np.zeros(data_copy.n) for i, s in enumerate(source_labels): source_inds = data_copy.get_transfer_inds(s) if not data_copy.is_regression: data_copy.change_labels(s, target_labels) data_copy.type[source_inds] = data_lib.TYPE_SOURCE data_copy.is_train[source_inds] = True data_copy.data_set_ids[source_inds] = i + 1 data_copy.reveal_labels(data_copy.is_source) return data_copy @property def prefix(self): s = "FuseTransfer+" + self.base_learner.prefix if "target_weight_scale" in self.__dict__ and self.target_weight_scale is not None: s += "-tws=" + str(self.target_weight_scale) if "use_oracle" in self.__dict__ and self.use_oracle: s += "-Oracle" return s