示例#1
0
 def __init__(self, name, datanode, seed=1):
     self.name = name
     self._seed = seed
     self.incumbent = datanode
     self.root_node = datanode
     self.graph = TransformationGraph()
     self.graph.add_node(self.root_node)
     self.time_budget = None
     self.maximum_evaluation_num = None
     logger_name = '%s(%d)' % (self.name, self._seed)
     self.logger = get_logger(logger_name)
    def optimize_explore_first(self):
        # Initialize the parameters.
        arm_num = len(self.arms)
        arm_candidate = self.arms.copy()
        self.best_lower_bounds = np.zeros(arm_num)
        _iter_id = 0
        assert arm_num * self.alpha <= self.trial_num

        while _iter_id < self.trial_num:
            if _iter_id < arm_num * self.alpha:
                _arm = self.arms[_iter_id % arm_num]
                self.logger.info('PULLING %s in %d-th round' % (_arm, _iter_id))
                reward = self.sub_bandits[_arm].play_once()

                self.rewards[_arm].append(reward)
                self.action_sequence.append(_arm)
                self.final_rewards.append(reward)
                self.time_records.append(time.time() - self.start_time)
                if reward > self.incumbent_perf:
                    self.incumbent_perf = reward
                    self.optimal_algo_id = _arm

                if self.shared_mode:
                    self.update_global_datanodes(_arm)

                self.logger.info('Rewards for pulling %s = %.4f' % (_arm, reward))
                _iter_id += 1
            else:
                # Pull each arm in the candidate once.
                for _arm in arm_candidate:
                    self.logger.info('PULLING %s in %d-th round' % (_arm, _iter_id))
                    reward = self.sub_bandits[_arm].play_once()
                    self.rewards[_arm].append(reward)
                    self.action_sequence.append(_arm)
                    self.final_rewards.append(reward)
                    self.time_records.append(time.time() - self.start_time)

                    if self.shared_mode:
                        self.update_global_datanodes(_arm)

                    self.logger.info('Rewards for pulling %s = %.4f' % (_arm, reward))
                    _iter_id += 1

            if _iter_id >= arm_num * self.alpha:
                # Update the upper/lower bound estimation.
                upper_bounds, lower_bounds = list(), list()
                for _arm in arm_candidate:
                    rewards = self.rewards[_arm]
                    slope = (rewards[-1] - rewards[-self.alpha]) / self.alpha
                    upper_bound = np.min([1.0, rewards[-1] + slope * (self.trial_num - _iter_id)])
                    upper_bounds.append(upper_bound)
                    lower_bounds.append(rewards[-1])
                    self.best_lower_bounds[self.arms.index(_arm)] = rewards[-1]

                # Reject the sub-optimal arms.
                n = len(arm_candidate)
                flags = [False] * n
                for i in range(n):
                    for j in range(n):
                        if i != j:
                            if upper_bounds[i] < lower_bounds[j]:
                                flags[i] = True

                if np.sum(flags) == n:
                    self.logger.error('Removing all the arms simultaneously!')
                self.logger.info('Candidates : %s' % ','.join(arm_candidate))
                self.logger.info('Upper bound: %s' % ','.join(['%.4f' % val for val in upper_bounds]))
                self.logger.info('Lower bound: %s' % ','.join(['%.4f' % val for val in lower_bounds]))
                self.logger.info('Remove Arms: %s' % [item for idx, item in enumerate(arm_candidate) if flags[idx]])

                # Update the arm_candidates.
                arm_candidate = [item for index, item in enumerate(arm_candidate) if not flags[index]]

            if _iter_id >= self.trial_num - 1:
                _lower_bounds = self.best_lower_bounds.copy()
                algo_idx = np.argmax(_lower_bounds)
                self.optimal_algo_id = self.arms[algo_idx]
                _best_perf = _lower_bounds[algo_idx]

                threshold = 0.96
                idxs = np.argsort(-_lower_bounds)[:3]
                _algo_ids = [self.arms[idx] for idx in idxs]
                self.nbest_algo_ids = list()
                for _idx, _arm in zip(idxs, _algo_ids):
                    if _lower_bounds[_idx] >= threshold * _best_perf:
                        self.nbest_algo_ids.append(_arm)
                assert len(self.nbest_algo_ids) > 0

                self.logger.info('=' * 50)
                self.logger.info('Best_algo_perf:    %s' % str(_best_perf))
                self.logger.info('Best_algo_id:      %s' % str(self.optimal_algo_id))
                self.logger.info('Arm candidates:    %s' % str(self.arms))
                self.logger.info('Best_lower_bounds: %s' % str(self.best_lower_bounds))
                self.logger.info('Nbest_algo_ids   : %s' % str(self.nbest_algo_ids))
                self.logger.info('=' * 50)

            # Sync the features data nodes.
            if self.shared_mode and _iter_id >= arm_num * self.alpha \
                    and _iter_id % 2 == 0 and len(arm_candidate) > 1:
                self.logger.info('Start to SYNC features among all arms!')
                data_nodes = list()
                for _arm in arm_candidate:
                    data_nodes.extend(self.fe_datanodes[_arm])
                # Sample #beam_size-1 nodes.
                beam_size = self.sub_bandits[arm_candidate[0]].optimizer['fe'].beam_width
                # TODO: how to generate the global nodes.
                global_nodes = TransformationGraph.sort_nodes_by_score(data_nodes)[:beam_size - 1]
                for _arm in arm_candidate:
                    self.sub_bandits[_arm].sync_global_incumbents(global_nodes)

        return self.final_rewards
示例#3
0
class Optimizer(object, metaclass=abc.ABCMeta):
    def __init__(self, name, datanode, seed=1):
        self.name = name
        self._seed = seed
        self.incumbent = datanode
        self.root_node = datanode
        self.graph = TransformationGraph()
        self.graph.add_node(self.root_node)
        self.time_budget = None
        self.maximum_evaluation_num = None
        logger_name = '%s(%d)' % (self.name, self._seed)
        self.logger = get_logger(logger_name)

    @abc.abstractmethod
    def optimize(self):
        raise NotImplementedError()

    @abc.abstractmethod
    def iterate(self):
        pass

    def get_incumbent(self):
        return self.incumbent

    def apply(self, data_node: DataNode, ref_node: DataNode):
        path_ids = self.graph.get_path_nodes(ref_node)
        self.logger.info('The path ids: %s' % str(path_ids))
        inputnode = self.graph.get_node(path_ids[0])
        inputnode.set_values(data_node)

        for node_id in path_ids[1:]:
            input_node_list = list()
            for input_id in self.graph.input_data_dict[node_id]:
                inputnode = self.graph.get_node(input_id)
                input_node_list.append(inputnode)
            inputnode = input_node_list[0] if len(input_node_list) == 1 else input_node_list

            edge = self.graph.get_edge(self.graph.input_edge_dict[node_id])
            self.logger.info('Transformation: %s - %d' % (edge.transformer.name, edge.transformer.type))
            outputnode = edge.transformer.operate(inputnode, edge.target_fields)
            self.logger.info('%s => %s' % (str(inputnode.shape), str(outputnode.shape)))
            self.graph.get_node(node_id).set_values(outputnode)
        output_node = self.graph.get_node(path_ids[-1]).copy_()
        self.logger.info('returned shape: %s' % str(output_node.shape))
        return output_node

    def get_available_transformations(self, node: DataNode, trans_types: typing.List):
        return self.get_transformations(list(set(node.feature_types)), trans_types)

    @staticmethod
    def get_transformations(feat_type: str or list[str], trans_types: typing.List):
        if isinstance(feat_type, str):
            feat_type = [feat_type]

        trans_ids = list()
        for _type in feat_type:
            trans_ids.extend(_type_infos[_type])
        trans_ids = list(set(trans_ids))
        transformers = list()

        for id in trans_ids:
            if _transformers[id]().type not in trans_types:
                continue

            params = _params_infos[id]
            if len(params) == 0:
                transformers.append(_transformers[id]())
            else:
                for param in params:
                    transformer = _transformers[id](param=param)
                    transformers.append(transformer)
        return transformers
def evaluate_transformation_graph():
    data = (np.array([[np.nan, 2, 1], [1, 2, 2], [3, 4, 2],
                      [5, np.nan, 1]]), np.array([1, 2, 3, 4]))
    feature_type = [NUMERICAL, NUMERICAL, CATEGORICAL]
    datanode = DataNode(data, feature_type)

    graph = TransformationGraph()
    graph.add_node(datanode)

    transformer = ImputationTransformation()
    output_datanode1 = transformer.operate(datanode, target_fields=[0, 1])
    graph.add_node(output_datanode1)
    graph.add_edge(datanode.node_id(), output_datanode1.node_id(), transformer)

    transformer = OneHotTransformation()
    output_datanode2 = transformer.operate(output_datanode1)
    graph.add_node(output_datanode2)
    graph.add_edge(output_datanode1.get_node_id(),
                   output_datanode2.get_node_id(), transformer)

    transformer = ScaleTransformation()
    transformer.concatenate = True
    output_datanode3 = transformer.operate(output_datanode2)
    graph.add_node(output_datanode3)
    graph.add_edge(output_datanode2.get_node_id(),
                   output_datanode3.get_node_id(), transformer)

    print(output_datanode3)
    print(output_datanode3.data)

    transformer = ScaleTransformation()
    transformer.concatenate = False
    output_datanode4 = transformer.operate(output_datanode2)
    graph.add_node(output_datanode4)
    graph.add_edge(output_datanode2.get_node_id(),
                   output_datanode4.get_node_id(), transformer)

    transformer = Merger()
    output_datanode5 = transformer.operate(
        [output_datanode3, output_datanode4])
    graph.add_node(output_datanode5)
    graph.add_transformation(
        [output_datanode3.get_node_id(),
         output_datanode4.get_node_id()], output_datanode5.get_node_id(),
        transformer)

    print(output_datanode5)
    print(output_datanode5.data)

    order_ids = graph.topological_sort()
    print(order_ids)
    test_data = (np.array([[np.nan, 2, 1], [1, 2, 1], [3, 2, 1],
                           [3, np.nan, 1]]), None)
    test_node = DataNode(test_data, feature_types)

    inputnode = graph.get_node(order_ids[0])
    inputnode.set_values(test_node)

    for idx in range(1, len(order_ids)):
        node_id = order_ids[idx]

        input_node_list = list()
        for input_id in graph.input_data_dict[node_id]:
            inputnode = graph.get_node(input_id)
            input_node_list.append(inputnode)
        inputnode = input_node_list[0] if len(
            input_node_list) == 1 else input_node_list

        edge = graph.get_edge(graph.input_edge_dict[node_id])
        outputnode = edge.transformer.operate(inputnode, edge.target_fields)
        graph.get_node(node_id).set_values(outputnode)
    output_node = graph.get_node(order_ids[-1])
    print(output_node)
    print(output_node.data)