示例#1
0
 def _create_q_score_net(self) -> None:
     self.q_score_model = ModelHelper(name="q_score_" + self.model_id)
     C2.set_model(self.q_score_model)
     self.q_score_output = self.get_q_values('states', 'actions', True)
     workspace.RunNetOnce(self.q_score_model.param_init_net)
     workspace.CreateNet(self.q_score_model.net)
     C2.set_model(None)
示例#2
0
    def process(
        self,
        sorted_features: List[int],
        sparse_data: StackedAssociativeArray,
        set_missing_value_to_zero: bool = False,
    ) -> Tuple[str, List[str]]:
        lengths_blob = sparse_data.lengths
        keys_blob = sparse_data.keys
        values_blob = sparse_data.values

        MISSING_SCALAR = C2.NextBlob("MISSING_SCALAR")
        missing_value = 0.0 if set_missing_value_to_zero else MISSING_VALUE
        workspace.FeedBlob(MISSING_SCALAR,
                           np.array([missing_value], dtype=np.float32))
        C2.net().GivenTensorFill([], [MISSING_SCALAR],
                                 shape=[],
                                 values=[missing_value])

        parameters: List[str] = [MISSING_SCALAR]

        assert len(sorted_features) > 0, "Sorted features is empty"
        dense_input = C2.SparseToDenseMask(keys_blob,
                                           values_blob,
                                           MISSING_SCALAR,
                                           lengths_blob,
                                           mask=sorted_features)[0]

        return dense_input, parameters
示例#3
0
    def get_max_q_values(
        self,
        next_states: str,
        possible_next_actions: StackedArray,
        use_target_network: bool,
    ) -> str:
        """
        Takes in an array of next_states and outputs an array of the same shape
        whose ith entry = max_{pna} Q(state_i, pna). Uses target network for
        Q(state_i, pna) approximation.

        :param next_states: Blob containing state features.  Each
            row contains a representation of a state.
        :param possible_next_actions: List of sets of possible next actions. The
            ith element of this list is a matrix PNA_i such that PNA_i[j] is the
            parametric representation of the jth possible action from the ith
            next_state. These have not been normalized.
        """

        stacked_states = C2.LengthsTile(next_states,
                                        possible_next_actions.lengths)
        all_q_values = self.get_q_values(
            stacked_states,
            possible_next_actions.values,
            use_target_network,
        )
        max_q_values = C2.LengthsMax(
            all_q_values,
            possible_next_actions.lengths,
        )
        return max_q_values
    def update_model(self, states: str, actions: str,
                     q_vals_target: str) -> None:
        """
        Takes in states, actions, and target q values. Updates the model:

            Runs the forward pass, computing Q(states, actions).
                Q(states, actions)[i][j] is an approximation of Q*(states[i], action_j).
            Comptutes Loss of Q(states, actions) with respect to q_vals_targets
            Updates Q Network's weights according to loss and optimizer

        :param states: Numpy array with shape (batch_size, state_dim). The ith
            row is a representation of the ith transition's state.
        :param actions: Numpy array with shape (batch_size, action_dim). The ith
            row is a representation of the ith transition's action.
        :param q_vals_targets: Numpy array with shape (batch_size, 1). The ith
            row is the label to train against for the data from the ith transition.
        """
        model = C2.model()
        q_vals_target = C2.StopGradient(q_vals_target)
        q_values = C2.NextBlob("train_output")
        state_action_pairs, _ = C2.Concat(states, actions, axis=1)
        self.ml_trainer.make_forward_pass_ops(model, state_action_pairs,
                                              q_values, False)

        self.loss_blob = self.ml_trainer.generateLossOps(
            model, q_values, q_vals_target)
        model.AddGradientOperators([self.loss_blob])
        for param in model.params:
            if param in model.param_to_grad:
                param_grad = model.param_to_grad[param]
                param_grad = C2.NanCheck(param_grad)
        self.ml_trainer.addParameterUpdateOps(model)
示例#5
0
    def process(
            self, sparse_data: StackedAssociativeArray
    ) -> Tuple[str, str, List[str]]:
        lengths_blob = sparse_data.lengths
        keys_blob = sparse_data.keys
        values_blob = sparse_data.values

        MISSING_SCALAR = C2.NextBlob("MISSING_SCALAR")
        missing_value = 0.0 if self.set_missing_value_to_zero else MISSING_VALUE
        workspace.FeedBlob(MISSING_SCALAR,
                           np.array([missing_value], dtype=np.float32))
        C2.net().GivenTensorFill([], [MISSING_SCALAR],
                                 shape=[],
                                 values=[missing_value])

        parameters: List[str] = [MISSING_SCALAR]

        assert len(self.sorted_features) > 0, "Sorted features is empty"
        dense_input = C2.NextBlob("dense_input")
        dense_input_presence = C2.NextBlob("dense_input_presence")
        C2.net().SparseToDenseMask(
            [keys_blob, values_blob, MISSING_SCALAR, lengths_blob],
            [dense_input, dense_input_presence],
            mask=self.sorted_features,
            return_presence_mask=True,
        )

        if self.set_missing_value_to_zero:
            dense_input_presence = C2.And(
                C2.GT(dense_input, -1e-4, broadcast=1),
                C2.LT(dense_input, 1e-4, broadcast=1),
            )

        return dense_input, dense_input_presence, parameters
示例#6
0
    def test_preprocessing_network(self):
        feature_value_map = read_data()

        normalization_parameters = {}
        for name, values in feature_value_map.items():
            normalization_parameters[name] = normalization.identify_parameter(
                name, values, feature_type=self._feature_type_override(name))
        test_features = NumpyFeatureProcessor.preprocess(
            feature_value_map, normalization_parameters)

        net = core.Net("PreprocessingTestNet")
        C2.set_net(net)
        preprocessor = PreprocessorNet()
        name_preprocessed_blob_map = {}
        for feature_name in feature_value_map:
            workspace.FeedBlob(str(feature_name), np.array([0],
                                                           dtype=np.int32))
            preprocessed_blob, _ = preprocessor.preprocess_blob(
                str(feature_name), [normalization_parameters[feature_name]])
            name_preprocessed_blob_map[feature_name] = preprocessed_blob

        workspace.CreateNet(net)

        for feature_name, feature_value in six.iteritems(feature_value_map):
            feature_value = np.expand_dims(feature_value, -1)
            workspace.FeedBlob(str(feature_name), feature_value)
        workspace.RunNetOnce(net)

        for feature_name in feature_value_map:
            normalized_features = workspace.FetchBlob(
                name_preprocessed_blob_map[feature_name])
            if feature_name != ENUM_FEATURE_ID:
                normalized_features = np.squeeze(normalized_features, -1)

            tolerance = 0.01
            if feature_name == BOXCOX_FEATURE_ID:
                # At the limit, boxcox has some numerical instability
                tolerance = 0.5
            non_matching = np.where(
                np.logical_not(
                    np.isclose(
                        normalized_features,
                        test_features[feature_name],
                        rtol=tolerance,
                        atol=tolerance,
                    )))
            self.assertTrue(
                np.all(
                    np.isclose(
                        normalized_features,
                        test_features[feature_name],
                        rtol=tolerance,
                        atol=tolerance,
                    )),
                "{} does not match: {} {}".format(
                    feature_name,
                    normalized_features[non_matching].tolist(),
                    test_features[feature_name][non_matching].tolist(),
                ),
            )
示例#7
0
    def normalize_dense_matrix(
        self,
        input_matrix: str,
        features: List[str],
        normalization_parameters: Dict[str, NormalizationParameters],
        blobname_prefix: str,
    ) -> Tuple[str, List[str]]:
        """
        Normalizes inputs according to parameters. Expects a dense matrix whose ith
        column corresponds to feature i.

        Note that the Caffe2 BatchBoxCox operator isn't implemented on CUDA GPU so
        we need to use a CPU context.

        :param input_matrix: Input matrix to normalize.
        :param features: Array that maps feature ids to column indices.
        :param normalization_parameters: Mapping from feature names to
            NormalizationParameters.
        :param blobname_prefix: Prefix for input blobs to norm_net.
        :param num_output_features: The number of features in an output processed
            datapoint. If set to None, this function will compute it.
        """
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
            feature_starts = self._get_type_boundaries(
                features, normalization_parameters)

            normalized_input_blobs = []
            parameters: List[str] = []
            for i, feature_type in enumerate(FEATURE_TYPES):
                start_index = feature_starts[i]
                if (i + 1) == len(FEATURE_TYPES):
                    end_index = len(normalization_parameters)
                else:
                    end_index = feature_starts[i + 1]
                if start_index == end_index:
                    continue  # No features of this type
                sliced_input_features = self._get_input_blob(
                    blobname_prefix, feature_type)
                C2.net().Slice(
                    [input_matrix],
                    [sliced_input_features],
                    starts=[0, start_index],
                    ends=[-1, end_index],
                )
                normalized_input_blob, blob_parameters = self.preprocess_blob(
                    sliced_input_features,
                    [
                        normalization_parameters[x]
                        for x in features[start_index:end_index]
                    ],
                )
                parameters.extend(blob_parameters)
                normalized_input_blobs.append(normalized_input_blob)
            for i, inp in enumerate(normalized_input_blobs):
                logger.info("input# {}: {}".format(i, inp))
            concatenated_input_blob, concatenated_input_blob_dim = C2.Concat(
                *normalized_input_blobs, axis=1)
            concatenated_input_blob = C2.NanCheck(concatenated_input_blob)
            return concatenated_input_blob, parameters
示例#8
0
 def _create_reward_train_net(self) -> None:
     self.reward_train_model = ModelHelper(name="reward_train_" +
                                           self.model_id)
     C2.set_model(self.reward_train_model)
     self.update_model('states', 'actions', 'rewards')
     workspace.RunNetOnce(self.reward_train_model.param_init_net)
     workspace.CreateNet(self.reward_train_model.net)
     C2.set_model(None)
示例#9
0
 def _create_q_score_net(self) -> None:
     self.q_score_model = ModelHelper(name="q_score_" + self.model_id)
     C2.set_model(self.q_score_model)
     self.q_score_output = self.get_q_values("states", "actions", True)
     workspace.RunNetOnce(self.q_score_model.param_init_net)
     self.q_score_model.net.Proto().num_workers = \
         RLTrainer.DEFAULT_TRAINING_NUM_WORKERS
     workspace.CreateNet(self.q_score_model.net)
     C2.set_model(None)
示例#10
0
 def _create_internal_policy_net(self) -> None:
     self.internal_policy_model = ModelHelper(name="internal_policy_" +
                                              self.model_id)
     C2.set_model(self.internal_policy_model)
     self.internal_policy_output = self.get_q_values_all_actions(
         "states", False)
     workspace.RunNetOnce(self.internal_policy_model.param_init_net)
     workspace.CreateNet(self.internal_policy_model.net)
     C2.set_model(None)
示例#11
0
 def _create_internal_policy_net(self) -> None:
     self.internal_policy_model = ModelHelper(name="q_score_" +
                                              self.model_id)
     C2.set_model(self.internal_policy_model)
     self.internal_policy_output = C2.FlattenToVec(
         self.get_q_values('states', 'actions', False))
     workspace.RunNetOnce(self.internal_policy_model.param_init_net)
     workspace.CreateNet(self.internal_policy_model.net)
     C2.set_model(None)
示例#12
0
 def __init__(self, params: PolicyEvaluatorParameters) -> None:
     self.params = params
     self.process_slate_net = core.Net("policy_evaluator")
     C2.set_net(self.process_slate_net)
     self.action_probabilities = PolicySimulator.plan(
         self.process_slate_net, params, self.params.db_type)
     self.created_net = False
     self.value_input_models: Dict[str, ValueInputModelParameters] = {}
     for model in self.params.value_input_models:
         self.value_input_models[model.name] = model
示例#13
0
 def _create_reward_train_net(self) -> None:
     self.reward_train_model = ModelHelper(name="reward_train_" +
                                           self.model_id)
     C2.set_model(self.reward_train_model)
     self.update_model('states', 'actions', 'rewards')
     workspace.RunNetOnce(self.reward_train_model.param_init_net)
     self.reward_train_model.net.Proto().num_workers = \
         RLTrainer.DEFAULT_TRAINING_NUM_WORKERS
     workspace.CreateNet(self.reward_train_model.net)
     C2.set_model(None)
示例#14
0
    def update_model(
        self,
        states: str,
        actions: str,
        q_vals_target: str,
    ) -> None:
        """
        Takes in states, actions, and target q values. Updates the model:

            Runs the forward pass, computing Q(states, actions).
                Q(states, actions)[i][j] is an approximation of Q*(states[i], action_j).
            Comptutes Loss of Q(states, actions) with respect to q_vals_targets
            Updates Q Network's weights according to loss and optimizer

        :param states: Numpy array with shape (batch_size, state_dim). The ith
            row is a representation of the ith transition's state.
        :param actions: Numpy array with shape (batch_size, action_dim). The ith
            row is a representation of the ith transition's action.
        :param q_vals_targets: Numpy array with shape (batch_size, 1). The ith
            row is the label to train against for the data from the ith transition.
        """
        model = C2.model()
        q_vals_target = C2.StopGradient(q_vals_target)
        q_values = C2.NextBlob("train_output")
        state_action_pairs, _ = C2.Concat(states, actions, axis=1)
        MakeForwardPassOps(
            model,
            self.model_id,
            state_action_pairs,
            q_values,
            self.weights,
            self.biases,
            self.activations,
            self.layers,
            self.dropout_ratio,
            False,
        )

        self.loss_blob = GenerateLossOps(
            model,
            q_values,
            q_vals_target,
        )
        model.AddGradientOperators([self.loss_blob])
        for param in model.params:
            if param in model.param_to_grad:
                param_grad = model.param_to_grad[param]
                param_grad = C2.NanCheck(param_grad)
        AddParameterUpdateOps(
            model,
            optimizer_input=self.optimizer,
            base_learning_rate=self.learning_rate,
            gamma=self.gamma,
            policy=self.lr_policy,
        )
示例#15
0
 def _sum_deterministic_policy(self, model_names, path):
     net = core.Net('DeterministicPolicy')
     C2.set_net(net)
     output = 'ActionProbabilities'
     workspace.FeedBlob(output, np.array([1.0]))
     model_outputs = []
     for model in model_names:
         model_output = '{}_Output'.format(model)
         workspace.FeedBlob(model_output, np.array([1.0], dtype=np.float32))
         model_outputs.append(model_output)
     max_action = C2.FlattenToVec(
         C2.ArgMax(C2.Transpose(C2.Sum(*model_outputs)))
     )
     one_blob = C2.NextBlob('one')
     workspace.FeedBlob(one_blob, np.array([1.0], dtype=np.float32))
     C2.net().SparseToDense(
         [
             max_action,
             one_blob,
             model_outputs[0],
         ],
         [output],
     )
     meta = PredictorExportMeta(
         net,
         [one_blob],
         model_outputs,
         [output],
     )
     save_to_db('minidb', path, meta)
示例#16
0
 def _create_internal_policy_net(self) -> None:
     self.internal_policy_model = ModelHelper(name="q_score_" +
                                              self.model_id)
     C2.set_model(self.internal_policy_model)
     self.internal_policy_output = C2.FlattenToVec(
         self.get_q_values('states', 'actions', False))
     workspace.RunNetOnce(self.internal_policy_model.param_init_net)
     self.internal_policy_model.net.Proto().num_workers = \
         RLTrainer.DEFAULT_TRAINING_NUM_WORKERS
     workspace.CreateNet(self.internal_policy_model.net)
     C2.set_model(None)
示例#17
0
 def _create_internal_policy_net(self) -> None:
     self.internal_policy_model = ModelHelper(name="internal_policy_" +
                                              self.model_id)
     C2.set_model(self.internal_policy_model)
     self.internal_policy_output = self.get_q_values_all_actions(
         "states", False)
     workspace.RunNetOnce(self.internal_policy_model.param_init_net)
     self.internal_policy_model.net.Proto().num_workers = \
         RLTrainer.DEFAULT_TRAINING_NUM_WORKERS
     workspace.CreateNet(self.internal_policy_model.net)
     C2.set_model(None)
 def _create_reward_train_net(self) -> None:
     self.reward_train_model = ModelHelper(name="reward_train_" +
                                           self.model_id)
     C2.set_model(self.reward_train_model)
     self.update_model("states", "actions", "rewards")
     workspace.RunNetOnce(self.reward_train_model.param_init_net)
     self.reward_train_model.net.Proto().num_workers = (
         RLTrainer.DEFAULT_TRAINING_NUM_WORKERS)
     self.reward_train_model.net.Proto().type = "async_scheduling"
     workspace.CreateNet(self.reward_train_model.net)
     C2.set_model(None)
示例#19
0
 def _create_all_q_score_net(self) -> None:
     self.all_q_score_model = ModelHelper(name="all_q_score_" +
                                          self.model_id)
     C2.set_model(self.all_q_score_model)
     self.all_q_score_output = self.get_q_values_all_actions(
         "states", False)
     workspace.RunNetOnce(self.all_q_score_model.param_init_net)
     self.all_q_score_model.net.Proto().num_workers = (
         RLTrainer.DEFAULT_TRAINING_NUM_WORKERS)
     self.all_q_score_model.net.Proto().type = "async_scheduling"
     workspace.CreateNet(self.all_q_score_model.net)
     C2.set_model(None)
示例#20
0
 def concat_states_and_possible_next_actions(
     self,
     next_state_preprocessed_matrix_blob: str,
     possible_next_actions_blob: str,
     possible_next_actions_lengths_blob: str,
 ) -> str:
     stacked_states = C2.LengthsTile(next_state_preprocessed_matrix_blob,
                                     possible_next_actions_lengths_blob)
     state_action_pairs, _ = C2.Concat(stacked_states,
                                       possible_next_actions_blob,
                                       axis=1)
     return state_action_pairs
示例#21
0
    def test_preprocessing_network(self):
        features, feature_value_map = preprocessing_util.read_data()
        normalization_parameters = {}
        for name, values in feature_value_map.items():
            normalization_parameters[name] = normalization.identify_parameter(
                values)
        test_features = self.preprocess(feature_value_map,
                                        normalization_parameters)

        net = core.Net("PreprocessingTestNet")
        C2.set_net(net)
        preprocessor = PreprocessorNet(net, False)
        for feature_name in feature_value_map:
            workspace.FeedBlob(feature_name, np.array([0], dtype=np.int32))
            preprocessor.preprocess_blob(
                feature_name, [normalization_parameters[feature_name]])

        workspace.CreateNet(net)

        for feature_name, feature_value in six.iteritems(feature_value_map):
            feature_value = np.expand_dims(feature_value, -1)
            workspace.FeedBlob(feature_name, feature_value)
        workspace.RunNetOnce(net)

        for feature_name in feature_value_map:
            normalized_features = workspace.FetchBlob(feature_name +
                                                      "_preprocessed")
            if feature_name != identify_types.ENUM:
                normalized_features = np.squeeze(normalized_features, -1)

            tolerance = 0.01
            if feature_name == BOXCOX:
                # At the limit, boxcox has some numerical instability
                tolerance = 0.5
            non_matching = np.where(
                np.logical_not(
                    np.isclose(
                        normalized_features,
                        test_features[feature_name],
                        rtol=tolerance,
                        atol=tolerance,
                    )))
            self.assertTrue(
                np.all(
                    np.isclose(
                        normalized_features,
                        test_features[feature_name],
                        rtol=tolerance,
                        atol=tolerance,
                    )),
                '{} does not match: {} {}'.format(
                    feature_name, normalized_features[non_matching].tolist(),
                    test_features[feature_name][non_matching].tolist()))
示例#22
0
 def get_q_values(self, states: str, actions: str, use_target_network: bool) -> str:
     state_action_pairs, _ = C2.Concat(states, actions, axis=1)
     q_values = C2.NextBlob("q_values")
     if use_target_network:
         self.target_network.make_forward_pass_ops(
             C2.model(), state_action_pairs, q_values, True
         )
     else:
         self.ml_trainer.make_forward_pass_ops(
             C2.model(), state_action_pairs, q_values, True
         )
     return q_values
示例#23
0
 def __init__(
     self,
     params: PolicyEvaluatorParameters,
     db_type: str,
 ) -> None:
     self.params = params
     self.process_slate_net = core.Net('policy_evaluator')
     C2.set_net(self.process_slate_net)
     self.action_probabilities = PolicySimulator.plan(
         self.process_slate_net,
         params,
         db_type,
     )
     self.created_net = False
示例#24
0
    def _create_rl_train_net(self) -> None:
        self.rl_train_model = ModelHelper(name="rl_train_" + self.model_id)
        C2.set_model(self.rl_train_model)

        if self.maxq_learning:
            next_q_values = self.get_max_q_values(
                'next_states',
                self.get_possible_next_actions(),
                True,
            )
        else:
            next_q_values = self.get_q_values('next_states', 'next_actions',
                                              True)

        q_vals_target = C2.Add(
            'rewards',
            C2.Mul(
                C2.Mul(
                    C2.Cast('not_terminals',
                            to=caffe2_pb2.TensorProto.FLOAT),  # type: ignore
                    self.rl_discount_rate,
                    broadcast=1,
                ),
                next_q_values))

        self.update_model('states', 'actions', q_vals_target)
        workspace.RunNetOnce(self.rl_train_model.param_init_net)
        workspace.CreateNet(self.rl_train_model.net)
        C2.set_model(None)
    def update_model(self, states: str, actions: str, q_vals_target: str) -> None:
        """
        Takes in states, actions, and target q values. Updates the model:

            Runs the forward pass, computing Q(states, actions).
                Q(states, actions)[i][j] is an approximation of Q*(states[i], action_j).
            Comptutes Loss of Q(states, actions) with respect to q_vals_targets
            Updates Q Network's weights according to loss and optimizer

        :param states: Numpy array with shape (batch_size, state_dim). The ith
            row is a representation of the ith transition's state.
        :param actions: Numpy array with shape (batch_size, action_dim). The ith
            row contains the one-hotted representation of the ith action.
        :param q_vals_targets: Numpy array with shape (batch_size, 1). The ith
            row is the label to train against for the data from the ith transition.
        """
        model = C2.model()
        q_vals_target = C2.StopGradient(q_vals_target)
        output_blob = C2.NextBlob("train_output")
        if self.conv_ml_trainer is not None:
            conv_output_blob = C2.NextBlob("conv_output")
            self.conv_ml_trainer.make_conv_pass_ops(model, states, conv_output_blob)
            states = conv_output_blob

        self.ml_trainer.make_forward_pass_ops(model, states, output_blob, False)
        q_val_select = C2.ReduceBackSum(C2.Mul(output_blob, actions))
        q_values = C2.ExpandDims(q_val_select, dims=[1])

        self.loss_blob = self.ml_trainer.generateLossOps(model, q_values, q_vals_target)
        model.AddGradientOperators([self.loss_blob])
        for param in model.params:
            if param in model.param_to_grad:
                param_grad = model.param_to_grad[param]
                param_grad = C2.NanCheck(param_grad)
        self.ml_trainer.addParameterUpdateOps(model)
示例#26
0
    def test_normalize_dense_matrix_enum(self):
        normalization_parameters = {
            1:
            NormalizationParameters(
                identify_types.ENUM,
                None,
                None,
                None,
                None,
                [12, 4, 2],
                None,
                None,
                None,
            ),
            2:
            NormalizationParameters(identify_types.CONTINUOUS, None, 0, 0, 1,
                                    None, None, None, None),
            3:
            NormalizationParameters(identify_types.ENUM, None, None, None,
                                    None, [15, 3], None, None, None),
        }
        norm_net = core.Net("net")
        C2.set_net(norm_net)
        preprocessor = PreprocessorNet()

        inputs = np.zeros([4, 3], dtype=np.float32)
        feature_ids = [2, 1, 3]  # Sorted according to feature type
        inputs[:, feature_ids.index(1)] = [12, 4, 2, 2]
        inputs[:, feature_ids.index(2)] = [1.0, 2.0, 3.0, 3.0]
        inputs[:, feature_ids.index(3)] = [
            15, 3, 15, normalization.MISSING_VALUE
        ]
        input_blob = C2.NextBlob("input_blob")
        workspace.FeedBlob(input_blob, np.array([0], dtype=np.float32))
        normalized_output_blob, _ = preprocessor.normalize_dense_matrix(
            input_blob, feature_ids, normalization_parameters, "", False)
        workspace.FeedBlob(input_blob, inputs)
        workspace.RunNetOnce(norm_net)
        normalized_feature_matrix = workspace.FetchBlob(normalized_output_blob)

        np.testing.assert_allclose(
            np.array([
                [1.0, 1, 0, 0, 1, 0],
                [2.0, 0, 1, 0, 0, 1],
                [3.0, 0, 0, 1, 1, 0],
                [3.0, 0, 0, 1, 0, 0],  # Missing values should go to all 0
            ]),
            normalized_feature_matrix,
        )
示例#27
0
    def test_normalize_dense_matrix_enum(self):
        normalization_parameters = {
            1: NormalizationParameters(
                identify_types.ENUM,
                None,
                None,
                None,
                None,
                [12, 4, 2],
                None,
                None,
                None,
            ),
            2: NormalizationParameters(
                identify_types.CONTINUOUS, None, 0, 0, 1, None, None, None, None
            ),
            3: NormalizationParameters(
                identify_types.ENUM, None, None, None, None, [15, 3], None, None, None
            ),
        }
        norm_net = core.Net("net")
        C2.set_net(norm_net)
        preprocessor = PreprocessorNet()

        inputs = np.zeros([4, 3], dtype=np.float32)
        feature_ids = [2, 1, 3]  # Sorted according to feature type
        inputs[:, feature_ids.index(1)] = [12, 4, 2, 2]
        inputs[:, feature_ids.index(2)] = [1.0, 2.0, 3.0, 3.0]
        inputs[:, feature_ids.index(3)] = [15, 3, 15, normalization.MISSING_VALUE]
        input_blob = C2.NextBlob("input_blob")
        workspace.FeedBlob(input_blob, np.array([0], dtype=np.float32))
        normalized_output_blob, _ = preprocessor.normalize_dense_matrix(
            input_blob, feature_ids, normalization_parameters, "", False
        )
        workspace.FeedBlob(input_blob, inputs)
        workspace.RunNetOnce(norm_net)
        normalized_feature_matrix = workspace.FetchBlob(normalized_output_blob)

        np.testing.assert_allclose(
            np.array(
                [
                    [1.0, 1, 0, 0, 1, 0],
                    [2.0, 0, 1, 0, 0, 1],
                    [3.0, 0, 0, 1, 1, 0],
                    [3.0, 0, 0, 1, 0, 0],  # Missing values should go to all 0
                ]
            ),
            normalized_feature_matrix,
        )
示例#28
0
 def _dummy_model_copy(self, model_name, path):
     net = core.Net(model_name)
     C2.set_net(net)
     inp = 'Input'
     output = 'Output'
     workspace.FeedBlob(inp, np.array([1.0]))
     workspace.FeedBlob(output, np.array([1.0]))
     net.Copy([inp], [output])
     meta = PredictorExportMeta(
         net,
         [],
         [inp],
         [output],
     )
     save_to_db('minidb', path, meta)
示例#29
0
 def _store_parameter(self, parameters, name, value):
     c2_name = C2.NextBlob(name)
     if C2.init_net():
         C2.init_net().GivenTensorFill(
             [],
             c2_name,
             shape=value.shape,
             values=value.flatten(),
             dtype=schema.data_type_for_dtype(value.dtype),
         )
         C2.init_net().AddExternalOutput(c2_name)
     else:
         workspace.FeedBlob(c2_name, value)
         parameters.append(c2_name)
     return c2_name
    def normalize_sparse_matrix(
        self,
        lengths_blob: str,
        keys_blob: str,
        values_blob: str,
        normalization_parameters: Dict[str, NormalizationParameters],
        blobname_prefix: str,
        split_expensive_feature_groups: bool = False,
    ) -> Tuple[str, List[str]]:
        sorted_features, _ = sort_features_by_normalization(
            normalization_parameters)
        int_features = [int(feature) for feature in sorted_features]

        dense_input, _ = C2.SparseToDenseMask(keys_blob,
                                              values_blob,
                                              self.MISSING_SCALAR,
                                              lengths_blob,
                                              mask=int_features)
        return self.normalize_dense_matrix(
            dense_input,
            sorted_features,
            normalization_parameters,
            blobname_prefix,
            split_expensive_feature_groups,
        )
示例#31
0
    def get_max_q_values(self, states: str, possible_actions: str,
                         use_target_network: bool) -> str:
        """
        Takes in an array of states and outputs an array of the same shape
        whose ith entry = max_{pna} Q(state_i, pna).

        :param states: Numpy array with shape (batch_size, state_dim). Each
            row contains a representation of a state.
        :param possible_next_actions: Numpy array with shape (batch_size, action_dim).
            possible_next_actions[i][j] = 1 iff the agent can take action j from
            state i.
        :use_target_network: Boolean that indicates whether or not to use this
            trainer's TargetNetwork to compute Q values.
        """
        q_values = self.get_q_values_all_actions(states, use_target_network)

        # Set the q values of impossible actions to a very large negative
        #    number.
        inverse_pna = C2.ConstantFill(possible_actions, value=1.0)
        possible_actions_float = C2.Cast(possible_actions,
                                         to=core.DataType.FLOAT)
        inverse_pna = C2.Sub(inverse_pna, possible_actions_float)
        inverse_pna = C2.Mul(inverse_pna,
                             self.ACTION_NOT_POSSIBLE_VAL,
                             broadcast=1)
        q_values = C2.Add(q_values, inverse_pna)

        q_values_max = C2.ReduceBackMax(q_values, num_reduce_dims=1)
        return C2.ExpandDims(q_values_max, dims=[1])
示例#32
0
def sparse_to_dense(lengths_blob: str, keys_blob: str, values_blob: str,
                    sorted_features: List[int]) -> Tuple[str, List[str]]:
    MISSING_SCALAR = C2.NextBlob("MISSING_SCALAR")
    workspace.FeedBlob(MISSING_SCALAR,
                       np.array([MISSING_VALUE], dtype=np.float32))
    C2.net().GivenTensorFill([], [MISSING_SCALAR],
                             shape=[],
                             values=[MISSING_VALUE])

    parameters: List[str] = [MISSING_SCALAR]

    assert len(sorted_features) > 0, "Sorted features is empty"
    dense_input = C2.SparseToDenseMask(keys_blob,
                                       values_blob,
                                       MISSING_SCALAR,
                                       lengths_blob,
                                       mask=sorted_features)[0]

    return dense_input, parameters
示例#33
0
def sparse_to_dense(
    lengths_blob: str,
    keys_blob: str,
    values_blob: str,
    sorted_features: List[int],
    set_missing_value_to_zero: bool = False,
) -> Tuple[str, List[str]]:
    MISSING_SCALAR = C2.NextBlob("MISSING_SCALAR")
    missing_value = 0.0 if set_missing_value_to_zero else MISSING_VALUE
    workspace.FeedBlob(MISSING_SCALAR, np.array([missing_value], dtype=np.float32))
    C2.net().GivenTensorFill([], [MISSING_SCALAR], shape=[], values=[missing_value])

    parameters: List[str] = [MISSING_SCALAR]

    assert len(sorted_features) > 0, "Sorted features is empty"
    dense_input = C2.SparseToDenseMask(
        keys_blob, values_blob, MISSING_SCALAR, lengths_blob, mask=sorted_features
    )[0]

    return dense_input, parameters
示例#34
0
def benchmark(num_forward_passes):
    """
    Benchmark preprocessor speeds:
        1 - PyTorch
        2 - PyTorch -> ONNX -> C2
        3 - C2
    """

    feature_value_map = gen_data(
        num_binary_features=10,
        num_boxcox_features=10,
        num_continuous_features=10,
        num_enum_features=10,
        num_prob_features=10,
        num_quantile_features=10,
    )

    normalization_parameters = {}
    for name, values in feature_value_map.items():
        normalization_parameters[name] = normalization.identify_parameter(
            name, values, 10
        )

    sorted_features, _ = sort_features_by_normalization(normalization_parameters)

    # Dummy input
    input_matrix = np.zeros([10000, len(sorted_features)], dtype=np.float32)

    # PyTorch Preprocessor
    pytorch_preprocessor = Preprocessor(normalization_parameters, False)
    for i, feature in enumerate(sorted_features):
        input_matrix[:, i] = feature_value_map[feature]

    #################### time pytorch ############################
    start = time.time()
    for _ in range(NUM_FORWARD_PASSES):
        _ = pytorch_preprocessor.forward(input_matrix)
    end = time.time()
    logger.info(
        "PyTorch: {} forward passes done in {} seconds".format(
            NUM_FORWARD_PASSES, end - start
        )
    )

    ################ time pytorch -> ONNX -> caffe2 ####################
    buffer = PytorchCaffe2Converter.pytorch_net_to_buffer(
        pytorch_preprocessor, len(sorted_features), False
    )
    input_blob, output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef(
        buffer
    )
    torch_workspace = caffe2_netdef.workspace
    parameters = torch_workspace.Blobs()
    for blob_str in parameters:
        workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str))
    torch_init_net = core.Net(caffe2_netdef.init_net)
    torch_predict_net = core.Net(caffe2_netdef.predict_net)
    input_matrix_blob = "input_matrix_blob"
    workspace.FeedBlob(input_blob, input_matrix)
    workspace.RunNetOnce(torch_init_net)
    start = time.time()
    for _ in range(NUM_FORWARD_PASSES):
        workspace.RunNetOnce(torch_predict_net)
        _ = workspace.FetchBlob(output_blob)
    end = time.time()
    logger.info(
        "PyTorch -> ONNX -> Caffe2: {} forward passes done in {} seconds".format(
            NUM_FORWARD_PASSES, end - start
        )
    )

    #################### time caffe2 ############################
    norm_net = core.Net("net")
    C2.set_net(norm_net)
    preprocessor = PreprocessorNet()
    input_matrix_blob = "input_matrix_blob"
    workspace.FeedBlob(input_matrix_blob, np.array([], dtype=np.float32))
    output_blob, _ = preprocessor.normalize_dense_matrix(
        input_matrix_blob, sorted_features, normalization_parameters, "", False
    )
    workspace.FeedBlob(input_matrix_blob, input_matrix)
    start = time.time()
    for _ in range(NUM_FORWARD_PASSES):
        workspace.RunNetOnce(norm_net)
        _ = workspace.FetchBlob(output_blob)
    end = time.time()
    logger.info(
        "Caffe2: {} forward passes done in {} seconds".format(
            NUM_FORWARD_PASSES, end - start
        )
    )
示例#35
0
    def preprocess_samples_discrete(
        self,
        samples: Samples,
        minibatch_size: int,
        one_hot_action: bool = True,
        use_gpu: bool = False,
    ) -> List[TrainingDataPage]:
        logger.info("Shuffling...")
        samples = shuffle_samples(samples)
        logger.info("Preprocessing...")

        if self.sparse_to_dense_net is None:
            self.sparse_to_dense_net = core.Net("gridworld_sparse_to_dense")
            C2.set_net(self.sparse_to_dense_net)
            saa = StackedAssociativeArray.from_dict_list(samples.states, "states")
            sorted_features, _ = sort_features_by_normalization(self.normalization)
            self.state_matrix, _ = sparse_to_dense(
                saa.lengths, saa.keys, saa.values, sorted_features
            )
            saa = StackedAssociativeArray.from_dict_list(
                samples.next_states, "next_states"
            )
            self.next_state_matrix, _ = sparse_to_dense(
                saa.lengths, saa.keys, saa.values, sorted_features
            )
            C2.set_net(None)
        else:
            StackedAssociativeArray.from_dict_list(samples.states, "states")
            StackedAssociativeArray.from_dict_list(samples.next_states, "next_states")
        workspace.RunNetOnce(self.sparse_to_dense_net)

        logger.info("Converting to Torch...")
        actions_one_hot = torch.tensor(
            (np.array(samples.actions).reshape(-1, 1) == np.array(self.ACTIONS)).astype(
                np.int64
            )
        )
        actions = actions_one_hot.argmax(dim=1, keepdim=True)
        rewards = torch.tensor(samples.rewards, dtype=torch.float32).reshape(-1, 1)
        action_probabilities = torch.tensor(
            samples.action_probabilities, dtype=torch.float32
        ).reshape(-1, 1)
        next_actions_one_hot = torch.tensor(
            (
                np.array(samples.next_actions).reshape(-1, 1) == np.array(self.ACTIONS)
            ).astype(np.int64)
        )
        logger.info("Converting PA to Torch...")
        possible_action_strings = np.array(
            list(itertools.zip_longest(*samples.possible_actions, fillvalue=""))
        ).T
        possible_actions_mask = torch.zeros([len(samples.actions), len(self.ACTIONS)])
        for i, action in enumerate(self.ACTIONS):
            possible_actions_mask[:, i] = torch.tensor(
                np.max(possible_action_strings == action, axis=1).astype(np.int64)
            )
        logger.info("Converting PNA to Torch...")
        possible_next_action_strings = np.array(
            list(itertools.zip_longest(*samples.possible_next_actions, fillvalue=""))
        ).T
        possible_next_actions_mask = torch.zeros(
            [len(samples.next_actions), len(self.ACTIONS)]
        )
        for i, action in enumerate(self.ACTIONS):
            possible_next_actions_mask[:, i] = torch.tensor(
                np.max(possible_next_action_strings == action, axis=1).astype(np.int64)
            )
        terminals = torch.tensor(samples.terminals, dtype=torch.int32).reshape(-1, 1)
        not_terminal = 1 - terminals
        logger.info("Converting RT to Torch...")

        time_diffs = torch.ones([len(samples.states), 1])

        logger.info("Preprocessing...")
        preprocessor = Preprocessor(self.normalization, False)

        states_ndarray = workspace.FetchBlob(self.state_matrix)
        states_ndarray = preprocessor.forward(states_ndarray)

        next_states_ndarray = workspace.FetchBlob(self.next_state_matrix)
        next_states_ndarray = preprocessor.forward(next_states_ndarray)

        logger.info("Batching...")
        tdps = []
        for start in range(0, states_ndarray.shape[0], minibatch_size):
            end = start + minibatch_size
            if end > states_ndarray.shape[0]:
                break
            tdp = TrainingDataPage(
                states=states_ndarray[start:end],
                actions=actions_one_hot[start:end]
                if one_hot_action
                else actions[start:end],
                propensities=action_probabilities[start:end],
                rewards=rewards[start:end],
                next_states=next_states_ndarray[start:end],
                not_terminal=not_terminal[start:end],
                next_actions=next_actions_one_hot[start:end],
                possible_actions_mask=possible_actions_mask[start:end],
                possible_next_actions_mask=possible_next_actions_mask[start:end],
                time_diffs=time_diffs[start:end],
            )
            tdp.set_type(torch.cuda.FloatTensor if use_gpu else torch.FloatTensor)
            tdps.append(tdp)
        return tdps
示例#36
0
    def preprocess_samples(
        self,
        samples: Samples,
        minibatch_size: int,
        use_gpu: bool = False,
        one_hot_action: bool = True,
        normalize_actions: bool = True,
    ) -> List[TrainingDataPage]:
        logger.info("Shuffling...")
        samples = shuffle_samples(samples)

        logger.info("Sparse2Dense...")
        net = core.Net("gridworld_preprocessing")
        C2.set_net(net)
        saa = StackedAssociativeArray.from_dict_list(samples.states, "states")
        sorted_state_features, _ = sort_features_by_normalization(self.normalization)
        state_matrix, _ = sparse_to_dense(
            saa.lengths, saa.keys, saa.values, sorted_state_features
        )
        saa = StackedAssociativeArray.from_dict_list(samples.next_states, "next_states")
        next_state_matrix, _ = sparse_to_dense(
            saa.lengths, saa.keys, saa.values, sorted_state_features
        )
        sorted_action_features, _ = sort_features_by_normalization(
            self.normalization_action
        )
        saa = StackedAssociativeArray.from_dict_list(samples.actions, "action")
        action_matrix, _ = sparse_to_dense(
            saa.lengths, saa.keys, saa.values, sorted_action_features
        )
        saa = StackedAssociativeArray.from_dict_list(
            samples.next_actions, "next_action"
        )
        next_action_matrix, _ = sparse_to_dense(
            saa.lengths, saa.keys, saa.values, sorted_action_features
        )
        action_probabilities = torch.tensor(
            samples.action_probabilities, dtype=torch.float32
        ).reshape(-1, 1)
        rewards = torch.tensor(samples.rewards, dtype=torch.float32).reshape(-1, 1)

        max_action_size = 4

        pnas_mask_list: List[List[int]] = []
        pnas_flat: List[Dict[str, float]] = []
        for pnas in samples.possible_next_actions:
            pnas_mask_list.append([1] * len(pnas) + [0] * (max_action_size - len(pnas)))
            pnas_flat.extend(pnas)
            for _ in range(max_action_size - len(pnas)):
                pnas_flat.append({})  # Filler
        saa = StackedAssociativeArray.from_dict_list(pnas_flat, "possible_next_actions")
        pnas_mask = torch.Tensor(pnas_mask_list)

        possible_next_actions_matrix, _ = sparse_to_dense(
            saa.lengths, saa.keys, saa.values, sorted_action_features
        )

        workspace.RunNetOnce(net)

        logger.info("Preprocessing...")
        state_preprocessor = Preprocessor(self.normalization, False)
        action_preprocessor = Preprocessor(self.normalization_action, False)

        states_ndarray = workspace.FetchBlob(state_matrix)
        states_ndarray = state_preprocessor.forward(states_ndarray)

        actions_ndarray = torch.from_numpy(workspace.FetchBlob(action_matrix))
        if normalize_actions:
            actions_ndarray = action_preprocessor.forward(actions_ndarray)

        next_states_ndarray = workspace.FetchBlob(next_state_matrix)
        next_states_ndarray = state_preprocessor.forward(next_states_ndarray)

        state_pnas_tile = next_states_ndarray.repeat(1, max_action_size).reshape(
            -1, next_states_ndarray.shape[1]
        )

        next_actions_ndarray = torch.from_numpy(workspace.FetchBlob(next_action_matrix))
        if normalize_actions:
            next_actions_ndarray = action_preprocessor.forward(next_actions_ndarray)

        logged_possible_next_actions = action_preprocessor.forward(
            workspace.FetchBlob(possible_next_actions_matrix)
        )

        assert state_pnas_tile.shape[0] == logged_possible_next_actions.shape[0], (
            "Invalid shapes: "
            + str(state_pnas_tile.shape)
            + " != "
            + str(logged_possible_next_actions.shape)
        )
        logged_possible_next_state_actions = torch.cat(
            (state_pnas_tile, logged_possible_next_actions), dim=1
        )

        logger.info("Reward Timeline to Torch...")
        time_diffs = torch.ones([len(samples.states), 1])

        tdps = []
        pnas_start = 0
        logger.info("Batching...")
        for start in range(0, states_ndarray.shape[0], minibatch_size):
            end = start + minibatch_size
            if end > states_ndarray.shape[0]:
                break
            pnas_end = pnas_start + (minibatch_size * max_action_size)
            tdp = TrainingDataPage(
                states=states_ndarray[start:end],
                actions=actions_ndarray[start:end],
                propensities=action_probabilities[start:end],
                rewards=rewards[start:end],
                next_states=next_states_ndarray[start:end],
                next_actions=next_actions_ndarray[start:end],
                not_terminal=(pnas_mask[start:end, :].sum(dim=1, keepdim=True) > 0),
                time_diffs=time_diffs[start:end],
                possible_next_actions_mask=pnas_mask[start:end, :],
                possible_next_actions_state_concat=logged_possible_next_state_actions[
                    pnas_start:pnas_end, :
                ],
            )
            pnas_start = pnas_end
            tdp.set_type(torch.cuda.FloatTensor if use_gpu else torch.FloatTensor)
            tdps.append(tdp)
        return tdps
示例#37
0
    def export(
        cls,
        trainer,
        actions,
        state_normalization_parameters,
        int_features=False,
        model_on_gpu=False,
        set_missing_value_to_zero=False,
    ):
        """Export caffe2 preprocessor net and pytorch DQN forward pass as one
        caffe2 net.

        :param trainer DQNTrainer
        :param state_normalization_parameters state NormalizationParameters
        :param int_features boolean indicating if int features blob will be present
        :param model_on_gpu boolean indicating if the model is a GPU model or CPU model
        """

        input_dim = trainer.num_features

        q_network = (
            trainer.q_network.module
            if isinstance(trainer.q_network, DataParallel)
            else trainer.q_network
        )

        buffer = PytorchCaffe2Converter.pytorch_net_to_buffer(
            q_network, input_dim, model_on_gpu
        )
        qnet_input_blob, qnet_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef(
            buffer
        )
        torch_workspace = caffe2_netdef.workspace

        parameters = torch_workspace.Blobs()
        for blob_str in parameters:
            workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str))

        torch_init_net = core.Net(caffe2_netdef.init_net)
        torch_predict_net = core.Net(caffe2_netdef.predict_net)
        logger.info("Generated ONNX predict net:")
        logger.info(str(torch_predict_net.Proto()))
        # While converting to metanetdef, the external_input of predict_net
        # will be recomputed. Add the real output of init_net to parameters
        # to make sure they will be counted.
        parameters.extend(
            set(caffe2_netdef.init_net.external_output)
            - set(caffe2_netdef.init_net.external_input)
        )

        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        C2.set_model(model)

        workspace.FeedBlob("input/image", np.zeros([1, 1, 1, 1], dtype=np.int32))
        workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32))
        workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64))
        workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32))

        input_feature_lengths = "input_feature_lengths"
        input_feature_keys = "input_feature_keys"
        input_feature_values = "input_feature_values"

        if int_features:
            workspace.FeedBlob(
                "input/int_features.lengths", np.zeros(1, dtype=np.int32)
            )
            workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64))
            workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32))
            C2.net().Cast(
                ["input/int_features.values"],
                ["input/int_features.values_float"],
                dtype=caffe2_pb2.TensorProto.FLOAT,
            )
            C2.net().MergeMultiScalarFeatureTensors(
                [
                    "input/float_features.lengths",
                    "input/float_features.keys",
                    "input/float_features.values",
                    "input/int_features.lengths",
                    "input/int_features.keys",
                    "input/int_features.values_float",
                ],
                [input_feature_lengths, input_feature_keys, input_feature_values],
            )
        else:
            C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths])
            C2.net().Copy(["input/float_features.keys"], [input_feature_keys])
            C2.net().Copy(["input/float_features.values"], [input_feature_values])

        if state_normalization_parameters is not None:
            sorted_feature_ids = sort_features_by_normalization(
                state_normalization_parameters
            )[0]
            dense_matrix, new_parameters = sparse_to_dense(
                input_feature_lengths,
                input_feature_keys,
                input_feature_values,
                sorted_feature_ids,
                set_missing_value_to_zero=set_missing_value_to_zero,
            )
            parameters.extend(new_parameters)
            preprocessor_net = PreprocessorNet()
            state_normalized_dense_matrix, new_parameters = preprocessor_net.normalize_dense_matrix(
                dense_matrix,
                sorted_feature_ids,
                state_normalization_parameters,
                "state_norm_",
                True,
            )
            parameters.extend(new_parameters)
        else:
            # Image input.  Note: Currently this does the wrong thing if
            #   more than one image is passed at a time.
            state_normalized_dense_matrix = "input/image"

        net.Copy([state_normalized_dense_matrix], [qnet_input_blob])

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(torch_init_net)

        net.AppendNet(torch_predict_net)

        new_parameters, q_values = RLPredictor._forward_pass(
            model, trainer, state_normalized_dense_matrix, actions, qnet_output_blob
        )
        parameters.extend(new_parameters)

        # Get 1 x n action index tensor under the max_q policy
        max_q_act_idxs = "max_q_policy_actions"
        C2.net().Flatten([C2.ArgMax(q_values)], [max_q_act_idxs], axis=0)
        shape_of_num_of_states = "num_states_shape"
        C2.net().FlattenToVec([max_q_act_idxs], [shape_of_num_of_states])
        num_states, _ = C2.Reshape(C2.Size(shape_of_num_of_states), shape=[1])

        # Get 1 x n action index tensor under the softmax policy
        temperature = C2.NextBlob("temperature")
        parameters.append(temperature)
        workspace.FeedBlob(
            temperature, np.array([trainer.rl_temperature], dtype=np.float32)
        )
        tempered_q_values = C2.Div(q_values, temperature, broadcast=1)
        softmax_values = C2.Softmax(tempered_q_values)
        softmax_act_idxs_nested = "softmax_act_idxs_nested"
        C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested])
        softmax_act_idxs = "softmax_policy_actions"
        C2.net().Flatten([softmax_act_idxs_nested], [softmax_act_idxs], axis=0)

        action_names = C2.NextBlob("action_names")
        parameters.append(action_names)
        workspace.FeedBlob(action_names, np.array(actions))

        # Concat action index tensors to get 2 x n tensor - [[max_q], [softmax]]
        # transpose & flatten to get [a1_maxq, a1_softmax, a2_maxq, a2_softmax, ...]
        max_q_act_blob = C2.Cast(max_q_act_idxs, to=caffe2_pb2.TensorProto.INT32)
        softmax_act_blob = C2.Cast(softmax_act_idxs, to=caffe2_pb2.TensorProto.INT32)
        C2.net().Append([max_q_act_blob, softmax_act_blob], [max_q_act_blob])
        transposed_action_idxs = C2.Transpose(max_q_act_blob)
        flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs)
        workspace.FeedBlob(OUTPUT_SINGLE_CAT_VALS_NAME, np.zeros(1, dtype=np.int64))
        C2.net().Gather(
            [action_names, flat_transposed_action_idxs], [OUTPUT_SINGLE_CAT_VALS_NAME]
        )

        workspace.FeedBlob(OUTPUT_SINGLE_CAT_LENGTHS_NAME, np.zeros(1, dtype=np.int32))
        C2.net().ConstantFill(
            [shape_of_num_of_states],
            [OUTPUT_SINGLE_CAT_LENGTHS_NAME],
            value=2,
            dtype=caffe2_pb2.TensorProto.INT32,
        )

        workspace.FeedBlob(OUTPUT_SINGLE_CAT_KEYS_NAME, np.zeros(1, dtype=np.int64))
        output_keys_tensor, _ = C2.Concat(
            C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64),
            C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64),
            axis=0,
        )
        output_key_tile = C2.Tile(output_keys_tensor, num_states, axis=0)
        C2.net().FlattenToVec([output_key_tile], [OUTPUT_SINGLE_CAT_KEYS_NAME])

        workspace.CreateNet(net)
        return DQNPredictor(net, torch_init_net, parameters, int_features)
示例#38
0
    def export_actor(
        cls,
        trainer,
        state_normalization_parameters,
        action_feature_ids,
        min_action_range_tensor_serving,
        max_action_range_tensor_serving,
        int_features=False,
        model_on_gpu=False,
    ):
        """Export caffe2 preprocessor net and pytorch actor forward pass as one
        caffe2 net.

        :param trainer DDPGTrainer
        :param state_normalization_parameters state NormalizationParameters
        :param min_action_range_tensor_serving pytorch tensor that specifies
            min action value for each dimension
        :param max_action_range_tensor_serving pytorch tensor that specifies
            min action value for each dimension
        :param state_normalization_parameters state NormalizationParameters
        :param int_features boolean indicating if int features blob will be present
        :param model_on_gpu boolean indicating if the model is a GPU model or CPU model
        """
        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        C2.set_model(model)
        parameters: List[str] = []

        workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32))
        workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64))
        workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32))

        input_feature_lengths = "input_feature_lengths"
        input_feature_keys = "input_feature_keys"
        input_feature_values = "input_feature_values"

        if int_features:
            workspace.FeedBlob(
                "input/int_features.lengths", np.zeros(1, dtype=np.int32)
            )
            workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64))
            workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32))
            C2.net().Cast(
                ["input/int_features.values"],
                ["input/int_features.values_float"],
                dtype=caffe2_pb2.TensorProto.FLOAT,
            )
            C2.net().MergeMultiScalarFeatureTensors(
                [
                    "input/float_features.lengths",
                    "input/float_features.keys",
                    "input/float_features.values",
                    "input/int_features.lengths",
                    "input/int_features.keys",
                    "input/int_features.values_float",
                ],
                [input_feature_lengths, input_feature_keys, input_feature_values],
            )
        else:
            C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths])
            C2.net().Copy(["input/float_features.keys"], [input_feature_keys])
            C2.net().Copy(["input/float_features.values"], [input_feature_values])

        preprocessor = PreprocessorNet()
        sorted_features, _ = sort_features_by_normalization(
            state_normalization_parameters
        )
        state_dense_matrix, new_parameters = sparse_to_dense(
            input_feature_lengths,
            input_feature_keys,
            input_feature_values,
            sorted_features,
        )
        parameters.extend(new_parameters)
        state_normalized_dense_matrix, new_parameters = preprocessor.normalize_dense_matrix(
            state_dense_matrix,
            sorted_features,
            state_normalization_parameters,
            "state_norm",
            False,
        )
        parameters.extend(new_parameters)

        torch_init_net, torch_predict_net, new_parameters, actor_input_blob, actor_output_blob, min_action_training_blob, max_action_training_blob, min_action_serving_blob, max_action_serving_blob = DDPGPredictor.generate_train_net(
            trainer,
            model,
            min_action_range_tensor_serving,
            max_action_range_tensor_serving,
            model_on_gpu,
        )
        parameters.extend(new_parameters)
        net.Copy([state_normalized_dense_matrix], [actor_input_blob])

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(torch_init_net)

        net.AppendNet(torch_predict_net)

        # Scale actors actions from [-1, 1] to serving range
        prev_range = C2.Sub(max_action_training_blob, min_action_training_blob)
        new_range = C2.Sub(max_action_serving_blob, min_action_serving_blob)
        subtract_prev_min = C2.Sub(actor_output_blob, min_action_training_blob)
        div_by_prev_range = C2.Div(subtract_prev_min, prev_range)
        scaled_for_serving_actions = C2.Add(
            C2.Mul(div_by_prev_range, new_range), min_action_serving_blob
        )

        output_lengths = "output/float_features.lengths"
        workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32))
        C2.net().ConstantFill(
            [C2.FlattenToVec(C2.ArgMax(actor_output_blob))],
            [output_lengths],
            value=trainer.actor.layers[-1].out_features,
            dtype=caffe2_pb2.TensorProto.INT32,
        )

        action_feature_ids_blob = C2.NextBlob("action_feature_ids")
        workspace.FeedBlob(
            action_feature_ids_blob, np.array(action_feature_ids, dtype=np.int64)
        )
        parameters.append(action_feature_ids_blob)

        output_keys = "output/float_features.keys"
        workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64))
        num_examples, _ = C2.Reshape(C2.Size("input/float_features.lengths"), shape=[1])
        C2.net().Tile([action_feature_ids_blob, num_examples], [output_keys], axis=1)

        output_values = "output/float_features.values"
        workspace.FeedBlob(output_values, np.zeros(1, dtype=np.float32))
        C2.net().FlattenToVec([scaled_for_serving_actions], [output_values])

        workspace.CreateNet(net)
        return DDPGPredictor(net, torch_init_net, parameters, int_features)
示例#39
0
    def test_prepare_normalization_and_normalize(self):
        feature_value_map = read_data()

        normalization_parameters = {}
        for name, values in feature_value_map.items():
            normalization_parameters[name] = normalization.identify_parameter(
                name, values, 10, feature_type=self._feature_type_override(name)
            )
        for k, v in normalization_parameters.items():
            if id_to_type(k) == CONTINUOUS:
                self.assertEqual(v.feature_type, CONTINUOUS)
                self.assertIs(v.boxcox_lambda, None)
                self.assertIs(v.boxcox_shift, None)
            elif id_to_type(k) == BOXCOX:
                self.assertEqual(v.feature_type, BOXCOX)
                self.assertIsNot(v.boxcox_lambda, None)
                self.assertIsNot(v.boxcox_shift, None)
            else:
                assert v.feature_type == id_to_type(k)
        sorted_features, _ = sort_features_by_normalization(normalization_parameters)

        norm_net = core.Net("net")
        C2.set_net(norm_net)
        preprocessor = PreprocessorNet()
        input_matrix = np.zeros([10000, len(sorted_features)], dtype=np.float32)
        for i, feature in enumerate(sorted_features):
            input_matrix[:, i] = feature_value_map[feature]
        input_matrix_blob = "input_matrix_blob"
        workspace.FeedBlob(input_matrix_blob, np.array([], dtype=np.float32))
        output_blob, _ = preprocessor.normalize_dense_matrix(
            input_matrix_blob, sorted_features, normalization_parameters, "", False
        )
        workspace.FeedBlob(input_matrix_blob, input_matrix)
        workspace.RunNetOnce(norm_net)
        normalized_feature_matrix = workspace.FetchBlob(output_blob)

        normalized_features = {}
        on_column = 0
        for feature in sorted_features:
            norm = normalization_parameters[feature]
            if norm.feature_type == ENUM:
                column_size = len(norm.possible_values)
            else:
                column_size = 1
            normalized_features[feature] = normalized_feature_matrix[
                :, on_column : (on_column + column_size)
            ]
            on_column += column_size

        self.assertTrue(
            all(
                [
                    np.isfinite(parameter.stddev) and np.isfinite(parameter.mean)
                    for parameter in normalization_parameters.values()
                ]
            )
        )
        for k, v in six.iteritems(normalized_features):
            self.assertTrue(np.all(np.isfinite(v)))
            feature_type = normalization_parameters[k].feature_type
            if feature_type == identify_types.PROBABILITY:
                sigmoidv = special.expit(v)
                self.assertTrue(
                    np.all(
                        np.logical_and(np.greater(sigmoidv, 0), np.less(sigmoidv, 1))
                    )
                )
            elif feature_type == identify_types.ENUM:
                possible_values = normalization_parameters[k].possible_values
                self.assertEqual(v.shape[0], len(feature_value_map[k]))
                self.assertEqual(v.shape[1], len(possible_values))

                possible_value_map = {}
                for i, possible_value in enumerate(possible_values):
                    possible_value_map[possible_value] = i

                for i, row in enumerate(v):
                    original_feature = feature_value_map[k][i]
                    self.assertEqual(
                        possible_value_map[original_feature], np.where(row == 1)[0][0]
                    )
            elif feature_type == identify_types.QUANTILE:
                for i, feature in enumerate(v[0]):
                    original_feature = feature_value_map[k][i]
                    expected = NumpyFeatureProcessor.value_to_quantile(
                        original_feature, normalization_parameters[k].quantiles
                    )
                    self.assertAlmostEqual(feature, expected, 2)
            elif feature_type == identify_types.BINARY:
                pass
            elif (
                feature_type == identify_types.CONTINUOUS
                or feature_type == identify_types.BOXCOX
            ):
                one_stddev = np.isclose(np.std(v, ddof=1), 1, atol=0.01)
                zero_stddev = np.isclose(np.std(v, ddof=1), 0, atol=0.01)
                zero_mean = np.isclose(np.mean(v), 0, atol=0.01)
                self.assertTrue(
                    np.all(zero_mean),
                    "mean of feature {} is {}, not 0".format(k, np.mean(v)),
                )
                self.assertTrue(np.all(np.logical_or(one_stddev, zero_stddev)))
            elif feature_type == identify_types.CONTINUOUS_ACTION:
                less_than_max = v < 1
                more_than_min = v > -1
                self.assertTrue(
                    np.all(less_than_max),
                    "values are not less than 1: {}".format(v[less_than_max == False]),
                )
                self.assertTrue(
                    np.all(more_than_min),
                    "values are not more than -1: {}".format(v[more_than_min == False]),
                )
            else:
                raise NotImplementedError()
示例#40
0
    def test_preprocessing_network(self):
        feature_value_map = read_data()

        normalization_parameters = {}
        for name, values in feature_value_map.items():
            normalization_parameters[name] = normalization.identify_parameter(
                name, values, feature_type=self._feature_type_override(name)
            )
        test_features = NumpyFeatureProcessor.preprocess(
            feature_value_map, normalization_parameters
        )

        net = core.Net("PreprocessingTestNet")
        C2.set_net(net)
        preprocessor = PreprocessorNet()
        name_preprocessed_blob_map = {}
        for feature_name in feature_value_map:
            workspace.FeedBlob(str(feature_name), np.array([0], dtype=np.int32))
            preprocessed_blob, _ = preprocessor.preprocess_blob(
                str(feature_name), [normalization_parameters[feature_name]]
            )
            name_preprocessed_blob_map[feature_name] = preprocessed_blob

        workspace.CreateNet(net)

        for feature_name, feature_value in six.iteritems(feature_value_map):
            feature_value = np.expand_dims(feature_value, -1)
            workspace.FeedBlob(str(feature_name), feature_value)
        workspace.RunNetOnce(net)

        for feature_name in feature_value_map:
            normalized_features = workspace.FetchBlob(
                name_preprocessed_blob_map[feature_name]
            )
            if feature_name != ENUM_FEATURE_ID:
                normalized_features = np.squeeze(normalized_features, -1)

            tolerance = 0.01
            if feature_name == BOXCOX_FEATURE_ID:
                # At the limit, boxcox has some numerical instability
                tolerance = 0.5
            non_matching = np.where(
                np.logical_not(
                    np.isclose(
                        normalized_features,
                        test_features[feature_name],
                        rtol=tolerance,
                        atol=tolerance,
                    )
                )
            )
            self.assertTrue(
                np.all(
                    np.isclose(
                        normalized_features,
                        test_features[feature_name],
                        rtol=tolerance,
                        atol=tolerance,
                    )
                ),
                "{} does not match: {} {}".format(
                    feature_name,
                    normalized_features[non_matching].tolist(),
                    test_features[feature_name][non_matching].tolist(),
                ),
            )
示例#41
0
    def create_net(self):
        net = core.Net("feature_extractor")
        init_net = core.Net("feature_extractor_init")
        missing_scalar = self.create_const(init_net, "MISSING_SCALAR", MISSING_VALUE)

        action_schema = map_schema() if self.sorted_action_features else schema.Scalar()

        input_schema = schema.Struct(
            (InputColumn.STATE_FEATURES, map_schema()),
            (InputColumn.NEXT_STATE_FEATURES, map_schema()),
            (InputColumn.ACTION, action_schema),
            (InputColumn.NEXT_ACTION, action_schema),
            (InputColumn.NOT_TERMINAL, schema.Scalar()),
        )
        if self.include_possible_actions:
            input_schema += schema.Struct(
                (InputColumn.POSSIBLE_ACTIONS_MASK, schema.List(schema.Scalar())),
                (InputColumn.POSSIBLE_NEXT_ACTIONS_MASK, schema.List(schema.Scalar())),
            )
            if self.sorted_action_features is not None:
                input_schema += schema.Struct(
                    (InputColumn.POSSIBLE_ACTIONS, schema.List(map_schema())),
                    (InputColumn.POSSIBLE_NEXT_ACTIONS, schema.List(map_schema())),
                )

        input_record = net.set_input_record(input_schema)

        state = self.extract_float_features(
            net,
            "state",
            input_record[InputColumn.STATE_FEATURES],
            self.sorted_state_features,
            missing_scalar,
        )
        next_state = self.extract_float_features(
            net,
            "next_state",
            input_record[InputColumn.NEXT_STATE_FEATURES],
            self.sorted_state_features,
            missing_scalar,
        )

        if self.sorted_action_features:
            action = self.extract_float_features(
                net,
                InputColumn.ACTION,
                input_record[InputColumn.ACTION],
                self.sorted_action_features,
                missing_scalar,
            )
            next_action = self.extract_float_features(
                net,
                InputColumn.NEXT_ACTION,
                input_record[InputColumn.NEXT_ACTION],
                self.sorted_action_features,
                missing_scalar,
            )
            if self.include_possible_actions:
                possible_action_features = self.extract_float_features(
                    net,
                    InputColumn.POSSIBLE_ACTIONS,
                    input_record[InputColumn.POSSIBLE_ACTIONS]["values"],
                    self.sorted_action_features,
                    missing_scalar,
                )
                possible_next_action_features = self.extract_float_features(
                    net,
                    InputColumn.POSSIBLE_NEXT_ACTIONS,
                    input_record[InputColumn.POSSIBLE_NEXT_ACTIONS]["values"],
                    self.sorted_action_features,
                    missing_scalar,
                )
        else:
            action = input_record[InputColumn.ACTION]
            next_action = input_record[InputColumn.NEXT_ACTION]

        if self.normalize:
            C2.set_net_and_init_net(net, init_net)
            state, _ = PreprocessorNet().normalize_dense_matrix(
                state,
                self.sorted_state_features,
                self.state_normalization_parameters,
                blobname_prefix="state",
                split_expensive_feature_groups=True,
            )
            next_state, _ = PreprocessorNet().normalize_dense_matrix(
                next_state,
                self.sorted_state_features,
                self.state_normalization_parameters,
                blobname_prefix="next_state",
                split_expensive_feature_groups=True,
            )
            if self.sorted_action_features is not None:
                action, _ = PreprocessorNet().normalize_dense_matrix(
                    action,
                    self.sorted_action_features,
                    self.action_normalization_parameters,
                    blobname_prefix="action",
                    split_expensive_feature_groups=True,
                )
                next_action, _ = PreprocessorNet().normalize_dense_matrix(
                    next_action,
                    self.sorted_action_features,
                    self.action_normalization_parameters,
                    blobname_prefix="next_action",
                    split_expensive_feature_groups=True,
                )
                if self.include_possible_actions:
                    possible_action_features, _ = PreprocessorNet().normalize_dense_matrix(
                        possible_action_features,
                        self.sorted_action_features,
                        self.action_normalization_parameters,
                        blobname_prefix="possible_action",
                        split_expensive_feature_groups=True,
                    )
                    possible_next_action_features, _ = PreprocessorNet().normalize_dense_matrix(
                        possible_next_action_features,
                        self.sorted_action_features,
                        self.action_normalization_parameters,
                        blobname_prefix="possible_next_action",
                        split_expensive_feature_groups=True,
                    )
            C2.set_net_and_init_net(None, None)

        output_schema = schema.Struct(
            (InputColumn.STATE_FEATURES, state),
            (InputColumn.NEXT_STATE_FEATURES, next_state),
            (InputColumn.ACTION, action),
            (InputColumn.NEXT_ACTION, next_action),
            (InputColumn.NOT_TERMINAL, input_record[InputColumn.NOT_TERMINAL]),
        )

        if self.include_possible_actions:
            # Drop the "lengths" blob from possible_actions_mask since we know
            # it's just a list of [max_num_actions, max_num_actions, ...]
            output_schema += schema.Struct(
                (
                    InputColumn.POSSIBLE_ACTIONS_MASK,
                    input_record[InputColumn.POSSIBLE_ACTIONS_MASK]["values"],
                ),
                (
                    InputColumn.POSSIBLE_NEXT_ACTIONS_MASK,
                    input_record[InputColumn.POSSIBLE_NEXT_ACTIONS_MASK]["values"],
                ),
            )
            if self.sorted_action_features is not None:
                output_schema += schema.Struct(
                    (InputColumn.POSSIBLE_ACTIONS, possible_action_features),
                    (InputColumn.POSSIBLE_NEXT_ACTIONS, possible_next_action_features),
                )

        net.set_output_record(output_schema)
        return FeatureExtractorNet(net, init_net)
示例#42
0
    def create_net(self):
        net = core.Net("feature_extractor")
        init_net = core.Net("feature_extractor_init")
        missing_scalar = self.create_const(init_net, "MISSING_SCALAR", MISSING_VALUE)

        input_schema = schema.Struct(
            (
                "float_features",
                schema.Map(
                    keys=core.BlobReference("input/float_features.keys"),
                    values=core.BlobReference("input/float_features.values"),
                    lengths_blob=core.BlobReference("input/float_features.lengths"),
                ),
            )
        )

        input_record = net.set_input_record(input_schema)

        state = self.extract_float_features(
            net,
            "state",
            input_record.float_features,
            self.sorted_state_features,
            missing_scalar,
        )

        if self.sorted_action_features:
            action = self.extract_float_features(
                net,
                "action",
                input_record.float_features,
                self.sorted_action_features,
                missing_scalar,
            )

        if self.normalize:
            C2.set_net_and_init_net(net, init_net)
            state, _ = PreprocessorNet().normalize_dense_matrix(
                state,
                self.sorted_state_features,
                self.state_normalization_parameters,
                blobname_prefix="state",
                split_expensive_feature_groups=True,
            )
            if self.sorted_action_features:
                action, _ = PreprocessorNet().normalize_dense_matrix(
                    action,
                    self.sorted_action_features,
                    self.action_normalization_parameters,
                    blobname_prefix="action",
                    split_expensive_feature_groups=True,
                )
            C2.set_net_and_init_net(None, None)

        output_record = schema.Struct(("state", state))
        if self.sorted_action_features:
            output_record += schema.Struct(("action", action))

        net.set_output_record(output_record)

        return FeatureExtractorNet(net, init_net)
示例#43
0
    def preprocess_blob(self, blob, normalization_parameters):
        """
        Takes in a blob and its normalization parameters. Outputs a tuple
        whose first element is a blob containing the normalized input blob
        and whose second element contains all the parameter blobs used to
        create it.

        Call this from a CPU context and ensure the input blob exists in it.
        """

        parameters: List[str] = []

        MISSING_U = self._store_parameter(
            parameters, "MISSING_U", np.array([MISSING_VALUE + 1e-4], dtype=np.float32)
        )
        MISSING_L = self._store_parameter(
            parameters, "MISSING_L", np.array([MISSING_VALUE - 1e-4], dtype=np.float32)
        )

        is_empty_l = C2.GT(blob, MISSING_L, broadcast=1)
        is_empty_u = C2.LT(blob, MISSING_U, broadcast=1)
        is_empty = C2.And(is_empty_l, is_empty_u)

        for i in range(len(normalization_parameters) - 1):
            if (
                normalization_parameters[i].feature_type
                != normalization_parameters[i + 1].feature_type
            ):
                raise Exception(
                    "Only one feature type is allowed per call to preprocess_blob!"
                )
        feature_type = normalization_parameters[0].feature_type
        if feature_type == identify_types.BINARY:
            TOLERANCE = self._store_parameter(
                parameters, "TOLERANCE", np.array(1e-3, dtype=np.float32)
            )
            ZERO = self._store_parameter(
                parameters, "ZERO", np.array([0], dtype=np.float32)
            )
            is_gt_zero = C2.GT(blob, C2.Add(ZERO, TOLERANCE, broadcast=1), broadcast=1)
            is_lt_zero = C2.LT(blob, C2.Sub(ZERO, TOLERANCE, broadcast=1), broadcast=1)
            bool_blob = C2.Or(is_gt_zero, is_lt_zero)
            blob = C2.Cast(bool_blob, to=caffe2_pb2.TensorProto.FLOAT)
        elif feature_type == identify_types.PROBABILITY:
            ONE = self._store_parameter(
                parameters, "ONE", np.array([1], dtype=np.float32)
            )
            NEGATIVE_ONE = self._store_parameter(
                parameters, "NEGATIVE_ONE", np.array([-1], dtype=np.float32)
            )
            clipped = C2.Clip(blob, min=0.01, max=0.99)
            blob = C2.Mul(
                C2.Log(C2.Sub(C2.Pow(clipped, exponent=-1.0), ONE, broadcast=1)),
                NEGATIVE_ONE,
                broadcast=1,
            )
        elif feature_type == identify_types.ENUM:
            for parameter in normalization_parameters:
                possible_values = parameter.possible_values
                for x in possible_values:
                    if x < 0:
                        logger.fatal(
                            "Invalid enum possible value for feature: "
                            + str(x)
                            + " "
                            + str(parameter.possible_values)
                        )
                        raise Exception(
                            "Invalid enum possible value for feature "
                            + blob
                            + ": "
                            + str(x)
                            + " "
                            + str(parameter.possible_values)
                        )

            int_blob = C2.Cast(blob, to=core.DataType.INT32)

            # Batch one hot transform with MISSING_VALUE as a possible value
            feature_lengths = [
                len(p.possible_values) + 1 for p in normalization_parameters
            ]
            feature_lengths_blob = self._store_parameter(
                parameters,
                "feature_lengths_blob",
                np.array(feature_lengths, dtype=np.int32),
            )

            feature_values = [
                x
                for p in normalization_parameters
                for x in p.possible_values + [int(MISSING_VALUE)]
            ]
            feature_values_blob = self._store_parameter(
                parameters,
                "feature_values_blob",
                np.array(feature_values, dtype=np.int32),
            )

            one_hot_output = C2.BatchOneHot(
                int_blob, feature_lengths_blob, feature_values_blob
            )
            flattened_one_hot = C2.FlattenToVec(one_hot_output)

            # Remove missing values with a mask
            cols_to_include = [
                [1] * len(p.possible_values) + [0] for p in normalization_parameters
            ]
            cols_to_include = [x for col in cols_to_include for x in col]
            mask = self._store_parameter(
                parameters, "mask", np.array(cols_to_include, dtype=np.int32)
            )

            zero_vec = C2.ConstantFill(
                one_hot_output, value=0, dtype=caffe2_pb2.TensorProto.INT32
            )

            repeated_mask_bool = C2.Cast(
                C2.Add(zero_vec, mask, broadcast=1), to=core.DataType.BOOL
            )

            flattened_repeated_mask = C2.FlattenToVec(repeated_mask_bool)

            flattened_one_hot_proc = C2.NextBlob("flattened_one_hot_proc")
            flattened_one_hot_proc_indices = C2.NextBlob(
                "flattened_one_hot_proc_indices"
            )
            C2.net().BooleanMask(
                [flattened_one_hot, flattened_repeated_mask],
                [flattened_one_hot_proc, flattened_one_hot_proc_indices],
            )

            one_hot_shape = C2.Shape(one_hot_output)

            shape_delta = self._store_parameter(
                parameters,
                "shape_delta",
                np.array([0, len(normalization_parameters)], dtype=np.int64),
            )

            target_shape = C2.Sub(one_hot_shape, shape_delta, broadcast=1)
            output_int_blob = C2.NextBlob("output_int_blob")
            output_int_blob_old_shape = C2.NextBlob("output_int_blob_old_shape")
            C2.net().Reshape(
                [flattened_one_hot_proc, target_shape],
                [output_int_blob, output_int_blob_old_shape],
            )

            output_blob = C2.Cast(output_int_blob, to=core.DataType.FLOAT)

            return output_blob, parameters
        elif feature_type == identify_types.QUANTILE:
            # This transformation replaces a set of values with their quantile.
            # The quantile boundaries are provided in the normalization params.

            quantile_sizes = [len(norm.quantiles) for norm in normalization_parameters]
            num_boundaries_blob = self._store_parameter(
                parameters,
                "num_boundaries_blob",
                np.array(quantile_sizes, dtype=np.int32),
            )

            quantile_values = np.array([], dtype=np.float32)
            quantile_labels = np.array([], dtype=np.float32)
            for norm in normalization_parameters:
                quantile_values = np.append(
                    quantile_values, np.array(norm.quantiles, dtype=np.float32)
                )
                quantile_labels = np.append(
                    quantile_labels,
                    np.arange(len(norm.quantiles), dtype=np.float32)
                    / float(len(norm.quantiles) - 1.0),
                )
            quantiles = np.vstack([quantile_values, quantile_labels]).T
            quantiles_blob = self._store_parameter(
                parameters, "quantiles_blob", quantiles
            )

            quantile_blob = C2.Percentile(blob, quantiles_blob, num_boundaries_blob)
            blob = quantile_blob
        elif (
            feature_type == identify_types.CONTINUOUS
            or feature_type == identify_types.BOXCOX
        ):
            boxcox_shifts = []
            boxcox_lambdas = []
            means = []
            stddevs = []

            for norm in normalization_parameters:
                if feature_type == identify_types.BOXCOX:
                    assert (
                        norm.boxcox_shift is not None and norm.boxcox_lambda is not None
                    )
                    boxcox_shifts.append(norm.boxcox_shift)
                    boxcox_lambdas.append(norm.boxcox_lambda)
                means.append(norm.mean)
                stddevs.append(norm.stddev)

            if feature_type == identify_types.BOXCOX:
                boxcox_shift_blob = self._store_parameter(
                    parameters,
                    "boxcox_shift",
                    np.array(boxcox_shifts, dtype=np.float32),
                )
                boxcox_lambda_blob = self._store_parameter(
                    parameters,
                    "boxcox_shift",
                    np.array(boxcox_lambdas, dtype=np.float32),
                )

                blob = C2.BatchBoxCox(blob, boxcox_lambda_blob, boxcox_shift_blob)

            means_blob = self._store_parameter(
                parameters, "means_blob", np.array([means], dtype=np.float32)
            )
            stddevs_blob = self._store_parameter(
                parameters, "stddevs_blob", np.array([stddevs], dtype=np.float32)
            )

            blob = C2.Sub(blob, means_blob, broadcast=1, axis=0)
            blob = C2.Div(blob, stddevs_blob, broadcast=1, axis=0)
            blob = C2.Clip(blob, min=MIN_FEATURE_VALUE, max=MAX_FEATURE_VALUE)
        elif feature_type == identify_types.CONTINUOUS_ACTION:
            serving_min_value = np.array(
                [norm.min_value for norm in normalization_parameters], dtype=np.float32
            )
            serving_max_value = np.array(
                [norm.max_value for norm in normalization_parameters], dtype=np.float32
            )

            training_min_value = (
                np.ones(len(normalization_parameters), dtype=np.float32) * -1 + EPS
            )

            scaling_factor = (
                (np.ones(len(normalization_parameters), dtype=np.float32) - EPS)
                * 2
                / (serving_max_value - serving_min_value)
            )

            serving_min_blob = self._store_parameter(
                parameters, "serving_min_blob", serving_min_value
            )
            training_min_blob = self._store_parameter(
                parameters, "training_min_blob", training_min_value
            )
            scaling_factor_blob = self._store_parameter(
                parameters, "scaling_factor_blob", scaling_factor
            )

            blob = C2.Sub(blob, serving_min_blob, broadcast=1, axis=1)
            blob = C2.Mul(blob, scaling_factor_blob, broadcast=1, axis=1)
            blob = C2.Add(blob, training_min_blob, broadcast=1, axis=1)
            blob = C2.Clip(blob, min=-1 + EPS, max=1 - EPS)
        else:
            raise NotImplementedError("Invalid feature type: {}".format(feature_type))

        zeros = C2.ConstantFill(blob, value=0.0)
        output_blob = C2.Where(is_empty, zeros, blob)
        output_blob = C2.NanCheck(output_blob)
        return output_blob, parameters
示例#44
0
    def normalize_dense_matrix(
        self,
        input_matrix: str,
        features: List[int],
        normalization_parameters: Dict[int, NormalizationParameters],
        blobname_prefix: str,
        split_expensive_feature_groups: bool,
    ) -> Tuple[str, List[str]]:
        """
        Normalizes inputs according to parameters. Expects a dense matrix whose ith
        column corresponds to feature i.

        Note that the Caffe2 BatchBoxCox operator isn't implemented on CUDA GPU so
        we need to use a CPU context.

        :param input_matrix: Input matrix to normalize.
        :param features: Array that maps feature ids to column indices.
        :param normalization_parameters: Mapping from feature names to
            NormalizationParameters.
        :param blobname_prefix: Prefix for input blobs to norm_net.
        :param num_output_features: The number of features in an output processed
            datapoint. If set to None, this function will compute it.
        """
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
            feature_starts = self._get_type_boundaries(
                features, normalization_parameters
            )

            normalized_input_blobs = []
            parameters: List[str] = []
            for i, feature_type in enumerate(FEATURE_TYPES):
                start_index = feature_starts[i]
                if (i + 1) == len(FEATURE_TYPES):
                    end_index = len(normalization_parameters)
                else:
                    end_index = feature_starts[i + 1]
                if start_index == end_index:
                    continue  # No features of this type
                slices = []

                split_feature_group, split_intervals = self._should_split_feature_group(
                    split_expensive_feature_groups, start_index, end_index, feature_type
                )

                if split_feature_group:
                    for j in range(len(split_intervals) - 1):
                        slice_blob = self._get_input_blob_indexed(
                            blobname_prefix, feature_type, j
                        )
                        C2.net().Slice(
                            [input_matrix],
                            [slice_blob],
                            starts=[0, split_intervals[j]],
                            ends=[-1, split_intervals[j + 1]],
                        )
                        slices.append(
                            (slice_blob, split_intervals[j], split_intervals[j + 1])
                        )
                else:
                    sliced_input_features = self._get_input_blob(
                        blobname_prefix, feature_type
                    )

                    C2.net().Slice(
                        [input_matrix],
                        [sliced_input_features],
                        starts=[0, start_index],
                        ends=[-1, end_index],
                    )

                    slices.append((sliced_input_features, start_index, end_index))

                for (slice_blob, start, end) in slices:
                    normalized_input_blob, blob_parameters = self.preprocess_blob(
                        slice_blob,
                        [normalization_parameters[x] for x in features[start:end]],
                    )
                    logger.info(
                        "Processed split ({}, {}) for feature type {}".format(
                            start, end, feature_type
                        )
                    )
                    parameters.extend(blob_parameters)
                    normalized_input_blobs.append(normalized_input_blob)
            for i, inp in enumerate(normalized_input_blobs):
                logger.info("input# {}: {}".format(i, inp))
            concatenated_input_blob, concatenated_input_blob_dim = C2.Concat(
                *normalized_input_blobs, axis=1
            )
        return concatenated_input_blob, parameters
示例#45
0
    def _forward_pass(
        cls, model, trainer, normalized_dense_matrix, actions, qnet_output_blob
    ):
        C2.set_model(model)

        parameters = []
        q_values = "q_values"
        C2.net().Copy([qnet_output_blob], [q_values])

        action_names = C2.NextBlob("action_names")
        parameters.append(action_names)
        workspace.FeedBlob(action_names, np.array(actions))
        action_range = C2.NextBlob("action_range")
        parameters.append(action_range)
        workspace.FeedBlob(action_range, np.array(list(range(len(actions)))))

        output_shape = C2.Shape(q_values)
        output_shape_row_count = C2.Slice(output_shape, starts=[0], ends=[1])

        output_row_shape = C2.Slice(q_values, starts=[0, 0], ends=[-1, 1])

        output_feature_keys = "output/string_weighted_multi_categorical_features.keys"
        workspace.FeedBlob(output_feature_keys, np.zeros(1, dtype=np.int64))
        output_feature_keys_matrix = C2.ConstantFill(
            output_row_shape, value=0, dtype=caffe2_pb2.TensorProto.INT64
        )
        # Note: sometimes we need to use an explicit output name, so we call
        #  C2.net().Fn(...)
        C2.net().FlattenToVec([output_feature_keys_matrix], [output_feature_keys])

        output_feature_lengths = (
            "output/string_weighted_multi_categorical_features.lengths"
        )
        workspace.FeedBlob(output_feature_lengths, np.zeros(1, dtype=np.int32))
        output_feature_lengths_matrix = C2.ConstantFill(
            output_row_shape, value=1, dtype=caffe2_pb2.TensorProto.INT32
        )
        C2.net().FlattenToVec([output_feature_lengths_matrix], [output_feature_lengths])

        output_keys = "output/string_weighted_multi_categorical_features.values.keys"
        workspace.FeedBlob(output_keys, np.array(["a"]))
        C2.net().Tile([action_names, output_shape_row_count], [output_keys], axis=1)

        output_lengths_matrix = C2.ConstantFill(
            output_row_shape, value=len(actions), dtype=caffe2_pb2.TensorProto.INT32
        )
        output_lengths = (
            "output/string_weighted_multi_categorical_features.values.lengths"
        )
        workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32))
        C2.net().FlattenToVec([output_lengths_matrix], [output_lengths])

        output_values = (
            "output/string_weighted_multi_categorical_features.values.values"
        )
        workspace.FeedBlob(output_values, np.array([1.0]))
        C2.net().FlattenToVec([q_values], [output_values])
        return parameters, q_values