def _create_q_score_net(self) -> None: self.q_score_model = ModelHelper(name="q_score_" + self.model_id) C2.set_model(self.q_score_model) self.q_score_output = self.get_q_values('states', 'actions', True) workspace.RunNetOnce(self.q_score_model.param_init_net) workspace.CreateNet(self.q_score_model.net) C2.set_model(None)
def process( self, sorted_features: List[int], sparse_data: StackedAssociativeArray, set_missing_value_to_zero: bool = False, ) -> Tuple[str, List[str]]: lengths_blob = sparse_data.lengths keys_blob = sparse_data.keys values_blob = sparse_data.values MISSING_SCALAR = C2.NextBlob("MISSING_SCALAR") missing_value = 0.0 if set_missing_value_to_zero else MISSING_VALUE workspace.FeedBlob(MISSING_SCALAR, np.array([missing_value], dtype=np.float32)) C2.net().GivenTensorFill([], [MISSING_SCALAR], shape=[], values=[missing_value]) parameters: List[str] = [MISSING_SCALAR] assert len(sorted_features) > 0, "Sorted features is empty" dense_input = C2.SparseToDenseMask(keys_blob, values_blob, MISSING_SCALAR, lengths_blob, mask=sorted_features)[0] return dense_input, parameters
def get_max_q_values( self, next_states: str, possible_next_actions: StackedArray, use_target_network: bool, ) -> str: """ Takes in an array of next_states and outputs an array of the same shape whose ith entry = max_{pna} Q(state_i, pna). Uses target network for Q(state_i, pna) approximation. :param next_states: Blob containing state features. Each row contains a representation of a state. :param possible_next_actions: List of sets of possible next actions. The ith element of this list is a matrix PNA_i such that PNA_i[j] is the parametric representation of the jth possible action from the ith next_state. These have not been normalized. """ stacked_states = C2.LengthsTile(next_states, possible_next_actions.lengths) all_q_values = self.get_q_values( stacked_states, possible_next_actions.values, use_target_network, ) max_q_values = C2.LengthsMax( all_q_values, possible_next_actions.lengths, ) return max_q_values
def update_model(self, states: str, actions: str, q_vals_target: str) -> None: """ Takes in states, actions, and target q values. Updates the model: Runs the forward pass, computing Q(states, actions). Q(states, actions)[i][j] is an approximation of Q*(states[i], action_j). Comptutes Loss of Q(states, actions) with respect to q_vals_targets Updates Q Network's weights according to loss and optimizer :param states: Numpy array with shape (batch_size, state_dim). The ith row is a representation of the ith transition's state. :param actions: Numpy array with shape (batch_size, action_dim). The ith row is a representation of the ith transition's action. :param q_vals_targets: Numpy array with shape (batch_size, 1). The ith row is the label to train against for the data from the ith transition. """ model = C2.model() q_vals_target = C2.StopGradient(q_vals_target) q_values = C2.NextBlob("train_output") state_action_pairs, _ = C2.Concat(states, actions, axis=1) self.ml_trainer.make_forward_pass_ops(model, state_action_pairs, q_values, False) self.loss_blob = self.ml_trainer.generateLossOps( model, q_values, q_vals_target) model.AddGradientOperators([self.loss_blob]) for param in model.params: if param in model.param_to_grad: param_grad = model.param_to_grad[param] param_grad = C2.NanCheck(param_grad) self.ml_trainer.addParameterUpdateOps(model)
def process( self, sparse_data: StackedAssociativeArray ) -> Tuple[str, str, List[str]]: lengths_blob = sparse_data.lengths keys_blob = sparse_data.keys values_blob = sparse_data.values MISSING_SCALAR = C2.NextBlob("MISSING_SCALAR") missing_value = 0.0 if self.set_missing_value_to_zero else MISSING_VALUE workspace.FeedBlob(MISSING_SCALAR, np.array([missing_value], dtype=np.float32)) C2.net().GivenTensorFill([], [MISSING_SCALAR], shape=[], values=[missing_value]) parameters: List[str] = [MISSING_SCALAR] assert len(self.sorted_features) > 0, "Sorted features is empty" dense_input = C2.NextBlob("dense_input") dense_input_presence = C2.NextBlob("dense_input_presence") C2.net().SparseToDenseMask( [keys_blob, values_blob, MISSING_SCALAR, lengths_blob], [dense_input, dense_input_presence], mask=self.sorted_features, return_presence_mask=True, ) if self.set_missing_value_to_zero: dense_input_presence = C2.And( C2.GT(dense_input, -1e-4, broadcast=1), C2.LT(dense_input, 1e-4, broadcast=1), ) return dense_input, dense_input_presence, parameters
def test_preprocessing_network(self): feature_value_map = read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( name, values, feature_type=self._feature_type_override(name)) test_features = NumpyFeatureProcessor.preprocess( feature_value_map, normalization_parameters) net = core.Net("PreprocessingTestNet") C2.set_net(net) preprocessor = PreprocessorNet() name_preprocessed_blob_map = {} for feature_name in feature_value_map: workspace.FeedBlob(str(feature_name), np.array([0], dtype=np.int32)) preprocessed_blob, _ = preprocessor.preprocess_blob( str(feature_name), [normalization_parameters[feature_name]]) name_preprocessed_blob_map[feature_name] = preprocessed_blob workspace.CreateNet(net) for feature_name, feature_value in six.iteritems(feature_value_map): feature_value = np.expand_dims(feature_value, -1) workspace.FeedBlob(str(feature_name), feature_value) workspace.RunNetOnce(net) for feature_name in feature_value_map: normalized_features = workspace.FetchBlob( name_preprocessed_blob_map[feature_name]) if feature_name != ENUM_FEATURE_ID: normalized_features = np.squeeze(normalized_features, -1) tolerance = 0.01 if feature_name == BOXCOX_FEATURE_ID: # At the limit, boxcox has some numerical instability tolerance = 0.5 non_matching = np.where( np.logical_not( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ))) self.assertTrue( np.all( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, )), "{} does not match: {} {}".format( feature_name, normalized_features[non_matching].tolist(), test_features[feature_name][non_matching].tolist(), ), )
def normalize_dense_matrix( self, input_matrix: str, features: List[str], normalization_parameters: Dict[str, NormalizationParameters], blobname_prefix: str, ) -> Tuple[str, List[str]]: """ Normalizes inputs according to parameters. Expects a dense matrix whose ith column corresponds to feature i. Note that the Caffe2 BatchBoxCox operator isn't implemented on CUDA GPU so we need to use a CPU context. :param input_matrix: Input matrix to normalize. :param features: Array that maps feature ids to column indices. :param normalization_parameters: Mapping from feature names to NormalizationParameters. :param blobname_prefix: Prefix for input blobs to norm_net. :param num_output_features: The number of features in an output processed datapoint. If set to None, this function will compute it. """ with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)): feature_starts = self._get_type_boundaries( features, normalization_parameters) normalized_input_blobs = [] parameters: List[str] = [] for i, feature_type in enumerate(FEATURE_TYPES): start_index = feature_starts[i] if (i + 1) == len(FEATURE_TYPES): end_index = len(normalization_parameters) else: end_index = feature_starts[i + 1] if start_index == end_index: continue # No features of this type sliced_input_features = self._get_input_blob( blobname_prefix, feature_type) C2.net().Slice( [input_matrix], [sliced_input_features], starts=[0, start_index], ends=[-1, end_index], ) normalized_input_blob, blob_parameters = self.preprocess_blob( sliced_input_features, [ normalization_parameters[x] for x in features[start_index:end_index] ], ) parameters.extend(blob_parameters) normalized_input_blobs.append(normalized_input_blob) for i, inp in enumerate(normalized_input_blobs): logger.info("input# {}: {}".format(i, inp)) concatenated_input_blob, concatenated_input_blob_dim = C2.Concat( *normalized_input_blobs, axis=1) concatenated_input_blob = C2.NanCheck(concatenated_input_blob) return concatenated_input_blob, parameters
def _create_reward_train_net(self) -> None: self.reward_train_model = ModelHelper(name="reward_train_" + self.model_id) C2.set_model(self.reward_train_model) self.update_model('states', 'actions', 'rewards') workspace.RunNetOnce(self.reward_train_model.param_init_net) workspace.CreateNet(self.reward_train_model.net) C2.set_model(None)
def _create_q_score_net(self) -> None: self.q_score_model = ModelHelper(name="q_score_" + self.model_id) C2.set_model(self.q_score_model) self.q_score_output = self.get_q_values("states", "actions", True) workspace.RunNetOnce(self.q_score_model.param_init_net) self.q_score_model.net.Proto().num_workers = \ RLTrainer.DEFAULT_TRAINING_NUM_WORKERS workspace.CreateNet(self.q_score_model.net) C2.set_model(None)
def _create_internal_policy_net(self) -> None: self.internal_policy_model = ModelHelper(name="internal_policy_" + self.model_id) C2.set_model(self.internal_policy_model) self.internal_policy_output = self.get_q_values_all_actions( "states", False) workspace.RunNetOnce(self.internal_policy_model.param_init_net) workspace.CreateNet(self.internal_policy_model.net) C2.set_model(None)
def _create_internal_policy_net(self) -> None: self.internal_policy_model = ModelHelper(name="q_score_" + self.model_id) C2.set_model(self.internal_policy_model) self.internal_policy_output = C2.FlattenToVec( self.get_q_values('states', 'actions', False)) workspace.RunNetOnce(self.internal_policy_model.param_init_net) workspace.CreateNet(self.internal_policy_model.net) C2.set_model(None)
def __init__(self, params: PolicyEvaluatorParameters) -> None: self.params = params self.process_slate_net = core.Net("policy_evaluator") C2.set_net(self.process_slate_net) self.action_probabilities = PolicySimulator.plan( self.process_slate_net, params, self.params.db_type) self.created_net = False self.value_input_models: Dict[str, ValueInputModelParameters] = {} for model in self.params.value_input_models: self.value_input_models[model.name] = model
def _create_reward_train_net(self) -> None: self.reward_train_model = ModelHelper(name="reward_train_" + self.model_id) C2.set_model(self.reward_train_model) self.update_model('states', 'actions', 'rewards') workspace.RunNetOnce(self.reward_train_model.param_init_net) self.reward_train_model.net.Proto().num_workers = \ RLTrainer.DEFAULT_TRAINING_NUM_WORKERS workspace.CreateNet(self.reward_train_model.net) C2.set_model(None)
def update_model( self, states: str, actions: str, q_vals_target: str, ) -> None: """ Takes in states, actions, and target q values. Updates the model: Runs the forward pass, computing Q(states, actions). Q(states, actions)[i][j] is an approximation of Q*(states[i], action_j). Comptutes Loss of Q(states, actions) with respect to q_vals_targets Updates Q Network's weights according to loss and optimizer :param states: Numpy array with shape (batch_size, state_dim). The ith row is a representation of the ith transition's state. :param actions: Numpy array with shape (batch_size, action_dim). The ith row is a representation of the ith transition's action. :param q_vals_targets: Numpy array with shape (batch_size, 1). The ith row is the label to train against for the data from the ith transition. """ model = C2.model() q_vals_target = C2.StopGradient(q_vals_target) q_values = C2.NextBlob("train_output") state_action_pairs, _ = C2.Concat(states, actions, axis=1) MakeForwardPassOps( model, self.model_id, state_action_pairs, q_values, self.weights, self.biases, self.activations, self.layers, self.dropout_ratio, False, ) self.loss_blob = GenerateLossOps( model, q_values, q_vals_target, ) model.AddGradientOperators([self.loss_blob]) for param in model.params: if param in model.param_to_grad: param_grad = model.param_to_grad[param] param_grad = C2.NanCheck(param_grad) AddParameterUpdateOps( model, optimizer_input=self.optimizer, base_learning_rate=self.learning_rate, gamma=self.gamma, policy=self.lr_policy, )
def _sum_deterministic_policy(self, model_names, path): net = core.Net('DeterministicPolicy') C2.set_net(net) output = 'ActionProbabilities' workspace.FeedBlob(output, np.array([1.0])) model_outputs = [] for model in model_names: model_output = '{}_Output'.format(model) workspace.FeedBlob(model_output, np.array([1.0], dtype=np.float32)) model_outputs.append(model_output) max_action = C2.FlattenToVec( C2.ArgMax(C2.Transpose(C2.Sum(*model_outputs))) ) one_blob = C2.NextBlob('one') workspace.FeedBlob(one_blob, np.array([1.0], dtype=np.float32)) C2.net().SparseToDense( [ max_action, one_blob, model_outputs[0], ], [output], ) meta = PredictorExportMeta( net, [one_blob], model_outputs, [output], ) save_to_db('minidb', path, meta)
def _create_internal_policy_net(self) -> None: self.internal_policy_model = ModelHelper(name="q_score_" + self.model_id) C2.set_model(self.internal_policy_model) self.internal_policy_output = C2.FlattenToVec( self.get_q_values('states', 'actions', False)) workspace.RunNetOnce(self.internal_policy_model.param_init_net) self.internal_policy_model.net.Proto().num_workers = \ RLTrainer.DEFAULT_TRAINING_NUM_WORKERS workspace.CreateNet(self.internal_policy_model.net) C2.set_model(None)
def _create_internal_policy_net(self) -> None: self.internal_policy_model = ModelHelper(name="internal_policy_" + self.model_id) C2.set_model(self.internal_policy_model) self.internal_policy_output = self.get_q_values_all_actions( "states", False) workspace.RunNetOnce(self.internal_policy_model.param_init_net) self.internal_policy_model.net.Proto().num_workers = \ RLTrainer.DEFAULT_TRAINING_NUM_WORKERS workspace.CreateNet(self.internal_policy_model.net) C2.set_model(None)
def _create_reward_train_net(self) -> None: self.reward_train_model = ModelHelper(name="reward_train_" + self.model_id) C2.set_model(self.reward_train_model) self.update_model("states", "actions", "rewards") workspace.RunNetOnce(self.reward_train_model.param_init_net) self.reward_train_model.net.Proto().num_workers = ( RLTrainer.DEFAULT_TRAINING_NUM_WORKERS) self.reward_train_model.net.Proto().type = "async_scheduling" workspace.CreateNet(self.reward_train_model.net) C2.set_model(None)
def _create_all_q_score_net(self) -> None: self.all_q_score_model = ModelHelper(name="all_q_score_" + self.model_id) C2.set_model(self.all_q_score_model) self.all_q_score_output = self.get_q_values_all_actions( "states", False) workspace.RunNetOnce(self.all_q_score_model.param_init_net) self.all_q_score_model.net.Proto().num_workers = ( RLTrainer.DEFAULT_TRAINING_NUM_WORKERS) self.all_q_score_model.net.Proto().type = "async_scheduling" workspace.CreateNet(self.all_q_score_model.net) C2.set_model(None)
def concat_states_and_possible_next_actions( self, next_state_preprocessed_matrix_blob: str, possible_next_actions_blob: str, possible_next_actions_lengths_blob: str, ) -> str: stacked_states = C2.LengthsTile(next_state_preprocessed_matrix_blob, possible_next_actions_lengths_blob) state_action_pairs, _ = C2.Concat(stacked_states, possible_next_actions_blob, axis=1) return state_action_pairs
def test_preprocessing_network(self): features, feature_value_map = preprocessing_util.read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( values) test_features = self.preprocess(feature_value_map, normalization_parameters) net = core.Net("PreprocessingTestNet") C2.set_net(net) preprocessor = PreprocessorNet(net, False) for feature_name in feature_value_map: workspace.FeedBlob(feature_name, np.array([0], dtype=np.int32)) preprocessor.preprocess_blob( feature_name, [normalization_parameters[feature_name]]) workspace.CreateNet(net) for feature_name, feature_value in six.iteritems(feature_value_map): feature_value = np.expand_dims(feature_value, -1) workspace.FeedBlob(feature_name, feature_value) workspace.RunNetOnce(net) for feature_name in feature_value_map: normalized_features = workspace.FetchBlob(feature_name + "_preprocessed") if feature_name != identify_types.ENUM: normalized_features = np.squeeze(normalized_features, -1) tolerance = 0.01 if feature_name == BOXCOX: # At the limit, boxcox has some numerical instability tolerance = 0.5 non_matching = np.where( np.logical_not( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ))) self.assertTrue( np.all( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, )), '{} does not match: {} {}'.format( feature_name, normalized_features[non_matching].tolist(), test_features[feature_name][non_matching].tolist()))
def get_q_values(self, states: str, actions: str, use_target_network: bool) -> str: state_action_pairs, _ = C2.Concat(states, actions, axis=1) q_values = C2.NextBlob("q_values") if use_target_network: self.target_network.make_forward_pass_ops( C2.model(), state_action_pairs, q_values, True ) else: self.ml_trainer.make_forward_pass_ops( C2.model(), state_action_pairs, q_values, True ) return q_values
def __init__( self, params: PolicyEvaluatorParameters, db_type: str, ) -> None: self.params = params self.process_slate_net = core.Net('policy_evaluator') C2.set_net(self.process_slate_net) self.action_probabilities = PolicySimulator.plan( self.process_slate_net, params, db_type, ) self.created_net = False
def _create_rl_train_net(self) -> None: self.rl_train_model = ModelHelper(name="rl_train_" + self.model_id) C2.set_model(self.rl_train_model) if self.maxq_learning: next_q_values = self.get_max_q_values( 'next_states', self.get_possible_next_actions(), True, ) else: next_q_values = self.get_q_values('next_states', 'next_actions', True) q_vals_target = C2.Add( 'rewards', C2.Mul( C2.Mul( C2.Cast('not_terminals', to=caffe2_pb2.TensorProto.FLOAT), # type: ignore self.rl_discount_rate, broadcast=1, ), next_q_values)) self.update_model('states', 'actions', q_vals_target) workspace.RunNetOnce(self.rl_train_model.param_init_net) workspace.CreateNet(self.rl_train_model.net) C2.set_model(None)
def update_model(self, states: str, actions: str, q_vals_target: str) -> None: """ Takes in states, actions, and target q values. Updates the model: Runs the forward pass, computing Q(states, actions). Q(states, actions)[i][j] is an approximation of Q*(states[i], action_j). Comptutes Loss of Q(states, actions) with respect to q_vals_targets Updates Q Network's weights according to loss and optimizer :param states: Numpy array with shape (batch_size, state_dim). The ith row is a representation of the ith transition's state. :param actions: Numpy array with shape (batch_size, action_dim). The ith row contains the one-hotted representation of the ith action. :param q_vals_targets: Numpy array with shape (batch_size, 1). The ith row is the label to train against for the data from the ith transition. """ model = C2.model() q_vals_target = C2.StopGradient(q_vals_target) output_blob = C2.NextBlob("train_output") if self.conv_ml_trainer is not None: conv_output_blob = C2.NextBlob("conv_output") self.conv_ml_trainer.make_conv_pass_ops(model, states, conv_output_blob) states = conv_output_blob self.ml_trainer.make_forward_pass_ops(model, states, output_blob, False) q_val_select = C2.ReduceBackSum(C2.Mul(output_blob, actions)) q_values = C2.ExpandDims(q_val_select, dims=[1]) self.loss_blob = self.ml_trainer.generateLossOps(model, q_values, q_vals_target) model.AddGradientOperators([self.loss_blob]) for param in model.params: if param in model.param_to_grad: param_grad = model.param_to_grad[param] param_grad = C2.NanCheck(param_grad) self.ml_trainer.addParameterUpdateOps(model)
def test_normalize_dense_matrix_enum(self): normalization_parameters = { 1: NormalizationParameters( identify_types.ENUM, None, None, None, None, [12, 4, 2], None, None, None, ), 2: NormalizationParameters(identify_types.CONTINUOUS, None, 0, 0, 1, None, None, None, None), 3: NormalizationParameters(identify_types.ENUM, None, None, None, None, [15, 3], None, None, None), } norm_net = core.Net("net") C2.set_net(norm_net) preprocessor = PreprocessorNet() inputs = np.zeros([4, 3], dtype=np.float32) feature_ids = [2, 1, 3] # Sorted according to feature type inputs[:, feature_ids.index(1)] = [12, 4, 2, 2] inputs[:, feature_ids.index(2)] = [1.0, 2.0, 3.0, 3.0] inputs[:, feature_ids.index(3)] = [ 15, 3, 15, normalization.MISSING_VALUE ] input_blob = C2.NextBlob("input_blob") workspace.FeedBlob(input_blob, np.array([0], dtype=np.float32)) normalized_output_blob, _ = preprocessor.normalize_dense_matrix( input_blob, feature_ids, normalization_parameters, "", False) workspace.FeedBlob(input_blob, inputs) workspace.RunNetOnce(norm_net) normalized_feature_matrix = workspace.FetchBlob(normalized_output_blob) np.testing.assert_allclose( np.array([ [1.0, 1, 0, 0, 1, 0], [2.0, 0, 1, 0, 0, 1], [3.0, 0, 0, 1, 1, 0], [3.0, 0, 0, 1, 0, 0], # Missing values should go to all 0 ]), normalized_feature_matrix, )
def test_normalize_dense_matrix_enum(self): normalization_parameters = { 1: NormalizationParameters( identify_types.ENUM, None, None, None, None, [12, 4, 2], None, None, None, ), 2: NormalizationParameters( identify_types.CONTINUOUS, None, 0, 0, 1, None, None, None, None ), 3: NormalizationParameters( identify_types.ENUM, None, None, None, None, [15, 3], None, None, None ), } norm_net = core.Net("net") C2.set_net(norm_net) preprocessor = PreprocessorNet() inputs = np.zeros([4, 3], dtype=np.float32) feature_ids = [2, 1, 3] # Sorted according to feature type inputs[:, feature_ids.index(1)] = [12, 4, 2, 2] inputs[:, feature_ids.index(2)] = [1.0, 2.0, 3.0, 3.0] inputs[:, feature_ids.index(3)] = [15, 3, 15, normalization.MISSING_VALUE] input_blob = C2.NextBlob("input_blob") workspace.FeedBlob(input_blob, np.array([0], dtype=np.float32)) normalized_output_blob, _ = preprocessor.normalize_dense_matrix( input_blob, feature_ids, normalization_parameters, "", False ) workspace.FeedBlob(input_blob, inputs) workspace.RunNetOnce(norm_net) normalized_feature_matrix = workspace.FetchBlob(normalized_output_blob) np.testing.assert_allclose( np.array( [ [1.0, 1, 0, 0, 1, 0], [2.0, 0, 1, 0, 0, 1], [3.0, 0, 0, 1, 1, 0], [3.0, 0, 0, 1, 0, 0], # Missing values should go to all 0 ] ), normalized_feature_matrix, )
def _dummy_model_copy(self, model_name, path): net = core.Net(model_name) C2.set_net(net) inp = 'Input' output = 'Output' workspace.FeedBlob(inp, np.array([1.0])) workspace.FeedBlob(output, np.array([1.0])) net.Copy([inp], [output]) meta = PredictorExportMeta( net, [], [inp], [output], ) save_to_db('minidb', path, meta)
def _store_parameter(self, parameters, name, value): c2_name = C2.NextBlob(name) if C2.init_net(): C2.init_net().GivenTensorFill( [], c2_name, shape=value.shape, values=value.flatten(), dtype=schema.data_type_for_dtype(value.dtype), ) C2.init_net().AddExternalOutput(c2_name) else: workspace.FeedBlob(c2_name, value) parameters.append(c2_name) return c2_name
def normalize_sparse_matrix( self, lengths_blob: str, keys_blob: str, values_blob: str, normalization_parameters: Dict[str, NormalizationParameters], blobname_prefix: str, split_expensive_feature_groups: bool = False, ) -> Tuple[str, List[str]]: sorted_features, _ = sort_features_by_normalization( normalization_parameters) int_features = [int(feature) for feature in sorted_features] dense_input, _ = C2.SparseToDenseMask(keys_blob, values_blob, self.MISSING_SCALAR, lengths_blob, mask=int_features) return self.normalize_dense_matrix( dense_input, sorted_features, normalization_parameters, blobname_prefix, split_expensive_feature_groups, )
def get_max_q_values(self, states: str, possible_actions: str, use_target_network: bool) -> str: """ Takes in an array of states and outputs an array of the same shape whose ith entry = max_{pna} Q(state_i, pna). :param states: Numpy array with shape (batch_size, state_dim). Each row contains a representation of a state. :param possible_next_actions: Numpy array with shape (batch_size, action_dim). possible_next_actions[i][j] = 1 iff the agent can take action j from state i. :use_target_network: Boolean that indicates whether or not to use this trainer's TargetNetwork to compute Q values. """ q_values = self.get_q_values_all_actions(states, use_target_network) # Set the q values of impossible actions to a very large negative # number. inverse_pna = C2.ConstantFill(possible_actions, value=1.0) possible_actions_float = C2.Cast(possible_actions, to=core.DataType.FLOAT) inverse_pna = C2.Sub(inverse_pna, possible_actions_float) inverse_pna = C2.Mul(inverse_pna, self.ACTION_NOT_POSSIBLE_VAL, broadcast=1) q_values = C2.Add(q_values, inverse_pna) q_values_max = C2.ReduceBackMax(q_values, num_reduce_dims=1) return C2.ExpandDims(q_values_max, dims=[1])
def sparse_to_dense(lengths_blob: str, keys_blob: str, values_blob: str, sorted_features: List[int]) -> Tuple[str, List[str]]: MISSING_SCALAR = C2.NextBlob("MISSING_SCALAR") workspace.FeedBlob(MISSING_SCALAR, np.array([MISSING_VALUE], dtype=np.float32)) C2.net().GivenTensorFill([], [MISSING_SCALAR], shape=[], values=[MISSING_VALUE]) parameters: List[str] = [MISSING_SCALAR] assert len(sorted_features) > 0, "Sorted features is empty" dense_input = C2.SparseToDenseMask(keys_blob, values_blob, MISSING_SCALAR, lengths_blob, mask=sorted_features)[0] return dense_input, parameters
def sparse_to_dense( lengths_blob: str, keys_blob: str, values_blob: str, sorted_features: List[int], set_missing_value_to_zero: bool = False, ) -> Tuple[str, List[str]]: MISSING_SCALAR = C2.NextBlob("MISSING_SCALAR") missing_value = 0.0 if set_missing_value_to_zero else MISSING_VALUE workspace.FeedBlob(MISSING_SCALAR, np.array([missing_value], dtype=np.float32)) C2.net().GivenTensorFill([], [MISSING_SCALAR], shape=[], values=[missing_value]) parameters: List[str] = [MISSING_SCALAR] assert len(sorted_features) > 0, "Sorted features is empty" dense_input = C2.SparseToDenseMask( keys_blob, values_blob, MISSING_SCALAR, lengths_blob, mask=sorted_features )[0] return dense_input, parameters
def benchmark(num_forward_passes): """ Benchmark preprocessor speeds: 1 - PyTorch 2 - PyTorch -> ONNX -> C2 3 - C2 """ feature_value_map = gen_data( num_binary_features=10, num_boxcox_features=10, num_continuous_features=10, num_enum_features=10, num_prob_features=10, num_quantile_features=10, ) normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( name, values, 10 ) sorted_features, _ = sort_features_by_normalization(normalization_parameters) # Dummy input input_matrix = np.zeros([10000, len(sorted_features)], dtype=np.float32) # PyTorch Preprocessor pytorch_preprocessor = Preprocessor(normalization_parameters, False) for i, feature in enumerate(sorted_features): input_matrix[:, i] = feature_value_map[feature] #################### time pytorch ############################ start = time.time() for _ in range(NUM_FORWARD_PASSES): _ = pytorch_preprocessor.forward(input_matrix) end = time.time() logger.info( "PyTorch: {} forward passes done in {} seconds".format( NUM_FORWARD_PASSES, end - start ) ) ################ time pytorch -> ONNX -> caffe2 #################### buffer = PytorchCaffe2Converter.pytorch_net_to_buffer( pytorch_preprocessor, len(sorted_features), False ) input_blob, output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef( buffer ) torch_workspace = caffe2_netdef.workspace parameters = torch_workspace.Blobs() for blob_str in parameters: workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str)) torch_init_net = core.Net(caffe2_netdef.init_net) torch_predict_net = core.Net(caffe2_netdef.predict_net) input_matrix_blob = "input_matrix_blob" workspace.FeedBlob(input_blob, input_matrix) workspace.RunNetOnce(torch_init_net) start = time.time() for _ in range(NUM_FORWARD_PASSES): workspace.RunNetOnce(torch_predict_net) _ = workspace.FetchBlob(output_blob) end = time.time() logger.info( "PyTorch -> ONNX -> Caffe2: {} forward passes done in {} seconds".format( NUM_FORWARD_PASSES, end - start ) ) #################### time caffe2 ############################ norm_net = core.Net("net") C2.set_net(norm_net) preprocessor = PreprocessorNet() input_matrix_blob = "input_matrix_blob" workspace.FeedBlob(input_matrix_blob, np.array([], dtype=np.float32)) output_blob, _ = preprocessor.normalize_dense_matrix( input_matrix_blob, sorted_features, normalization_parameters, "", False ) workspace.FeedBlob(input_matrix_blob, input_matrix) start = time.time() for _ in range(NUM_FORWARD_PASSES): workspace.RunNetOnce(norm_net) _ = workspace.FetchBlob(output_blob) end = time.time() logger.info( "Caffe2: {} forward passes done in {} seconds".format( NUM_FORWARD_PASSES, end - start ) )
def preprocess_samples_discrete( self, samples: Samples, minibatch_size: int, one_hot_action: bool = True, use_gpu: bool = False, ) -> List[TrainingDataPage]: logger.info("Shuffling...") samples = shuffle_samples(samples) logger.info("Preprocessing...") if self.sparse_to_dense_net is None: self.sparse_to_dense_net = core.Net("gridworld_sparse_to_dense") C2.set_net(self.sparse_to_dense_net) saa = StackedAssociativeArray.from_dict_list(samples.states, "states") sorted_features, _ = sort_features_by_normalization(self.normalization) self.state_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_features ) saa = StackedAssociativeArray.from_dict_list( samples.next_states, "next_states" ) self.next_state_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_features ) C2.set_net(None) else: StackedAssociativeArray.from_dict_list(samples.states, "states") StackedAssociativeArray.from_dict_list(samples.next_states, "next_states") workspace.RunNetOnce(self.sparse_to_dense_net) logger.info("Converting to Torch...") actions_one_hot = torch.tensor( (np.array(samples.actions).reshape(-1, 1) == np.array(self.ACTIONS)).astype( np.int64 ) ) actions = actions_one_hot.argmax(dim=1, keepdim=True) rewards = torch.tensor(samples.rewards, dtype=torch.float32).reshape(-1, 1) action_probabilities = torch.tensor( samples.action_probabilities, dtype=torch.float32 ).reshape(-1, 1) next_actions_one_hot = torch.tensor( ( np.array(samples.next_actions).reshape(-1, 1) == np.array(self.ACTIONS) ).astype(np.int64) ) logger.info("Converting PA to Torch...") possible_action_strings = np.array( list(itertools.zip_longest(*samples.possible_actions, fillvalue="")) ).T possible_actions_mask = torch.zeros([len(samples.actions), len(self.ACTIONS)]) for i, action in enumerate(self.ACTIONS): possible_actions_mask[:, i] = torch.tensor( np.max(possible_action_strings == action, axis=1).astype(np.int64) ) logger.info("Converting PNA to Torch...") possible_next_action_strings = np.array( list(itertools.zip_longest(*samples.possible_next_actions, fillvalue="")) ).T possible_next_actions_mask = torch.zeros( [len(samples.next_actions), len(self.ACTIONS)] ) for i, action in enumerate(self.ACTIONS): possible_next_actions_mask[:, i] = torch.tensor( np.max(possible_next_action_strings == action, axis=1).astype(np.int64) ) terminals = torch.tensor(samples.terminals, dtype=torch.int32).reshape(-1, 1) not_terminal = 1 - terminals logger.info("Converting RT to Torch...") time_diffs = torch.ones([len(samples.states), 1]) logger.info("Preprocessing...") preprocessor = Preprocessor(self.normalization, False) states_ndarray = workspace.FetchBlob(self.state_matrix) states_ndarray = preprocessor.forward(states_ndarray) next_states_ndarray = workspace.FetchBlob(self.next_state_matrix) next_states_ndarray = preprocessor.forward(next_states_ndarray) logger.info("Batching...") tdps = [] for start in range(0, states_ndarray.shape[0], minibatch_size): end = start + minibatch_size if end > states_ndarray.shape[0]: break tdp = TrainingDataPage( states=states_ndarray[start:end], actions=actions_one_hot[start:end] if one_hot_action else actions[start:end], propensities=action_probabilities[start:end], rewards=rewards[start:end], next_states=next_states_ndarray[start:end], not_terminal=not_terminal[start:end], next_actions=next_actions_one_hot[start:end], possible_actions_mask=possible_actions_mask[start:end], possible_next_actions_mask=possible_next_actions_mask[start:end], time_diffs=time_diffs[start:end], ) tdp.set_type(torch.cuda.FloatTensor if use_gpu else torch.FloatTensor) tdps.append(tdp) return tdps
def preprocess_samples( self, samples: Samples, minibatch_size: int, use_gpu: bool = False, one_hot_action: bool = True, normalize_actions: bool = True, ) -> List[TrainingDataPage]: logger.info("Shuffling...") samples = shuffle_samples(samples) logger.info("Sparse2Dense...") net = core.Net("gridworld_preprocessing") C2.set_net(net) saa = StackedAssociativeArray.from_dict_list(samples.states, "states") sorted_state_features, _ = sort_features_by_normalization(self.normalization) state_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_state_features ) saa = StackedAssociativeArray.from_dict_list(samples.next_states, "next_states") next_state_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_state_features ) sorted_action_features, _ = sort_features_by_normalization( self.normalization_action ) saa = StackedAssociativeArray.from_dict_list(samples.actions, "action") action_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_action_features ) saa = StackedAssociativeArray.from_dict_list( samples.next_actions, "next_action" ) next_action_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_action_features ) action_probabilities = torch.tensor( samples.action_probabilities, dtype=torch.float32 ).reshape(-1, 1) rewards = torch.tensor(samples.rewards, dtype=torch.float32).reshape(-1, 1) max_action_size = 4 pnas_mask_list: List[List[int]] = [] pnas_flat: List[Dict[str, float]] = [] for pnas in samples.possible_next_actions: pnas_mask_list.append([1] * len(pnas) + [0] * (max_action_size - len(pnas))) pnas_flat.extend(pnas) for _ in range(max_action_size - len(pnas)): pnas_flat.append({}) # Filler saa = StackedAssociativeArray.from_dict_list(pnas_flat, "possible_next_actions") pnas_mask = torch.Tensor(pnas_mask_list) possible_next_actions_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_action_features ) workspace.RunNetOnce(net) logger.info("Preprocessing...") state_preprocessor = Preprocessor(self.normalization, False) action_preprocessor = Preprocessor(self.normalization_action, False) states_ndarray = workspace.FetchBlob(state_matrix) states_ndarray = state_preprocessor.forward(states_ndarray) actions_ndarray = torch.from_numpy(workspace.FetchBlob(action_matrix)) if normalize_actions: actions_ndarray = action_preprocessor.forward(actions_ndarray) next_states_ndarray = workspace.FetchBlob(next_state_matrix) next_states_ndarray = state_preprocessor.forward(next_states_ndarray) state_pnas_tile = next_states_ndarray.repeat(1, max_action_size).reshape( -1, next_states_ndarray.shape[1] ) next_actions_ndarray = torch.from_numpy(workspace.FetchBlob(next_action_matrix)) if normalize_actions: next_actions_ndarray = action_preprocessor.forward(next_actions_ndarray) logged_possible_next_actions = action_preprocessor.forward( workspace.FetchBlob(possible_next_actions_matrix) ) assert state_pnas_tile.shape[0] == logged_possible_next_actions.shape[0], ( "Invalid shapes: " + str(state_pnas_tile.shape) + " != " + str(logged_possible_next_actions.shape) ) logged_possible_next_state_actions = torch.cat( (state_pnas_tile, logged_possible_next_actions), dim=1 ) logger.info("Reward Timeline to Torch...") time_diffs = torch.ones([len(samples.states), 1]) tdps = [] pnas_start = 0 logger.info("Batching...") for start in range(0, states_ndarray.shape[0], minibatch_size): end = start + minibatch_size if end > states_ndarray.shape[0]: break pnas_end = pnas_start + (minibatch_size * max_action_size) tdp = TrainingDataPage( states=states_ndarray[start:end], actions=actions_ndarray[start:end], propensities=action_probabilities[start:end], rewards=rewards[start:end], next_states=next_states_ndarray[start:end], next_actions=next_actions_ndarray[start:end], not_terminal=(pnas_mask[start:end, :].sum(dim=1, keepdim=True) > 0), time_diffs=time_diffs[start:end], possible_next_actions_mask=pnas_mask[start:end, :], possible_next_actions_state_concat=logged_possible_next_state_actions[ pnas_start:pnas_end, : ], ) pnas_start = pnas_end tdp.set_type(torch.cuda.FloatTensor if use_gpu else torch.FloatTensor) tdps.append(tdp) return tdps
def export( cls, trainer, actions, state_normalization_parameters, int_features=False, model_on_gpu=False, set_missing_value_to_zero=False, ): """Export caffe2 preprocessor net and pytorch DQN forward pass as one caffe2 net. :param trainer DQNTrainer :param state_normalization_parameters state NormalizationParameters :param int_features boolean indicating if int features blob will be present :param model_on_gpu boolean indicating if the model is a GPU model or CPU model """ input_dim = trainer.num_features q_network = ( trainer.q_network.module if isinstance(trainer.q_network, DataParallel) else trainer.q_network ) buffer = PytorchCaffe2Converter.pytorch_net_to_buffer( q_network, input_dim, model_on_gpu ) qnet_input_blob, qnet_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef( buffer ) torch_workspace = caffe2_netdef.workspace parameters = torch_workspace.Blobs() for blob_str in parameters: workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str)) torch_init_net = core.Net(caffe2_netdef.init_net) torch_predict_net = core.Net(caffe2_netdef.predict_net) logger.info("Generated ONNX predict net:") logger.info(str(torch_predict_net.Proto())) # While converting to metanetdef, the external_input of predict_net # will be recomputed. Add the real output of init_net to parameters # to make sure they will be counted. parameters.extend( set(caffe2_netdef.init_net.external_output) - set(caffe2_netdef.init_net.external_input) ) model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) workspace.FeedBlob("input/image", np.zeros([1, 1, 1, 1], dtype=np.int32)) workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = "input_feature_lengths" input_feature_keys = "input_feature_keys" input_feature_values = "input_feature_values" if int_features: workspace.FeedBlob( "input/int_features.lengths", np.zeros(1, dtype=np.int32) ) workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32)) C2.net().Cast( ["input/int_features.values"], ["input/int_features.values_float"], dtype=caffe2_pb2.TensorProto.FLOAT, ) C2.net().MergeMultiScalarFeatureTensors( [ "input/float_features.lengths", "input/float_features.keys", "input/float_features.values", "input/int_features.lengths", "input/int_features.keys", "input/int_features.values_float", ], [input_feature_lengths, input_feature_keys, input_feature_values], ) else: C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) if state_normalization_parameters is not None: sorted_feature_ids = sort_features_by_normalization( state_normalization_parameters )[0] dense_matrix, new_parameters = sparse_to_dense( input_feature_lengths, input_feature_keys, input_feature_values, sorted_feature_ids, set_missing_value_to_zero=set_missing_value_to_zero, ) parameters.extend(new_parameters) preprocessor_net = PreprocessorNet() state_normalized_dense_matrix, new_parameters = preprocessor_net.normalize_dense_matrix( dense_matrix, sorted_feature_ids, state_normalization_parameters, "state_norm_", True, ) parameters.extend(new_parameters) else: # Image input. Note: Currently this does the wrong thing if # more than one image is passed at a time. state_normalized_dense_matrix = "input/image" net.Copy([state_normalized_dense_matrix], [qnet_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_predict_net) new_parameters, q_values = RLPredictor._forward_pass( model, trainer, state_normalized_dense_matrix, actions, qnet_output_blob ) parameters.extend(new_parameters) # Get 1 x n action index tensor under the max_q policy max_q_act_idxs = "max_q_policy_actions" C2.net().Flatten([C2.ArgMax(q_values)], [max_q_act_idxs], axis=0) shape_of_num_of_states = "num_states_shape" C2.net().FlattenToVec([max_q_act_idxs], [shape_of_num_of_states]) num_states, _ = C2.Reshape(C2.Size(shape_of_num_of_states), shape=[1]) # Get 1 x n action index tensor under the softmax policy temperature = C2.NextBlob("temperature") parameters.append(temperature) workspace.FeedBlob( temperature, np.array([trainer.rl_temperature], dtype=np.float32) ) tempered_q_values = C2.Div(q_values, temperature, broadcast=1) softmax_values = C2.Softmax(tempered_q_values) softmax_act_idxs_nested = "softmax_act_idxs_nested" C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested]) softmax_act_idxs = "softmax_policy_actions" C2.net().Flatten([softmax_act_idxs_nested], [softmax_act_idxs], axis=0) action_names = C2.NextBlob("action_names") parameters.append(action_names) workspace.FeedBlob(action_names, np.array(actions)) # Concat action index tensors to get 2 x n tensor - [[max_q], [softmax]] # transpose & flatten to get [a1_maxq, a1_softmax, a2_maxq, a2_softmax, ...] max_q_act_blob = C2.Cast(max_q_act_idxs, to=caffe2_pb2.TensorProto.INT32) softmax_act_blob = C2.Cast(softmax_act_idxs, to=caffe2_pb2.TensorProto.INT32) C2.net().Append([max_q_act_blob, softmax_act_blob], [max_q_act_blob]) transposed_action_idxs = C2.Transpose(max_q_act_blob) flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs) workspace.FeedBlob(OUTPUT_SINGLE_CAT_VALS_NAME, np.zeros(1, dtype=np.int64)) C2.net().Gather( [action_names, flat_transposed_action_idxs], [OUTPUT_SINGLE_CAT_VALS_NAME] ) workspace.FeedBlob(OUTPUT_SINGLE_CAT_LENGTHS_NAME, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [shape_of_num_of_states], [OUTPUT_SINGLE_CAT_LENGTHS_NAME], value=2, dtype=caffe2_pb2.TensorProto.INT32, ) workspace.FeedBlob(OUTPUT_SINGLE_CAT_KEYS_NAME, np.zeros(1, dtype=np.int64)) output_keys_tensor, _ = C2.Concat( C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64), C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64), axis=0, ) output_key_tile = C2.Tile(output_keys_tensor, num_states, axis=0) C2.net().FlattenToVec([output_key_tile], [OUTPUT_SINGLE_CAT_KEYS_NAME]) workspace.CreateNet(net) return DQNPredictor(net, torch_init_net, parameters, int_features)
def export_actor( cls, trainer, state_normalization_parameters, action_feature_ids, min_action_range_tensor_serving, max_action_range_tensor_serving, int_features=False, model_on_gpu=False, ): """Export caffe2 preprocessor net and pytorch actor forward pass as one caffe2 net. :param trainer DDPGTrainer :param state_normalization_parameters state NormalizationParameters :param min_action_range_tensor_serving pytorch tensor that specifies min action value for each dimension :param max_action_range_tensor_serving pytorch tensor that specifies min action value for each dimension :param state_normalization_parameters state NormalizationParameters :param int_features boolean indicating if int features blob will be present :param model_on_gpu boolean indicating if the model is a GPU model or CPU model """ model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) parameters: List[str] = [] workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = "input_feature_lengths" input_feature_keys = "input_feature_keys" input_feature_values = "input_feature_values" if int_features: workspace.FeedBlob( "input/int_features.lengths", np.zeros(1, dtype=np.int32) ) workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32)) C2.net().Cast( ["input/int_features.values"], ["input/int_features.values_float"], dtype=caffe2_pb2.TensorProto.FLOAT, ) C2.net().MergeMultiScalarFeatureTensors( [ "input/float_features.lengths", "input/float_features.keys", "input/float_features.values", "input/int_features.lengths", "input/int_features.keys", "input/int_features.values_float", ], [input_feature_lengths, input_feature_keys, input_feature_values], ) else: C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) preprocessor = PreprocessorNet() sorted_features, _ = sort_features_by_normalization( state_normalization_parameters ) state_dense_matrix, new_parameters = sparse_to_dense( input_feature_lengths, input_feature_keys, input_feature_values, sorted_features, ) parameters.extend(new_parameters) state_normalized_dense_matrix, new_parameters = preprocessor.normalize_dense_matrix( state_dense_matrix, sorted_features, state_normalization_parameters, "state_norm", False, ) parameters.extend(new_parameters) torch_init_net, torch_predict_net, new_parameters, actor_input_blob, actor_output_blob, min_action_training_blob, max_action_training_blob, min_action_serving_blob, max_action_serving_blob = DDPGPredictor.generate_train_net( trainer, model, min_action_range_tensor_serving, max_action_range_tensor_serving, model_on_gpu, ) parameters.extend(new_parameters) net.Copy([state_normalized_dense_matrix], [actor_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_predict_net) # Scale actors actions from [-1, 1] to serving range prev_range = C2.Sub(max_action_training_blob, min_action_training_blob) new_range = C2.Sub(max_action_serving_blob, min_action_serving_blob) subtract_prev_min = C2.Sub(actor_output_blob, min_action_training_blob) div_by_prev_range = C2.Div(subtract_prev_min, prev_range) scaled_for_serving_actions = C2.Add( C2.Mul(div_by_prev_range, new_range), min_action_serving_blob ) output_lengths = "output/float_features.lengths" workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [C2.FlattenToVec(C2.ArgMax(actor_output_blob))], [output_lengths], value=trainer.actor.layers[-1].out_features, dtype=caffe2_pb2.TensorProto.INT32, ) action_feature_ids_blob = C2.NextBlob("action_feature_ids") workspace.FeedBlob( action_feature_ids_blob, np.array(action_feature_ids, dtype=np.int64) ) parameters.append(action_feature_ids_blob) output_keys = "output/float_features.keys" workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64)) num_examples, _ = C2.Reshape(C2.Size("input/float_features.lengths"), shape=[1]) C2.net().Tile([action_feature_ids_blob, num_examples], [output_keys], axis=1) output_values = "output/float_features.values" workspace.FeedBlob(output_values, np.zeros(1, dtype=np.float32)) C2.net().FlattenToVec([scaled_for_serving_actions], [output_values]) workspace.CreateNet(net) return DDPGPredictor(net, torch_init_net, parameters, int_features)
def test_prepare_normalization_and_normalize(self): feature_value_map = read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( name, values, 10, feature_type=self._feature_type_override(name) ) for k, v in normalization_parameters.items(): if id_to_type(k) == CONTINUOUS: self.assertEqual(v.feature_type, CONTINUOUS) self.assertIs(v.boxcox_lambda, None) self.assertIs(v.boxcox_shift, None) elif id_to_type(k) == BOXCOX: self.assertEqual(v.feature_type, BOXCOX) self.assertIsNot(v.boxcox_lambda, None) self.assertIsNot(v.boxcox_shift, None) else: assert v.feature_type == id_to_type(k) sorted_features, _ = sort_features_by_normalization(normalization_parameters) norm_net = core.Net("net") C2.set_net(norm_net) preprocessor = PreprocessorNet() input_matrix = np.zeros([10000, len(sorted_features)], dtype=np.float32) for i, feature in enumerate(sorted_features): input_matrix[:, i] = feature_value_map[feature] input_matrix_blob = "input_matrix_blob" workspace.FeedBlob(input_matrix_blob, np.array([], dtype=np.float32)) output_blob, _ = preprocessor.normalize_dense_matrix( input_matrix_blob, sorted_features, normalization_parameters, "", False ) workspace.FeedBlob(input_matrix_blob, input_matrix) workspace.RunNetOnce(norm_net) normalized_feature_matrix = workspace.FetchBlob(output_blob) normalized_features = {} on_column = 0 for feature in sorted_features: norm = normalization_parameters[feature] if norm.feature_type == ENUM: column_size = len(norm.possible_values) else: column_size = 1 normalized_features[feature] = normalized_feature_matrix[ :, on_column : (on_column + column_size) ] on_column += column_size self.assertTrue( all( [ np.isfinite(parameter.stddev) and np.isfinite(parameter.mean) for parameter in normalization_parameters.values() ] ) ) for k, v in six.iteritems(normalized_features): self.assertTrue(np.all(np.isfinite(v))) feature_type = normalization_parameters[k].feature_type if feature_type == identify_types.PROBABILITY: sigmoidv = special.expit(v) self.assertTrue( np.all( np.logical_and(np.greater(sigmoidv, 0), np.less(sigmoidv, 1)) ) ) elif feature_type == identify_types.ENUM: possible_values = normalization_parameters[k].possible_values self.assertEqual(v.shape[0], len(feature_value_map[k])) self.assertEqual(v.shape[1], len(possible_values)) possible_value_map = {} for i, possible_value in enumerate(possible_values): possible_value_map[possible_value] = i for i, row in enumerate(v): original_feature = feature_value_map[k][i] self.assertEqual( possible_value_map[original_feature], np.where(row == 1)[0][0] ) elif feature_type == identify_types.QUANTILE: for i, feature in enumerate(v[0]): original_feature = feature_value_map[k][i] expected = NumpyFeatureProcessor.value_to_quantile( original_feature, normalization_parameters[k].quantiles ) self.assertAlmostEqual(feature, expected, 2) elif feature_type == identify_types.BINARY: pass elif ( feature_type == identify_types.CONTINUOUS or feature_type == identify_types.BOXCOX ): one_stddev = np.isclose(np.std(v, ddof=1), 1, atol=0.01) zero_stddev = np.isclose(np.std(v, ddof=1), 0, atol=0.01) zero_mean = np.isclose(np.mean(v), 0, atol=0.01) self.assertTrue( np.all(zero_mean), "mean of feature {} is {}, not 0".format(k, np.mean(v)), ) self.assertTrue(np.all(np.logical_or(one_stddev, zero_stddev))) elif feature_type == identify_types.CONTINUOUS_ACTION: less_than_max = v < 1 more_than_min = v > -1 self.assertTrue( np.all(less_than_max), "values are not less than 1: {}".format(v[less_than_max == False]), ) self.assertTrue( np.all(more_than_min), "values are not more than -1: {}".format(v[more_than_min == False]), ) else: raise NotImplementedError()
def test_preprocessing_network(self): feature_value_map = read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( name, values, feature_type=self._feature_type_override(name) ) test_features = NumpyFeatureProcessor.preprocess( feature_value_map, normalization_parameters ) net = core.Net("PreprocessingTestNet") C2.set_net(net) preprocessor = PreprocessorNet() name_preprocessed_blob_map = {} for feature_name in feature_value_map: workspace.FeedBlob(str(feature_name), np.array([0], dtype=np.int32)) preprocessed_blob, _ = preprocessor.preprocess_blob( str(feature_name), [normalization_parameters[feature_name]] ) name_preprocessed_blob_map[feature_name] = preprocessed_blob workspace.CreateNet(net) for feature_name, feature_value in six.iteritems(feature_value_map): feature_value = np.expand_dims(feature_value, -1) workspace.FeedBlob(str(feature_name), feature_value) workspace.RunNetOnce(net) for feature_name in feature_value_map: normalized_features = workspace.FetchBlob( name_preprocessed_blob_map[feature_name] ) if feature_name != ENUM_FEATURE_ID: normalized_features = np.squeeze(normalized_features, -1) tolerance = 0.01 if feature_name == BOXCOX_FEATURE_ID: # At the limit, boxcox has some numerical instability tolerance = 0.5 non_matching = np.where( np.logical_not( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ) ) ) self.assertTrue( np.all( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ) ), "{} does not match: {} {}".format( feature_name, normalized_features[non_matching].tolist(), test_features[feature_name][non_matching].tolist(), ), )
def create_net(self): net = core.Net("feature_extractor") init_net = core.Net("feature_extractor_init") missing_scalar = self.create_const(init_net, "MISSING_SCALAR", MISSING_VALUE) action_schema = map_schema() if self.sorted_action_features else schema.Scalar() input_schema = schema.Struct( (InputColumn.STATE_FEATURES, map_schema()), (InputColumn.NEXT_STATE_FEATURES, map_schema()), (InputColumn.ACTION, action_schema), (InputColumn.NEXT_ACTION, action_schema), (InputColumn.NOT_TERMINAL, schema.Scalar()), ) if self.include_possible_actions: input_schema += schema.Struct( (InputColumn.POSSIBLE_ACTIONS_MASK, schema.List(schema.Scalar())), (InputColumn.POSSIBLE_NEXT_ACTIONS_MASK, schema.List(schema.Scalar())), ) if self.sorted_action_features is not None: input_schema += schema.Struct( (InputColumn.POSSIBLE_ACTIONS, schema.List(map_schema())), (InputColumn.POSSIBLE_NEXT_ACTIONS, schema.List(map_schema())), ) input_record = net.set_input_record(input_schema) state = self.extract_float_features( net, "state", input_record[InputColumn.STATE_FEATURES], self.sorted_state_features, missing_scalar, ) next_state = self.extract_float_features( net, "next_state", input_record[InputColumn.NEXT_STATE_FEATURES], self.sorted_state_features, missing_scalar, ) if self.sorted_action_features: action = self.extract_float_features( net, InputColumn.ACTION, input_record[InputColumn.ACTION], self.sorted_action_features, missing_scalar, ) next_action = self.extract_float_features( net, InputColumn.NEXT_ACTION, input_record[InputColumn.NEXT_ACTION], self.sorted_action_features, missing_scalar, ) if self.include_possible_actions: possible_action_features = self.extract_float_features( net, InputColumn.POSSIBLE_ACTIONS, input_record[InputColumn.POSSIBLE_ACTIONS]["values"], self.sorted_action_features, missing_scalar, ) possible_next_action_features = self.extract_float_features( net, InputColumn.POSSIBLE_NEXT_ACTIONS, input_record[InputColumn.POSSIBLE_NEXT_ACTIONS]["values"], self.sorted_action_features, missing_scalar, ) else: action = input_record[InputColumn.ACTION] next_action = input_record[InputColumn.NEXT_ACTION] if self.normalize: C2.set_net_and_init_net(net, init_net) state, _ = PreprocessorNet().normalize_dense_matrix( state, self.sorted_state_features, self.state_normalization_parameters, blobname_prefix="state", split_expensive_feature_groups=True, ) next_state, _ = PreprocessorNet().normalize_dense_matrix( next_state, self.sorted_state_features, self.state_normalization_parameters, blobname_prefix="next_state", split_expensive_feature_groups=True, ) if self.sorted_action_features is not None: action, _ = PreprocessorNet().normalize_dense_matrix( action, self.sorted_action_features, self.action_normalization_parameters, blobname_prefix="action", split_expensive_feature_groups=True, ) next_action, _ = PreprocessorNet().normalize_dense_matrix( next_action, self.sorted_action_features, self.action_normalization_parameters, blobname_prefix="next_action", split_expensive_feature_groups=True, ) if self.include_possible_actions: possible_action_features, _ = PreprocessorNet().normalize_dense_matrix( possible_action_features, self.sorted_action_features, self.action_normalization_parameters, blobname_prefix="possible_action", split_expensive_feature_groups=True, ) possible_next_action_features, _ = PreprocessorNet().normalize_dense_matrix( possible_next_action_features, self.sorted_action_features, self.action_normalization_parameters, blobname_prefix="possible_next_action", split_expensive_feature_groups=True, ) C2.set_net_and_init_net(None, None) output_schema = schema.Struct( (InputColumn.STATE_FEATURES, state), (InputColumn.NEXT_STATE_FEATURES, next_state), (InputColumn.ACTION, action), (InputColumn.NEXT_ACTION, next_action), (InputColumn.NOT_TERMINAL, input_record[InputColumn.NOT_TERMINAL]), ) if self.include_possible_actions: # Drop the "lengths" blob from possible_actions_mask since we know # it's just a list of [max_num_actions, max_num_actions, ...] output_schema += schema.Struct( ( InputColumn.POSSIBLE_ACTIONS_MASK, input_record[InputColumn.POSSIBLE_ACTIONS_MASK]["values"], ), ( InputColumn.POSSIBLE_NEXT_ACTIONS_MASK, input_record[InputColumn.POSSIBLE_NEXT_ACTIONS_MASK]["values"], ), ) if self.sorted_action_features is not None: output_schema += schema.Struct( (InputColumn.POSSIBLE_ACTIONS, possible_action_features), (InputColumn.POSSIBLE_NEXT_ACTIONS, possible_next_action_features), ) net.set_output_record(output_schema) return FeatureExtractorNet(net, init_net)
def create_net(self): net = core.Net("feature_extractor") init_net = core.Net("feature_extractor_init") missing_scalar = self.create_const(init_net, "MISSING_SCALAR", MISSING_VALUE) input_schema = schema.Struct( ( "float_features", schema.Map( keys=core.BlobReference("input/float_features.keys"), values=core.BlobReference("input/float_features.values"), lengths_blob=core.BlobReference("input/float_features.lengths"), ), ) ) input_record = net.set_input_record(input_schema) state = self.extract_float_features( net, "state", input_record.float_features, self.sorted_state_features, missing_scalar, ) if self.sorted_action_features: action = self.extract_float_features( net, "action", input_record.float_features, self.sorted_action_features, missing_scalar, ) if self.normalize: C2.set_net_and_init_net(net, init_net) state, _ = PreprocessorNet().normalize_dense_matrix( state, self.sorted_state_features, self.state_normalization_parameters, blobname_prefix="state", split_expensive_feature_groups=True, ) if self.sorted_action_features: action, _ = PreprocessorNet().normalize_dense_matrix( action, self.sorted_action_features, self.action_normalization_parameters, blobname_prefix="action", split_expensive_feature_groups=True, ) C2.set_net_and_init_net(None, None) output_record = schema.Struct(("state", state)) if self.sorted_action_features: output_record += schema.Struct(("action", action)) net.set_output_record(output_record) return FeatureExtractorNet(net, init_net)
def preprocess_blob(self, blob, normalization_parameters): """ Takes in a blob and its normalization parameters. Outputs a tuple whose first element is a blob containing the normalized input blob and whose second element contains all the parameter blobs used to create it. Call this from a CPU context and ensure the input blob exists in it. """ parameters: List[str] = [] MISSING_U = self._store_parameter( parameters, "MISSING_U", np.array([MISSING_VALUE + 1e-4], dtype=np.float32) ) MISSING_L = self._store_parameter( parameters, "MISSING_L", np.array([MISSING_VALUE - 1e-4], dtype=np.float32) ) is_empty_l = C2.GT(blob, MISSING_L, broadcast=1) is_empty_u = C2.LT(blob, MISSING_U, broadcast=1) is_empty = C2.And(is_empty_l, is_empty_u) for i in range(len(normalization_parameters) - 1): if ( normalization_parameters[i].feature_type != normalization_parameters[i + 1].feature_type ): raise Exception( "Only one feature type is allowed per call to preprocess_blob!" ) feature_type = normalization_parameters[0].feature_type if feature_type == identify_types.BINARY: TOLERANCE = self._store_parameter( parameters, "TOLERANCE", np.array(1e-3, dtype=np.float32) ) ZERO = self._store_parameter( parameters, "ZERO", np.array([0], dtype=np.float32) ) is_gt_zero = C2.GT(blob, C2.Add(ZERO, TOLERANCE, broadcast=1), broadcast=1) is_lt_zero = C2.LT(blob, C2.Sub(ZERO, TOLERANCE, broadcast=1), broadcast=1) bool_blob = C2.Or(is_gt_zero, is_lt_zero) blob = C2.Cast(bool_blob, to=caffe2_pb2.TensorProto.FLOAT) elif feature_type == identify_types.PROBABILITY: ONE = self._store_parameter( parameters, "ONE", np.array([1], dtype=np.float32) ) NEGATIVE_ONE = self._store_parameter( parameters, "NEGATIVE_ONE", np.array([-1], dtype=np.float32) ) clipped = C2.Clip(blob, min=0.01, max=0.99) blob = C2.Mul( C2.Log(C2.Sub(C2.Pow(clipped, exponent=-1.0), ONE, broadcast=1)), NEGATIVE_ONE, broadcast=1, ) elif feature_type == identify_types.ENUM: for parameter in normalization_parameters: possible_values = parameter.possible_values for x in possible_values: if x < 0: logger.fatal( "Invalid enum possible value for feature: " + str(x) + " " + str(parameter.possible_values) ) raise Exception( "Invalid enum possible value for feature " + blob + ": " + str(x) + " " + str(parameter.possible_values) ) int_blob = C2.Cast(blob, to=core.DataType.INT32) # Batch one hot transform with MISSING_VALUE as a possible value feature_lengths = [ len(p.possible_values) + 1 for p in normalization_parameters ] feature_lengths_blob = self._store_parameter( parameters, "feature_lengths_blob", np.array(feature_lengths, dtype=np.int32), ) feature_values = [ x for p in normalization_parameters for x in p.possible_values + [int(MISSING_VALUE)] ] feature_values_blob = self._store_parameter( parameters, "feature_values_blob", np.array(feature_values, dtype=np.int32), ) one_hot_output = C2.BatchOneHot( int_blob, feature_lengths_blob, feature_values_blob ) flattened_one_hot = C2.FlattenToVec(one_hot_output) # Remove missing values with a mask cols_to_include = [ [1] * len(p.possible_values) + [0] for p in normalization_parameters ] cols_to_include = [x for col in cols_to_include for x in col] mask = self._store_parameter( parameters, "mask", np.array(cols_to_include, dtype=np.int32) ) zero_vec = C2.ConstantFill( one_hot_output, value=0, dtype=caffe2_pb2.TensorProto.INT32 ) repeated_mask_bool = C2.Cast( C2.Add(zero_vec, mask, broadcast=1), to=core.DataType.BOOL ) flattened_repeated_mask = C2.FlattenToVec(repeated_mask_bool) flattened_one_hot_proc = C2.NextBlob("flattened_one_hot_proc") flattened_one_hot_proc_indices = C2.NextBlob( "flattened_one_hot_proc_indices" ) C2.net().BooleanMask( [flattened_one_hot, flattened_repeated_mask], [flattened_one_hot_proc, flattened_one_hot_proc_indices], ) one_hot_shape = C2.Shape(one_hot_output) shape_delta = self._store_parameter( parameters, "shape_delta", np.array([0, len(normalization_parameters)], dtype=np.int64), ) target_shape = C2.Sub(one_hot_shape, shape_delta, broadcast=1) output_int_blob = C2.NextBlob("output_int_blob") output_int_blob_old_shape = C2.NextBlob("output_int_blob_old_shape") C2.net().Reshape( [flattened_one_hot_proc, target_shape], [output_int_blob, output_int_blob_old_shape], ) output_blob = C2.Cast(output_int_blob, to=core.DataType.FLOAT) return output_blob, parameters elif feature_type == identify_types.QUANTILE: # This transformation replaces a set of values with their quantile. # The quantile boundaries are provided in the normalization params. quantile_sizes = [len(norm.quantiles) for norm in normalization_parameters] num_boundaries_blob = self._store_parameter( parameters, "num_boundaries_blob", np.array(quantile_sizes, dtype=np.int32), ) quantile_values = np.array([], dtype=np.float32) quantile_labels = np.array([], dtype=np.float32) for norm in normalization_parameters: quantile_values = np.append( quantile_values, np.array(norm.quantiles, dtype=np.float32) ) quantile_labels = np.append( quantile_labels, np.arange(len(norm.quantiles), dtype=np.float32) / float(len(norm.quantiles) - 1.0), ) quantiles = np.vstack([quantile_values, quantile_labels]).T quantiles_blob = self._store_parameter( parameters, "quantiles_blob", quantiles ) quantile_blob = C2.Percentile(blob, quantiles_blob, num_boundaries_blob) blob = quantile_blob elif ( feature_type == identify_types.CONTINUOUS or feature_type == identify_types.BOXCOX ): boxcox_shifts = [] boxcox_lambdas = [] means = [] stddevs = [] for norm in normalization_parameters: if feature_type == identify_types.BOXCOX: assert ( norm.boxcox_shift is not None and norm.boxcox_lambda is not None ) boxcox_shifts.append(norm.boxcox_shift) boxcox_lambdas.append(norm.boxcox_lambda) means.append(norm.mean) stddevs.append(norm.stddev) if feature_type == identify_types.BOXCOX: boxcox_shift_blob = self._store_parameter( parameters, "boxcox_shift", np.array(boxcox_shifts, dtype=np.float32), ) boxcox_lambda_blob = self._store_parameter( parameters, "boxcox_shift", np.array(boxcox_lambdas, dtype=np.float32), ) blob = C2.BatchBoxCox(blob, boxcox_lambda_blob, boxcox_shift_blob) means_blob = self._store_parameter( parameters, "means_blob", np.array([means], dtype=np.float32) ) stddevs_blob = self._store_parameter( parameters, "stddevs_blob", np.array([stddevs], dtype=np.float32) ) blob = C2.Sub(blob, means_blob, broadcast=1, axis=0) blob = C2.Div(blob, stddevs_blob, broadcast=1, axis=0) blob = C2.Clip(blob, min=MIN_FEATURE_VALUE, max=MAX_FEATURE_VALUE) elif feature_type == identify_types.CONTINUOUS_ACTION: serving_min_value = np.array( [norm.min_value for norm in normalization_parameters], dtype=np.float32 ) serving_max_value = np.array( [norm.max_value for norm in normalization_parameters], dtype=np.float32 ) training_min_value = ( np.ones(len(normalization_parameters), dtype=np.float32) * -1 + EPS ) scaling_factor = ( (np.ones(len(normalization_parameters), dtype=np.float32) - EPS) * 2 / (serving_max_value - serving_min_value) ) serving_min_blob = self._store_parameter( parameters, "serving_min_blob", serving_min_value ) training_min_blob = self._store_parameter( parameters, "training_min_blob", training_min_value ) scaling_factor_blob = self._store_parameter( parameters, "scaling_factor_blob", scaling_factor ) blob = C2.Sub(blob, serving_min_blob, broadcast=1, axis=1) blob = C2.Mul(blob, scaling_factor_blob, broadcast=1, axis=1) blob = C2.Add(blob, training_min_blob, broadcast=1, axis=1) blob = C2.Clip(blob, min=-1 + EPS, max=1 - EPS) else: raise NotImplementedError("Invalid feature type: {}".format(feature_type)) zeros = C2.ConstantFill(blob, value=0.0) output_blob = C2.Where(is_empty, zeros, blob) output_blob = C2.NanCheck(output_blob) return output_blob, parameters
def normalize_dense_matrix( self, input_matrix: str, features: List[int], normalization_parameters: Dict[int, NormalizationParameters], blobname_prefix: str, split_expensive_feature_groups: bool, ) -> Tuple[str, List[str]]: """ Normalizes inputs according to parameters. Expects a dense matrix whose ith column corresponds to feature i. Note that the Caffe2 BatchBoxCox operator isn't implemented on CUDA GPU so we need to use a CPU context. :param input_matrix: Input matrix to normalize. :param features: Array that maps feature ids to column indices. :param normalization_parameters: Mapping from feature names to NormalizationParameters. :param blobname_prefix: Prefix for input blobs to norm_net. :param num_output_features: The number of features in an output processed datapoint. If set to None, this function will compute it. """ with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)): feature_starts = self._get_type_boundaries( features, normalization_parameters ) normalized_input_blobs = [] parameters: List[str] = [] for i, feature_type in enumerate(FEATURE_TYPES): start_index = feature_starts[i] if (i + 1) == len(FEATURE_TYPES): end_index = len(normalization_parameters) else: end_index = feature_starts[i + 1] if start_index == end_index: continue # No features of this type slices = [] split_feature_group, split_intervals = self._should_split_feature_group( split_expensive_feature_groups, start_index, end_index, feature_type ) if split_feature_group: for j in range(len(split_intervals) - 1): slice_blob = self._get_input_blob_indexed( blobname_prefix, feature_type, j ) C2.net().Slice( [input_matrix], [slice_blob], starts=[0, split_intervals[j]], ends=[-1, split_intervals[j + 1]], ) slices.append( (slice_blob, split_intervals[j], split_intervals[j + 1]) ) else: sliced_input_features = self._get_input_blob( blobname_prefix, feature_type ) C2.net().Slice( [input_matrix], [sliced_input_features], starts=[0, start_index], ends=[-1, end_index], ) slices.append((sliced_input_features, start_index, end_index)) for (slice_blob, start, end) in slices: normalized_input_blob, blob_parameters = self.preprocess_blob( slice_blob, [normalization_parameters[x] for x in features[start:end]], ) logger.info( "Processed split ({}, {}) for feature type {}".format( start, end, feature_type ) ) parameters.extend(blob_parameters) normalized_input_blobs.append(normalized_input_blob) for i, inp in enumerate(normalized_input_blobs): logger.info("input# {}: {}".format(i, inp)) concatenated_input_blob, concatenated_input_blob_dim = C2.Concat( *normalized_input_blobs, axis=1 ) return concatenated_input_blob, parameters
def _forward_pass( cls, model, trainer, normalized_dense_matrix, actions, qnet_output_blob ): C2.set_model(model) parameters = [] q_values = "q_values" C2.net().Copy([qnet_output_blob], [q_values]) action_names = C2.NextBlob("action_names") parameters.append(action_names) workspace.FeedBlob(action_names, np.array(actions)) action_range = C2.NextBlob("action_range") parameters.append(action_range) workspace.FeedBlob(action_range, np.array(list(range(len(actions))))) output_shape = C2.Shape(q_values) output_shape_row_count = C2.Slice(output_shape, starts=[0], ends=[1]) output_row_shape = C2.Slice(q_values, starts=[0, 0], ends=[-1, 1]) output_feature_keys = "output/string_weighted_multi_categorical_features.keys" workspace.FeedBlob(output_feature_keys, np.zeros(1, dtype=np.int64)) output_feature_keys_matrix = C2.ConstantFill( output_row_shape, value=0, dtype=caffe2_pb2.TensorProto.INT64 ) # Note: sometimes we need to use an explicit output name, so we call # C2.net().Fn(...) C2.net().FlattenToVec([output_feature_keys_matrix], [output_feature_keys]) output_feature_lengths = ( "output/string_weighted_multi_categorical_features.lengths" ) workspace.FeedBlob(output_feature_lengths, np.zeros(1, dtype=np.int32)) output_feature_lengths_matrix = C2.ConstantFill( output_row_shape, value=1, dtype=caffe2_pb2.TensorProto.INT32 ) C2.net().FlattenToVec([output_feature_lengths_matrix], [output_feature_lengths]) output_keys = "output/string_weighted_multi_categorical_features.values.keys" workspace.FeedBlob(output_keys, np.array(["a"])) C2.net().Tile([action_names, output_shape_row_count], [output_keys], axis=1) output_lengths_matrix = C2.ConstantFill( output_row_shape, value=len(actions), dtype=caffe2_pb2.TensorProto.INT32 ) output_lengths = ( "output/string_weighted_multi_categorical_features.values.lengths" ) workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().FlattenToVec([output_lengths_matrix], [output_lengths]) output_values = ( "output/string_weighted_multi_categorical_features.values.values" ) workspace.FeedBlob(output_values, np.array([1.0])) C2.net().FlattenToVec([q_values], [output_values]) return parameters, q_values