def update_model(self, states: str, actions: str, q_vals_target: str) -> None: """ Takes in states, actions, and target q values. Updates the model: Runs the forward pass, computing Q(states, actions). Q(states, actions)[i][j] is an approximation of Q*(states[i], action_j). Comptutes Loss of Q(states, actions) with respect to q_vals_targets Updates Q Network's weights according to loss and optimizer :param states: Numpy array with shape (batch_size, state_dim). The ith row is a representation of the ith transition's state. :param actions: Numpy array with shape (batch_size, action_dim). The ith row is a representation of the ith transition's action. :param q_vals_targets: Numpy array with shape (batch_size, 1). The ith row is the label to train against for the data from the ith transition. """ model = C2.model() q_vals_target = C2.StopGradient(q_vals_target) q_values = C2.NextBlob("train_output") state_action_pairs, _ = C2.Concat(states, actions, axis=1) self.ml_trainer.make_forward_pass_ops(model, state_action_pairs, q_values, False) self.loss_blob = self.ml_trainer.generateLossOps( model, q_values, q_vals_target) model.AddGradientOperators([self.loss_blob]) for param in model.params: if param in model.param_to_grad: param_grad = model.param_to_grad[param] param_grad = C2.NanCheck(param_grad) self.ml_trainer.addParameterUpdateOps(model)
def update_model(self, states: str, actions: str, q_vals_target: str) -> None: """ Takes in states, actions, and target q values. Updates the model: Runs the forward pass, computing Q(states, actions). Q(states, actions)[i][j] is an approximation of Q*(states[i], action_j). Comptutes Loss of Q(states, actions) with respect to q_vals_targets Updates Q Network's weights according to loss and optimizer :param states: Numpy array with shape (batch_size, state_dim). The ith row is a representation of the ith transition's state. :param actions: Numpy array with shape (batch_size, action_dim). The ith row contains the one-hotted representation of the ith action. :param q_vals_targets: Numpy array with shape (batch_size, 1). The ith row is the label to train against for the data from the ith transition. """ model = C2.model() q_vals_target = C2.StopGradient(q_vals_target) output_blob = C2.NextBlob("train_output") if self.conv_ml_trainer is not None: conv_output_blob = C2.NextBlob("conv_output") self.conv_ml_trainer.make_conv_pass_ops(model, states, conv_output_blob) states = conv_output_blob self.ml_trainer.make_forward_pass_ops(model, states, output_blob, False) q_val_select = C2.ReduceBackSum(C2.Mul(output_blob, actions)) q_values = C2.ExpandDims(q_val_select, dims=[1]) self.loss_blob = self.ml_trainer.generateLossOps(model, q_values, q_vals_target) model.AddGradientOperators([self.loss_blob]) for param in model.params: if param in model.param_to_grad: param_grad = model.param_to_grad[param] param_grad = C2.NanCheck(param_grad) self.ml_trainer.addParameterUpdateOps(model)
def normalize_dense_matrix( self, input_matrix: str, features: List[str], normalization_parameters: Dict[str, NormalizationParameters], blobname_prefix: str, ) -> Tuple[str, List[str]]: """ Normalizes inputs according to parameters. Expects a dense matrix whose ith column corresponds to feature i. Note that the Caffe2 BatchBoxCox operator isn't implemented on CUDA GPU so we need to use a CPU context. :param input_matrix: Input matrix to normalize. :param features: Array that maps feature ids to column indices. :param normalization_parameters: Mapping from feature names to NormalizationParameters. :param blobname_prefix: Prefix for input blobs to norm_net. :param num_output_features: The number of features in an output processed datapoint. If set to None, this function will compute it. """ with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)): feature_starts = self._get_type_boundaries( features, normalization_parameters) normalized_input_blobs = [] parameters: List[str] = [] for i, feature_type in enumerate(FEATURE_TYPES): start_index = feature_starts[i] if (i + 1) == len(FEATURE_TYPES): end_index = len(normalization_parameters) else: end_index = feature_starts[i + 1] if start_index == end_index: continue # No features of this type sliced_input_features = self._get_input_blob( blobname_prefix, feature_type) C2.net().Slice( [input_matrix], [sliced_input_features], starts=[0, start_index], ends=[-1, end_index], ) normalized_input_blob, blob_parameters = self.preprocess_blob( sliced_input_features, [ normalization_parameters[x] for x in features[start_index:end_index] ], ) parameters.extend(blob_parameters) normalized_input_blobs.append(normalized_input_blob) for i, inp in enumerate(normalized_input_blobs): logger.info("input# {}: {}".format(i, inp)) concatenated_input_blob, concatenated_input_blob_dim = C2.Concat( *normalized_input_blobs, axis=1) concatenated_input_blob = C2.NanCheck(concatenated_input_blob) return concatenated_input_blob, parameters
def update_model( self, states: str, actions: str, q_vals_target: str, ) -> None: """ Takes in states, actions, and target q values. Updates the model: Runs the forward pass, computing Q(states, actions). Q(states, actions)[i][j] is an approximation of Q*(states[i], action_j). Comptutes Loss of Q(states, actions) with respect to q_vals_targets Updates Q Network's weights according to loss and optimizer :param states: Numpy array with shape (batch_size, state_dim). The ith row is a representation of the ith transition's state. :param actions: Numpy array with shape (batch_size, action_dim). The ith row contains the one-hotted representation of the ith action. :param q_vals_targets: Numpy array with shape (batch_size, 1). The ith row is the label to train against for the data from the ith transition. """ model = C2.model() q_vals_target = C2.StopGradient(q_vals_target) output_blob = C2.NextBlob("train_output") MakeForwardPassOps( model, self.model_id, states, output_blob, self.weights, self.biases, self.activations, self.layers, self.dropout_ratio, False, ) q_val_select = C2.ReduceBackSum(C2.Mul(output_blob, actions)) q_values = C2.ExpandDims(q_val_select, dims=[1]) self.loss_blob = GenerateLossOps( model, q_values, q_vals_target, ) model.AddGradientOperators([self.loss_blob]) for param in model.params: if param in model.param_to_grad: param_grad = model.param_to_grad[param] param_grad = C2.NanCheck(param_grad) AddParameterUpdateOps( model, optimizer_input=self.optimizer, base_learning_rate=self.learning_rate, gamma=self.gamma, policy=self.lr_policy, )
def preprocess_blob(self, blob, normalization_parameters): """ Takes in a blob and its normalization parameters. Outputs a tuple whose first element is a blob containing the normalized input blob and whose second element contains all the parameter blobs used to create it. Call this from a CPU context and ensure the input blob exists in it. """ parameters: List[str] = [] MISSING_U = self._store_parameter( parameters, "MISSING_U", np.array([MISSING_VALUE + 1e-4], dtype=np.float32) ) MISSING_L = self._store_parameter( parameters, "MISSING_L", np.array([MISSING_VALUE - 1e-4], dtype=np.float32) ) is_empty_l = C2.GT(blob, MISSING_L, broadcast=1) is_empty_u = C2.LT(blob, MISSING_U, broadcast=1) is_empty = C2.And(is_empty_l, is_empty_u) for i in range(len(normalization_parameters) - 1): if ( normalization_parameters[i].feature_type != normalization_parameters[i + 1].feature_type ): raise Exception( "Only one feature type is allowed per call to preprocess_blob!" ) feature_type = normalization_parameters[0].feature_type if feature_type == identify_types.BINARY: TOLERANCE = self._store_parameter( parameters, "TOLERANCE", np.array(1e-3, dtype=np.float32) ) ZERO = self._store_parameter( parameters, "ZERO", np.array([0], dtype=np.float32) ) is_gt_zero = C2.GT(blob, C2.Add(ZERO, TOLERANCE, broadcast=1), broadcast=1) is_lt_zero = C2.LT(blob, C2.Sub(ZERO, TOLERANCE, broadcast=1), broadcast=1) bool_blob = C2.Or(is_gt_zero, is_lt_zero) blob = C2.Cast(bool_blob, to=caffe2_pb2.TensorProto.FLOAT) elif feature_type == identify_types.PROBABILITY: ONE = self._store_parameter( parameters, "ONE", np.array([1], dtype=np.float32) ) NEGATIVE_ONE = self._store_parameter( parameters, "NEGATIVE_ONE", np.array([-1], dtype=np.float32) ) clipped = C2.Clip(blob, min=0.01, max=0.99) blob = C2.Mul( C2.Log(C2.Sub(C2.Pow(clipped, exponent=-1.0), ONE, broadcast=1)), NEGATIVE_ONE, broadcast=1, ) elif feature_type == identify_types.ENUM: for parameter in normalization_parameters: possible_values = parameter.possible_values for x in possible_values: if x < 0: logger.fatal( "Invalid enum possible value for feature: " + str(x) + " " + str(parameter.possible_values) ) raise Exception( "Invalid enum possible value for feature " + blob + ": " + str(x) + " " + str(parameter.possible_values) ) int_blob = C2.Cast(blob, to=core.DataType.INT32) # Batch one hot transform with MISSING_VALUE as a possible value feature_lengths = [ len(p.possible_values) + 1 for p in normalization_parameters ] feature_lengths_blob = self._store_parameter( parameters, "feature_lengths_blob", np.array(feature_lengths, dtype=np.int32), ) feature_values = [ x for p in normalization_parameters for x in p.possible_values + [int(MISSING_VALUE)] ] feature_values_blob = self._store_parameter( parameters, "feature_values_blob", np.array(feature_values, dtype=np.int32), ) one_hot_output = C2.BatchOneHot( int_blob, feature_lengths_blob, feature_values_blob ) flattened_one_hot = C2.FlattenToVec(one_hot_output) # Remove missing values with a mask cols_to_include = [ [1] * len(p.possible_values) + [0] for p in normalization_parameters ] cols_to_include = [x for col in cols_to_include for x in col] mask = self._store_parameter( parameters, "mask", np.array(cols_to_include, dtype=np.int32) ) zero_vec = C2.ConstantFill( one_hot_output, value=0, dtype=caffe2_pb2.TensorProto.INT32 ) repeated_mask_bool = C2.Cast( C2.Add(zero_vec, mask, broadcast=1), to=core.DataType.BOOL ) flattened_repeated_mask = C2.FlattenToVec(repeated_mask_bool) flattened_one_hot_proc = C2.NextBlob("flattened_one_hot_proc") flattened_one_hot_proc_indices = C2.NextBlob( "flattened_one_hot_proc_indices" ) C2.net().BooleanMask( [flattened_one_hot, flattened_repeated_mask], [flattened_one_hot_proc, flattened_one_hot_proc_indices], ) one_hot_shape = C2.Shape(one_hot_output) shape_delta = self._store_parameter( parameters, "shape_delta", np.array([0, len(normalization_parameters)], dtype=np.int64), ) target_shape = C2.Sub(one_hot_shape, shape_delta, broadcast=1) output_int_blob = C2.NextBlob("output_int_blob") output_int_blob_old_shape = C2.NextBlob("output_int_blob_old_shape") C2.net().Reshape( [flattened_one_hot_proc, target_shape], [output_int_blob, output_int_blob_old_shape], ) output_blob = C2.Cast(output_int_blob, to=core.DataType.FLOAT) return output_blob, parameters elif feature_type == identify_types.QUANTILE: # This transformation replaces a set of values with their quantile. # The quantile boundaries are provided in the normalization params. quantile_sizes = [len(norm.quantiles) for norm in normalization_parameters] num_boundaries_blob = self._store_parameter( parameters, "num_boundaries_blob", np.array(quantile_sizes, dtype=np.int32), ) quantile_values = np.array([], dtype=np.float32) quantile_labels = np.array([], dtype=np.float32) for norm in normalization_parameters: quantile_values = np.append( quantile_values, np.array(norm.quantiles, dtype=np.float32) ) quantile_labels = np.append( quantile_labels, np.arange(len(norm.quantiles), dtype=np.float32) / float(len(norm.quantiles) - 1.0), ) quantiles = np.vstack([quantile_values, quantile_labels]).T quantiles_blob = self._store_parameter( parameters, "quantiles_blob", quantiles ) quantile_blob = C2.Percentile(blob, quantiles_blob, num_boundaries_blob) blob = quantile_blob elif ( feature_type == identify_types.CONTINUOUS or feature_type == identify_types.BOXCOX ): boxcox_shifts = [] boxcox_lambdas = [] means = [] stddevs = [] for norm in normalization_parameters: if feature_type == identify_types.BOXCOX: assert ( norm.boxcox_shift is not None and norm.boxcox_lambda is not None ) boxcox_shifts.append(norm.boxcox_shift) boxcox_lambdas.append(norm.boxcox_lambda) means.append(norm.mean) stddevs.append(norm.stddev) if feature_type == identify_types.BOXCOX: boxcox_shift_blob = self._store_parameter( parameters, "boxcox_shift", np.array(boxcox_shifts, dtype=np.float32), ) boxcox_lambda_blob = self._store_parameter( parameters, "boxcox_shift", np.array(boxcox_lambdas, dtype=np.float32), ) blob = C2.BatchBoxCox(blob, boxcox_lambda_blob, boxcox_shift_blob) means_blob = self._store_parameter( parameters, "means_blob", np.array([means], dtype=np.float32) ) stddevs_blob = self._store_parameter( parameters, "stddevs_blob", np.array([stddevs], dtype=np.float32) ) blob = C2.Sub(blob, means_blob, broadcast=1, axis=0) blob = C2.Div(blob, stddevs_blob, broadcast=1, axis=0) blob = C2.Clip(blob, min=MIN_FEATURE_VALUE, max=MAX_FEATURE_VALUE) elif feature_type == identify_types.CONTINUOUS_ACTION: serving_min_value = np.array( [norm.min_value for norm in normalization_parameters], dtype=np.float32 ) serving_max_value = np.array( [norm.max_value for norm in normalization_parameters], dtype=np.float32 ) training_min_value = ( np.ones(len(normalization_parameters), dtype=np.float32) * -1 + EPS ) scaling_factor = ( (np.ones(len(normalization_parameters), dtype=np.float32) - EPS) * 2 / (serving_max_value - serving_min_value) ) serving_min_blob = self._store_parameter( parameters, "serving_min_blob", serving_min_value ) training_min_blob = self._store_parameter( parameters, "training_min_blob", training_min_value ) scaling_factor_blob = self._store_parameter( parameters, "scaling_factor_blob", scaling_factor ) blob = C2.Sub(blob, serving_min_blob, broadcast=1, axis=1) blob = C2.Mul(blob, scaling_factor_blob, broadcast=1, axis=1) blob = C2.Add(blob, training_min_blob, broadcast=1, axis=1) blob = C2.Clip(blob, min=-1 + EPS, max=1 - EPS) else: raise NotImplementedError("Invalid feature type: {}".format(feature_type)) zeros = C2.ConstantFill(blob, value=0.0) output_blob = C2.Where(is_empty, zeros, blob) output_blob = C2.NanCheck(output_blob) return output_blob, parameters