def get_relevance_model(self, feature_layer_keys_to_fns={}) -> RelevanceModel: """ Creates RankingModel NOTE: Override this method to create custom loss, scorer, model objects """ # Define interaction model interaction_model: InteractionModel = UnivariateInteractionModel( feature_config=self.feature_config, feature_layer_keys_to_fns=feature_layer_keys_to_fns, tfrecord_type=self.tfrecord_type, max_sequence_size=self.args.max_sequence_size, ) # Define loss object from loss key loss: RelevanceLossBase = loss_factory.get_loss( loss_key=self.loss_key, scoring_type=self.scoring_type) # Define scorer scorer: ScorerBase = RelevanceScorer.from_model_config_file( model_config_file=self.model_config_file, interaction_model=interaction_model, loss=loss, output_name=self.args.output_name, ) # Define metrics objects from metrics keys metrics: List[Union[Type[Metric], str]] = [ metric_factory.get_metric(metric_key=metric_key) for metric_key in self.metrics_keys ] # Define optimizer optimizer: Optimizer = get_optimizer( optimizer_key=self.optimizer_key, learning_rate=self.args.learning_rate, learning_rate_decay=self.args.learning_rate_decay, learning_rate_decay_steps=self.args.learning_rate_decay_steps, gradient_clip_value=self.args.gradient_clip_value, ) # Combine the above to define a RelevanceModel relevance_model: RelevanceModel = RankingModel( feature_config=self.feature_config, tfrecord_type=self.tfrecord_type, scorer=scorer, metrics=metrics, optimizer=optimizer, model_file=self.args.model_file, compile_keras_model=self.args.compile_keras_model, output_name=self.args.output_name, logger=self.logger, ) return relevance_model
def test_cyclic_lr_in_training_pipeline(self): """Test a cyclic learning rate in model training""" Logger = logging_utils.setup_logging( reset=True, file_name=os.path.join(INPUT_DIR + 'ranklib', "output_log.csv"), log_to_file=True, ) io = LocalIO() feature_config = self.parse_config( TFRecordTypeKey.SEQUENCE_EXAMPLE, self.feature_config_yaml_convert_to_clicks, io) dataset = RelevanceDataset( data_dir=INPUT_DIR + '/ranklib', data_format=DataFormatKey.RANKLIB, feature_config=feature_config, tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE, batch_size=2, file_io=io, preprocessing_keys_to_fns={}, logger=Logger, keep_additional_info=KEEP_ADDITIONAL_INFO, non_zero_features_only=NON_ZERO_FEATURES_ONLY, max_sequence_size=319, ) # Define interaction model interaction_model: InteractionModel = UnivariateInteractionModel( feature_config=feature_config, feature_layer_keys_to_fns={}, tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE, max_sequence_size=319, file_io=io, ) # Define loss object from loss key loss: RelevanceLossBase = loss_factory.get_loss( loss_key=LossKey.RANK_ONE_LISTNET, scoring_type=ScoringTypeKey.POINTWISE) # Define scorer scorer: ScorerBase = RelevanceScorer.from_model_config_file( model_config_file=self.model_config_file, interaction_model=interaction_model, loss=loss, logger=Logger, file_io=io, ) optimizer: Optimizer = get_optimizer( model_config=io.read_yaml(self.model_config_file)) # Combine the above to define a RelevanceModel relevance_model: RelevanceModel = RankingModel( feature_config=feature_config, tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE, scorer=scorer, optimizer=optimizer, model_file=None, file_io=io, logger=Logger, ) callbacks_list = [] my_callback_object = LrCallback() callbacks_list.append(my_callback_object) history = relevance_model.model.fit( x=dataset.train, validation_data=dataset.validation, epochs=2, verbose=True, callbacks=callbacks_list, ) lr_list = my_callback_object.get_lr_list() lr_gold = [ 0.001, 0.020800006, 0.040599994, 0.0604, 0.080199994, 0.1, 0.080199994, 0.0604, 0.040599994, 0.020800006, 0.001, 0.010900003, 0.020800006, 0.030699994, 0.040599994, 0.050499998, 0.040599994, 0.030699994, 0.020800006, 0.010900003, 0.001, 0.0059499955, 0.010900003, 0.015849996, 0.020800006, 0.02575, 0.020800006, 0.015849996, 0.010900003, 0.0059499955, 0.001, 0.0034749978, 0.0059500015, 0.008424998, 0.010900003, 0.013375, 0.010900003, 0.008424998, 0.0059500015, 0.0034749978, 0.001, 0.0022374988, 0.0034749978, 0.0047125025, 0.0059500015, 0.0071875, 0.0059500015, 0.0047125025 ] for i in range(len(lr_list)): assert np.isclose(lr_gold[i], lr_list[i])
def get_relevance_model(self, feature_layer_keys_to_fns={}) -> RelevanceModel: """ Creates a RankingModel that can be used for training and evaluating Parameters ---------- feature_layer_keys_to_fns : dict of (str, function) dictionary of function names mapped to tensorflow compatible function definitions that can now be used in the InteractionModel as a feature function to transform input features Returns ------- `RankingModel` RankingModel that can be used for training and evaluating a ranking model Notes ----- Override this method to create custom loss, scorer, model objects """ # Define interaction model interaction_model: InteractionModel = UnivariateInteractionModel( feature_config=self.feature_config, feature_layer_keys_to_fns=feature_layer_keys_to_fns, tfrecord_type=self.tfrecord_type, max_sequence_size=self.args.max_sequence_size, file_io=self.file_io, ) # Define loss object from loss key loss: RelevanceLossBase = loss_factory.get_loss( loss_key=self.loss_key, scoring_type=self.scoring_type) # Define scorer scorer: ScorerBase = RelevanceScorer( feature_config=self.feature_config, model_config=self.model_config, interaction_model=interaction_model, loss=loss, output_name=self.args.output_name, logger=self.logger, file_io=self.file_io, ) # Define metrics objects from metrics keys metrics: List[Union[Type[Metric], str]] = [ metric_factory.get_metric(metric_key=metric_key) for metric_key in self.metrics_keys ] optimizer: Optimizer = get_optimizer(model_config=self.model_config) # Combine the above to define a RelevanceModel if self.model_config["architecture_key"] == ArchitectureKey.LINEAR: RankingModelClass = LinearRankingModel else: RankingModelClass = RankingModel relevance_model: RelevanceModel = RankingModelClass( feature_config=self.feature_config, tfrecord_type=self.tfrecord_type, scorer=scorer, metrics=metrics, optimizer=optimizer, model_file=self.model_file, initialize_layers_dict=ast.literal_eval( self.args.initialize_layers_dict), freeze_layers_list=ast.literal_eval(self.args.freeze_layers_list), compile_keras_model=self.args.compile_keras_model, output_name=self.args.output_name, file_io=self.local_io, logger=self.logger, ) return relevance_model
def test_reduce_lr_on_plateau_in_training_pipeline(self): """Test reduce lr on plateau""" self.model_config_file = MODEL_CONFIG_REDUCE_LR_ON_PLATEAU Logger = logging_utils.setup_logging( reset=True, file_name=os.path.join(INPUT_DIR + 'ranklib', "output_log.csv"), log_to_file=True, ) io = LocalIO() feature_config = self.parse_config(TFRecordTypeKey.SEQUENCE_EXAMPLE, self.feature_config_yaml_convert_to_clicks, io) model_config = io.read_yaml(self.model_config_file) dataset = RelevanceDataset( data_dir=INPUT_DIR + '/ranklib', data_format=DataFormatKey.RANKLIB, feature_config=feature_config, tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE, batch_size=32, file_io=io, preprocessing_keys_to_fns={}, logger=Logger, keep_additional_info=KEEP_ADDITIONAL_INFO, non_zero_features_only=NON_ZERO_FEATURES_ONLY, max_sequence_size=319, ) # Define interaction model interaction_model: InteractionModel = UnivariateInteractionModel( feature_config=feature_config, feature_layer_keys_to_fns={}, tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE, max_sequence_size=319, file_io=io, ) # Define loss object from loss key loss: RelevanceLossBase = loss_factory.get_loss( loss_key=LossKey.RANK_ONE_LISTNET, scoring_type=ScoringTypeKey.POINTWISE ) # Define scorer scorer: ScorerBase = RelevanceScorer.from_model_config_file( model_config_file=self.model_config_file, interaction_model=interaction_model, loss=loss, logger=Logger, file_io=io, ) optimizer: Optimizer = get_optimizer(model_config=model_config) # Combine the above to define a RelevanceModel relevance_model: RelevanceModel = RankingModel( feature_config=feature_config, tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE, scorer=scorer, optimizer=optimizer, model_file=None, file_io=io, logger=Logger, ) callback_list = [] callback_list.append(relevance_model.define_scheduler_as_callback(None, model_config)) my_callback_object = LrCallback() callback_list.append(my_callback_object) history = relevance_model.model.fit( x=dataset.train.shard(2, 0), validation_data=dataset.validation.shard(2, 1), epochs=10, verbose=True, callbacks=callback_list, ) lr_list = my_callback_object.get_lr_reduce_on_plateau_list() lr_gold = [50.0, 50.0, 25.0, 12.5, 6.25, 3.125, 1.5625, 1.0, 1.0, 1.0] assert np.all(np.isclose(lr_gold, lr_list))
def get_ranking_model( self, loss_key: str, metrics_keys: List, feature_config: FeatureConfig, feature_layer_keys_to_fns={}, initialize_layers_dict={}, freeze_layers_list=[], ) -> RelevanceModel: """ Creates RankingModel NOTE: Override this method to create custom loss, scorer, model objects """ # Define interaction model interaction_model: InteractionModel = UnivariateInteractionModel( feature_config=feature_config, feature_layer_keys_to_fns=feature_layer_keys_to_fns, tfrecord_type=self.args.tfrecord_type, max_sequence_size=self.args.max_sequence_size, file_io=self.file_io, ) # Define loss object from loss key loss: RelevanceLossBase = loss_factory.get_loss( loss_key=loss_key, scoring_type=self.args.scoring_type) # Define scorer scorer: ScorerBase = RelevanceScorer.from_model_config_file( model_config_file=self.args.model_config, interaction_model=interaction_model, loss=loss, output_name=self.args.output_name, file_io=self.file_io, ) # Define metrics objects from metrics keys metrics: List[Union[Type[Metric], str]] = [ metric_factory.get_metric(metric_key=metric_key) for metric_key in metrics_keys ] # Define optimizer optimizer: Optimizer = get_optimizer( file_io=self.file_io, model_config_file=self.args.model_config, ) # Combine the above to define a RelevanceModel relevance_model: RelevanceModel = RankingModel( feature_config=feature_config, tfrecord_type=self.args.tfrecord_type, scorer=scorer, metrics=metrics, optimizer=optimizer, model_file=self.args.model_file, initialize_layers_dict=initialize_layers_dict, freeze_layers_list=freeze_layers_list, compile_keras_model=self.args.compile_keras_model, output_name=self.args.output_name, logger=self.logger, file_io=self.file_io, ) return relevance_model