def CNorm(vst_onlyTokens, dl_terms, dl_associations, vso, nbEpochs=30, batchSize=64, l_numberOfFilters=[4000], l_filterSizes=[1], phraseMaxSize=15): # Preparing data for SLFNN and S-CNN components: dataSCNN, labels, l_unkownTokens, l_uncompleteExpressions = prepare2D_data( vst_onlyTokens, dl_terms, dl_associations, vso, phraseMaxSize) dataSLFNN = numpy.zeros((dataSCNN.shape[0], dataSCNN.shape[2])) for i in range(dataSCNN.shape[0]): numberOfToken = 0 for embedding in dataSCNN[i]: if not numpy.any(embedding): pass else: numberOfToken += 1 dataSLFNN[i] += embedding if numberOfToken > 0: dataSLFNN[i] = dataSLFNN[i] / numberOfToken # Input layers: inputLP = Input(shape=dataSLFNN.shape[1]) inputCNN = Input(shape=[dataSCNN.shape[1], dataSCNN.shape[2]]) # SLFNN component: ontoSpaceSize = labels.shape[2] denseLP = layers.Dense( units=ontoSpaceSize, use_bias=True, kernel_initializer=initializers.GlorotUniform())(inputLP) modelLP = Model(inputs=inputLP, outputs=denseLP) # Shallow-CNN component: l_subLayers = list() for i, filterSize in enumerate(l_filterSizes): convLayer = (layers.Conv1D( l_numberOfFilters[i], filterSize, strides=1, kernel_initializer=initializers.GlorotUniform()))(inputCNN) outputSize = phraseMaxSize - filterSize + 1 pool = (layers.MaxPool1D(pool_size=outputSize))(convLayer) activationLayer = (layers.LeakyReLU(alpha=0.3))(pool) l_subLayers.append(activationLayer) if len(l_filterSizes) > 1: concatenateLayer = (layers.Concatenate(axis=-1))( l_subLayers) # axis=-1 // concatenating on the last dimension else: concatenateLayer = l_subLayers[0] denseLayer = layers.Dense( ontoSpaceSize, kernel_initializer=initializers.GlorotUniform())(concatenateLayer) modelCNN = Model(inputs=inputCNN, outputs=denseLayer) convModel = Model(inputs=inputCNN, outputs=concatenateLayer) fullmodel = models.Sequential() fullmodel.add(convModel) # Combination of the two components: combinedLayer = layers.average([modelLP.output, modelCNN.output]) fullModel = Model(inputs=[inputLP, inputCNN], outputs=combinedLayer) fullModel.summary() # Compile and train: fullModel.compile( optimizer=optimizers.Nadam(), loss=losses.LogCosh(), metrics=[metrics.CosineSimilarity(), metrics.MeanSquaredError()]) fullModel.fit([dataSLFNN, dataSCNN], labels, epochs=nbEpochs, batch_size=batchSize) return fullModel, vso, l_unkownTokens
def scheduler(epoch): if epoch <= 2: return 0.0005 * strategy.num_replicas_in_sync elif epoch <= 4: return 0.0002 * strategy.num_replicas_in_sync else: return 0.0001 * strategy.num_replicas_in_sync #Add this for TensorBoard #callbacks = [tf.keras.callbacks.TensorBoard(log_dir='./logs', profile_batch = 0)] callbacks = [tf.keras.callbacks.LearningRateScheduler(scheduler)] print('starting at ', time()) history = model.fit(train, steps_per_epoch=steps_per_epoch, \ epochs=num_epochs, callbacks=callbacks, verbose=1) print("finishing at:", time()) print("evaluating:", time()) model.evaluate(test, steps=validation_steps) print("evaluated at:", time()) model_full_path = "/tmp/mymodel" + str(worker_number) + ".h5" print("Training finished, now saving the model in h5 format to: " + model_full_path) model.save(model_full_path, save_format="h5") print("model saved.\n") #print("..saving the model in tf format (TF 2.0) to: " + model_full_path) #tf.keras.models.save_model(model, "/tmp/mymodel"+ str(worker_number) + ".tf", save_format='tf') #print("model saved.\n")
encoder_output, attention_weights = SelfAttention( size=128, num_hops=10, use_penalization=False)(encoder_output) elif config == 2: encoder_output, attention_weights = Attention( context='many-to-one', alignment_type='global')(attention_input) encoder_output = Flatten()(encoder_output) elif config == 3: encoder_output, attention_weights = Attention( context='many-to-one', alignment_type='local-p*', window_width=100, score_function='scaled_dot')(attention_input) encoder_output = Flatten()(encoder_output) # Prediction Layer Y = Dense(units=num_categories, activation='softmax')(encoder_output) # Compile model model = Model(inputs=X, outputs=Y) model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) # Train multi-class classification model model.fit(x=X_train, y=Y_train, validation_data=(X_test, Y_test), epochs=num_epochs, batch_size=batch_size)
class RelevanceModel: def __init__( self, feature_config: FeatureConfig, tfrecord_type: str, file_io: FileIO, scorer: Optional[ScorerBase] = None, metrics: List[Union[Type[kmetrics.Metric], str]] = [], optimizer: Optional[Optimizer] = None, model_file: Optional[str] = None, initialize_layers_dict: dict = {}, freeze_layers_list: list = [], compile_keras_model: bool = False, output_name: str = "score", logger=None, ): """ Constructor to instantiate a RelevanceModel that can be used for training and evaluating the search ML task Parameters ---------- feature_config : `FeatureConfig` object FeatureConfig object that defines the features to be loaded in the dataset and the preprocessing functions to be applied to each of them tfrecord_type : {"example", "sequence_example"} Type of the TFRecord protobuf message used for TFRecordDataset file_io : `FileIO` object file I/O handler objects for reading and writing data scorer : `ScorerBase` object Scorer object that wraps an InteractionModel and converts input features into scores metrics : list List of keras Metric classes that will be used for evaluating the trained model optimizer : `Optimizer` Tensorflow keras optimizer to be used for training the model model_file : str, optional Path to pretrained model file to be loaded for evaluation or retraining initialize_layers_dict : dict, optional Dictionary of tensorflow layer names mapped to the path of pretrained weights Use this for transfer learning with pretrained weights freeze_layers_list : list, optional List of model layer names to be frozen Use this for freezing pretrained weights from other ml4ir models compile_keras_model : bool, optional Whether the keras model loaded from disk should be compiled with loss, metrics and an optimizer output_name : str, optional Name of the output tensorflow node that captures the score logger : `Logger`, optional logging handler for status messages """ self.feature_config: FeatureConfig = feature_config self.logger: Logger = logger self.output_name = output_name self.scorer = scorer self.tfrecord_type = tfrecord_type self.file_io = file_io if scorer: self.max_sequence_size = scorer.interaction_model.max_sequence_size else: self.max_sequence_size = 0 # Load/Build Model if model_file and not compile_keras_model: """ If a model file is specified, load it without compiling into a keras model NOTE: This will allow the model to be only used for inference and cannot be used for retraining. """ self.model: Model = self.load(model_file) self.is_compiled = False else: """ Specify inputs to the model Individual input nodes are defined for each feature Each data point represents features for all records in a single query """ inputs: Dict[str, Input] = feature_config.define_inputs() scores, train_features, metadata_features = scorer(inputs) # Create model with functional Keras API self.model = Model(inputs=inputs, outputs={self.output_name: scores}) self.model.output_names = [self.output_name] # Get loss fn loss_fn = scorer.loss.get_loss_fn(**metadata_features) # Get metric objects metrics_impl: List[Union[str, kmetrics.Metric]] = get_metrics_impl( metrics=metrics, feature_config=feature_config, metadata_features=metadata_features ) # Compile model """ NOTE: Related Github issue: https://github.com/tensorflow/probability/issues/519 """ self.model.compile( optimizer=optimizer, loss=loss_fn, metrics=metrics_impl, experimental_run_tf_function=False, ) # Write model summary to logs model_summary = list() self.model.summary(print_fn=lambda x: model_summary.append(x)) if self.logger: self.logger.info("\n".join(model_summary)) if model_file: """ If model file is specified, load the weights from the SavedModel NOTE: The architecture, loss and metrics of self.model need to be the same as the loaded SavedModel """ self.load_weights(model_file) # Initialize layer weights for layer_name, layer_file in initialize_layers_dict.items(): layer = self.model.get_layer(layer_name) layer.set_weights(self.file_io.load_numpy_array(layer_file, unzip=True)) self.logger.info("Setting {} weights from {}".format(layer_name, layer_file)) # Freeze layer weights for layer_name in freeze_layers_list: layer = self.model.get_layer(layer_name) layer.trainable = False self.logger.info("Freezing {} layer".format(layer_name)) self.is_compiled = True @classmethod def from_relevance_scorer( cls, feature_config: FeatureConfig, interaction_model: InteractionModel, model_config: dict, loss: RelevanceLossBase, metrics: List[Union[kmetrics.Metric, str]], optimizer: Optimizer, tfrecord_type: str, file_io: FileIO, model_file: Optional[str] = None, initialize_layers_dict: dict = {}, freeze_layers_list: list = [], compile_keras_model: bool = False, output_name: str = "score", logger=None, ): """ Create a RelevanceModel with default Scorer function constructed from an InteractionModel Parameters ---------- feature_config : `FeatureConfig` object FeatureConfig object that defines the features to be loaded in the dataset and the preprocessing functions to be applied to each of them tfrecord_type : {"example", "sequence_example"} Type of the TFRecord protobuf message used for TFRecordDataset file_io : `FileIO` object file I/O handler objects for reading and writing data interaction_model : `InteractionModel` object InteractionModel object that converts input features into a dense feature representation loss : `RelevanceLossBase` object Loss object defining the final activation layer and the loss function metrics : list List of keras Metric classes that will be used for evaluating the trained model optimizer : `Optimizer` Tensorflow keras optimizer to be used for training the model model_file : str, optional Path to pretrained model file to be loaded for evaluation or retraining initialize_layers_dict : dict, optional Dictionary of tensorflow layer names mapped to the path of pretrained weights Use this for transfer learning with pretrained weights freeze_layers_list : list, optional List of model layer names to be frozen Use this for freezing pretrained weights from other ml4ir models compile_keras_model : bool, optional Whether the keras model loaded from disk should be compiled with loss, metrics and an optimizer output_name : str, optional Name of the output tensorflow node that captures the score logger : `Logger`, optional logging handler for status messages Returns ------- RelevanceModel RelevanceModel object with a default scorer build with a custom InteractionModel """ assert isinstance(interaction_model, InteractionModel) assert isinstance(loss, RelevanceLossBase) scorer: ScorerBase = RelevanceScorer( model_config=model_config, interaction_model=interaction_model, loss=loss, output_name=output_name, ) return cls( scorer=scorer, feature_config=feature_config, metrics=metrics, optimizer=optimizer, tfrecord_type=tfrecord_type, model_file=model_file, initialize_layers_dict=initialize_layers_dict, freeze_layers_list=freeze_layers_list, compile_keras_model=compile_keras_model, output_name=output_name, file_io=file_io, logger=logger, ) @classmethod def from_univariate_interaction_model( cls, model_config, feature_config: FeatureConfig, tfrecord_type: str, loss: RelevanceLossBase, metrics: List[Union[kmetrics.Metric, str]], optimizer: Optimizer, feature_layer_keys_to_fns: dict = {}, model_file: Optional[str] = None, initialize_layers_dict: dict = {}, freeze_layers_list: list = [], compile_keras_model: bool = False, output_name: str = "score", max_sequence_size: int = 0, file_io: FileIO = None, logger=None, ): """ Create a RelevanceModel with default UnivariateInteractionModel Parameters ---------- feature_config : `FeatureConfig` object FeatureConfig object that defines the features to be loaded in the dataset and the preprocessing functions to be applied to each of them model_config : dict dictionary defining the dense model architecture tfrecord_type : {"example", "sequence_example"} Type of the TFRecord protobuf message used for TFRecordDataset file_io : `FileIO` object file I/O handler objects for reading and writing data loss : `RelevanceLossBase` object Loss object defining the final activation layer and the loss function metrics : list List of keras Metric classes that will be used for evaluating the trained model optimizer : `Optimizer` Tensorflow keras optimizer to be used for training the model feature_layer_keys_to_fns : dict Dictionary of custom feature transformation functions to be applied on the input features as part of the InteractionModel model_file : str, optional Path to pretrained model file to be loaded for evaluation or retraining initialize_layers_dict : dict, optional Dictionary of tensorflow layer names mapped to the path of pretrained weights Use this for transfer learning with pretrained weights freeze_layers_list : list, optional List of model layer names to be frozen Use this for freezing pretrained weights from other ml4ir models compile_keras_model : bool, optional Whether the keras model loaded from disk should be compiled with loss, metrics and an optimizer output_name : str, optional Name of the output tensorflow node that captures the score max_sequence_size : int, optional Maximum length of the sequence to be used for SequenceExample protobuf objects logger : `Logger`, optional logging handler for status messages Returns ------- RelevanceModel RelevanceModel object with a UnivariateInteractionModel """ interaction_model: InteractionModel = UnivariateInteractionModel( feature_config=feature_config, feature_layer_keys_to_fns=feature_layer_keys_to_fns, tfrecord_type=tfrecord_type, max_sequence_size=max_sequence_size, ) return cls.from_relevance_scorer( interaction_model=interaction_model, model_config=model_config, feature_config=feature_config, loss=loss, metrics=metrics, optimizer=optimizer, tfrecord_type=tfrecord_type, model_file=model_file, initialize_layers_dict=initialize_layers_dict, freeze_layers_list=freeze_layers_list, compile_keras_model=compile_keras_model, output_name=output_name, file_io=file_io, logger=logger, ) def define_scheduler_as_callback(self, monitor_metric, model_config): """ Adding reduce lr on plateau as a callback if specified Parameters ---------- monitor_metric : string The metric to be monitored by the callback model_config : dict dictionary defining the dense model architecture Returns ------- reduce_lr The created scheduler callback object. """ if model_config and 'lr_schedule' in model_config: lr_schedule = model_config['lr_schedule'] lr_schedule_key = lr_schedule['key'] if lr_schedule_key == LearningRateScheduleKey.REDUCE_LR_ON_PLATEAU: if monitor_metric is None: reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(factor=lr_schedule.get('factor', 0.5), patience=lr_schedule.get('patience', 5), min_lr=lr_schedule.get('min_lr', 0.0001), mode=lr_schedule.get('mode', 'auto'), verbose=1) else: if not monitor_metric.startswith("val_"): monitor_metric = "val_{}".format(monitor_metric) reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor=monitor_metric, factor=lr_schedule.get('factor', 0.5), patience=lr_schedule.get('patience', 5), min_lr=lr_schedule.get('min_lr', 0.0001), mode=lr_schedule.get('mode', 'auto'), verbose=1) return reduce_lr def fit( self, dataset: RelevanceDataset, num_epochs: int, models_dir: str, logs_dir: Optional[str] = None, logging_frequency: int = 25, monitor_metric: str = "", monitor_mode: str = "", patience=2, ): """ Trains model for defined number of epochs and returns the training and validation metrics as a dictionary Parameters ---------- dataset : `RelevanceDataset` object RelevanceDataset object to be used for training and validation num_epochs : int Value specifying number of epochs to train for models_dir : str Directory to save model checkpoints logs_dir : str, optional Directory to save model logs If set to False, no progress logs will be written logging_frequency : int, optional Every #batches to log results monitor_metric : str, optional Name of the metric to monitor for early stopping, checkpointing monitor_mode : {"max", "min"} Whether to maximize or minimize the monitoring metric patience : int Number of epochs to wait before early stopping Returns ------- train_metrics : dict Train and validation metrics in a single dictionary where key is metric name and value is floating point metric value. This dictionary will be used for experiment tracking for each ml4ir run """ if not monitor_metric.startswith("val_"): monitor_metric = "val_{}".format(monitor_metric) callbacks_list: list = self._build_callback_hooks( models_dir=models_dir, logs_dir=logs_dir, is_training=True, logging_frequency=logging_frequency, monitor_mode=monitor_mode, monitor_metric=monitor_metric, patience=patience, ) if self.is_compiled: history = self.model.fit( x=dataset.train, validation_data=dataset.validation, epochs=num_epochs, verbose=True, callbacks=callbacks_list, ) # Write metrics for experiment tracking # Returns a dictionary train_metrics = dict() for metric, value in history.history.items(): if not metric.startswith("val_"): """ NOTE: Prepend "train_" to metrics on training dataset to differentiate from validation and test metrics in the final experiment results """ # History is a dict of key: list(values per epoch) # We are capturing the metrics of the last epoch (-1) train_metrics["train_{}".format(metric)] = value[-1] else: train_metrics[metric] = value[-1] return train_metrics else: raise NotImplementedError( "The model could not be trained. " "Check if the model was compiled correctly." " Training loaded SavedModel is not currently supported." ) def predict( self, test_dataset: data.TFRecordDataset, inference_signature: str = "serving_default", additional_features: dict = {}, logs_dir: Optional[str] = None, logging_frequency: int = 25, ): """ Predict the scores on the test dataset using the trained model Parameters ---------- test_dataset : `Dataset` object `Dataset` object for which predictions are to be made inference_signature : str, optional If using a SavedModel for prediction, specify the inference signature to be used for computing scores additional_features : dict, optional Dictionary containing new feature name and function definition to compute them. Use this to compute additional features from the scores. For example, converting ranking scores for each document into ranks for the query logs_dir : str, optional Path to directory to save logs logging_frequency : int Value representing how often(in batches) to log status Returns ------- `pd.DataFrame` pandas DataFrame containing the predictions on the test dataset made with the `RelevanceModel` """ if logs_dir: outfile = os.path.join(logs_dir, RelevanceModelConstants.MODEL_PREDICTIONS_CSV_FILE) # Delete file if it exists self.file_io.rm_file(outfile) _predict_fn = get_predict_fn( model=self.model, tfrecord_type=self.tfrecord_type, feature_config=self.feature_config, inference_signature=inference_signature, is_compiled=self.is_compiled, output_name=self.output_name, features_to_return=self.feature_config.get_features_to_log(), additional_features=additional_features, max_sequence_size=self.max_sequence_size, ) predictions_df_list = list() batch_count = 0 for predictions_dict in test_dataset.map(_predict_fn).take(-1): predictions_df = pd.DataFrame(predictions_dict) if logs_dir: np.set_printoptions( formatter={"all": lambda x: str(x.decode("utf-8")) if isinstance(x, bytes) else str(x)}, linewidth=sys.maxsize, threshold=sys.maxsize) # write the full line in the csv not the truncated version. # Decode bytes features to strings for col in predictions_df.columns: if isinstance(predictions_df[col].values[0], bytes): predictions_df[col] = predictions_df[col].str.decode("utf8") if os.path.isfile(outfile): predictions_df.to_csv(outfile, mode="a", header=False, index=False) else: # If writing first time, write headers to CSV file predictions_df.to_csv(outfile, mode="w", header=True, index=False) else: predictions_df_list.append(predictions_df) batch_count += 1 if batch_count % logging_frequency == 0: self.logger.info("Finished predicting scores for {} batches".format(batch_count)) predictions_df = None if logs_dir: self.logger.info("Model predictions written to -> {}".format(outfile)) else: predictions_df = pd.concat(predictions_df_list) return predictions_df def evaluate( self, test_dataset: data.TFRecordDataset, inference_signature: str = None, additional_features: dict = {}, group_metrics_min_queries: int = 50, logs_dir: Optional[str] = None, logging_frequency: int = 25, compute_intermediate_stats: bool = True, ): """ Evaluate the RelevanceModel Parameters ---------- test_dataset: an instance of tf.data.dataset inference_signature : str, optional If using a SavedModel for prediction, specify the inference signature to be used for computing scores additional_features : dict, optional Dictionary containing new feature name and function definition to compute them. Use this to compute additional features from the scores. For example, converting ranking scores for each document into ranks for the query group_metrics_min_queries : int, optional Minimum count threshold per group to be considered for computing groupwise metrics logs_dir : str, optional Path to directory to save logs logging_frequency : int Value representing how often(in batches) to log status compute_intermediate_stats : bool Determines if group metrics and other intermediate stats on the test set should be computed Returns ------- df_overall_metrics : `pd.DataFrame` object `pd.DataFrame` containing overall metrics df_groupwise_metrics : `pd.DataFrame` object `pd.DataFrame` containing groupwise metrics if group_metric_keys are defined in the FeatureConfig metrics_dict : dict metrics as a dictionary of metric names mapping to values Notes ----- You can directly do a `model.evaluate()` only if the keras model is compiled Override this method to implement your own evaluation metrics. """ if self.is_compiled: metrics_dict = self.model.evaluate(test_dataset) return None, None, dict(zip(self.model.metrics_names, metrics_dict)) else: raise NotImplementedError def run_ttest(self, mean, variance, n, ttest_pvalue_threshold): """ Compute the paired t-test statistic and its p-value given mean, standard deviation and sample count Parameters ---------- mean: float The mean of the rank differences for the entire dataset variance: float The variance of the rank differences for the entire dataset n: int The number of samples in the entire dataset ttest_pvalue_threshold: float P-value threshold for student t-test metrics_dict: dict dictionary of metrics to keep track Returns ------- t_test_metrics_dict: Dictionary A dictionary with the t-test metrics recorded. """ raise NotImplementedError def save( self, models_dir: str, preprocessing_keys_to_fns={}, postprocessing_fn=None, required_fields_only: bool = True, pad_sequence: bool = False, sub_dir: str = "final", dataset: Optional[RelevanceDataset] = None, experiment_details: Optional[dict] = None ): """ Save the RelevanceModel as a tensorflow SavedModel to the `models_dir` There are two different serving signatures currently used to save the model: * `default`: default keras model without any pre/post processing wrapper * `tfrecord`: serving signature that allows keras model to be served using TFRecord proto messages. Allows definition of custom pre/post processing logic Additionally, each model layer is also saved as a separate numpy zipped array to enable transfer learning with other ml4ir models. Parameters ---------- models_dir : str path to directory to save the model preprocessing_keys_to_fns : dict dictionary mapping function names to tf.functions that should be saved in the preprocessing step of the tfrecord serving signature postprocessing_fn: function custom tensorflow compatible postprocessing function to be used at serving time. Saved as part of the postprocessing layer of the tfrecord serving signature required_fields_only: bool boolean value defining if only required fields need to be added to the tfrecord parsing function at serving time pad_sequence: bool, optional Value defining if sequences should be padded for SequenceExample proto inputs at serving time. Set this to False if you want to not handle padded scores. sub_dir: str, optional sub directory name to save the model into dataset : `RelevanceDataset` object RelevanceDataset object that can optionally be passed to be used by downstream jobs that want to save the data along with the model. Note that this feature is currently unimplemented and is upto the users to override and customize. experiment_details: dict Dictionary containing metadata and results about the current experiment Notes ----- All the functions passed under `preprocessing_keys_to_fns` here must be serializable tensor graph operations """ model_file = os.path.join(models_dir, sub_dir) # Save model with default signature self.model.save(filepath=os.path.join(model_file, "default")) """ Save model with custom signatures Currently supported - signature to read TFRecord SequenceExample inputs """ self.model.save( filepath=os.path.join(model_file, "tfrecord"), signatures=define_serving_signatures( model=self.model, tfrecord_type=self.tfrecord_type, feature_config=self.feature_config, preprocessing_keys_to_fns=preprocessing_keys_to_fns, postprocessing_fn=postprocessing_fn, required_fields_only=required_fields_only, pad_sequence=pad_sequence, max_sequence_size=self.max_sequence_size, ), ) # Save individual layer weights self.file_io.make_directory(os.path.join(model_file, "layers"), clear_dir=True) for layer in self.model.layers: try: self.file_io.save_numpy_array( np_array=layer.get_weights(), file_path=os.path.join(model_file, "layers", "{}.npz".format(layer.name)), zip=True, ) except FileNotFoundError: self.logger.warning( "Error saving layer: {} due to FileNotFoundError. Skipping...".format(layer.name)) self.logger.info("Final model saved to : {}".format(model_file)) def load(self, model_file: str) -> Model: """ Loads model from the SavedModel file specified Parameters ---------- model_file : str path to file with saved tf keras model Returns ------- `tf.keras.Model` Tensorflow keras model loaded from file Notes ----- Retraining currently not supported! Would require compiling the model with the right loss and optimizer states """ """ NOTE: There is currently a bug in Keras Model with saving/loading models with custom losses and metrics. Therefore, we are currently loading the SavedModel with compile=False The saved model signatures can be used for inference at serving time Ref: https://github.com/keras-team/keras/issues/5916 https://github.com/tensorflow/tensorflow/issues/32348 https://github.com/keras-team/keras/issues/3977 """ model = tf.keras.models.load_model(model_file, compile=False) self.logger.info("Successfully loaded SavedModel from {}".format(model_file)) self.logger.warning("Retraining is not yet supported. Model is loaded with compile=False") return model def load_weights(self, model_file: str): """ Load saved model with compile=False Parameters ---------- model_file : str path to file with saved tf keras model """ loaded_model = self.load(model_file) # Set weights of Keras model from the loaded model weights self.model.set_weights(loaded_model.get_weights()) self.logger.info("Weights have been set from SavedModel. RankingModel can now be trained.") def _build_callback_hooks( self, models_dir: str, logs_dir: Optional[str] = None, is_training=True, logging_frequency=25, monitor_metric: str = "", monitor_mode: str = "", patience=2, ): """ Build callback hooks for the training and evaluation loop Parameters ---------- models_dir : str Path to directory to save model checkpoints logs_dir : str Path to directory to save tensorboard logs is_training : bool, optional Whether we are building callbacks for training or evaluation logging_frequency : int, optional How often, in number of epochs, to log training and evaluation progress monitor_metric : str, optional Name of metric to be used for ModelCheckpoint and EarlyStopping callbacks monitor_mode : {"max", "min"}, optional Mode for maximizing or minimizing the ModelCheckpoint and EarlyStopping patience : int, optional Number of epochs to wait before early stopping if metric change is below tolerance Returns ------- callbacks_list : list List of callbacks to be used with the RelevanceModel training and evaluation """ callbacks_list: list = list() if is_training: # Model checkpoint if models_dir and monitor_metric: checkpoints_path = os.path.join( models_dir, RelevanceModelConstants.CHECKPOINT_FNAME ) cp_callback = callbacks.ModelCheckpoint( filepath=checkpoints_path, save_weights_only=False, verbose=1, save_best_only=True, mode=monitor_mode, monitor=monitor_metric, ) callbacks_list.append(cp_callback) # Early Stopping if monitor_metric: early_stopping_callback = callbacks.EarlyStopping( monitor=monitor_metric, mode=monitor_mode, patience=patience, verbose=1, restore_best_weights=True, ) callbacks_list.append(early_stopping_callback) # TensorBoard if logs_dir: tensorboard_callback = callbacks.TensorBoard( log_dir=logs_dir, histogram_freq=1, update_freq=5 ) callbacks_list.append(tensorboard_callback) # Debugging/Logging callbacks_list.append(DebuggingCallback(self.logger, logging_frequency)) # Adding lr scheduler as a callback; used for `ReduceLROnPlateau` which we treat today as a callback scheduler_callback = self.define_scheduler_as_callback(monitor_metric, self.scorer.model_config) if scheduler_callback: callbacks_list.append(scheduler_callback) # Add more here return callbacks_list def calibrate(self, relevance_dataset, logger, logs_dir_local, **kwargs)\ -> Tuple[np.ndarray, ...]: """Calibrate model with temperature scaling Parameters ---------- relevance_dataset: RelevanceDataset RelevanceDataset object to be used for training and evaluating temperature scaling logger: Logger Logger object to log events logs_dir_local: str path to save the calibration results. (zipped csv file containing original probabilities, calibrated probabilities, ...) Returns ------- `Union[np.ndarray, Tuple[np.ndarray, ...]]` optimizer output containing temperature value learned during temperature scaling """ logger.info("=" * 50) logger.info("Calibrating the model with temperature scaling") return temperature_scale(model=self.model, scorer=self.scorer, dataset=relevance_dataset, logger=logger, logs_dir_local=logs_dir_local, file_io=self.file_io, **kwargs) def add_temperature_layer(self, temperature: float = 1.0, layer_name: str = 'temperature_layer'): """Add temperature layer to the input of last activation (softmax) layer Parameters ---------- self: RelevanceModel input RelevanceModel object that its last layer inputs will be divided by a temperature value temperature: float a scalar value to scale the last activation layer inputs layer_name: str name of the temperature scaling layer Returns ------- `RelevanceModel` updated RelevanceModel object with temperature """ # get last layer's output --> MUST **NOT** BE AN ACTIVATION (e.g. SOFTMAX) LAYER final_layer_name = self.scorer.model_config['layers'][-1]['name'] final_layer = self.model.get_layer(name=final_layer_name).output temperature_layer = TemperatureScalingLayer(name=layer_name, temperature=temperature)(final_layer) # using the `last layer` as final activation function before computing loss idx_activation = -1 if len(self.model.layers) > 0 and isinstance(self.model.layers[idx_activation], tf.keras.layers.Activation): # creating new activation layer activation_layer_name = self.model.get_layer(index=idx_activation).name activation_function = self.model.get_layer(index=idx_activation).activation activation_layer = tf.keras.layers.Activation( activation_function, name=activation_layer_name)(temperature_layer) # creating new keras Functional API model self.model = Model(self.model.inputs, activation_layer) self.logger.info(f'Temperature Scaling layer added and new Functional API model' f' replaced; temperature = {temperature}.') else: self.logger.info("Skipping adding Temperature Scaling layer because no activation " "exist in the last layer of Keras original model!")
# creating final model solver = Model(grid, digit_placeholders) # build the whole model # compiling created model solver.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # grid ---> model ---> degit_placeholders #:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: # Train Model # First train # in the firs training we don't delete any digit solver.fit( delete_digits(Xtrain, 0), # we don't delete any digit for now [Ytrain[:, i, j, :] for i in range(9) for j in range(9)], # each digit of solution batch_size=128, epochs=1, # 1 epoch should be enough for the task verbose=1, ) # Second train early_stop = EarlyStopping(patience=2, verbose=1) i = 1 for nb_epochs, nb_delete in zip( [ 5, 10, 10 ], #[1, 2, 3, 4, 6, 8, 10, 10, 10, 10, 10, 15, 15, 15, 15, 15, 15, 20, 25, 30], # epochs for each round [ 20, 55, 58 ] #[1, 2, 3, 4, 6, 8, 10, 12, 14, 17, 20, 23, 25, 30, 35, 40, 45, 50, 55, 60] # digit to pull off ):
infomax = DeepGraphInfomax(gcn_model, corrupted_generator) x_in, x_out = infomax.in_out_tensors() # train model model = Model(inputs=x_in, outputs=x_out) model.compile(loss=tf.nn.sigmoid_cross_entropy_with_logits, optimizer=Adam(lr=1e-3)) model.summary() # create a model image, print to file plot_model(model, show_shapes=True, to_file="model.png") epochs = 1000 es = EarlyStopping(monitor="loss", min_delta=0, patience=20) # next line triggers the model to train, run time 10 mins with a GTX 1070 # 90 minutes on CPU history = model.fit(gen, epochs=epochs, verbose=0, callbacks=[es]) plot_history(history) # playing with the embedding vectors, obtain trained node embedding model x_emb_in, x_emb_out = gcn_model.in_out_tensors() # for full batch models, squeeze out the batch dim (which is 1) x_out = tf.squeeze(x_emb_out, axis=0) emb_model = Model(inputs=x_emb_in, outputs=x_out) # get the target, document type is the best I can think of at short notice # data from `notebooks/recommend-content-dgi/01_data_pull_use.py` text_df = pd.read_csv('data/processed/text_use_large_2000_df.csv') text_df.head() text_df['document_type'].value_counts()
return precision precision = precision(y_true, y_pred) recall = recall(y_true, y_pred) return 2*((precision*recall)/(precision+recall+K.epsilon())) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[f1, 'accuracy']) history = model.fit([X_train_II,X_train_V1], y_train, epochs=epochs, batch_size=batch_size, verbose=1, validation_data=([X_test_II,X_test_V1], y_test)) y_pred = model.predict([X_test_II,X_test_V1]) test_acc = model.evaluate([X_test_II,X_test_V1], y_test, verbose=1) print("keras acc: ",test_acc) y_pred =(y_pred>0.4) list(y_pred) # print(classification_report(NSR_y_test.argmax(axis=1), y_pred.argmax(axis=1))) print(classification_report(y_test, y_pred))
class SymmetricAutoencoder: def __init__(self, dims, act='relu', init='glorot_uniform', noise_stddev=0.0): n_stacks = len(dims) - 1 # input x = Input(shape=(dims[0], ), name='input') h = GaussianNoise(noise_stddev)(x) # internal layers in encoder for i in range(n_stacks - 1): h = Dense(dims[i + 1], activation=act, kernel_initializer=init, name='encoder_%d' % i)(h) # h = BatchNormalization(momentum=0.66)(h) # h = Dropout(0.3)(h) # hidden layer h = Dense( dims[-1], kernel_initializer=init, name='encoder_%d' % (n_stacks - 1))( h) # hidden bottleneck layer, features are extracted from here y = h # internal layers in decoder for i in range(n_stacks - 1, 0, -1): y = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(y) # y = BatchNormalization(momentum=0.66)(y) # y = Dropout(0.3)(y) # output y = Dense(dims[0], kernel_initializer=init, name='decoder_0')(y) self.model = Model(inputs=x, outputs=y, name='AE') self.encoder = Model(inputs=x, outputs=h, name='encoder') def load_weights(self, weights): self.model.load_weights(weights, by_name=True) def load_encoder(self, model): self.encoder = load_model(model) def extract_features(self, x): return self.encoder.predict(x) def predict(self, x): y = self.model.predict(x, verbose=0, steps=1) return y def compile(self, optimizer='sgd', loss='mse'): self.model.compile(optimizer=optimizer, loss=loss) def summary(self): self.model.summary() def evaluate(self, x): predictions = self.model.predict(x) mse = np.mean(np.power(x - predictions, 2), axis=1) error_df = pd.DataFrame({'reconstruction_error': mse}) print('MSE: {}'.format(np.mean(mse))) print(error_df.describe()) def fit(self, dataset, epochs=10, steps_per_epoch=30, validation_data=None, validation_steps=None, save_dir='results/', file_name='ae_weights.h5', log_name='Autoencoder'): tensorboard = tf.keras.callbacks.TensorBoard( log_dir=(save_dir + "{}".format(log_name))) checkpoint = ModelCheckpoint(save_dir + "ae_weights.{epoch:02d}-{loss:.5f}.h5", monitor='loss', save_weights_only=True, period=10) self.model.fit(dataset, epochs=epochs, steps_per_epoch=steps_per_epoch, validation_data=validation_data, validation_steps=validation_steps, callbacks=[checkpoint, tensorboard]) self.model.save_weights(save_dir + file_name) print('Autoencoder weights are saved to %s/%s', save_dir, file_name)
class VariationalAutoencoder: def __init__(self, dims, act='relu', init='glorot_uniform', noise_stddev=0.0): n_stacks = len(dims) - 1 # input x = Input(shape=(dims[0], ), name='input') encoder = GaussianNoise(noise_stddev)(x) # internal layers in encoder for i in range(n_stacks - 1): encoder = Dense(dims[i + 1], activation=act, kernel_initializer=init, name='encoder_%d' % i)(encoder) encoder = BatchNormalization(momentum=0.66)(encoder) # encoder = Dropout(0.3)(h) # variational part z_mean = Dense(dims[-1], name='z_mean')(encoder) z_log_var = Dense(dims[-1], name='z_log_var')(encoder) # use reparameterization trick to push the sampling out as input z = Lambda(self.sampling, output_shape=(dims[-1], ), name='z')([z_mean, z_log_var]) encoder = Model(x, [z_mean, z_log_var, z], name='encoder') encoder.summary() latent_inputs = Input(shape=(dims[-1], ), name='z_sampling') y = latent_inputs # internal layers in decoder for i in range(n_stacks - 1, 0, -1): y = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(y) # y = BatchNormalization(momentum=0.66)(decoder) # y = Dropout(0.3)(decoder) # output y = Dense(dims[0], kernel_initializer=init, name='decoder_0')(y) decoder = Model(latent_inputs, y, name='decoder') decoder.summary() outputs = decoder(encoder(x)[2]) self.model = Model(inputs=x, outputs=outputs, name='VAE') self.encoder = encoder def get_encoder(self): return Model(inputs=self.model.input, outputs=self.model.get_layer("encoder").output) @staticmethod def sampling(args): """Reparameterization trick by sampling from an isotropic unit Gaussian. # Arguments args (tensor): mean and log of variance of Q(z|X) # Returns z (tensor): sampled latent vector """ z_mean, z_log_var = args batch = K.shape(z_mean)[0] dim = K.int_shape(z_mean)[1] # by default, random_normal has mean = 0 and std = 1.0 epsilon = K.random_normal(shape=(batch, dim)) return z_mean + K.exp(0.5 * z_log_var) * epsilon def load_weights(self, weights): self.model.load_weights(weights, by_name=True) def load_encoder(self, model): self.encoder = load_model(model) def extract_features(self, x): return self.encoder.predict(x) def predict(self, x): y = self.model.predict(x, verbose=0, steps=1) return y def compile(self, optimizer='sgd', loss='mse'): self.model.compile(optimizer=optimizer, loss=loss) def summary(self): self.model.summary() def evaluate(self, x): predictions = self.model.predict(x) mse = np.mean(np.power(x - predictions, 2), axis=1) error_df = pd.DataFrame({'reconstruction_error': mse}) print('MSE: {}'.format(np.mean(mse))) print(error_df.describe()) def fit(self, dataset, epochs=10, steps_per_epoch=30, validation_data=None, validation_steps=None, save_dir='results/', file_name='vae_weights.h5', log_name='Variational Autoencoder'): tensorboard = tf.keras.callbacks.TensorBoard( log_dir=(save_dir + "{}".format(log_name))) checkpoint = ModelCheckpoint(save_dir + "ae_weights.{epoch:02d}-{loss:.5f}.h5", monitor='loss', save_weights_only=True, period=10) self.model.fit(dataset, epochs=epochs, steps_per_epoch=steps_per_epoch, validation_data=validation_data, validation_steps=validation_steps, callbacks=[checkpoint, tensorboard]) self.model.save_weights(save_dir + file_name) print('Autoencoder weights are saved to %s/%s', save_dir, file_name)
from tensorflow.keras import Model from tensorflow.keras.layers import Input, Dense input1 = Input(shape=(13, )) dense1 = Dense(128, activation='relu')(input1) dense1 = Dense(64, activation='relu')(dense1) dense1 = Dense(64, activation='relu')(dense1) dense1 = Dense(64, activation='relu')(dense1) output1 = Dense(1)(dense1) model = Model(inputs=input1, outputs=output1) #3. compile and fit model.compile(optimizer='adam', loss='mse', metrics=['mae']) model.fit(x_train, y_train, batch_size=4, epochs=150, verbose=1, validation_split=0.2) #4. evalutate and predict mse, mae = model.evaluate(x_test, y_test, batch_size=4) print("mse :", mse, "\nmae :", mae) y_predict = model.predict(x_test) # RMSE 구하기 from sklearn.metrics import mean_squared_error def RMSE(y_test, y_predict): return np.sqrt(mean_squared_error(y_test, y_predict))
class MyModel: def __init__(self, input_size, vocab_size, greedy=False, beam_width=10, top_paths=1, stop_tolerance=20, reduce_tolerance=15): self.input_size = input_size self.vocab_size = vocab_size self.model = None self.greedy = greedy self.beam_width = beam_width self.top_paths = max(1, top_paths) self.stop_tolerance = stop_tolerance self.reduce_tolerance = reduce_tolerance def summary(self, output=None, target=None): self.model.summary() if target is not None: os.makedirs(output, exist_ok=True) with open(os.path.join(output, target), "w") as f: with redirect_stdout(f): self.model.summary() def load_checkpoint(self, target): if os.path.isfile(target): if self.model is None: self.compile() self.model.load_weights(target) def get_callbacks(self, logdir, checkpoint, monitor="val_loss", verbose=0): callbacks = [ CSVLogger(filename=os.path.join(logdir, "epochs.log"), separator=";", append=True), TensorBoard(log_dir=logdir, histogram_freq=10, profile_batch=0, write_graph=True, write_images=False, update_freq="epoch"), ModelCheckpoint(filepath=checkpoint, monitor=monitor, save_best_only=True, save_weights_only=True, verbose=verbose), EarlyStopping(monitor=monitor, min_delta=1e-8, patience=self.stop_tolerance, restore_best_weights=True, verbose=verbose), ReduceLROnPlateau(monitor=monitor, min_delta=1e-8, factor=0.2, patience=self.reduce_tolerance, verbose=verbose) ] return callbacks def compile(self, learning_rate=None, initial_step=0): # define inputs, outputs and optimizer of the chosen architecture inputs, outputs = self.architecture(self.input_size, self.vocab_size + 1) if learning_rate is None: learning_rate = CustomSchedule(d_model=self.vocab_size + 1, initial_step=initial_step) self.learning_schedule = True else: self.learning_schedule = False optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate) # create and compile self.model = Model(inputs=inputs, outputs=outputs) self.model.compile( optimizer=optimizer, loss=lambda y1, y2: tf.py_function(self.ctc_loss_lambda_func, [y1, y2], [tf.float32])) def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, validation_freq=1, max_queue_size=10, workers=1, use_multiprocessing=False, **kwargs): # remove ReduceLROnPlateau (if exist) when use schedule learning rate if callbacks and self.learning_schedule: callbacks = [ x for x in callbacks if not isinstance(x, ReduceLROnPlateau) ] out = self.model.fit(x=x, y=y, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, validation_split=validation_split, validation_data=validation_data, shuffle=shuffle, class_weight=class_weight, sample_weight=sample_weight, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, validation_freq=validation_freq, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, **kwargs) return out def predict(self, x, batch_size=None, verbose=0, steps=1, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False, ctc_decode=True): if verbose == 1: print("Model Predict") out = self.model.predict(x=x, batch_size=batch_size, verbose=verbose, steps=steps, callbacks=callbacks, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing) if not ctc_decode: return np.log(out.clip(min=1e-8)), [] steps_done = 0 if verbose == 1: print("CTC Decode") progbar = tf.keras.utils.Progbar(target=steps) batch_size = int(np.ceil(len(out) / steps)) input_length = len(max(out, key=len)) predicts, probabilities = [], [] while steps_done < steps: index = steps_done * batch_size until = index + batch_size x_test = np.asarray(out[index:until]) x_test_len = np.asarray([input_length for _ in range(len(x_test))]) decode, log = self.ctc_decode(x_test, x_test_len, greedy=self.greedy, beam_width=self.beam_width, top_paths=self.top_paths) if not self.greedy: probabilities.extend([np.exp(x)[0] for x in log]) else: probabilities.extend([np.exp(-x)[0] for x in log]) decode = [[[int(p) for p in x if p != -1] for x in y] for y in decode] predicts.extend(np.swapaxes(decode, 0, 1)) steps_done += 1 if verbose == 1: progbar.update(steps_done) return (predicts, probabilities) def ctc_decode(self, y_pred, input_length, greedy=True, beam_width=100, top_paths=1): input_shape = y_pred.shape num_samples, num_steps = input_shape[0], input_shape[1] y_pred = tf.math.log( tf.transpose(y_pred, perm=[1, 0, 2]) + K.epsilon()) input_length = tf.cast(input_length, tf.int32) if greedy: (decoded, log_prob) = tf.nn.ctc_greedy_decoder(inputs=y_pred, sequence_length=input_length) else: (decoded, log_prob) = tf.nn.ctc_beam_search_decoder( inputs=y_pred, sequence_length=input_length, beam_width=beam_width, top_paths=top_paths) decoded_dense = [] for st in decoded: # st = tf.sparse.SparseTensor( # st.indices, st.values, (num_samples, num_steps)) decoded_dense.append( tf.sparse.to_dense(sp_input=st, default_value=-1)) return (decoded_dense, log_prob) @staticmethod def ctc_loss_lambda_func(y_true, y_pred): if len(y_true.shape) > 2: y_true = tf.squeeze(y_true) # y_pred.shape = (batch_size, string_length, alphabet_size_1_hot_encoded) # output of every model is softmax # so sum across alphabet_size_1_hot_encoded give 1 # string_length give string length input_length = tf.math.reduce_sum(y_pred, axis=-1, keepdims=False) input_length = tf.math.reduce_sum(input_length, axis=-1, keepdims=True) # y_true strings are padded with 0 # so sum of non-zero gives number of characters in this string label_length = tf.math.count_nonzero(y_true, axis=-1, keepdims=True, dtype="int64") loss = K.ctc_batch_cost(y_true, y_pred, input_length, label_length) # average loss across all entries in the batch loss = tf.reduce_mean(loss) return loss def architecture(self, input_size, d_model): input_data = Input(name="input", shape=input_size) cnn = Reshape((input_size[0] // 2, input_size[1] // 2, input_size[2] * 4))(input_data) cnn = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 2), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=16, kernel_size=(3, 3), padding="same")(cnn) cnn = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=32, kernel_size=(3, 3), padding="same")(cnn) cnn = Conv2D(filters=40, kernel_size=(2, 4), strides=(2, 4), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=40, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=48, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=48, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=56, kernel_size=(2, 4), strides=(2, 4), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=56, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) shape = cnn.get_shape() bgru = Reshape((shape[1], shape[2] * shape[3]))(cnn) bgru = Bidirectional(GRU(units=128, return_sequences=True, dropout=0.5))(bgru) bgru = Dense(units=256)(bgru) bgru = Bidirectional(GRU(units=128, return_sequences=True, dropout=0.5))(bgru) bgru = Dense(units=256)(bgru) bgru = Bidirectional(GRU(units=128, return_sequences=True, dropout=0.5))(bgru) output_data = Dense(units=d_model, activation="softmax")(bgru) return (input_data, output_data)
cnns = [] for _ in range(adjs): input_a = Input((*input_shape, 1)) x = Conv2D(8, 3)(input_a) x = MaxPool2D(2)(x) x = Flatten()(x) x = Model(inputs=[input_a], outputs=x) cnns.append(x) combine = Concatenate()([x.output for x in cnns]) z = Dense(classes, activation='softmax')(combine) model = Model(inputs=[x.input for x in cnns], outputs=z) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train_cat, batch_size=32, epochs=50, shuffle=True, validation_split=0.1) y_pred = model.predict(x_test).argmax(axis=-1) acc = accuracy_score(y_test, y_pred) print("Fold accuracy: {:2.2f}".format(acc)) matrix = confusion_matrix(y_test, y_pred) print("Fold confusion matrix: \n {}".format(matrix)) total_acc += acc / n_splits print("Total model accuracy: {:2.2f}".format(total_acc))
sub_Y = Y[start:end, :] train_X, train_Y, val_X, val_Y = train_test_split(sub_X, sub_Y, train_portion) # 保存一次训练的训练过程 train_loss_list = [] train_mse_list = [] train_ic_list = [] val_loss_list = [] val_mse_list = [] val_ic_list = [] # 做一次训练 for epoch in range(EPOCHS): model.fit(train_X, train_Y, epochs=1, shuffle=False, verbose=0) # print("Epoch:" + str(epoch)) # 训练集loss和metrics pred_train = model.predict(train_X) train_loss = mean_absolute_error(pred_train, train_Y) trian_mse = mean_squared_error(pred_train, train_Y) train_loss_list.append(train_loss) train_mse_list.append(trian_mse) # print("train_loss:" + str(train_loss) + " train_mse:" + str(trian_mse),end=" ") # 验证集loss和metrics pred_val = model.predict(val_X) val_loss = mean_absolute_error(pred_val, val_Y) val_mse = mean_squared_error(pred_val, val_Y)
class UNetModel(): def __init__(self, ckpt_name, loss, optimizer, metrics, monitor, dropout_rate=0.25, epochs=50, batch_size=32, input_size=101, input_layer=None, output_layer=None): if input_layer is None: self.input_layer = Input((input_size, input_size, 1)) else: self.input_layer = input_layer if output_layer is None: self.output_layer = self.build_model(self.input_layer, 16, dropout_rate) else: self.output_layer = output_layer self.model = Model(self.input_layer, self.output_layer) self.model.compile(loss=loss, optimizer=optimizer, metrics=metrics) self.model_name = ckpt_name self.model_checkpoint = ModelCheckpoint(self.model_name, monitor=monitor, mode='max', save_best_only=True, verbose=1) self.reduce_lr = ReduceLROnPlateau(monitor=monitor, mode='max', factor=0.5, patience=5, min_lr=0.0001, verbose=1) self.epochs = epochs self.batch_size = batch_size print('''Model info: model name: {} loss: {} optimizer: {} monitor: {} dropout rate: {} epoch: {} batch size: {}'''.format(ckpt_name, loss, optimizer, monitor, dropout_rate, epochs, batch_size)) def fit(self, x_train, y_train, x_valid, y_valid): self.history = self.model.fit( x_train, y_train, validation_data=[x_valid, y_valid], epochs=self.epochs, batch_size=self.batch_size, callbacks=[self.model_checkpoint, self.reduce_lr], verbose=2) return self.history def batch_activate(self, x): x = BatchNormalization()(x) x = Activation('relu')(x) return x def convolution_block(self, x, filters, size, strides=(1, 1), padding='same', activation=True): x = Conv2D(filters, size, strides=strides, padding=padding)(x) if activation is True: x = self.batch_activate(x) return x def residual_block(self, blockInput, num_filters=16, activation=False): x = self.batch_activate(blockInput) x = self.convolution_block(x, num_filters, (3, 3)) x = self.convolution_block(x, num_filters, (3, 3), activation=False) x = Add()([x, blockInput]) if activation: x = self.batch_activate(x) return x def squeeze_excite_block_cSE(self, input, ratio=2): init = input filters = K.int_shape(init)[-1] se_shape = (1, 1, filters) se = GlobalAveragePooling2D()(init) se = Reshape(se_shape)(se) se = Dense(filters // ratio, activation='relu', kernel_initializer='he_normal', use_bias=True)(se) se = Dense(filters, activation='sigmoid', kernel_initializer='he_normal', use_bias=True)(se) x = multiply([init, se]) return x def squeeze_excite_block_sSE(sekf, input): sSE_scale = Conv2D(1, (1, 1), activation='sigmoid', padding="same", use_bias=True)(input) return multiply([input, sSE_scale]) def unet_layer(self, blockInput, num_filters, use_csSE_ratio=2): x = Conv2D(num_filters, (3, 3), activation=None, padding="same")(blockInput) x = self.residual_block(x, num_filters) x = self.residual_block(x, num_filters, activation=True) if use_csSE_ratio > 0: sSEx = self.squeeze_excite_block_sSE(x) cSEx = self.squeeze_excite_block_cSE(x, ratio=use_csSE_ratio) x = Add()([sSEx, cSEx]) return x def build_model(self, input_layer, start_neurons, DropoutRatio=0.5, use_csSE_ratio=2): # 101 -> 50 conv1 = self.unet_layer(input_layer, start_neurons * 1, use_csSE_ratio) pool1 = MaxPooling2D((2, 2))(conv1) pool1 = Dropout(DropoutRatio / 2)(pool1) # 50 -> 25 conv2 = self.unet_layer(pool1, start_neurons * 2, use_csSE_ratio) pool2 = MaxPooling2D((2, 2))(conv2) pool2 = Dropout(DropoutRatio)(pool2) # 25 -> 12 conv3 = self.unet_layer(pool2, start_neurons * 4, use_csSE_ratio) pool3 = MaxPooling2D((2, 2))(conv3) pool3 = Dropout(DropoutRatio)(pool3) # 12 -> 6 conv4 = self.unet_layer(pool3, start_neurons * 8, use_csSE_ratio) pool4 = MaxPooling2D((2, 2))(conv4) pool4 = Dropout(DropoutRatio)(pool4) # Middle convm = Conv2D(start_neurons * 16, (3, 3), activation=None, padding="same")(pool4) convm = self.residual_block(convm, start_neurons * 16) convm = self.residual_block(convm, start_neurons * 16, True) # 6 -> 12 deconv4 = Conv2DTranspose(start_neurons * 8, (3, 3), strides=(2, 2), padding="same")(convm) uconv4 = concatenate([deconv4, conv4]) uconv4 = Dropout(DropoutRatio)(uconv4) uconv4 = Conv2D(start_neurons * 8, (3, 3), activation=None, padding="same")(uconv4) uconv4 = self.residual_block(uconv4, start_neurons * 8) uconv4 = self.residual_block(uconv4, start_neurons * 8, True) # 12 -> 25 deconv3 = Conv2DTranspose(start_neurons * 4, (3, 3), strides=(2, 2), padding="valid")(uconv4) uconv3 = concatenate([deconv3, conv3]) uconv3 = Dropout(DropoutRatio)(uconv3) uconv3 = Conv2D(start_neurons * 4, (3, 3), activation=None, padding="same")(uconv3) uconv3 = self.residual_block(uconv3, start_neurons * 4) uconv3 = self.residual_block(uconv3, start_neurons * 4, True) # 25 -> 50 deconv2 = Conv2DTranspose(start_neurons * 2, (3, 3), strides=(2, 2), padding="same")(uconv3) uconv2 = concatenate([deconv2, conv2]) uconv2 = Dropout(DropoutRatio)(uconv2) uconv2 = Conv2D(start_neurons * 2, (3, 3), activation=None, padding="same")(uconv2) uconv2 = self.residual_block(uconv2, start_neurons * 2) uconv2 = self.residual_block(uconv2, start_neurons * 2, True) # 50 -> 101 deconv1 = Conv2DTranspose(start_neurons * 1, (3, 3), strides=(2, 2), padding="valid")(uconv2) uconv1 = concatenate([deconv1, conv1]) uconv1 = Dropout(DropoutRatio)(uconv1) uconv1 = Conv2D(start_neurons * 1, (3, 3), activation=None, padding="same")(uconv1) uconv1 = self.residual_block(uconv1, start_neurons * 1) uconv1 = self.residual_block(uconv1, start_neurons * 1, True) output_layer_noActi = Conv2D(1, (1, 1), padding="same", activation=None)(uconv1) output_layer = Activation('sigmoid')(output_layer_noActi) return output_layer
# Flow training images in batches of 20 using train_datagen generator train_generator = train_datagen.flow_from_directory(train_dir, batch_size = 20, class_mode = 'binary', target_size = (150, 150)) # Flow validation images in batches of 20 using test_datagen generator validation_generator = test_datagen.flow_from_directory( validation_dir, batch_size = 20, class_mode = 'binary', target_size = (150, 150)) history = model.fit( train_generator, validation_data = validation_generator, steps_per_epoch = 100, epochs = 20, validation_steps = 50, verbose = 2) import matplotlib.pyplot as plt acc = history.history['accuracy'] val_acc = history.history['val_accuracy'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(len(acc)) plt.plot(epochs, acc, 'r', label='Training accuracy') plt.plot(epochs, val_acc, 'b', label='Validation accuracy') plt.title('Training and validation accuracy')
a5 = Dropout(0.2)(a5) a5 = Flatten()(a5) z6 = Dense(136)(a5) # ---------------------------- model = Model(input_img, z6) model.summary() # Compile and train model model.compile(optimizer=Adam(0.001), loss='mse') train = model.fit(X, y, batch_size=64, epochs=45) # ## **Plot loss** def plot_history(history): fig = plt.figure(figsize=(15, 8)) ax2 = fig.add_subplot(222) ax2.set_title('model loss') ax2.plot(history['loss']) ax2.set_ylabel('loss') ax2.set_xlabel('epoch') ax2.legend(['train', 'test'], loc='upper right')
self.act = Dense(1, activation='sigmoid') def call(self, inputs, training=None, mask=None): x, mask = inputs x = self.embedding(inputs) x = self.attention([x, x, x, mask]) x = tf.reduce_mean(x, axis=1) x = self.act(x) return x if __name__ == '__main__': imdb = tf.keras.datasets.imdb (x_train, y_train), (x_test, y_test) = imdb.load_data() x_train = pad_sequences(x_train, 200) x = Input([200, ],dtype='int32') mask = Input([200, ],dtype='int32') out = TestModel([x, mask]) model = Model(([x, mask], out)) model.compile( loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(1e-5), metrics=['accuracy'] ) import numpy as np m = np.ones([25000, 200]) model.fit([x_train, m], y_train, batch_size=32, epochs=1)
output_layer = unet_model(input_layer, 32) #output_layer = improved_unet_model(input_layer, 32) model = Model(input_layer, output_layer) # 저장된 가중치 불러오기 #model.load_weights(checkpoint_path) # ################## compile 메서드로 모형 완성 ########################### adam = tf.keras.optimizers.Adam(learning_rate=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=True, name='Adam') model.compile(loss="mae", optimizer=adam, metrics=[maeOverFscore_keras, fscore_keras, "accuracy"]) # ################## fit 메서드로 트레이닝 ################################# #model_history = model.fit_sample() model_history = model.fit(train_dataset, epochs=10, verbose=1, shuffle=True, callbacks=callbacks_list) pred = model.predict(testGenerator()) submission = pd.read_csv('Submission_form.csv') submission.iloc[:, 1:] = pred.reshape(-1, 1600) submission.to_csv('Answer.csv', index=False)
with strategy.scope(): rfanet_x = RFDN() x = Input(shape=(120, 160, 3)) out = rfanet_x(x) parallel_model = Model(inputs=x, outputs=out) parallel_model.compile(loss=loss_fn, optimizer=optimizer, metrics=metrics) parallel_model.summary() print('Ready for training!\n') # Callbacks callbacks = get_nyu_callbacks(parallel_model, train_generator, val_generator, runPath, totaL_epochs=args.epochs, warmup_epoch=5, batch_size=args.bs, lr=args.lr, val_loss="val_loss_sirmse_baseline") # Start training parallel_model.fit(train_generator, validation_data=val_generator, callbacks=callbacks, epochs=args.epochs, shuffle=True, batch_size=args.bs, verbose=1)
#compiling the model by using categorical_crossentropy loss model.compile(optimizer=opti_flow, loss = 'categorical_crossentropy', metrics=['mae','accuracy']) model.summary() #visualizing the model on tensorboard tensorboard = TensorBoard(log_dir="logs\{}".format(time()),write_graph=True) #calling the datagenerator and passing the inputs to our model for training i=0 hist_frames=[] for x, y in datagen(): i=i+1 print(i) if(i == 15000): break history = model.fit(x,y, batch_size=64, epochs=1,callbacks=[tensorboard]) hist_frames.append(history.history) #saving training history print("\nhistory dict:",hist_frames) #saving the model after training model.save('C:\\Users\\Tanya Joon\\Documents\\MM 803 Image\\model.h5') #saving the training loss in an numpy array loss_array=[] for i in hist_frames: for j in i['loss']: loss_array.append(j) #saving the training accuracy in an numpy array
backbone = ResNet50(weights=None, input_shape=(224, 224, 3), pooling='avg', include_top=False) x = backbone(backbone.input) x = Dense(64, activation='relu', kernel_initializer='he_uniform')(x) x = Dropout(0.5)(x) x = Dense(32, activation='relu', kernel_initializer='he_uniform')(x) x = Dropout(0.5)(x) output = Dense(1, activation='relu', kernel_initializer='he_uniform')(x) model = Model(backbone.input, output) model_checkpoint = ModelCheckpoint(str( models_path.joinpath('{epoch:02d}-{val_n_mae:.2f}.h5')), period=1) lr_sche = LearningRateScheduler(lr_schedule) model.compile(loss=loss, optimizer=tf.keras.optimizers.Adam(0.0001, decay=1e-3 / STEP_SIZE_TRAIN), metrics=[n_mae]) his = model.fit(train_dataset, validation_data=val_dataset, epochs=epochs, callbacks=[model_checkpoint, wandbcb, lr_sche], verbose=1) # %% Save history to csv and images history = his.history save_history(history_path, history) plot_history(history_path, history)
conv_out_03 = layers.Flatten()(dense_01) drop_02 = layers.Dropout(0.5)(dense_01) out_layer = layers.Dense(n_cls, activation='softmax')(drop_02) # %% Trainable Model org_model = Model(inputs=in_layer, outputs=out_layer) org_model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['acc']) if not os.path.exists('./logs/features'): org_model.fit(X_train, to_categorical(y_train), batch_size=128, epochs=1000, callbacks=[EarlyStopping(patience=50)], validation_data=(X_test, to_categorical(y_test))) # %% Model for Analysis out_01_model = Model(inputs=in_layer, outputs=conv_out_01) out_02_model = Model(inputs=in_layer, outputs=conv_out_02) out_03_model = Model(inputs=in_layer, outputs=conv_out_03) # %% Embeddings for training data def _ext(X_data, y_data): sorted_X = [] sorted_y = [] for i in range(n_cls):
class Transformer(): """ Transformer Model. References: Bryan M. Li and FOR.ai A Transformer Chatbot Tutorial with TensorFlow 2.0, 2019 Medium: https://medium.com/tensorflow/a-transformer-chatbot-tutorial-with-tensorflow-2-0-88bf59e66fe2 Tensorflow documentation Transformer model for language understanding URL: https://www.tensorflow.org/tutorials/text/transformer Trung Tran Create The Transformer With Tensorflow 2.0 Machine Talk: https://machinetalk.org/2019/04/29/create-the-transformer-with-tensorflow-2-0/ Github: https://github.com/ChunML/NLP/tree/master/machine_translation Jupyter Notebook: https://colab.research.google.com/drive/1YhN8ZCZhrv18Hw0a_yIkuZ5tTh4EZDuG#scrollTo=ha0dNJogUPQN """ def __init__(self, tokenizer, num_layers, units, d_model, num_heads, dropout=0.0, stop_tolerance=20, reduce_tolerance=15): self.tokenizer = tokenizer self.num_layers = num_layers self.units = units self.d_model = d_model self.num_heads = num_heads self.dropout = dropout self.stop_tolerance = stop_tolerance self.reduce_tolerance = reduce_tolerance self.model = None self.encoder = None self.decoder = None def summary(self, output=None, target=None): """Show/Save model structure (summary)""" self.model.summary() if target is not None: os.makedirs(output, exist_ok=True) with open(os.path.join(output, target), "w") as f: with redirect_stdout(f): self.model.summary() def load_checkpoint(self, target): """Restore model to construct transformer/encoder/decoder""" if os.path.isfile(target): if self.model is None: self.compile() self.model.load_weights(target) def get_callbacks(self, logdir, checkpoint, monitor="val_loss", verbose=0): """Setup the list of callbacks for the model""" callbacks = [ CSVLogger( filename=os.path.join(logdir, "epochs.log"), separator=";", append=True), TensorBoard( log_dir=logdir, histogram_freq=10, profile_batch=0, write_graph=True, write_images=False, update_freq="epoch"), ModelCheckpoint( filepath=checkpoint, monitor=monitor, save_best_only=True, save_weights_only=True, verbose=verbose), EarlyStopping( monitor=monitor, min_delta=1e-8, patience=self.stop_tolerance, restore_best_weights=True, verbose=verbose), ReduceLROnPlateau( monitor=monitor, min_delta=1e-8, factor=0.2, patience=self.reduce_tolerance, verbose=verbose) ] return callbacks def compile(self, learning_rate=None, initial_step=0): """Build models (train, encoder and decoder)""" enc_input = Input(shape=(None,), name="enc_input") dec_input = Input(shape=(None,), name="dec_input") enc_padding_mask, look_ahead_mask, dec_padding_mask = create_masks(enc_input, dec_input) self.encoder = Encoder(num_layers=self.num_layers, d_model=self.d_model, num_heads=self.num_heads, dff=self.units, input_vocab_size=self.tokenizer.vocab_size, maximum_position_encoding=self.tokenizer.vocab_size, rate=self.dropout) self.decoder = Decoder(num_layers=self.num_layers, d_model=self.d_model, num_heads=self.num_heads, dff=self.units, target_vocab_size=self.tokenizer.vocab_size, maximum_position_encoding=self.tokenizer.vocab_size, rate=self.dropout) enc_output = self.encoder(enc_input, enc_padding_mask) dec_output, _ = self.decoder(dec_input, enc_output, look_ahead_mask, dec_padding_mask) if learning_rate is None: learning_rate = CustomSchedule(d_model=self.d_model, initial_step=initial_step) self.learning_schedule = True else: self.learning_schedule = False optimizer = Adam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) self.model = Model(inputs=[enc_input, dec_input], outputs=dec_output, name="transformer") self.model.compile(optimizer=optimizer, loss=loss_func, metrics=["accuracy"]) def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, validation_freq=1, max_queue_size=10, workers=1, use_multiprocessing=False, **kwargs): """ Model training on data yielded (fit function has support to generator). A fit() abstration function of TensorFlow 2 using the model_train. :param: See tensorflow.keras.Model.fit() :return: A history object """ # remove ReduceLROnPlateau (if exist) when use schedule learning rate if callbacks and self.learning_schedule: callbacks = [x for x in callbacks if not isinstance(x, ReduceLROnPlateau)] out = self.model.fit(x=x, y=y, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, validation_split=validation_split, validation_data=validation_data, shuffle=shuffle, class_weight=class_weight, sample_weight=sample_weight, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, validation_freq=validation_freq, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, **kwargs) return out def predict(self, x, batch_size=None, verbose=0, steps=1, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False): """ Model predicting on data yielded (generator). A predict() abstration function of TensorFlow 2 using the encoder and decoder models :param: See tensorflow.keras.Model.predict() :return: A numpy array(s) of predictions. """ try: enqueuer = GeneratorEnqueuer(x, use_multiprocessing=use_multiprocessing) enqueuer.start(workers=workers, max_queue_size=max_queue_size) output_generator = enqueuer.get() steps_done = 0 if verbose == 1: print("Model Predict") progbar = Progbar(target=steps) predicts = [] while steps_done < steps: x = next(output_generator)[0] for sentence in x: enc_input = tf.expand_dims(sentence, axis=0) dec_input = tf.expand_dims([self.tokenizer.SOS], axis=0) for _ in range(self.tokenizer.maxlen): enc_padding_mask, look_ahead_mask, dec_padding_mask = create_masks(enc_input, dec_input) enc_output = self.encoder(enc_input, enc_padding_mask) # (batch_size, inp_seq_len, d_model) dec_output, _ = self.decoder(dec_input, enc_output, look_ahead_mask, dec_padding_mask) # select the last word from the seq_len dimension predictions = dec_output[:, -1:, :] # (batch_size, 1, vocab_size) predicted_id = tf.cast(tf.argmax(predictions, axis=-1), dtype=tf.int32) # return the result if the predicted_id is equal to the end token if tf.equal(predicted_id, self.tokenizer.EOS): break # concatentate the predicted_id to the output which is given to the decoder as its input. dec_input = tf.concat([dec_input, predicted_id], axis=-1) dec_input = tf.squeeze(dec_input, axis=0) dec_input = self.tokenizer.decode(dec_input) predicts.append(self.tokenizer.remove_tokens(dec_input)) steps_done += 1 if verbose == 1: progbar.update(steps_done) finally: enqueuer.stop() return predicts
def model_cat(images, categories): imgs = [] labels = [] for i in range(int(len(categories))): if (images[i].shape == (128, 128, 3)): try: labels.append((categories[i][0])) imgs.append(images[i]) except: print(categories[i]) imgs_train = np.asarray(imgs[:int(len(imgs) * 0.98)]) imgs_validate = np.asarray(imgs[int(len(imgs) * 0.98):]) labels_train = np.asarray(labels[:int(len(labels) * 0.98)]) labels_validate = np.asarray(labels[int(len(labels) * 0.98):]) input = Input(shape=(imgs_train.shape[1], imgs_train.shape[2], 3)) # convolution layers (VGG structure) conv_layer1 = Conv2D(filters=32, kernel_size=3, activation='relu') conv_output1 = conv_layer1(input) conv_layer2 = Conv2D(filters=32, kernel_size=3, activation='relu') conv_output2 = conv_layer2(conv_output1) pool_layer1 = MaxPool2D(pool_size=(2, 2)) pool_output1 = pool_layer1(conv_output2) conv_layer3 = Conv2D(filters=64, kernel_size=3, activation='relu') conv_output3 = conv_layer3(pool_output1) conv_layer4 = Conv2D(filters=64, kernel_size=3, activation='relu') conv_output4 = conv_layer4(conv_output3) pool_layer2 = MaxPool2D(pool_size=(2, 2)) pool_output2 = pool_layer2(conv_output4) drop_out2 = Dropout(0.2)(pool_output2) Flatten_output = Flatten()(drop_out2) dense_sigmoid = Dense(16, activation='sigmoid')(Flatten_output) output = Dense(labels_train.shape[1], activation='softmax')(dense_sigmoid) Model_cat = Model(input, output) # optimizer optimizer = optimizers.Adam(lr=0.00002) Model_cat.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['mse', 'accuracy']) history = Model_cat.fit(imgs_train, labels_train, epochs=50, batch_size=256, validation_data=[imgs_validate, labels_validate]) Model_cat.save("Model_cat.hdf5") cat_precict = Model_cat.predict(imgs_validate) num_true_predict = 0 for i in range(labels_validate.shape[0]): for j in range(labels_validate.shape[1]): labels_validate[i][j] = np.argmax(labels_validate[i][j]) if np.argmax(cat_precict[i]) in labels_validate[i]: num_true_predict += 1 print(np.argmax(cat_precict[i])) print("test accuracy for categories is:", num_true_predict / labels_validate.shape[0]) return Model_cat, history.history
dropout_1 = Dropout(0.3, seed=42)(fc_1) fc_2 = Dense(32, activation='relu')(dropout_1) output = Dense(n_out, activation='softmax')(fc_2) # Build model model = Model(inputs=[X_in, A_in], outputs=output) optimizer = Adam(lr=learning_rate) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['acc']) # Train model validation_data = (test_X, test_y) model.fit(train_X, train_y, batch_size=16, validation_data=validation_data, epochs=10, verbose=0) y_pred = model.predict(X_test, verbose=1) y_p = [] for row in y_pred: y_p.append(np.argmax(row)) target_names = ['0', '1', '2'] print("Fold: ", fold) fold += 1 # print(classification_report(y_test, y_p, target_names=target_names)) f1_weighted_per_fold.append( f1_score(y_test, y_p, average='weighted')) f1_macro_per_fold.append(f1_score(y_test, y_p, average='macro')) f1_micro_per_fold.append(f1_score(y_test, y_p, average='micro'))
def model_caption(images, captions_word2vec, captions_onehot, word2vec): decoder_input_raw = [] decoder_output_raw = [] encoder_raw = [] encoder_test = np.asarray(images[:int(0.1 * len(images))]) decoder_input_test = np.asarray( captions_word2vec[:int(0.1 * len(captions_word2vec))]) decoder_output_test = np.asarray( captions_onehot[:int(0.1 * len(captions_onehot))]) encoder_test = encoder_test.reshape(encoder_test.shape[0], 1, encoder_test.shape[1], encoder_test.shape[2], 3) for i in range(int(len(captions_word2vec))): if (images[i].shape == (128, 128, 3)): decoder_input_raw += [captions_word2vec[i]] decoder_output_raw += [captions_onehot[i]] encoder_raw += [images[i]] max = 0 for i in range(len(captions_onehot)): if (max < len(captions_onehot[i])): max = len(captions_word2vec[i]) encoder_train_input = np.asarray(encoder_raw[:int(0.98 * len(encoder_raw))]) encoder_validate_input = np.asarray(encoder_raw[int(0.98 * len(encoder_raw)):]) decoder_input_raw = np.asarray(decoder_input_raw) decoder_output_raw = np.asarray(decoder_output_raw) decoder_input_raw = preprocessing.sequence.pad_sequences(decoder_input_raw, maxlen=None, dtype='float', padding='post', truncating='post', value=0) decoder_output_raw = preprocessing.sequence.pad_sequences( decoder_output_raw, maxlen=None, dtype='float', padding='post', truncating='post', value=0) decoder_input_raw = preprocessing.sequence.pad_sequences( decoder_input_raw, maxlen=decoder_input_raw.shape[1] + 1, dtype='float', padding='pre', truncating='pre', value=0) decoder_output_raw = preprocessing.sequence.pad_sequences( decoder_output_raw, maxlen=decoder_output_raw.shape[1] + 1, dtype='float', padding='post', truncating='post', value=0) decoder_train_input = decoder_input_raw[:int( len(decoder_input_raw) * 0.98)] decoder_train_output = decoder_output_raw[:int( len(decoder_output_raw) * 0.98)] decoder_validate_input = decoder_input_raw[ int(len(decoder_input_raw) * 0.98):] decoder_validate_output = decoder_output_raw[ int(len(decoder_output_raw) * 0.98):] encoder_input = Input(shape=(encoder_train_input.shape[1], encoder_train_input.shape[2], 3)) # convolution layers (VGG structure) conv_layer1 = Conv2D(filters=32, kernel_size=3, activation='relu') conv_output1 = conv_layer1(encoder_input) conv_layer2 = Conv2D(filters=32, kernel_size=3, activation='relu') conv_output2 = conv_layer2(conv_output1) pool_layer1 = MaxPool2D(pool_size=(2, 2)) pool_output1 = pool_layer1(conv_output2) conv_layer3 = Conv2D(filters=64, kernel_size=3, activation='relu') conv_output3 = conv_layer3(pool_output1) conv_layer4 = Conv2D(filters=64, kernel_size=3, activation='relu') conv_output4 = conv_layer4(conv_output3) pool_layer2 = MaxPool2D(pool_size=(2, 2)) pool_output2 = pool_layer2(conv_output4) drop_out2 = Dropout(0.2)(pool_output2) # flatten layer Flatten_layer = Flatten() Flatten_output = Flatten_layer(drop_out2) Full_connect_h = Dense(128, activation='relu') Full_connect_h_output = Full_connect_h(Flatten_output) Full_connect_c = Dense(128, activation='relu') Full_connect_c_output = Full_connect_c(Flatten_output) Full_connect_attention = Dense(512, activation="relu") Full_connect_attention_output = Full_connect_attention(Flatten_output) # decoder decoder_input = Input(shape=(None, decoder_train_input.shape[2])) num_hidden_lstm = int(Full_connect_h_output.shape[1]) decoder_lstm = LSTM(num_hidden_lstm, return_state=True, return_sequences=True) decoder_out, decoder_h, decoder_c = decoder_lstm( decoder_input, initial_state=[Full_connect_h_output, Full_connect_c_output]) # ensure a same dimmension for dot product in attention layer decoder_dense1 = Dense(Full_connect_attention_output.shape[1], activation='relu') decoder_dense1_output = decoder_dense1(decoder_out) # Attention layer attention = Attention() attention_distribute = attention( [decoder_dense1_output, Full_connect_attention_output]) # attention output into softmax for output decoder_dense2 = Dense(decoder_train_output.shape[2], activation='softmax') decoder_output = decoder_dense2(attention_distribute) Model_train = Model([encoder_input, decoder_input], decoder_output) # optimizer optimizer = optimizers.SGD(lr=0.000002, momentum=0.9) # , nesterov=True)# decay=1e-6) Model_train.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['mse', 'accuracy']) Model_train.load_weights("Model_train.h5") history = Model_train.fit( [encoder_train_input, decoder_train_input], decoder_train_output, epochs=5000, batch_size=256, validation_data=[[encoder_validate_input, decoder_validate_input], decoder_validate_output]) Model_train.save("Model_train.hdf5") # encoder model Model_encoder = Model(encoder_input, [ Full_connect_h_output, Full_connect_c_output, Full_connect_attention_output ]) Model_encoder.save("Model_encoder.hdf5") # decoder model h_decoder_input = Input(shape=(num_hidden_lstm, )) c_decoder_input = Input(shape=(num_hidden_lstm, )) attention_decoder_input = Input( shape=(Full_connect_attention_output.shape[1], )) decoder_out, decoder_h_output, decoder_c_output = decoder_lstm( decoder_input, initial_state=[h_decoder_input, c_decoder_input]) decoder_dense_out1 = decoder_dense1(decoder_out) attention_distribute = attention( [attention_decoder_input, decoder_dense_out1]) decoder_output = decoder_dense2(attention_distribute) Model_decoder = Model([ decoder_input, h_decoder_input, c_decoder_input, attention_decoder_input ], [decoder_output, decoder_h_output, decoder_c_output]) Model_decoder.save("Model_decoder.hdf5") # prediction # BLUE score list BLUE_score_list = [] # maximum words in a sentence max_word = 20 for i in range(len(encoder_test)): outputs_list = [] outputs = "" for k in range(len(decoder_input_test[i])): if (sum(decoder_output_test[i][k]) != 0): output = np.argmax(decoder_output_test[i][k]) output = wordforid[output] outputs = outputs + output + ' ' outputs_list.append(output) print("actually reference:", outputs) [h, c, attention_] = Model_encoder.predict(encoder_test[i]) # a blank image for normalisation blank_reference = np.ones((1, width, height, 3)) * 128 [h_reference, c_reference, attention_reference] = Model_encoder.predict(blank_reference) current_decoder_input = np.zeros([1, 1, len(decoder_input_test[0][0])]) current_decoder_input_reference = np.zeros( [1, 1, len(decoder_input_test[0][0])]) outputs_hat = "" outputs_hat_list = [] outputs_hatt = "" outputs_hatt_list = [] # for BLUE score counting BLUE = [] BLUE_element = {} for j in range(max_word): [current_decoder_output, h, c] = Model_decoder.predict( [current_decoder_input, h, c, attention_]) [current_decoder_output_reference, h_reference, c_reference] = Model_decoder.predict([ current_decoder_input_reference, h_reference, c_reference, attention_reference ]) # normalisation of decoder output current_decoder_out = (current_decoder_output - current_decoder_output_reference) / ( current_decoder_output**0.75) output_hat = np.argmax(current_decoder_out) output_hat = wordforid[output_hat] output_hatt = np.argmax(current_decoder_output) current_decoder_input = word2vec[output_hatt] current_decoder_input = current_decoder_input.reshape(1, 1, 300) output_hatt = wordforid[output_hatt] output_hattt = np.argmax(current_decoder_output_reference) current_decoder_input_reference = word2vec[output_hattt] current_decoder_input_reference = current_decoder_input_reference.reshape( 1, 1, 300) count_current_word = outputs_list.count(output_hat) # We use the advanced BLUE score, if number of the word in output is bigger than reference, use the # maximum word number in reference if (count_current_word > 0): try: if (BLUE_element[output_hat] < count_current_word): BLUE_element[output_hat] += 1 BLUE.append(1) except: BLUE_element[output_hat] = 1 BLUE.append(1) else: BLUE.append(0) outputs_hat = outputs_hat + output_hat + ' ' outputs_hat_list.append(output_hat) outputs_hatt = outputs_hatt + output_hatt + ' ' outputs_hatt_list.append(output_hatt) print("normal prediction:", outputs_hat) print("prediction:", outputs_hatt) if (len(BLUE) > 0): BLUE_score = sum(BLUE) / len(BLUE) BLUE_score_list.append(BLUE_score) print("BLUE score is", BLUE_score) print('\n') overall_BLUE_Score = sum(BLUE_score_list) / len(BLUE_score_list) print("overall BLUE score is:", overall_BLUE_Score) return Model_encoder, Model_decoder, history.history, overall_BLUE_Score
class ClassificationPipe(PipelineBase): """Cral pipeline for classification task.""" def __init__(self, *args, **kwargs): super(ClassificationPipe, self).__init__(task_type='classification', *args, **kwargs) def add_data(self, *args, **kwargs): """Parses dataset once for generating metadata and versions the data. Args: *args: Description **kwargs: Description Deleted Parameters: train_images_dir (str): path to images val_images_dir (str, optional): path to validation images split (float, optional): float to divide training dataset into training and validation """ self.dataset_hash, self.dataset_csv_path, self.dataset_json = classification_dataset_hasher( # noqa: E501 tempfile.gettempdir(), *args, **kwargs) # try_mlflow_log(log_artifact, local_path=dataset_csv_path) # try_mlflow_log(log_artifact, local_path=dataset_json) with open(self.dataset_json) as f: self.data_dict = json.loads(f.read()) self.update_project_file(self.data_dict) def set_aug(self, aug): """Sets the augmentation pipeline. Args: aug (TYPE): An albumentations data-augmentation pipeline """ # Do a check on data self.aug_pipeline = AugmentorClassification(aug) # update_json(self) def visualize_data(self, image_url, allow_aug=False): """Summary. Args: image_url (TYPE): Description allow_aug (bool, optional): Description Raises: ValueError: Description """ # tfrecord_dir = self.cral_meta_data['tfrecord_path'] # train_tfrecords = list( # glob.glob(os.path.join(tfrecord_dir, 'train*.tfrecord'))) # test_tfrecords = list( # glob.glob(os.path.join(tfrecord_dir, 'test*.tfrecord'))) if allow_aug is True and self.aug_pipeline is None: raise ValueError('No augmentation pipeline has been provided') def lock_data(self, gen_stats=False): """Parse Data and makes tf-records and creates meta-data. Args: gen_stats (bool, optional): If True uses tfdv to create stats graph """ meta_info = classification_tfrecord_creator( meta_json=os.path.join(tempfile.gettempdir(), 'dataset.json'), dataset_csv=os.path.join(tempfile.gettempdir(), 'dataset.csv'), out_path=tempfile.gettempdir()) self.update_project_file(meta_info) # generate cavets overview html graphs with tfdv and log # disabling due to version problem if gen_stats: from cral.data_feeder.utils import generate_stats generate_stats(os.path.join(tempfile.gettempdir(), 'statistics')) def set_algo(self, feature_extractor, config, weights='imagenet', base_trainable=False, preprocessing_fn=None, optimizer=tf.keras.optimizers.Adam(lr=1e-4, clipnorm=0.001), distribute_strategy=None): """Set model for training and prediction. Args: feature_extractor (str,model): Name of base model config: Is an instance of MLPConfig for the head of the network weights: one of `None` (random initialization),'imagenet' (pre-training on ImageNet),or the path to the weights file to be loaded base_trainable (bool, optional): If set False the base models layers will not be trainable useful fortransfer learning preprocessing_fn (func, optional): needs to to be set if a in built model is not being used Raises: ValueError: If network name assigned to `feature_extractor` is not yet supported. """ classification_algo_meta = dict(feature_extractor_from_cral=False, classification_meta=None) # if config is not None: assert isinstance( config, MLPConfig ), f'config has to be an object of MLPConfig but got{type(config)}' height = config.height width = config.width fully_connected_layer = config.fully_connected_layer dropout_rate = config.dropout_rate hidden_layer_activation = config.hidden_layer_activation final_layer_activation = config.final_layer_activation assert isinstance(feature_extractor, str), 'expected a string got {} instead'.format( type(feature_extractor)) feature_extractor = feature_extractor.lower() if feature_extractor not in classification_networks.keys(): raise ValueError('feature extractor has to be one of {}'.format( list(classification_networks.keys()))) if weights in ('imagenet', None): backbone, self.preprocessing_fn = classification_networks[ feature_extractor](weights=weights, input_shape=(height, width, 3)) elif tf.saved_model.contains_saved_model(weights): backbone, self.preprocessing_fn = classification_networks[ feature_extractor](weights=None, input_shape=(height, width, 3)) else: assert False, 'Weights file is not supported' if preprocessing_fn is not None: # assert preprocessing function once self.preprocessing_fn = preprocessing_fn # freeze/train backbone backbone.trainable = base_trainable num_classes = self.cral_meta_data['num_classes'] final_hidden_layer = densely_connected_head( feature_extractor_model=backbone, fully_connected_layer=fully_connected_layer, dropout_rate=dropout_rate, hidden_layer_Activation=hidden_layer_activation) output = Dense(units=num_classes, activation=final_layer_activation)(final_hidden_layer) # Assign resize dimensions resize_height = tf.constant(height, dtype=tf.int64) resize_width = tf.constant(width, dtype=tf.int64) @tf.function( input_signature=[tf.TensorSpec([None, None, 3], dtype=tf.uint8)]) def _preprocess(image_array): """tf.function-deocrated version of preprocess_""" im_arr = tf.image.resize(image_array, (resize_height, resize_width)) im_arr = self.preprocessing_fn(im_arr) input_batch = tf.expand_dims(im_arr, axis=0) return input_batch self.model = Model(inputs=backbone.inputs, outputs=output, name='{}_custom'.format(feature_extractor)) if tf.saved_model.contains_saved_model(weights): self.model.load_weights( os.path.join(weights, 'variables', 'variables')) # Attach function to Model self.model.preprocess = _preprocess # Attach resize dimensions to Model self.model.resize_height = resize_height self.model.resize_width = resize_width # Model parallelism if distribute_strategy is None: self.model.compile(optimizer=optimizer, loss=tf.keras.losses.CategoricalCrossentropy(), metrics=['accuracy']) else: with distribute_strategy.scope(): self.model.compile( optimizer=optimizer, loss=tf.keras.losses.CategoricalCrossentropy(), metrics=['accuracy']) classification_algo_meta['feature_extractor_from_cral'] = True classification_meta = dict(feature_extractor=feature_extractor, architecture='MLP', weights=weights, base_trainable=base_trainable, config=jsonpickle.encode(config)) classification_algo_meta['classification_meta'] = classification_meta self.height = height self.width = width # log_param('feature_extractor', feature_extractor) self.update_project_file(classification_algo_meta) self.update_project_file(classification_algo_meta) def train(self, num_epochs, snapshot_prefix, snapshot_path, snapshot_every_n, batch_size=2, validation_batch_size=None, validate_every_n=1, callbacks=[], steps_per_epoch=None, compile_options=None, log_evry_n_step=100): """This function starts the training loop, with metric logging enabled. Args: num_epochs (int): number of epochs to run training on snapshot_prefix (str): prefix to assign to the checkpoint file snapshot_path (str): a valid folder path where the checkpoints are to be stored snapshot_every_n (int): take a snapshot at every nth epoch batch_size (int, optional): batch size validation_batch_size (None, optional): the batch size for validation loop, if None(default) then equal to `batch_size` argument validate_every_n (int, optional): Run validation every nth epoch callbacks (list, optional): list of keras callbacks to be passed to model.fit() method steps_per_epoch (None, optional): steps size of each epoch compile_options (None, optional): A dictionary to be passed to model.compile method Raises: ValueError: If model is not defined """ assert isinstance(num_epochs, int), 'num epochs to run should be in `int`' assert os.path.isdir(snapshot_path), '{} doesnot exist'.format( snapshot_path) assert isinstance(callbacks, list) assert isinstance(validate_every_n, int) snapshot_prefix = str(snapshot_prefix) if validation_batch_size is None: validation_batch_size = batch_size # self.height = height # self.width = width num_classes = int(self.cral_meta_data['num_classes']) training_set_size = int(self.cral_meta_data['num_training_images']) test_set_size = int(self.cral_meta_data['num_test_images']) if self.model is None: raise ValueError( 'please define a model first using set_algo() function') if compile_options is not None: assert isinstance(compile_options, dict) self.model.compile(**compile_options) meta_info = dict(height=self.height, width=self.width, num_epochs=num_epochs, batch_size=batch_size) self.update_project_file(meta_info) tfrecord_dir = self.cral_meta_data['tfrecord_path'] train_tfrecords = os.path.join(tfrecord_dir, 'train*.tfrecord') test_tfrecords = os.path.join(tfrecord_dir, 'test*.tfrecord') train_input_function = classification_tfrecord_parser( filenames=train_tfrecords, height=self.height, width=self.width, num_classes=num_classes, processing_func=self.preprocessing_fn, augmentation=self.aug_pipeline, batch_size=batch_size) if test_set_size > 0: test_input_function = classification_tfrecord_parser( filenames=test_tfrecords, height=self.height, width=self.width, num_classes=num_classes, processing_func=self.preprocessing_fn, augmentation=self.aug_pipeline, batch_size=validation_batch_size, num_repeat=-1) validation_steps = test_set_size / validation_batch_size else: test_input_function = None validation_steps = None if steps_per_epoch is None: steps_per_epoch = training_set_size / batch_size # callbacks.append(KerasCallback(log_evry_n_step)) # callbacks.append(KerasCallback()) # callbacks.append( # checkpoint_callback( # snapshot_every_epoch=snapshot_every_n, # snapshot_path=snapshot_path, # checkpoint_prefix=snapshot_prefix, # save_h5=False)) # Attach segmind.cral as an asset tf.io.gfile.copy(self.cral_file, 'segmind.cral', overwrite=True) cral_asset_file = tf.saved_model.Asset('segmind.cral') self.model.cral_file = cral_asset_file # pred_model = tf.keras.models.load_model('saved_model') # location_to_cral_file = pred_model.cral_file.asset_path.numpy() # log_param('training_steps_per_epoch', int(steps_per_epoch)) # if test_set_size > 0: # log_param('val_steps_per_epoch', int(validation_steps)) # log_gpu_params() # Train & test self.model.fit(x=train_input_function, epochs=num_epochs, callbacks=callbacks, steps_per_epoch=steps_per_epoch, validation_data=test_input_function, validation_steps=validation_steps, validation_freq=validate_every_n) final_model_path = os.path.join(snapshot_path, str(snapshot_prefix) + '_final') self.model.save(filepath=final_model_path, overwrite=True) print('Saved the final Model to :\n {}'.format(final_model_path)) def prediction_model(self, checkpoint_file): self.model = keras.models.load_model(checkpoint_file, compile=False) try: location_to_cral_file = self.model.cral_file.asset_path.numpy() with open(location_to_cral_file) as f: metainfo = json.loads(f.read()) except AttributeError: print( "Couldn't locate any cral config file, probably this model was not trained using cral, or may be corrupted" # noqa: E501 ) for k, v in metainfo.items(): print(k, v) # architecture = metainfo['classification_meta']['architecture'] # num_classes = int(metainfo['num_classes']) feature_extractor = metainfo['classification_meta'][ 'feature_extractor'] size = (metainfo['height'], metainfo['width']) _, preprocessing_fn = classification_networks[feature_extractor]( weights=None) pred_object = ClassificationPredictor( model=self.model, preprocessing_func=preprocessing_fn, size=size) return pred_object.predict
#encoded representation: encoded = layers.Dense(num_neurons, activation="relu", kernel_regularizer=regulariser)(stock_in) #decoded representation: decoded = layers.Dense(features, activation="linear", kernel_regularizer=regulariser)(encoded) autoencoder = Model(stock_in, decoded) encoder = Model(stock_in, encoded) encoded_input = Input(shape=(num_neurons, )) decoder_layer = autoencoder.layers[-1] decoder = Model(encoded_input, decoder_layer(encoded_input)) autoencoder.compile(optimizer='sgd', loss='mean_squared_error') ''' autoencoder.fit(X_train,X_train, epochs = 500, verbose = 1) encoded_imgs = encoder.predict(X_test) decoded_imgs = decoder.predict(encoded_imgs) encoded_Train = encoder.predict(X_train) encoded_Test = encoder.predict(X_test) ''' #============================================================================== #============================================================================== '''auto-encoding the folds:''' Epochs = 100 #fold 1: autoencoder.fit(fold1_train, fold1_train, epochs=Epochs, verbose=1)
mode='min', verbose=1) mc = ModelCheckpoint(fnModel, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max') t0 = time.time() h = m.fit( X_train, y_train, batch_size=500, #1000, # 1000 epochs=200, callbacks=[mc], #validation_split= 0.1 validation_data=(X_val, y_val)) tf.keras.backend.clear_session() fnModel = 'ryModel_3.hdf5' #se normalizan los datos X_train = X_train.reshape(-1, nTime, nFreq, 1).astype('float32') X_val = X_val.reshape(-1, nTime, nFreq, 1).astype('float32') X_test = X_test.reshape(-1, nTime, nFreq, 1).astype('float32') #X_testREAL= X_testREAL.reshape( -1, nTime, nFreq, 1).astype('float32')
from tensorflow.keras.layers import Input, Dense, Dropout from tensorflow.keras import Model from tensorflow.keras.losses import SparseCategoricalCrossentropy pixels = 28 * 28 hidden_nodes = 64 dropout = 0.3 (xtr, ytr), (xte, yte) = tf.keras.datasets.mnist.load_data() xtr = xtr.reshape((60000, pixels)).astype(np.float32) / 255.0 xte = xte.reshape((10000, pixels)).astype(np.float32) / 255.0 inputs = Input(shape=(pixels, ), name='images') z = Dense(hidden_nodes, activation='relu', name='hidden1')(inputs) z = Dropout(dropout)(z) z = Dense(10, activation='softmax')(z) our_model = Model(inputs=inputs, outputs=z) our_model.summary() our_model.compile(optimizer='adam', loss=SparseCategoricalCrossentropy(), metrics=['accuracy']) results = our_model.fit(xtr, ytr, batch_size=32, epochs=10, validation_split=0.2) our_model.save('hw01_model.hdf5')