def train_model(model, scaler, data): data = np.asarray(data) X = np.delete(data, 1, axis=1) y = data[:, 1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15) model.compile(optimizer='adam', loss='mean_squared_error', metrics=[ metrics.MeanSquaredError(), metrics.RootMeanSquaredError(), metrics.MeanAbsoluteError() ]) epochs_hist = model.fit(X_train, y_train, epochs=50, batch_size=15, verbose=1, validation_split=0.2) X_testing = np.array(X_test) y_predict = model.predict(X_testing) mse_training = epochs_hist.history['val_loss'][49] rmse_training = epochs_hist.history['val_root_mean_squared_error'][49] mae_training = epochs_hist.history['val_mean_absolute_error'][49] evaluation_test = model.evaluate(X_test, y_test) save_model(model) return { "mse_test": evaluation_test[1], "rmse_test": evaluation_test[2], "mae_test": evaluation_test[3], "mse_train": mse_training, "rmse_train": rmse_training, "mae_train": mae_training }
def create_BERT_model(): max_seq_length = 128 sentence_in = layers.Input(shape=(), dtype=tf.string, name="sentence_in") embed = hub.KerasLayer( "https://tfhub.dev/google/nnlm-en-dim128/2", trainable=False)(sentence_in) # Expects a tf.string input tensor. # input_word_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32, name="input_word_ids") # input_mask = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32, name="input_mask") # segment_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32, name="segment_ids") # albert_layer = hub.KerasLayer("https://tfhub.dev/tensorflow/albert_en_base/1", trainable=False) # pooled_output, sequence_output = albert_layer([input_word_ids, input_mask, segment_ids]) x = layers.Dense(64, activation='relu')(embed) x = layers.Dense(32, activation='relu')(x) x = layers.Dense(16, activation='relu')(x) x = layers.Dense(1)(x) bertie = Model(inputs=[sentence_in], outputs=x) bertie.compile(optimizer=optimizers.Adam(), loss="mean_squared_error", metrics=[metrics.RootMeanSquaredError()]) bertie.summary() return bertie
def create_KBLSTM_model(): text_in = layers.Input(shape=(25,), dtype='int32', name="TextIn") input_entities = layers.Input(shape=(25,), dtype='int32', name="EntityInput") embed_path = "../data/embeddings/numpy/GNews.npy" print("Loading embeddings...") if not os.path.isfile(embed_path): embeddings = {} with codecs.open('../data/embeddings/wiki-news-300d-1m.vec', encoding='utf-8') as f: for line in tqdm.tqdm(f): values = line.rstrip().rsplit(' ') word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings[word] = coefs with codecs.open('../data/vocab/train_vocab.funlines.json', encoding='utf-8') as fp: vocab_dict = json.load(fp) embed_matrix = np.zeros((len(vocab_dict), 300)) i = 0 for k, v in vocab_dict.items(): try: embed_matrix[v] = embeddings[k] except KeyError: # print(f'{k} does not exist in FastText embeddings') i += 1 print(len(vocab_dict), i) np.save(embed_path, embed_matrix) else: embed_matrix = np.load(embed_path, allow_pickle=True) embed_layer = layers.Embedding(input_dim=len(embed_matrix), output_dim=300, trainable=False, embeddings_initializer=initializers.Constant(embed_matrix))(text_in) embeddings = np.load('../data/NELL/embeddings/entity.npy') entity_embedding = layers.Embedding(181544, 100, embeddings_initializer=initializers.Constant(embeddings), trainable=False, name="EntityEmbeddings")(input_entities) HIDDEN_LAYER_DIMENSION = 64 state_vector = layers.Bidirectional(layers.LSTM(HIDDEN_LAYER_DIMENSION, dropout=0.5, return_sequences=True))(embed_layer) attention_layer = AttentionWeightedAverage()(state_vector) attention_layer = layers.Dense(100, activation='relu')(attention_layer) hidden = KnowledgeLayer()([attention_layer,entity_embedding]) # attention_layer = layers.Dense(64, activation='relu')(attention_layer) hidden = layers.add([hidden, attention_layer]) preds = layers.Dense(1)(hidden) m = Model([text_in,input_entities], preds) m.compile(optimizer=optimizers.Adam(), loss="mean_squared_error", metrics=[metrics.RootMeanSquaredError()]) m.summary() return m
def fit(cls, X, y): y = y.astype('float') cls.m = Sequential() cls.m.add(Dense(1, activation=sigmoid_3, input_dim=X.shape[1])) cls.m.compile(optimizer='adam', loss='mse', metrics=[metrics.RootMeanSquaredError()]) cls.m.fit(X, y, nb_epoch=20, validation_split=0.2)
def main(train: bool): args = parse_args() train_data = read_records(args.train_path) test_data = read_records(args.test_path) model = cf_model( np.maximum(train_data['user_id'].max(), test_data['user_id'].max()) + 1, np.maximum(train_data['item_id'].max(), test_data['item_id'].max()) + 1) model.compile(optimizer=optimizers.Adam(1e-3), loss=losses.mean_squared_error, metrics=[metrics.RootMeanSquaredError()]) if train: model.fit( [train_data['user_id'], train_data['item_id']], train_data['rating'], validation_data=([test_data['user_id'], test_data['item_id']], test_data['rating']), callbacks=callbacks.EarlyStopping(patience=2), batch_size=64, epochs=500, verbose=1) model.save_weights('checkpoints/' + path.splitext(path.basename(args.train_path))[0]) rating = layers.Lambda(lambda x: K.minimum(K.maximum(x, 1), 5))( model.output) final_model = models.Model(model.input, rating) final_model.compile(loss=losses.mean_squared_error, metrics=[metrics.RootMeanSquaredError()]) final_model.load_weights('checkpoints/' + path.splitext(path.basename(args.test_path))[0]) final_model.evaluate([test_data['user_id'], test_data['item_id']], test_data['rating']) test_data['rating'] = final_model.predict( [test_data['user_id'], test_data['item_id']]) write_records( 'results/' + path.splitext(path.basename(args.test_path))[0] + '.base_prediction.txt', test_data)
def create_model_mean_pooling(learn_rate, epoch_num, batches, outf_layer, outf_sum, filter_num, split_filters, which_sum, fc): input_shape = (98, 98, 3) inputs = Input(shape=input_shape, name='image_input') # filter number settings (f1, f2, f3) = filter_num convolution_1 = Conv2D(f1, kernel_size=(5, 5), strides=(1, 1), activation=outf_layer, input_shape=input_shape, name='c_layer_1')(inputs) a1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='p_layer_1')(convolution_1) a2 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2))(a1) a3 = AveragePooling2D(pool_size=(2, 2), strides=(2, 2))(a2) K = Conv2D(f2, kernel_size=(5, 5), strides=(1, 1), activation=outf_layer, name='c_layer_2') v1 = K(a1) v2 = K(a2) v3 = K(a3) flat1 = Flatten()(v1) flat2 = Flatten()(v2) flat3 = Flatten()(v3) merged = concatenate([flat1, flat2, flat3]) if fc: s3 = Dense(1, activation=tf.keras.activations.relu)(merged) else: summed = tf.reduce_sum(merged, axis=[1]) s3 = summed model = Model(inputs=inputs, outputs=s3) model.compile( loss=losses.MeanSquaredError(), optimizer=optimizers.Adam(learning_rate=learn_rate, name='Adam'), metrics=[metrics.RootMeanSquaredError(), metrics.MeanAbsoluteError()]) return model
def create_MultiCNN_model(): text_in = layers.Input(shape=(25,), dtype='int32', name="TextIn") glove = np.load('../data/embeddings/numpy/GloVe.npy', allow_pickle=True) glove_embed = layers.Embedding(input_dim=len(glove), output_dim=300, embeddings_initializer=initializers.Constant(glove), trainable=False, name="GloveEmbedding")(text_in) glove_conv = layers.Conv1D(filters=32, kernel_size=4, activation='relu')(glove_embed) glove_drop = layers.Dropout(0.5)(glove_conv) glove_pool = layers.MaxPooling1D(pool_size=2)(glove_drop) glove_flat = layers.Flatten()(glove_pool) fasttext = np.load('../data/embeddings/numpy/fasttext.npy', allow_pickle=True) fasttext_embed = layers.Embedding(input_dim=len(fasttext), output_dim=300, embeddings_initializer=initializers.Constant(fasttext), trainable=False, name="FastTextEmbedding")(text_in) fasttext_conv = layers.Conv1D(filters=32, kernel_size=4, activation='relu')(fasttext_embed) fasttext_drop = layers.Dropout(0.5)(fasttext_conv) fasttext_pool = layers.MaxPooling1D(pool_size=2)(fasttext_drop) fasttext_flat = layers.Flatten()(fasttext_pool) gnews = np.load('../data/embeddings/numpy/GNews.npy', allow_pickle=True) gnews_embed = layers.Embedding(input_dim=len(gnews), output_dim=300, embeddings_initializer=initializers.Constant(gnews), trainable=False, name="GNewsEmbedding")(text_in) gnews_conv = layers.Conv1D(filters=32, kernel_size=4, activation='relu')(gnews_embed) gnews_drop = layers.Dropout(0.5)(gnews_conv) gnews_pool = layers.MaxPooling1D(pool_size=2)(gnews_drop) gnews_flat = layers.Flatten()(gnews_pool) custom = np.load('../data/embeddings/numpy/headline.npy', allow_pickle=True) custom_embed = layers.Embedding(input_dim=len(custom), output_dim=300, embeddings_initializer=initializers.Constant(custom), trainable=False, name="CustomEmbedding")(text_in) custom_conv = layers.Conv1D(filters=32, kernel_size=4, activation='relu')(custom_embed) custom_drop = layers.Dropout(0.5)(custom_conv) custom_pool = layers.MaxPooling1D(pool_size=2)(custom_drop) custom_flat = layers.Flatten()(custom_pool) merged = layers.concatenate([glove_flat, fasttext_flat, gnews_flat, custom_flat]) # interpretation x = layers.Dense(10, activation='relu')(merged) x = layers.Dense(1)(x) # dense = layers.Dense(64)(dense) # dense = layers.Dense(32)(dense) # dense = layers.Dense(16)(dense) # dense = layers.Dense(1)(dense) m = Model(text_in, x) m.compile(optimizer=optimizers.Adam(), loss="mean_squared_error", metrics=[metrics.RootMeanSquaredError()]) m.summary() return m
def __get_metric(self, metric): if metric == "auc": return m.AUC() elif metric == "accuracy": return m.Accuracy() elif metric == "binary_accuracy": return m.BinaryAccuracy() elif metric == "categorical_accuracy": return m.CategoricalAccuracy() elif metric == "binary_crossentropy": return m.BinaryCrossentropy() elif metric == "categorical_crossentropy": return m.CategoricalCrossentropy() elif metric == "sparse_categorical_crossentropy": return m.SparseCategoricalCrossentropy() elif metric == "kl_divergence": return m.KLDivergence() elif metric == "poisson": return m.Poission() elif metric == "mse": return m.MeanSquaredError() elif metric == "rmse": return m.RootMeanSquaredError() elif metric == "mae": return m.MeanAbsoluteError() elif metric == "mean_absolute_percentage_error": return m.MeanAbsolutePercentageError() elif metric == "mean_squared_logarithm_error": return m.MeanSquaredLogarithmError() elif metric == "cosine_similarity": return m.CosineSimilarity() elif metric == "log_cosh_error": return m.LogCoshError() elif metric == "precision": return m.Precision() elif metric == "recall": return m.Recall() elif metric == "true_positive": return m.TruePositives() elif metric == "true_negative": return m.TrueNegatives() elif metric == "false_positive": return m.FalsePositives() elif metric == "false_negative": return m.FalseNegatives() else: raise Exception("specified metric not defined")
def _build_model(self): input_songs = layers.Input(shape=(self.total_song_num, ), name='in_songs') input_tags = layers.Input(shape=(len(self.tag_list), ), name='in_tags') song_dense = layers.Dense( 32, kernel_regularizer=regularizers.l2(0.0001))(input_songs) batch_norm = layers.BatchNormalization()(song_dense) song_dense = layers.Activation(activation='relu')(batch_norm) tag_dense = layers.Dense( 32, kernel_regularizer=regularizers.l2(0.0001))(input_tags) batch_norm = layers.BatchNormalization()(tag_dense) tag_dense = layers.Activation(activation='relu')(batch_norm) input_song_tag = layers.Concatenate(axis=1)([song_dense, tag_dense]) dense = layers.Dense( 1024, kernel_regularizer=regularizers.l2(0.0001))(input_song_tag) batch_norm = layers.BatchNormalization()(dense) dense = layers.Activation(activation='relu')(batch_norm) song_dense = layers.Dense( 64, kernel_regularizer=regularizers.l2(0.0001))(dense) batch_norm = layers.BatchNormalization()(song_dense) song_dense = layers.Activation(activation='relu')(batch_norm) tag_dense = layers.Dense( 64, kernel_regularizer=regularizers.l2(0.0001))(dense) batch_norm = layers.BatchNormalization()(tag_dense) tag_dense = layers.Activation(activation='relu')(batch_norm) output_songs = layers.Dense(self.total_song_num, activation='sigmoid', name='out_songs')(song_dense) output_tags = layers.Dense(len(self.tag_list), activation='sigmoid', name='out_tags')(tag_dense) model = models.Model(inputs=[input_songs, input_tags], outputs=[output_songs, output_tags]) model.compile(optimizer=optimizers.Adam(learning_rate=1e-3), loss='binary_crossentropy', metrics=[metrics.RootMeanSquaredError()]) return model
def get_model(): model = keras.Sequential([ layers.Dense(32, activation='relu', input_shape=[X_train.shape[1]]), layers.Dense(32, activation='relu'), layers.Dense(2) ]) optimizer = tf.keras.optimizers.Adam(lr=0.001) model.compile( loss='mse', optimizer=optimizer, metrics=[ metrics.RootMeanSquaredError( name="rmse"), # Notice I add the names here to make consistent metrics.MeanAbsoluteError( name="mae") # Notice I add the names here to make consistent ]) return model
def test_model(model_dir, learn_rate): json_file = open(model_dir + "/model.json", 'r') loaded_model_json = json_file.read() json_file.close() model_number = model_dir.replace("models/klasicke/model", "") model = model_from_json(loaded_model_json) model.load_weights(model_dir + "/model.h5") model.compile( loss=losses.MeanSquaredError(), optimizer=optimizers.Adam(learning_rate=learn_rate, name='Adam'), metrics=[metrics.RootMeanSquaredError(), metrics.MeanAbsoluteError()]) images, labels = CNNutils.load_test_data("new_photos/labels/labels.csv", "new_photos/test_crops/") results = model.evaluate(images, labels, batch_size=16) print('test loss, test acc:', results) with open("../results/test_results.csv", 'a', newline='') as f: writer = csv.writer(f) writer.writerow((model_number, results[0], results[1], results[2]))
def forecast(self, local_mse, local_normalized_scaled_unit_sales, local_mean_unit_complete_time_serie, local_raw_unit_sales, local_settings): try: print( 'starting high loss (mse in previous LSTM) time_series in-block forecast submodule' ) # set training parameters with open(''.join([local_settings['hyperparameters_path'], 'in_block_time_serie_based_model_hyperparameters.json'])) \ as local_r_json_file: model_hyperparameters = json.loads(local_r_json_file.read()) local_r_json_file.close() local_time_series_group = np.load(''.join( [local_settings['train_data_path'], 'time_serie_group.npy']), allow_pickle=True) time_steps_days = int(local_settings['time_steps_days']) epochs = int(model_hyperparameters['epochs']) batch_size = int(model_hyperparameters['batch_size']) workers = int(model_hyperparameters['workers']) optimizer_function = model_hyperparameters['optimizer'] optimizer_learning_rate = model_hyperparameters['learning_rate'] if optimizer_function == 'adam': optimizer_function = optimizers.Adam(optimizer_learning_rate) elif optimizer_function == 'ftrl': optimizer_function = optimizers.Ftrl(optimizer_learning_rate) losses_list = [] loss_1 = model_hyperparameters['loss_1'] loss_2 = model_hyperparameters['loss_2'] loss_3 = model_hyperparameters['loss_3'] union_settings_losses = [loss_1, loss_2, loss_3] if 'mape' in union_settings_losses: losses_list.append(losses.MeanAbsolutePercentageError()) if 'mse' in union_settings_losses: losses_list.append(losses.MeanSquaredError()) if 'mae' in union_settings_losses: losses_list.append(losses.MeanAbsoluteError()) if 'm_mape' in union_settings_losses: losses_list.append(modified_mape()) if 'customized_loss_function' in union_settings_losses: losses_list.append(customized_loss()) metrics_list = [] metric1 = model_hyperparameters['metrics1'] metric2 = model_hyperparameters['metrics2'] union_settings_metrics = [metric1, metric2] if 'rmse' in union_settings_metrics: metrics_list.append(metrics.RootMeanSquaredError()) if 'mse' in union_settings_metrics: metrics_list.append(metrics.MeanSquaredError()) if 'mae' in union_settings_metrics: metrics_list.append(metrics.MeanAbsoluteError()) if 'mape' in union_settings_metrics: metrics_list.append(metrics.MeanAbsolutePercentageError()) l1 = model_hyperparameters['l1'] l2 = model_hyperparameters['l2'] if model_hyperparameters['regularizers_l1_l2'] == 'True': activation_regularizer = regularizers.l1_l2(l1=l1, l2=l2) else: activation_regularizer = None # searching for time_series with high loss forecast time_series_treated = [] poor_results_mse_threshold = local_settings[ 'poor_results_mse_threshold'] poor_result_time_serie_list = [] nof_features_for_training = 0 for result in local_mse: if result[1] > poor_results_mse_threshold: nof_features_for_training += 1 poor_result_time_serie_list.append(int(result[0])) # nof_features_for_training = local_normalized_scaled_unit_sales.shape[0] nof_features_for_training = len(poor_result_time_serie_list) # creating model forecaster_in_block = tf.keras.Sequential() print( 'current model for specific high loss time_series: Mix_Bid_PeepHole_LSTM_Dense_ANN' ) # first layer (DENSE) if model_hyperparameters['units_layer_1'] > 0: forecaster_in_block.add( layers.Dense( units=model_hyperparameters['units_layer_1'], activation=model_hyperparameters['activation_1'], input_shape=(model_hyperparameters['time_steps_days'], nof_features_for_training), activity_regularizer=activation_regularizer)) forecaster_in_block.add( layers.Dropout( rate=float(model_hyperparameters['dropout_layer_1']))) # second LSTM layer if model_hyperparameters['units_layer_2'] > 0: forecaster_in_block.add( layers.Bidirectional( layers.RNN(PeepholeLSTMCell( units=model_hyperparameters['units_layer_2'], activation=model_hyperparameters['activation_2'], activity_regularizer=activation_regularizer, dropout=float( model_hyperparameters['dropout_layer_2'])), return_sequences=False))) forecaster_in_block.add( RepeatVector(model_hyperparameters['repeat_vector'])) # third LSTM layer if model_hyperparameters['units_layer_3'] > 0: forecaster_in_block.add( layers.Bidirectional( layers.RNN(PeepholeLSTMCell( units=model_hyperparameters['units_layer_3'], activation=model_hyperparameters['activation_3'], activity_regularizer=activation_regularizer, dropout=float( model_hyperparameters['dropout_layer_3'])), return_sequences=False))) forecaster_in_block.add( RepeatVector(model_hyperparameters['repeat_vector'])) # fourth layer (DENSE) if model_hyperparameters['units_layer_4'] > 0: forecaster_in_block.add( layers.Dense( units=model_hyperparameters['units_layer_4'], activation=model_hyperparameters['activation_4'], activity_regularizer=activation_regularizer)) forecaster_in_block.add( layers.Dropout( rate=float(model_hyperparameters['dropout_layer_4']))) # final layer forecaster_in_block.add( TimeDistributed(layers.Dense(units=nof_features_for_training))) # forecaster_in_block.saves(''.join([local_settings['models_path'], '_model_structure_']), # save_format='tf') forecast_horizon_days = local_settings['forecast_horizon_days'] forecaster_in_block.build(input_shape=(1, forecast_horizon_days, nof_features_for_training)) forecaster_in_block.compile(optimizer=optimizer_function, loss=losses_list, metrics=metrics_list) forecaster_in_block_json = forecaster_in_block.to_json() with open( ''.join([ local_settings['models_path'], 'forecaster_in_block.json' ]), 'w') as json_file: json_file.write(forecaster_in_block_json) json_file.close() forecaster_in_block_untrained = forecaster_in_block print('specific time_serie model initialized and compiled') nof_selling_days = local_normalized_scaled_unit_sales.shape[1] last_learning_day_in_year = np.mod(nof_selling_days, 365) max_selling_time = local_settings['max_selling_time'] days_in_focus_frame = model_hyperparameters['days_in_focus_frame'] window_input_length = local_settings['moving_window_input_length'] window_output_length = local_settings[ 'moving_window_output_length'] moving_window_length = window_input_length + window_output_length nof_years = local_settings['number_of_years_ceil'] # training # time_serie_data = local_normalized_scaled_unit_sales nof_poor_result_time_series = len(poor_result_time_serie_list) time_serie_data = np.zeros(shape=(nof_poor_result_time_series, max_selling_time)) time_serie_iterator = 0 for time_serie in poor_result_time_serie_list: time_serie_data[ time_serie_iterator, :] = local_normalized_scaled_unit_sales[ time_serie, :] time_serie_iterator += 1 if local_settings['repeat_training_in_block'] == "True": print( 'starting in-block training of model for high_loss time_series in previous model' ) nof_selling_days = time_serie_data.shape[1] # nof_moving_windows = np.int32(nof_selling_days / moving_window_length) remainder_days = np.mod(nof_selling_days, moving_window_length) window_first_days = [ first_day for first_day in range(0, nof_selling_days, moving_window_length) ] length_window_walk = len(window_first_days) # last_window_start = window_first_days[length_window_walk - 1] if remainder_days != 0: window_first_days[ length_window_walk - 1] = nof_selling_days - moving_window_length day_in_year = [] [ day_in_year.append(last_learning_day_in_year + year * 365) for year in range(nof_years) ] stride_window_walk = model_hyperparameters[ 'stride_window_walk'] print('defining x_train') x_train = [] if local_settings['train_model_input_data_approach'] == "all": [ x_train.append( time_serie_data[:, day - time_steps_days:day - window_output_length]) for day in range(time_steps_days, max_selling_time, stride_window_walk) ] elif local_settings[ 'train_model_input_data_approach'] == "focused": [ x_train.append(time_serie_data[:, day:day + time_steps_days]) for last_day in day_in_year[:-1] for day in range( last_day + window_output_length, last_day + window_output_length - days_in_focus_frame, -stride_window_walk) ] # border condition, take care with last year, working with last data available, yeah really!! [ x_train.append( np.concatenate( (time_serie_data[:, day - window_output_length:day], np.zeros(shape=(nof_poor_result_time_series, time_steps_days - window_output_length))), axis=1)) for last_day in day_in_year[-1:] for day in range( last_day, last_day - days_in_focus_frame, -stride_window_walk) ] else: logging.info( "\ntrain_model_input_data_approach is not defined") print('-a problem occurs with the data_approach settings') return False, None print('defining y_train') y_train = [] if local_settings['train_model_input_data_approach'] == "all": [ y_train.append(time_serie_data[:, day - time_steps_days:day]) for day in range(time_steps_days, max_selling_time, stride_window_walk) ] elif local_settings[ 'train_model_input_data_approach'] == "focused": [ y_train.append(time_serie_data[:, day:day + time_steps_days]) for last_day in day_in_year[:-1] for day in range( last_day + window_output_length, last_day + window_output_length - days_in_focus_frame, -stride_window_walk) ] # border condition, take care with last year, working with last data available, yeah really!! [ y_train.append( np.concatenate( (time_serie_data[:, day - window_output_length:day], np.zeros(shape=(nof_poor_result_time_series, time_steps_days - window_output_length))), axis=1)) for last_day in day_in_year[-1:] for day in range( last_day, last_day - days_in_focus_frame, -stride_window_walk) ] # if time_enhance is active, assigns more weight to the last time_steps according to enhance_last_stride if local_settings['time_enhance'] == 'True': enhance_last_stride = local_settings['enhance_last_stride'] last_elements = [] length_x_y_train = len(x_train) x_train_enhanced, y_train_enhanced = [], [] enhance_iterator = 1 for position in range( length_x_y_train - enhance_last_stride, length_x_y_train, -1): [ x_train_enhanced.append(x_train[position]) for enhance in range(1, 3 * (enhance_iterator + 1)) ] [ y_train_enhanced.append(y_train[position]) for enhance in range(1, 3 * (enhance_iterator + 1)) ] enhance_iterator += 1 x_train = x_train[:-enhance_last_stride] [ x_train.append(time_step) for time_step in x_train_enhanced ] y_train = y_train[:-enhance_last_stride] [ y_train.append(time_step) for time_step in y_train_enhanced ] # broadcasts lists to np arrays and applies the last pre-training preprocessing (amplification) x_train = np.array(x_train) y_train = np.array(y_train) print('x_train_shape: ', x_train.shape) if local_settings['amplification'] == 'True': factor = local_settings[ 'amplification_factor'] # factor tuning was done previously for time_serie_iterator in range(np.shape(x_train)[1]): max_time_serie = np.amax( x_train[:, time_serie_iterator, :]) x_train[:, time_serie_iterator, :][x_train[:, time_serie_iterator, :] > 0] = \ max_time_serie * factor max_time_serie = np.amax( y_train[:, time_serie_iterator, :]) y_train[:, time_serie_iterator, :][y_train[:, time_serie_iterator, :] > 0] = \ max_time_serie * factor print('x_train and y_train built done') # define callbacks, checkpoints namepaths model_weights = ''.join([ local_settings['checkpoints_path'], 'check_point_model_for_high_loss_time_serie_', model_hyperparameters['current_model_name'], "_loss_-{loss:.4f}-.hdf5" ]) callback1 = cb.EarlyStopping( monitor='loss', patience=model_hyperparameters['early_stopping_patience']) callback2 = cb.ModelCheckpoint(model_weights, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks = [callback1, callback2] x_train = x_train.reshape( (np.shape(x_train)[0], np.shape(x_train)[2], np.shape(x_train)[1])) y_train = y_train.reshape( (np.shape(y_train)[0], np.shape(y_train)[2], np.shape(y_train)[1])) print('input_shape: ', np.shape(x_train)) # train for each time_serie # check settings for repeat or not the training forecaster_in_block.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, workers=workers, callbacks=callbacks, shuffle=False) # print summary (informative; but if says "shape = multiple", probably useless) forecaster_in_block.summary() forecaster_in_block.save(''.join([ local_settings['models_path'], '_high_loss_time_serie_model_forecaster_in_block_.h5' ])) forecaster_in_block.save_weights(''.join([ local_settings['models_path'], '_weights_high_loss_ts_model_forecaster_in_block_.h5' ])) print( 'high loss time_series model trained and saved in hdf5 format .h5' ) else: forecaster_in_block.load_weights(''.join([ local_settings['models_path'], '_weights_high_loss_ts_model_forecaster_in_block_.h5' ])) # forecaster_in_block = models.load_model(''.join([local_settings['models_path'], # '_high_loss_time_serie_model_forecaster_.h5'])) print('weights of previously trained model loaded') # compile model and make forecast (not necessary) # forecaster_in_block.compile(optimizer='adam', loss='mse') # evaluating model and comparing with aggregated (in-block) LSTM print('evaluating the model trained..') time_serie_data = time_serie_data.reshape( (1, time_serie_data.shape[1], time_serie_data.shape[0])) x_input = time_serie_data[:, -forecast_horizon_days:, :] y_pred_normalized = forecaster_in_block.predict(x_input) # print('output shape: ', y_pred_normalized.shape) time_serie_data = time_serie_data.reshape( (time_serie_data.shape[2], time_serie_data.shape[1])) # print('time_serie data shape: ', np.shape(time_serie_data)) time_serie_iterator = 0 improved_time_series_forecast = [] time_series_not_improved = [] improved_mse = [] for time_serie in poor_result_time_serie_list: # for time_serie in range(local_normalized_scaled_unit_sales.shape[0]): y_truth = local_raw_unit_sales[time_serie:time_serie + 1, -forecast_horizon_days:] # print('y_truth shape:', y_truth.shape) # reversing preprocess: rescale, denormalize, reshape # inverse reshape y_pred_reshaped = y_pred_normalized.reshape( (y_pred_normalized.shape[2], y_pred_normalized.shape[1])) y_pred_reshaped = y_pred_reshaped[ time_serie_iterator:time_serie_iterator + 1, :] # print('y_pred_reshaped shape:', y_pred_reshaped.shape) # inverse transform (first moving_windows denormalizing and then general rescaling) time_serie_normalized_window_mean = np.mean( time_serie_data[time_serie_iterator, -moving_window_length:]) # print('mean of this time serie (normalized values): ', time_serie_normalized_window_mean) local_denormalized_array = window_based_denormalizer( y_pred_reshaped, time_serie_normalized_window_mean, forecast_horizon_days) local_point_forecast = general_mean_rescaler( local_denormalized_array, local_mean_unit_complete_time_serie[time_serie], forecast_horizon_days) # print('rescaled denormalized forecasts array shape: ', local_point_forecast.shape) # calculating MSE # print(y_truth.shape) # print(local_point_forecast.shape) local_error_metric_mse = mean_squared_error( y_truth, local_point_forecast) # print('time_serie: ', time_serie, '\tMean_Squared_Error: ', local_error_metric_mse) previous_result = local_mse[:, 1][local_mse[:, 0] == time_serie].item() time_series_treated.append( [int(time_serie), previous_result, local_error_metric_mse]) if local_error_metric_mse < previous_result: # print('better results with time_serie specific model training') print(time_serie, 'MSE improved from ', previous_result, 'to ', local_error_metric_mse) improved_time_series_forecast.append(int(time_serie)) improved_mse.append(local_error_metric_mse) else: # print('no better results with time serie specific model training') # print('MSE not improved from: ', previous_result, '\t current mse: ', local_error_metric_mse) time_series_not_improved.append(int(time_serie)) time_serie_iterator += 1 time_series_treated = np.array(time_series_treated) improved_mse = np.array(improved_mse) average_mse_in_block_forecast = np.mean(time_series_treated[:, 2]) average_mse_improved_ts = np.mean(improved_mse) print('poor result time serie list len:', len(poor_result_time_serie_list)) print('mean_mse for in-block forecast:', average_mse_in_block_forecast) print( 'number of time series with better results with this forecast: ', len(improved_time_series_forecast)) print( 'mean_mse of time series with better results with this forecast: ', average_mse_improved_ts) print('not improved time series =', len(time_series_not_improved)) time_series_treated = np.array(time_series_treated) improved_time_series_forecast = np.array( improved_time_series_forecast) time_series_not_improved = np.array(time_series_not_improved) poor_result_time_serie_array = np.array( poor_result_time_serie_list) # store data of (individual-approach) time_series forecast successfully improved and those that not np.save( ''.join([ local_settings['models_evaluation_path'], 'poor_result_time_serie_array' ]), poor_result_time_serie_array) np.save( ''.join([ local_settings['models_evaluation_path'], 'time_series_forecast_results' ]), time_series_treated) np.save( ''.join([ local_settings['models_evaluation_path'], 'improved_time_series_forecast' ]), improved_time_series_forecast) np.save( ''.join([ local_settings['models_evaluation_path'], 'time_series_not_improved' ]), time_series_not_improved) np.savetxt(''.join([ local_settings['models_evaluation_path'], 'time_series_forecast_results.csv' ]), time_series_treated, fmt='%10.15f', delimiter=',', newline='\n') forecaster_in_block_json = forecaster_in_block.to_json() with open(''.join([local_settings['models_path'], 'high_loss_time_serie_model_forecaster_in_block.json']), 'w') \ as json_file: json_file.write(forecaster_in_block_json) json_file.close() print('trained model weights and architecture saved') print('metadata (results, time_serie with high loss) saved') print( 'forecast improvement done. (high loss time_serie focused) submodule has finished' ) except Exception as submodule_error: print('time_series in-block forecast submodule_error: ', submodule_error) logger.info( 'error in forecast of in-block time_series (high_loss_identified_ts_forecast submodule)' ) logger.error(str(submodule_error), exc_info=True) return False return True
def train(self, local_settings, local_raw_unit_sales, local_model_hyperparameters, local_time_series_not_improved, raw_unit_sales_ground_truth): try: # data normalization local_forecast_horizon_days = local_settings['forecast_horizon_days'] local_x_train, local_y_train = build_x_y_train_arrays(local_raw_unit_sales, local_settings, local_model_hyperparameters, local_time_series_not_improved) local_forecast_horizon_days = local_settings['forecast_horizon_days'] local_features_for_each_training = 1 print('starting neural network - individual time_serie training') # building architecture and compiling model_template # set training parameters local_time_steps_days = int(local_settings['time_steps_days']) local_epochs = int(local_model_hyperparameters['epochs']) local_batch_size = int(local_model_hyperparameters['batch_size']) local_workers = int(local_model_hyperparameters['workers']) local_optimizer_function = local_model_hyperparameters['optimizer'] local_optimizer_learning_rate = local_model_hyperparameters['learning_rate'] if local_optimizer_function == 'adam': local_optimizer_function = optimizers.Adam(local_optimizer_learning_rate) elif local_optimizer_function == 'ftrl': local_optimizer_function = optimizers.Ftrl(local_optimizer_learning_rate) local_losses_list = [] local_loss_1 = local_model_hyperparameters['loss_1'] local_loss_2 = local_model_hyperparameters['loss_2'] local_loss_3 = local_model_hyperparameters['loss_3'] local_union_settings_losses = [local_loss_1, local_loss_2, local_loss_3] if 'mape' in local_union_settings_losses: local_losses_list.append(losses.MeanAbsolutePercentageError()) if 'mse' in local_union_settings_losses: local_losses_list.append(losses.MeanSquaredError()) if 'mae' in local_union_settings_losses: local_losses_list.append(losses.MeanAbsoluteError()) if 'm_mape' in local_union_settings_losses: local_losses_list.append(modified_mape()) if 'customized_loss_function' in local_union_settings_losses: local_losses_list.append(customized_loss()) if 'pinball_loss_function' in local_union_settings_losses: local_losses_list.append(pinball_function_loss()) local_metrics_list = [] local_metric1 = local_model_hyperparameters['metrics1'] local_metric2 = local_model_hyperparameters['metrics2'] local_union_settings_metrics = [local_metric1, local_metric2] if 'rmse' in local_union_settings_metrics: local_metrics_list.append(metrics.RootMeanSquaredError()) if 'mse' in local_union_settings_metrics: local_metrics_list.append(metrics.MeanSquaredError()) if 'mae' in local_union_settings_metrics: local_metrics_list.append(metrics.MeanAbsoluteError()) if 'mape' in local_union_settings_metrics: local_metrics_list.append(metrics.MeanAbsolutePercentageError()) local_l1 = local_model_hyperparameters['l1'] local_l2 = local_model_hyperparameters['l2'] if local_model_hyperparameters['regularizers_l1_l2'] == 'True': local_activation_regularizer = regularizers.l1_l2(l1=local_l1, l2=local_l2) else: local_activation_regularizer = None # define callbacks, checkpoints namepaths local_callback1 = cb.EarlyStopping(monitor='loss', patience=local_model_hyperparameters['early_stopping_patience']) local_callbacks = [local_callback1] print('building current model: Mix_Bid_PeepHole_LSTM_Dense_ANN') local_base_model = tf.keras.Sequential() # first layer (DENSE) if local_model_hyperparameters['units_layer_1'] > 0: # strictly dim 1 of input_shape is ['time_steps_days'] (dim 0 is number of batches: None) local_base_model.add(layers.Dense(units=local_model_hyperparameters['units_layer_1'], activation=local_model_hyperparameters['activation_1'], input_shape=(local_time_steps_days, local_features_for_each_training), activity_regularizer=local_activation_regularizer)) local_base_model.add(layers.Dropout(rate=float(local_model_hyperparameters['dropout_layer_1']))) # second layer if local_model_hyperparameters['units_layer_2']: if local_model_hyperparameters['units_layer_1'] == 0: local_base_model.add(layers.RNN( PeepholeLSTMCell(units=local_model_hyperparameters['units_layer_2'], activation=local_model_hyperparameters['activation_2'], input_shape=(local_time_steps_days, local_features_for_each_training), dropout=float(local_model_hyperparameters['dropout_layer_2'])))) else: local_base_model.add(layers.RNN( PeepholeLSTMCell(units=local_model_hyperparameters['units_layer_2'], activation=local_model_hyperparameters['activation_2'], dropout=float(local_model_hyperparameters['dropout_layer_2'])))) # local_base_model.add(RepeatVector(local_model_hyperparameters['repeat_vector'])) # third layer if local_model_hyperparameters['units_layer_3'] > 0: local_base_model.add(layers.Dense(units=local_model_hyperparameters['units_layer_3'], activation=local_model_hyperparameters['activation_3'], activity_regularizer=local_activation_regularizer)) local_base_model.add(layers.Dropout(rate=float(local_model_hyperparameters['dropout_layer_3']))) # fourth layer if local_model_hyperparameters['units_layer_4'] > 0: local_base_model.add(layers.RNN( PeepholeLSTMCell(units=local_model_hyperparameters['units_layer_4'], activation=local_model_hyperparameters['activation_4'], dropout=float(local_model_hyperparameters['dropout_layer_4'])))) local_base_model.add(layers.Dense(units=local_forecast_horizon_days)) # build and compile model local_base_model.build(input_shape=(1, local_time_steps_days, local_features_for_each_training)) local_base_model.compile(optimizer=local_optimizer_function, loss=local_losses_list, metrics=local_metrics_list) # save model architecture (template for specific models) local_base_model.save(''.join([local_settings['models_path'], 'generic_forecaster_template_individual_ts.h5'])) local_base_model_json = local_base_model.to_json() with open(''.join([local_settings['models_path'], 'generic_forecaster_template_individual_ts.json']), 'w') as json_file: json_file.write(local_base_model_json) json_file.close() local_base_model.summary() # training model local_moving_window_length = local_settings['moving_window_input_length'] + \ local_settings['moving_window_output_length'] # all input data in the correct type local_x_train = np.array(local_x_train, dtype=np.dtype('float32')) local_y_train = np.array(local_y_train, dtype=np.dtype('float32')) local_raw_unit_sales = np.array(local_raw_unit_sales, dtype=np.dtype('float32')) # specific time_serie models training loop local_y_pred_list = [] local_nof_time_series = local_settings['number_of_time_series'] remainder = np.array([time_serie for time_serie in range(local_nof_time_series) if time_serie not in local_time_series_not_improved]) for time_serie in remainder: # ----------------------key_point--------------------------------------------------------------------- # take note that each loop the weights and internal last states of previous training are conserved # that's probably save times and (in aggregated or ordered) connected time series will improve results # ----------------------key_point--------------------------------------------------------------------- print('training time_serie:', time_serie) local_x, local_y = local_x_train[:, time_serie: time_serie + 1, :], \ local_y_train[:, time_serie: time_serie + 1, :] local_x = local_x.reshape(local_x.shape[0], local_x.shape[2], 1) local_y = local_y.reshape(local_y.shape[0], local_y.shape[2], 1) # training, saving model and storing forecasts local_base_model.fit(local_x, local_y, batch_size=local_batch_size, epochs=local_epochs, workers=local_workers, callbacks=local_callbacks, shuffle=False) local_base_model.save_weights(''.join([local_settings['models_path'], '/weights_last_year/_individual_ts_', str(time_serie), '_model_weights_.h5'])) local_x_input = local_raw_unit_sales[time_serie: time_serie + 1, -local_forecast_horizon_days:] local_x_input = cof_zeros(local_x_input, local_settings) local_x_input = local_x_input.reshape(1, local_x_input.shape[1], 1) print('x_input shape:', local_x_input.shape) local_y_pred = local_base_model.predict(local_x_input) print('x_input:\n', local_x_input) print('y_pred shape:', local_y_pred.shape) local_y_pred = local_y_pred.reshape(local_y_pred.shape[1]) local_y_pred = cof_zeros(local_y_pred, local_settings) if local_settings['mini_ts_evaluator'] == "True" and \ local_settings['competition_stage'] != 'submitting_after_June_1th_using_1941days': mini_evaluator = mini_evaluator_submodule() evaluation = mini_evaluator.evaluate_ts_forecast( raw_unit_sales_ground_truth[time_serie, -local_forecast_horizon_days:], local_y_pred) print('ts:', time_serie, 'with cof_zeros ts mse:', evaluation) else: print('ts:', time_serie) print(local_y_pred) local_y_pred_list.append(local_y_pred) local_point_forecast_array = np.array(local_y_pred_list) local_point_forecast_normalized = local_point_forecast_array.reshape( (local_point_forecast_array.shape[0], local_point_forecast_array.shape[1])) local_point_forecast = local_point_forecast_normalized # save points forecast np.savetxt(''.join([local_settings['others_outputs_path'], 'point_forecast_NN_LSTM_simulation.csv']), local_point_forecast, fmt='%10.15f', delimiter=',', newline='\n') print('point forecasts saved to file') print('submodule for build, train and forecast time_serie individually finished successfully') return True except Exception as submodule_error: print('train model and forecast individual time_series submodule_error: ', submodule_error) logger.info('error in training and forecast-individual time_serie schema') logger.error(str(submodule_error), exc_info=True) return False
def create_model(learn_rate, epoch_num, batches, outf_layer, outf_sum, filter_num, split_filters, which_sum, fc): input_shape = (98, 98, 3) inputs = Input(shape=input_shape, name='image_input') # filter number settings (f1, f2, f3) = filter_num # 3 filters for summing if split_filters: (f1, f2, f3) = (int(f1 - 3), int(f2 - 3), int(f3)) # normal layer convolution_1 = Conv2D(f1, kernel_size=(5, 5), strides=(1, 1), activation=outf_layer, input_shape=input_shape, name='c_layer_1')(inputs) s1 = tf.reduce_sum(convolution_1, axis=[1, 2, 3], name='c_layer_1_sum') pooling_1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='p_layer_1')(convolution_1) if split_filters: # sum "layer" s1 = tf.reduce_sum(Conv2D(3, kernel_size=(5, 5), strides=(1, 1), activation=outf_sum, input_shape=input_shape)(inputs), name='c_layer_1_sum') convolution_2 = Conv2D(f2, kernel_size=(5, 5), strides=(1, 1), activation=outf_layer, input_shape=input_shape, name='c_layer_2')(pooling_1) s2 = tf.reduce_sum(convolution_2, axis=[1, 2, 3], name='c_layer_2_sum') pooling_2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='p_layer_2')(convolution_2) if split_filters: s2 = tf.reduce_sum(Conv2D(3, kernel_size=(5, 5), strides=(1, 1), activation=outf_sum, input_shape=input_shape)(pooling_1), name='c_layer_2_sum') convolution_3 = Conv2D(f3, kernel_size=(5, 5), strides=(1, 1), activation=outf_sum, input_shape=input_shape, name='c_layer_3')(pooling_2) if fc: flat = Flatten()(convolution_3) s3 = Dense(1, activation=outf_sum)(flat) else: s3 = tf.reduce_sum(convolution_3, axis=[1, 2, 3], name='c_layer_3_sum') y_pred = s3 for i, s in enumerate([s1, s2]): if which_sum[i] == 1: y_pred += s model = Model(inputs=inputs, outputs=s3) model.compile( loss=losses.MeanSquaredError(), optimizer=optimizers.Adam(learning_rate=learn_rate, name='Adam'), metrics=[metrics.RootMeanSquaredError(), metrics.MeanAbsoluteError()]) return model
def train_model(self, local_settings, local_raw_unit_sales, local_model_hyperparameters): try: # loading hyperparameters local_days_in_focus = local_model_hyperparameters[ 'days_in_focus_frame'] local_raw_unit_sales_data = local_raw_unit_sales[:, -local_days_in_focus:] local_nof_ts = local_raw_unit_sales.shape[0] local_forecast_horizon_days = local_settings[ 'forecast_horizon_days'] local_features_for_each_training = 1 print( 'starting neural network - individual time_serie training unit_sale_approach' ) # building architecture and compiling model_template # set training parameters local_time_steps_days = int(local_settings['time_steps_days']) local_epochs = int(local_model_hyperparameters['epochs']) local_batch_size = int(local_model_hyperparameters['batch_size']) local_workers = int(local_model_hyperparameters['workers']) local_optimizer_function = local_model_hyperparameters['optimizer'] local_optimizer_learning_rate = local_model_hyperparameters[ 'learning_rate'] local_validation_split = local_model_hyperparameters[ 'validation_split'] if local_optimizer_function == 'adam': local_optimizer_function = optimizers.Adam( local_optimizer_learning_rate) elif local_optimizer_function == 'ftrl': local_optimizer_function = optimizers.Ftrl( local_optimizer_learning_rate) local_losses_list = [] local_loss_1 = local_model_hyperparameters['loss_1'] local_loss_2 = local_model_hyperparameters['loss_2'] local_loss_3 = local_model_hyperparameters['loss_3'] local_union_settings_losses = [ local_loss_1, local_loss_2, local_loss_3 ] if 'mape' in local_union_settings_losses: local_losses_list.append(losses.MeanAbsolutePercentageError()) if 'mse' in local_union_settings_losses: local_losses_list.append(losses.MeanSquaredError()) if 'mae' in local_union_settings_losses: local_losses_list.append(losses.MeanAbsoluteError()) if 'm_mape' in local_union_settings_losses: local_losses_list.append(modified_mape()) if 'customized_loss_function' in local_union_settings_losses: local_losses_list.append(customized_loss()) if 'pinball_loss_function' in local_union_settings_losses: local_losses_list.append(pinball_function_loss()) local_metrics_list = [] local_metric1 = local_model_hyperparameters['metrics1'] local_metric2 = local_model_hyperparameters['metrics2'] local_union_settings_metrics = [local_metric1, local_metric2] if 'rmse' in local_union_settings_metrics: local_metrics_list.append(metrics.RootMeanSquaredError()) if 'mse' in local_union_settings_metrics: local_metrics_list.append(metrics.MeanSquaredError()) if 'mae' in local_union_settings_metrics: local_metrics_list.append(metrics.MeanAbsoluteError()) if 'mape' in local_union_settings_metrics: local_metrics_list.append( metrics.MeanAbsolutePercentageError()) local_l1 = local_model_hyperparameters['l1'] local_l2 = local_model_hyperparameters['l2'] if local_model_hyperparameters['regularizers_l1_l2'] == 'True': local_activation_regularizer = regularizers.l1_l2(l1=local_l1, l2=local_l2) else: local_activation_regularizer = None # define callbacks, checkpoints namepaths local_callback1 = cb.EarlyStopping( monitor='loss', patience=local_model_hyperparameters['early_stopping_patience'] ) local_callbacks = [local_callback1] print( 'building current model: individual_time_serie_acc_freq_LSTM_Dense_ANN' ) local_base_model = tf.keras.Sequential() # first layer (LSTM) if local_model_hyperparameters['units_layer_1'] > 0: local_base_model.add( layers.LSTM( units=local_model_hyperparameters['units_layer_1'], activation=local_model_hyperparameters['activation_1'], input_shape=( local_model_hyperparameters['time_steps_days'], local_features_for_each_training), dropout=float( local_model_hyperparameters['dropout_layer_1']), activity_regularizer=local_activation_regularizer, return_sequences=True)) # second LSTM layer if local_model_hyperparameters['units_layer_2'] > 0: local_base_model.add( layers.Bidirectional( layers.LSTM( units=local_model_hyperparameters['units_layer_2'], activation=local_model_hyperparameters[ 'activation_2'], activity_regularizer=local_activation_regularizer, dropout=float( local_model_hyperparameters['dropout_layer_2'] ), return_sequences=False))) local_base_model.add( RepeatVector(local_model_hyperparameters['repeat_vector'])) # third LSTM layer if local_model_hyperparameters['units_layer_3'] > 0: local_base_model.add( layers.Bidirectional( layers. RNN(PeepholeLSTMCell( units=local_model_hyperparameters['units_layer_3'], dropout=float( local_model_hyperparameters['dropout_layer_3']) ), activity_regularizer=local_activation_regularizer, return_sequences=False))) local_base_model.add( RepeatVector(local_model_hyperparameters['repeat_vector'])) # fourth layer (DENSE) if local_model_hyperparameters['units_layer_4'] > 0: local_base_model.add( layers.Dense( units=local_model_hyperparameters['units_layer_4'], activation=local_model_hyperparameters['activation_4'], activity_regularizer=local_activation_regularizer)) local_base_model.add( layers.Dropout(rate=float( local_model_hyperparameters['dropout_layer_4']))) # final layer local_base_model.add( layers.Dense( units=local_model_hyperparameters['units_final_layer'])) # build and compile model local_base_model.build( input_shape=(1, local_time_steps_days, local_features_for_each_training)) local_base_model.compile(optimizer=local_optimizer_function, loss=local_losses_list, metrics=local_metrics_list) # save model architecture (template for specific models) local_base_model.save(''.join([ local_settings['models_path'], '_unit_sales_forecaster_template_individual_ts.h5' ])) local_base_model_json = local_base_model.to_json() with open(''.join([local_settings['models_path'], '_unit_sales_forecaster_forecaster_template_individual_ts.json']), 'w') \ as json_file: json_file.write(local_base_model_json) json_file.close() local_base_model.summary() # training model local_moving_window_length = local_settings['moving_window_input_length'] + \ local_settings['moving_window_output_length'] # loading x_train and y_train, previously done for third and fourth models trainings local_builder = local_bxy_x_y_builder() local_x_train, local_y_train = local_builder.build_x_y_train_arrays( local_raw_unit_sales, local_settings, local_model_hyperparameters) local_x_train = local_x_train.reshape(local_x_train.shape[0], local_x_train.shape[2], local_x_train.shape[1]) local_y_train = local_x_train.reshape(local_y_train.shape[0], local_y_train.shape[2], local_y_train.shape[1]) # star training time_serie by time_serie local_y_pred_array = np.zeros(shape=(local_raw_unit_sales.shape[0], local_forecast_horizon_days), dtype=np.dtype('float32')) for time_serie in range(local_nof_ts): print('training time_serie:', time_serie) local_x, local_y = local_x_train[:, :, time_serie: time_serie + 1], \ local_y_train[:, :, time_serie: time_serie + 1] # training, saving model and storing forecasts local_base_model.fit(local_x, local_y, batch_size=local_batch_size, epochs=local_epochs, workers=local_workers, callbacks=local_callbacks, shuffle=False, validation_split=local_validation_split) local_base_model.save_weights(''.join([ local_settings['models_path'], '/_weights_unit_sales_NN_35_days/_individual_ts_', str(time_serie), '_model_weights_.h5' ])) local_x_input = local_raw_unit_sales[ time_serie:time_serie + 1, -local_forecast_horizon_days:] local_x_input = local_x_input.reshape(1, local_x_input.shape[1], 1) # print('x_input shape:', local_x_input.shape) local_y_pred = local_base_model.predict(local_x_input) # print('x_input:\n', local_x_input) # print('y_pred shape:', local_y_pred.shape) local_y_pred = local_y_pred.reshape(local_y_pred.shape[1]) # print('ts:', time_serie) # print(local_y_pred) local_y_pred_array[time_serie:time_serie + 1, :] = local_y_pred local_point_forecast_normalized = local_y_pred_array.reshape( (local_y_pred_array.shape[0], local_y_pred_array.shape[1])) local_point_forecast = local_point_forecast_normalized.clip(0) # save points forecast np.save( ''.join([ local_settings['train_data_path'], 'point_forecast_NN_from_unit_sales_training' ]), local_point_forecast) np.save( ''.join([ local_settings['train_data_path'], 'eleventh_model_NN_unit_sales_forecast_data' ]), local_point_forecast) np.savetxt(''.join([ local_settings['others_outputs_path'], 'point_forecast_NN_from_unit_sales_training.csv' ]), local_point_forecast, fmt='%10.15f', delimiter=',', newline='\n') print('point forecasts saved to file') print( 'submodule for build, train and forecast time_serie unit_sales individually finished successfully' ) return True, local_point_forecast except Exception as submodule_error: print( 'train model and forecast individual time_series units_sales_ submodule_error: ', submodule_error) logger.info( 'error in training and forecast-individual time_serie unit_sales_ schema' ) logger.error(str(submodule_error), exc_info=True) return False, []
update_freq='epoch', profile_batch=2, embeddings_freq=0, embeddings_metadata=None) callbacks = [ ReduceLROnPlateau(monitor='val_loss', patience=5, cooldown=0), EarlyStopping(monitor='val_acc', min_delta=1e-6, patience=15), tensor_board ] mymetrics = [ 'acc', metrics.Precision(), metrics.Recall(), metrics.AUC(), metrics.RootMeanSquaredError() ] training_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=mymetrics) history = training_model.fit(X_data, y_data, batch_size=4096, epochs=50, validation_split=0.05, callbacks=callbacks, verbose=1) # Saving the tokenizer with open('Tokenizer.pickle', 'wb') as file:
def build(self, hp): ###### Feature Part input_features = layers.Input(shape=(self.feature_len,), dtype='float32', name="FeatureInput") input_entities = layers.Input(shape=(self.kb_len,), dtype='int32', name="EntityInput") feature_dense = layers.Dense(units=hp.Int( 'feature_units1', min_value=8, max_value=128, step=16, default=24 ), activation='relu', name="FeatureDense1")(input_features) feature_dense = layers.Dropout(rate=0.5)(feature_dense) feature_dense = layers.Dense(units=hp.Int( 'feature_units2', min_value=8, max_value=128, step=16, default=24 ), activation='relu', name="FeatureDense2")(feature_dense) feature_dense = layers.Dropout(rate=0.5)(feature_dense) embeddings = np.load('../data/NELL/embeddings/entity.npy') entity_embedding = layers.Embedding(181544, 64, embeddings_initializer=initializers.Constant(embeddings), trainable=False, name="EntityEmbeddings")(input_entities) sum_layer = layers.Lambda(lambda x: backend.sum(x, axis=1, keepdims=False))(entity_embedding) entity_dense = sum_layer for i in range(hp.Int('entity_layers', 1, 4)): entity_dense = layers.Dense(units=hp.Int( f'entity_units{i}', min_value=8, max_value=128, step=16, default=24 ), activation='relu', name=f"EntityDense{i}")(entity_dense) entity_dense = layers.Dropout(rate=hp.Float( f'entity_dropout_{i}', min_value=0.0, max_value=0.5, default=0.20, step=0.1, ))(entity_dense) #################### ###### Sentence Part input_replaced = layers.Input(shape=(), dtype=tf.string, name="ReplacedInput") input_replacement = layers.Input(shape=(), dtype=tf.string, name="ReplacementInput") sentence_in = layers.Input(shape=(), dtype=tf.string, name="sentence_in") sentence_dense = hub.KerasLayer(self.nnlm_path)(sentence_in) # Expects a tf.string input tensor. for i in range(hp.Int('sentence_layers', 1, 4)): sentence_dense = layers.Dense(units=hp.Int( f'sentence_units{i}', min_value=64, max_value=512, step=64, default=128 ), activation='relu', name=f"SentenceDense{i}")(sentence_dense) sentence_dense = layers.Dropout(rate=hp.Float( f'sentence_dropout_{i}', min_value=0.0, max_value=0.5, default=0.20, step=0.1, ))(sentence_dense) sentence_model = Model(sentence_in, sentence_dense, name="SentenceModel") concat_sentence = layers.Concatenate()([sentence_model(input_replaced), sentence_model(input_replacement)]) ##################### ###### Common Part concat = layers.Concatenate()([feature_dense, concat_sentence, entity_dense]) # output = layers.Dense(16, activation='relu', name="OutoutDense1")(feature_dense) output = layers.Dense(1, name="Output")(concat) # input_tokens, HUMOR = Model(inputs=[input_features, input_replaced, input_replacement, input_entities], outputs=output) # opt = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) opt = optimizers.Adam(lr=hp.Float( 'learning_rate', min_value=1e-5, max_value=1e-1, sampling='LOG', default=1e-2 )) # opt = optimizers.Nadam(clipnorm=1., clipvalue=0.5) HUMOR.compile(optimizer=opt, loss="mean_squared_error", metrics=[metrics.RootMeanSquaredError()]) HUMOR.summary() return HUMOR
def build(self, hp): ###### Feature Part input_features = layers.Input(shape=(self.feature_len, ), dtype='float32', name="FeatureInput") input_entities = layers.Input(shape=(self.kb_len, ), dtype='int32', name="EntityInput") feature_dense = layers.Dense( self.parameters["feature_units1"], activation=self.parameters["feature_dense_activation1"], name="FeatureDense1")(input_features) feature_dense = layers.Dropout( self.parameters["feature_dropout_1"])(feature_dense) feature_dense = layers.Dense( self.parameters["feature_units2"], activation=self.parameters["feature_dense_activation2"], name="FeatureDense2")(feature_dense) feature_dense = layers.Dropout( self.parameters["feature_dropout_2"])(feature_dense) embeddings = np.load('../data/NELL/embeddings/entity.npy') entity_embedding = layers.Embedding( 181544, 64, embeddings_initializer=initializers.Constant(embeddings), trainable=False, name="EntityEmbeddings")(input_entities) sum_layer = layers.Lambda( lambda x: backend.sum(x, axis=1, keepdims=False))(entity_embedding) entity_dense = layers.Dense( self.parameters["entity_units1"], activation=self.parameters["entity_dense_activation1"], name="EntityDense1")(sum_layer) entity_dense = layers.Dropout( self.parameters["entity_dropout_1"])(entity_dense) entity_dense = layers.Dense( self.parameters["entity_units2"], activation=self.parameters["entity_dense_activation2"], name="EntityDense2")(entity_dense) entity_dense = layers.Dropout( self.parameters["entity_dropout_2"])(entity_dense) #################### ###### Sentence Part input_replaced = layers.Input(shape=(), dtype=tf.string, name="ReplacedInput") input_replacement = layers.Input(shape=(), dtype=tf.string, name="ReplacementInput") sentence_in = layers.Input(shape=(), dtype=tf.string, name="sentence_in") word_embed = hub.KerasLayer(self.nnlm_path)(sentence_in) sentence_dense = layers.Dense( self.parameters["sentence_units1"], activation=self.parameters["sentence_dense_activation1"], name="SentenceDense1")(word_embed) sentence_dense = layers.Dropout( self.parameters["sentence_dropout_1"])(sentence_dense) sentence_dense = layers.Dense( self.parameters["sentence_units2"], activation=self.parameters["sentence_dense_activation2"], name="SentenceDense2")(sentence_dense) sentence_dense = layers.Dropout( self.parameters["sentence_dropout_2"])(sentence_dense) sentence_dense = layers.Dense( self.parameters["sentence_units3"], activation=self.parameters["sentence_dense_activation3"], name="SentenceDense3")(sentence_dense) sentence_dense = layers.Dropout( self.parameters["sentence_dropout_3"])(sentence_dense) sentence_model = Model(sentence_in, sentence_dense, name="WordModel") concat_sentence = layers.Concatenate()([ sentence_model(input_replaced), sentence_model(input_replacement) ]) ##################### ###### Albert input_word_ids = tf.keras.layers.Input(shape=(self.max_seq_length, ), dtype=tf.int32, name="input_word_ids") input_mask = tf.keras.layers.Input(shape=(self.max_seq_length, ), dtype=tf.int32, name="input_mask") segment_ids = tf.keras.layers.Input(shape=(self.max_seq_length, ), dtype=tf.int32, name="segment_ids") albert_layer = hub.KerasLayer(self.albert_path, trainable=False) pooled_output, sequence_output = albert_layer( [input_word_ids, input_mask, segment_ids]) context_dense = layers.Dense(hp.Int('contextUnits1', min_value=32, max_value=512, step=32), activation=hp.Choice( 'activation1', ['relu', 'tanh', 'sigmoid']), name="ContextDense1")(pooled_output) context_dense = layers.Dropout(0.5)(context_dense) context_dense = layers.Dense(hp.Int('contextUnits2', min_value=32, max_value=512, step=32), activation=hp.Choice( 'activation2', ['relu', 'tanh', 'sigmoid']), name="ContextDense2")(context_dense) context_dense = layers.Dropout(0.5)(context_dense) ###### Common Part concat = layers.Concatenate()( [feature_dense, concat_sentence, entity_dense, context_dense]) # output = layers.Dense(16, activation='relu', name="OutputDense1")(concat) # output = layers.Dropout(0.50)(output) output = layers.Dense(1, name="Output")(concat) # input_tokens, HUMOR = Model(inputs=[ input_features, input_entities, input_replaced, input_replacement, input_word_ids, input_mask, segment_ids ], outputs=output, name="KBHumor") # opt = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) opt = optimizers.Adam(lr=self.parameters["learning_rate"]) # opt = optimizers.Nadam(clipnorm=1., clipvalue=0.5) HUMOR.compile(optimizer=opt, loss="mean_squared_error", metrics=[metrics.RootMeanSquaredError()]) HUMOR.summary() return HUMOR
def test_rest(scan_object): print('\n >>> start testing the rest... \n') import talos import random from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense from tensorflow.keras import metrics, losses deploy_filename = 'test' + str(random.randint(1, 20000000000)) print('\n ...Deploy()... \n') talos.Deploy(scan_object, deploy_filename, 'val_acc') print('\n ...Restore()... \n') restored = talos.Restore(deploy_filename + '.zip') x, y = talos.templates.datasets.breast_cancer() x = x[:50] y = y[:50] x_train, y_train, x_val, y_val = talos.utils.val_split(x, y, .2) x = talos.utils.rescale_meanzero(x) callbacks = [ talos.utils.early_stopper(10), talos.utils.ExperimentLogCallback('test', {}) ] metrics = [ metrics.MeanAbsolutePercentageError(), metrics.MeanSquaredLogarithmicError(), metrics.RootMeanSquaredError(), metrics.Precision(), metrics.Recall() ] print('\n ...callbacks and metrics... \n') model1 = Sequential() model1.add(Dense(10, input_dim=x.shape[1])) model1.add(Dense(1)) model1.compile('adam', metrics=metrics) model1.fit(x, y, callbacks=callbacks) print('\n ...generator... \n') model2 = Sequential() model2.add(Dense(10, input_dim=x.shape[1])) model2.add(Dense(1)) model2.compile('adam') model2.fit_generator(talos.utils.generator(x, y, 10), 5) print('\n ...SequenceGenerator... \n') model3 = Sequential() model3.add(Dense(10, input_dim=x.shape[1])) model3.add(Dense(1)) model3.compile('adam', 'logcosh') model3.fit_generator(talos.utils.SequenceGenerator(x, y, 10)) # print('\n ...gpu_utils... \n') # talos.utils.gpu_utils.force_cpu() # talos.utils.gpu_utils.parallel_gpu_jobs() # print('\n ...gpu_utils... \n') from talos.utils.test_utils import create_param_space create_param_space(restored.results, 5) print('finished testing the rest \n')
def create_HUMOR2_model(feature_len: int, kb_len: int, max_seq_length: int, parameters) -> Model: ###### Feature Part input_features = layers.Input(shape=(feature_len, ), dtype='float32', name="FeatureInput") input_entities = layers.Input(shape=(kb_len, ), dtype='int32', name="EntityInput") feature_dense = layers.Dense( parameters["feature_units1"], activation=parameters["feature_dense_activation1"], name="FeatureDense1")(input_features) feature_dense = layers.Dropout( parameters["feature_dropout_1"])(feature_dense) feature_dense = layers.Dense( parameters["feature_units2"], activation=parameters["feature_dense_activation2"], name="FeatureDense2")(feature_dense) feature_dense = layers.Dropout( parameters["feature_dropout_2"])(feature_dense) embeddings = np.load('../data/NELL/embeddings/entity.npy') entity_embedding = layers.Embedding( len(embeddings), embeddings.shape[1], embeddings_initializer=initializers.Constant(embeddings), trainable=False, name="EntityEmbeddings")(input_entities) sum_layer = layers.Lambda( lambda x: backend.sum(x, axis=1, keepdims=False))(entity_embedding) entity_dense = layers.Dense( parameters["entity_units1"], activation=parameters["entity_dense_activation1"], name="EntityDense1")(sum_layer) entity_dense = layers.Dropout(parameters["entity_dropout_1"])(entity_dense) entity_dense = layers.Dense( parameters["entity_units2"], activation=parameters["entity_dense_activation2"], name="EntityDense2")(entity_dense) entity_dense = layers.Dropout(parameters["entity_dropout_2"])(entity_dense) #################### ###### Sentence Part input_replaced = layers.Input( shape=(), dtype=tf.string, name="ReplacedInput" ) # layers.Input(shape=(768,), dtype=tf.float32, name="ReplacedInput") # input_replacement = layers.Input( shape=(), dtype=tf.string, name="ReplacementInput" ) # layers.Input(shape=(768,), dtype=tf.float32, name="ReplacementInput") # sentence_in = layers.Input( shape=(), dtype=tf.string, name="sentence_in" ) # layers.Input(shape=(768,), dtype=tf.float32, name="sentence_in") word_embed = hub.KerasLayer('https://tfhub.dev/google/nnlm-en-dim128/2')( sentence_in) sentence_dense = layers.Dense( parameters["sentence_units1"], activation=parameters["sentence_dense_activation1"], name="SentenceDense1")(word_embed) sentence_dense = layers.Dropout( parameters["sentence_dropout_1"])(sentence_dense) sentence_dense = layers.Dense( parameters["sentence_units2"], activation=parameters["sentence_dense_activation2"], name="SentenceDense2")(sentence_dense) sentence_dense = layers.Dropout( parameters["sentence_dropout_2"])(sentence_dense) sentence_dense = layers.Dense( parameters["sentence_units3"], activation=parameters["sentence_dense_activation3"], name="SentenceDense3")(sentence_dense) sentence_dense = layers.Dropout( parameters["sentence_dropout_3"])(sentence_dense) sentence_model = Model(sentence_in, sentence_dense, name="SentenceModel") concat_sentence = layers.Concatenate()( [sentence_model(input_replaced), sentence_model(input_replacement)]) ##################### ###### Albert input_word_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype=tf.int32, name="input_word_ids") input_mask = tf.keras.layers.Input(shape=(max_seq_length, ), dtype=tf.int32, name="input_mask") segment_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype=tf.int32, name="segment_ids") albert_layer = hub.KerasLayer( "https://tfhub.dev/tensorflow/albert_en_base/1", trainable=False) pooled_output, sequence_output = albert_layer( [input_word_ids, input_mask, segment_ids]) context_dense = layers.Dense(parameters["context_units1"], activation=parameters["context_activation1"], name="ContextDense1")(pooled_output) context_dense = layers.Dropout(0.5)(context_dense) context_dense = layers.Dense(parameters["context_units2"], activation=parameters["context_activation2"], name="ContextDense2")(context_dense) context_dense = layers.Dropout(0.5)(context_dense) ###### Common Part concat = layers.Concatenate()( [feature_dense, concat_sentence, entity_dense, context_dense]) # concat = layers.Concatenate()([feature_dense, concat_sentence, entity_dense]) # output = layers.Dense(16, activation='relu', name="OutputDense1")(concat) # output = layers.Dropout(0.50)(output) output = layers.Dense(1, name="Output")(concat) HUMOR = Model(inputs=[ input_features, input_entities, input_replaced, input_replacement, input_word_ids, input_mask, segment_ids ], outputs=output, name="KBHumor") # HUMOR = Model(inputs=[input_features, input_entities, input_replaced, input_replacement], outputs=output, name="KBHumor") # opt = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) opt = optimizers.Adam(lr=parameters["learning_rate"]) # opt = optimizers.Nadam(clipnorm=1., clipvalue=0.5) HUMOR.compile(optimizer=opt, loss="mean_squared_error", metrics=[metrics.RootMeanSquaredError()]) HUMOR.summary() return HUMOR
def create_HUMOR_model(feature_len: int, kb_len: int, kb_part: bool, word_encoder: bool, replaced: bool, replacement: bool) -> Model: """Create a humor model. Feature length and KB length is for defining input sizes. KB part is for defining whether you want the knowledge base part or not. To select the word encoder you will have to set word_encoder. Subsequently you will have to set replaced and replacement according to which inputs you want. If you don't at all want the word encoder set it to false. Arguments: feature_len {int} -- The number of features kb_len {int} -- The length of the KB Vector kb_part {bool} -- Whether you want the KB part or not word_encoder {bool} -- Whether you want the word encoder or not. replaced {bool} -- If word encoder is inplace do you then want the replaced input. replacement {bool} -- If word encoder is inplace do you then want the replacement input. Returns: Model -- The compiled keras model. """ ###### Feature Part input_features = layers.Input(shape=(feature_len, ), dtype='float32', name="FeatureInput") feature_dense = layers.Dense(16, activation='relu', name="FeatureDense1")(input_features) feature_dense = layers.Dropout(0.5)(feature_dense) feature_dense = layers.Dense(16, activation='relu', name="FeatureDense2")(feature_dense) feature_dense = layers.Dropout(0.5)(feature_dense) outputs = [feature_dense] inputs = [input_features] #################### ###### Knowledge Part if kb_part: input_entities = layers.Input(shape=(kb_len, ), dtype='int32', name="EntityInput") embeddings = np.load('../data/NELL/embeddings/entity.npy') entity_embedding = layers.Embedding( len(embeddings), embeddings.shape[1], embeddings_initializer=initializers.Constant(embeddings), trainable=False, name="EntityEmbeddings")(input_entities) sum_layer = layers.Lambda( lambda x: backend.sum(x, axis=1, keepdims=False))(entity_embedding) entity_dense = layers.Dense(32, activation='relu', name="EntityDense1")(sum_layer) entity_dense = layers.Dropout(0.5)(entity_dense) entity_dense = layers.Dense(16, activation='relu', name="EntityDense2")(entity_dense) entity_dense = layers.Dropout(0.5)(entity_dense) outputs.append(entity_dense) inputs.append(input_entities) #################### ###### Sentence Part if word_encoder: sentence_in = layers.Input(shape=(), dtype=tf.string, name="sentence_in") embed = hub.KerasLayer('https://tfhub.dev/google/nnlm-en-dim128/2')( sentence_in) # Expects a tf.string input tensor. sentence_dense = layers.Dense(64, activation='relu', name="SentenceDense1")(embed) sentence_dense = layers.Dropout(0.5)(sentence_dense) sentence_dense = layers.Dense(32, activation='relu', name="SentenceDense2")(sentence_dense) sentence_dense = layers.Dropout(0.5)(sentence_dense) sentence_dense = layers.Dense(16, activation='relu', name="SentenceDense3")(sentence_dense) sentence_dense = layers.Dropout(0.5)(sentence_dense) sentence_model = Model(sentence_in, sentence_dense, name="WordModel") concat_vector = [] if replaced: input_replaced = layers.Input(shape=(), dtype=tf.string, name="ReplacedInput") concat_vector.append(sentence_model(input_replaced)) inputs.append(input_replaced) if replacement: input_replacement = layers.Input(shape=(), dtype=tf.string, name="ReplacementInput") concat_vector.append(sentence_model(input_replacement)) inputs.append(input_replacement) if replaced and replacement: concat_sentence = layers.Concatenate()(concat_vector) outputs.append(concat_sentence) else: outputs.append(concat_vector[0]) ##################### ###### Common Part if len(outputs) > 1: concat = layers.Concatenate()(outputs) output = layers.Dense(1, name="Output")(concat) else: output = layers.Dense(1, name="Output")(outputs[0]) # input_tokens, HUMOR = Model(inputs=inputs, outputs=output, name="KBHumor") # opt = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) opt = optimizers.Adam(lr=0.001) # opt = optimizers.Nadam(clipnorm=1., clipvalue=0.5) HUMOR.compile(optimizer=opt, loss="mean_squared_error", metrics=[metrics.RootMeanSquaredError()]) HUMOR.summary() return HUMOR
def build_model(local_bm_hyperparameters, local_bm_settings): model_built = 0 time_steps_days = int(local_bm_hyperparameters['time_steps_days']) epochs = int(local_bm_hyperparameters['epochs']) batch_size = int(local_bm_hyperparameters['batch_size']) workers = int(local_bm_hyperparameters['workers']) optimizer_function = local_bm_hyperparameters['optimizer'] optimizer_learning_rate = local_bm_hyperparameters['learning_rate'] if optimizer_function == 'adam': optimizer_function = optimizers.Adam(optimizer_learning_rate) elif optimizer_function == 'ftrl': optimizer_function = optimizers.Ftrl(optimizer_learning_rate) losses_list = [] loss_1 = local_bm_hyperparameters['loss_1'] loss_2 = local_bm_hyperparameters['loss_2'] loss_3 = local_bm_hyperparameters['loss_3'] union_settings_losses = [loss_1, loss_2, loss_3] if 'mape' in union_settings_losses: losses_list.append(losses.MeanAbsolutePercentageError()) if 'mse' in union_settings_losses: losses_list.append(losses.MeanSquaredError()) if 'mae' in union_settings_losses: losses_list.append(losses.MeanAbsoluteError()) if 'm_mape' in union_settings_losses: losses_list.append(modified_mape()) if 'customized_loss_function' in union_settings_losses: losses_list.append(customized_loss()) metrics_list = [] metric1 = local_bm_hyperparameters['metrics1'] metric2 = local_bm_hyperparameters['metrics2'] union_settings_metrics = [metric1, metric2] if 'rmse' in union_settings_metrics: metrics_list.append(metrics.RootMeanSquaredError()) if 'mse' in union_settings_metrics: metrics_list.append(metrics.MeanSquaredError()) if 'mae' in union_settings_metrics: metrics_list.append(metrics.MeanAbsoluteError()) if 'mape' in union_settings_metrics: metrics_list.append(metrics.MeanAbsolutePercentageError()) l1 = local_bm_hyperparameters['l1'] l2 = local_bm_hyperparameters['l2'] if local_bm_hyperparameters['regularizers_l1_l2'] == 'True': activation_regularizer = regularizers.l1_l2(l1=l1, l2=l2) else: activation_regularizer = None nof_features_for_training = local_bm_hyperparameters[ 'nof_features_for_training'] # creating model forecaster_in_block = tf.keras.Sequential() print('creating the ANN model...') # first layer (DENSE) if local_bm_hyperparameters['units_layer_1'] > 0: forecaster_in_block.add( layers.Dense( units=local_bm_hyperparameters['units_layer_1'], activation=local_bm_hyperparameters['activation_1'], input_shape=(local_bm_hyperparameters['time_steps_days'], nof_features_for_training), activity_regularizer=activation_regularizer)) forecaster_in_block.add( layers.Dropout( rate=float(local_bm_hyperparameters['dropout_layer_1']))) # second LSTM layer if local_bm_hyperparameters[ 'units_layer_2'] > 0 and local_bm_hyperparameters[ 'units_layer_1'] > 0: forecaster_in_block.add( layers.Bidirectional( layers.LSTM( units=local_bm_hyperparameters['units_layer_2'], activation=local_bm_hyperparameters['activation_2'], activity_regularizer=activation_regularizer, dropout=float(local_bm_hyperparameters['dropout_layer_2']), return_sequences=False))) forecaster_in_block.add( RepeatVector(local_bm_hyperparameters['repeat_vector'])) # third LSTM layer if local_bm_hyperparameters['units_layer_3'] > 0: forecaster_in_block.add( layers.Bidirectional( layers.LSTM( units=local_bm_hyperparameters['units_layer_3'], activation=local_bm_hyperparameters['activation_3'], activity_regularizer=activation_regularizer, dropout=float(local_bm_hyperparameters['dropout_layer_3']), return_sequences=True))) if local_bm_hyperparameters['units_layer_4'] == 0: forecaster_in_block.add( RepeatVector(local_bm_hyperparameters['repeat_vector'])) # fourth layer (DENSE) if local_bm_hyperparameters['units_layer_4'] > 0: forecaster_in_block.add( layers.Dense(units=local_bm_hyperparameters['units_layer_4'], activation=local_bm_hyperparameters['activation_4'], activity_regularizer=activation_regularizer)) forecaster_in_block.add( layers.Dropout( rate=float(local_bm_hyperparameters['dropout_layer_4']))) # final layer forecaster_in_block.add( TimeDistributed(layers.Dense(units=nof_features_for_training))) forecaster_in_block.save(''.join( [local_bm_settings['models_path'], 'in_block_NN_model_structure_']), save_format='tf') forecast_horizon_days = local_bm_settings['forecast_horizon_days'] forecaster_in_block.build(input_shape=(1, forecast_horizon_days + 1, nof_features_for_training)) forecaster_in_block.compile(optimizer=optimizer_function, loss=losses_list, metrics=metrics_list) forecaster_in_block_json = forecaster_in_block.to_json() with open( ''.join([ local_bm_settings['models_path'], 'freq_acc_forecaster_in_block.json' ]), 'w') as json_file: json_file.write(forecaster_in_block_json) json_file.close() print( 'build_model function finish (model structure saved in json and ts formats)' ) return True, model_built
def construct_lstm_cnn(look_forward, look_back=30, compile=True, single_output=False): cnn = construct_cnn(look_forward, fc=False) cnn_flatten = Flatten()(cnn.output) lstm = construct_lstm(look_forward, look_back, 2, fc=False) #Merged layer merged_outputs = [] cnn_lstm = concatenate([cnn_flatten, lstm.output]) fc_merged = Dense(500, activation='relu')(cnn_lstm) drop_merged = Dropout(0.5)(fc_merged) fc2_merged = Dense(100, activation='relu')(drop_merged) drop2_merged = Dropout(0.5)(fc2_merged) fc3_merged = Dense(25 , activation='relu')(drop2_merged) drop3_merged = Dropout(0.5)(fc3_merged) if not single_output: for i in range(look_forward): pred_merged = Dense(1, activation='linear', name='merged_output_' + str(i))(drop3_merged) merged_outputs.append(pred_merged) else: pred_merged = Dense(1, activation='linear', name='merged_output_t_' + str(look_forward))(drop3_merged) merged_outputs.append(pred_merged) #Auxiliary branch for cnn cnn_outputs = [] fc_cnn = Dense(500, activation='relu')(cnn_flatten) drop_cnn = Dropout(0.5)(fc_cnn) fc2_cnn = Dense(100, activation='relu')(drop_cnn) drop2_cnn = Dropout(0.5)(fc2_cnn) fc3_cnn = Dense(25 , activation='relu')(drop2_cnn) drop3_cnn = Dropout(0.5)(fc3_cnn) if not single_output: for i in range(look_forward): pred_cnn_aux = Dense(1, activation='linear', name='cnn_aux_output_' + str(i))(drop3_cnn) cnn_outputs.append(pred_cnn_aux) else: pred_cnn_aux = Dense(1, activation='linear', name='cnn_aux_output_t_' + str(look_forward))(drop3_cnn) cnn_outputs.append(pred_cnn_aux) #Auxiliary branch for lstm lstm_outputs = [] fc_lstm = Dense(500, activation='relu')(lstm.output) drop_lstm = Dropout(0.5)(fc_lstm) fc2_lstm = Dense(100, activation='relu')(drop_lstm) drop2_lstm = Dropout(0.5)(fc2_lstm) fc3_lstm = Dense(25 , activation='relu')(drop2_lstm) drop3_lstm = Dropout(0.5)(fc3_lstm) if not single_output: for i in range(look_forward): pred_lstm_aux = Dense(1, activation='linear', name='lstm_aux_output_' + str(i))(drop3_lstm) lstm_outputs.append(pred_lstm_aux) else: pred_lstm_aux = Dense(1, activation='linear', name='lstm_aux_output_' + str(look_forward))(drop3_lstm) lstm_outputs.append(pred_lstm_aux) #Final model with three branches model = Model(inputs=[cnn.input, lstm.input], outputs=(merged_outputs + cnn_outputs + lstm_outputs), name="lstm-cnn") if compile: if not single_output: loss_weights = [1 for i in range(look_forward)] + [0.2 for i in range(look_forward)] + [0.2 for i in range(look_forward)] else: loss_weights = [1, 0.2, 0.2] model.compile(optimizer='adam', loss=rmse_loss, loss_weights=loss_weights, metrics=[metrics.RootMeanSquaredError(name='rmse'), metrics.MeanAbsolutePercentageError(name='mape'), metrics.MeanAbsoluteError(name='mae')]) return model
def forecast(self, local_mse, local_normalized_scaled_unit_sales, local_mean_unit_complete_time_serie, local_raw_unit_sales, local_settings): try: print( 'starting high loss (mse in aggregated LSTM) specific time_serie forecast submodule' ) # set training parameters with open(''.join([local_settings['hyperparameters_path'], 'individual_time_serie_based_model_hyperparameters.json'])) \ as local_r_json_file: model_hyperparameters = json.loads(local_r_json_file.read()) local_r_json_file.close() time_steps_days = int(local_settings['time_steps_days']) epochs = int(model_hyperparameters['epochs']) batch_size = int(model_hyperparameters['batch_size']) workers = int(model_hyperparameters['workers']) optimizer_function = model_hyperparameters['optimizer'] optimizer_learning_rate = model_hyperparameters['learning_rate'] if optimizer_function == 'adam': optimizer_function = optimizers.Adam(optimizer_learning_rate) elif optimizer_function == 'ftrl': optimizer_function = optimizers.Ftrl(optimizer_learning_rate) losses_list = [] loss_1 = model_hyperparameters['loss_1'] loss_2 = model_hyperparameters['loss_2'] loss_3 = model_hyperparameters['loss_3'] union_settings_losses = [loss_1, loss_2, loss_3] if 'mape' in union_settings_losses: losses_list.append(losses.MeanAbsolutePercentageError()) if 'mse' in union_settings_losses: losses_list.append(losses.MeanSquaredError()) if 'mae' in union_settings_losses: losses_list.append(losses.MeanAbsoluteError()) if 'm_mape' in union_settings_losses: losses_list.append(modified_mape()) if 'customized_loss_function' in union_settings_losses: losses_list.append(customized_loss()) metrics_list = [] metric1 = model_hyperparameters['metrics1'] metric2 = model_hyperparameters['metrics2'] union_settings_metrics = [metric1, metric2] if 'rmse' in union_settings_metrics: metrics_list.append(metrics.RootMeanSquaredError()) if 'mse' in union_settings_metrics: metrics_list.append(metrics.MeanSquaredError()) if 'mae' in union_settings_metrics: metrics_list.append(metrics.MeanAbsoluteError()) if 'mape' in union_settings_metrics: metrics_list.append(metrics.MeanAbsolutePercentageError()) l1 = model_hyperparameters['l1'] l2 = model_hyperparameters['l2'] if model_hyperparameters['regularizers_l1_l2'] == 'True': activation_regularizer = regularizers.l1_l2(l1=l1, l2=l2) else: activation_regularizer = None nof_features_by_training = 1 forecaster = tf.keras.Sequential() print( 'current model for specific high loss time_series: Mix_Bid_PeepHole_LSTM_Dense_ANN' ) # first layer (DENSE) if model_hyperparameters['units_layer_1'] > 0: forecaster.add( layers.Dense( units=model_hyperparameters['units_layer_1'], activation=model_hyperparameters['activation_1'], activity_regularizer=activation_regularizer)) forecaster.add( layers.Dropout( rate=float(model_hyperparameters['dropout_layer_1']))) # second LSTM layer if model_hyperparameters['units_layer_2'] > 0: forecaster.add( layers.Bidirectional( layers.RNN(PeepholeLSTMCell( units=model_hyperparameters['units_layer_2'], activation=model_hyperparameters['activation_2'], activity_regularizer=activation_regularizer, dropout=float( model_hyperparameters['dropout_layer_2'])), return_sequences=False))) forecaster.add( RepeatVector(model_hyperparameters['repeat_vector'])) # third LSTM layer if model_hyperparameters['units_layer_3'] > 0: forecaster.add( layers.Bidirectional( layers.RNN(PeepholeLSTMCell( units=model_hyperparameters['units_layer_3'], activation=model_hyperparameters['activation_3'], activity_regularizer=activation_regularizer, dropout=float( model_hyperparameters['dropout_layer_3'])), return_sequences=False))) forecaster.add( RepeatVector(model_hyperparameters['repeat_vector'])) # fourth layer (DENSE) if model_hyperparameters['units_layer_4'] > 0: forecaster.add( layers.Dense( units=model_hyperparameters['units_layer_4'], activation=model_hyperparameters['activation_4'], activity_regularizer=activation_regularizer)) forecaster.add( layers.Dropout( rate=float(model_hyperparameters['dropout_layer_4']))) # final layer forecaster.add(layers.Dense(units=nof_features_by_training)) forecaster.compile(optimizer=optimizer_function, loss=losses_list, metrics=metrics_list) # forecaster.saves(''.join([local_settings['models_path'], '_model_structure_']), # save_format='tf') forecaster.build( input_shape=(1, local_settings['forecast_horizon_days'], 1)) forecaster_yaml = forecaster.to_yaml() with open( ''.join([local_settings['models_path'], 'forecaster.yaml']), 'w') as yaml_file: yaml_file.write(forecaster_yaml) forecaster_untrained = forecaster print('specific time_serie model initialized and compiled') poor_results_mse_threshold = local_settings[ 'poor_results_mse_threshold'] nof_selling_days = local_normalized_scaled_unit_sales.shape[1] last_learning_day_in_year = np.mod(nof_selling_days, 365) max_selling_time = local_settings['max_selling_time'] days_in_focus_frame = model_hyperparameters['days_in_focus_frame'] window_input_length = local_settings['moving_window_input_length'] window_output_length = local_settings[ 'moving_window_output_length'] moving_window_length = window_input_length + window_output_length nof_years = local_settings['number_of_years_ceil'] time_series_individually_treated = [] time_series_not_improved = [] dirname = os.path.dirname(__file__) for result in local_mse: time_serie = int(result[0]) file_path = os.path.join( dirname, ''.join([ '.', local_settings['models_path'], 'specific_time_serie_', str(time_serie), 'model_forecast_.h5' ])) if os.path.isfile( file_path) or result[1] <= poor_results_mse_threshold: continue # training print('\ntime_serie: ', time_serie) time_serie_data = local_normalized_scaled_unit_sales[ time_serie, :] time_serie_data = time_serie_data.reshape( time_serie_data.shape[0]) nof_selling_days = time_serie_data.shape[0] # nof_moving_windows = np.int32(nof_selling_days / moving_window_length) remainder_days = np.mod(nof_selling_days, moving_window_length) window_first_days = [ first_day for first_day in range(0, nof_selling_days, moving_window_length) ] length_window_walk = len(window_first_days) # last_window_start = window_first_days[length_window_walk - 1] if remainder_days != 0: window_first_days[ length_window_walk - 1] = nof_selling_days - moving_window_length day_in_year = [] [ day_in_year.append(last_learning_day_in_year + year * 365) for year in range(nof_years) ] stride_window_walk = model_hyperparameters[ 'stride_window_walk'] print('defining x_train') x_train = [] if local_settings['train_model_input_data_approach'] == "all": [ x_train.append( time_serie_data[day - time_steps_days:day - window_output_length]) for day in range(time_steps_days, max_selling_time, stride_window_walk) ] elif local_settings[ 'train_model_input_data_approach'] == "focused": [ x_train.append(time_serie_data[day:day + window_input_length]) for last_day in day_in_year[:-1] for day in range( last_day + window_output_length, last_day + window_output_length - days_in_focus_frame, -stride_window_walk) ] # border condition, take care with last year, working with last data available [ x_train.append( time_serie_data[day - window_input_length:day]) for last_day in day_in_year[-1:] for day in range( last_day, last_day - days_in_focus_frame, -stride_window_walk) ] x_train = np.array(x_train) print('x_train_shape: ', x_train.shape) else: logging.info( "\ntrain_model_input_data_approach is not defined") print('-a problem occurs with the data_approach settings') return False, None print('defining y_train') y_train = [] if local_settings['train_model_input_data_approach'] == "all": [ y_train.append( time_serie_data[day - window_output_length:day]) for day in range(time_steps_days, max_selling_time, stride_window_walk) ] elif local_settings[ 'train_model_input_data_approach'] == "focused": [ y_train.append(time_serie_data[day:day + window_output_length]) for last_day in day_in_year[:-1] for day in range( last_day + window_output_length, last_day + window_output_length - days_in_focus_frame, -stride_window_walk) ] # border condition, take care with last year, working with last data available [ y_train.append( time_serie_data[day - window_output_length:day]) for last_day in day_in_year[-1:] for day in range( last_day, last_day - days_in_focus_frame, -stride_window_walk) ] y_train = np.array(y_train) factor = local_settings['amplification_factor'] max_time_serie = np.amax(x_train) x_train[x_train > 0] = max_time_serie * factor max_time_serie = np.amax(y_train) y_train[y_train > 0] = max_time_serie * factor print('x_train and y_train built done') # define callbacks, checkpoints namepaths model_weights = ''.join([ local_settings['checkpoints_path'], 'model_for_specific_time_serie_', str(time_serie), model_hyperparameters['current_model_name'], "_loss_-{loss:.4f}-.hdf5" ]) callback1 = cb.EarlyStopping( monitor='loss', patience=model_hyperparameters['early_stopping_patience']) callback2 = cb.ModelCheckpoint(model_weights, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks = [callback1, callback2] x_train = x_train.reshape( (np.shape(x_train)[0], np.shape(x_train)[1], 1)) y_train = y_train.reshape( (np.shape(y_train)[0], np.shape(y_train)[1], 1)) print('input_shape: ', np.shape(x_train)) # train for each time_serie # check settings for repeat or not the training need_store_time_serie = True # load model time_series_individually_treated = np.load(''.join([ local_settings['models_evaluation_path'], 'improved_time_series_forecast.npy' ])) time_series_individually_treated = time_series_individually_treated.tolist( ) model_name = ''.join([ 'specific_time_serie_', str(time_serie), 'model_forecast_.h5' ]) model_path = ''.join( [local_settings['models_path'], model_name]) if os.path.isfile(model_path) and model_hyperparameters[ 'repeat_one_by_one_training'] == "False": forecaster = models.load_model(model_path, custom_objects={ 'modified_mape': modified_mape, 'customized_loss': customized_loss }) need_store_time_serie = False elif model_hyperparameters['one_by_one_feature_training_done'] == "False"\ or model_hyperparameters['repeat_one_by_one_training'] == "True": forecaster = forecaster_untrained forecaster.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, workers=workers, callbacks=callbacks, shuffle=False) # print summary (informative; but if says "shape = multiple", probably useless) forecaster.summary() # compile model and make forecast forecaster.compile(optimizer='adam', loss='mse') # evaluating model and comparing with aggregated (in-block) LSTM print('evaluating the model trained..') forecast_horizon_days = local_settings['forecast_horizon_days'] time_serie_data = time_serie_data.reshape( (1, time_serie_data.shape[0], 1)) x_input = time_serie_data[:, -forecast_horizon_days:, :] y_pred_normalized = forecaster.predict(x_input) print('output shape: ', y_pred_normalized.shape) y_truth = local_raw_unit_sales[time_serie, -forecast_horizon_days:] y_truth = y_truth.reshape(1, np.shape(y_truth)[0]) print('y_truth shape:', y_truth.shape) # reversing preprocess: rescale, denormalize, reshape # inverse reshape y_pred_reshaped = y_pred_normalized.reshape( (y_pred_normalized.shape[2], y_pred_normalized.shape[1])) print('y_pred_reshaped shape:', y_pred_reshaped.shape) # inverse transform (first moving_windows denormalizing and then general rescaling) time_serie_data = time_serie_data.reshape( np.shape(time_serie_data)[1], 1) print('time_serie data shape: ', np.shape(time_serie_data)) time_serie_normalized_window_mean = np.mean( time_serie_data[-moving_window_length:]) print('mean of this time serie (normalized values): ', time_serie_normalized_window_mean) local_denormalized_array = window_based_denormalizer( y_pred_reshaped, time_serie_normalized_window_mean, forecast_horizon_days) local_point_forecast = general_mean_rescaler( local_denormalized_array, local_mean_unit_complete_time_serie[time_serie], forecast_horizon_days) print('rescaled denormalized forecasts array shape: ', local_point_forecast.shape) # calculating MSE local_error_metric_mse = mean_squared_error( y_truth, local_point_forecast) print('time_serie: ', time_serie, '\tMean_Squared_Error: ', local_error_metric_mse) if local_error_metric_mse < result[1]: print( 'better results with time_serie specific model training' ) print('MSE improved from ', result[1], 'to ', local_error_metric_mse) # save models for this time serie forecaster.save(''.join([ local_settings['models_path'], 'specific_time_serie_', str(time_serie), 'model_forecast_.h5' ])) print('model for time_serie ', str(time_serie), " saved") if need_store_time_serie: time_series_individually_treated.append( int(time_serie)) else: print( 'no better results with time serie specific model training' ) time_series_not_improved.append(int(time_serie)) time_series_individually_treated = np.array( time_series_individually_treated) time_series_not_improved = np.array(time_series_not_improved) # store data of (individual-approach) time_series forecast successfully improved and those that not np.save( ''.join([ local_settings['models_evaluation_path'], 'improved_time_series_forecast' ]), time_series_individually_treated) np.save( ''.join([ local_settings['models_evaluation_path'], 'time_series_not_improved' ]), time_series_not_improved) print( 'forecast improvement done. (specific time_serie focused) submodule has finished' ) except Exception as submodule_error: print('time_series individual forecast submodule_error: ', submodule_error) logger.info( 'error in forecast of individual (high_loss_identified_ts_forecast submodule)' ) logger.error(str(submodule_error), exc_info=True) return False return True
def create_CNN_model(): text_in = layers.Input(shape=(25, ), dtype='int32', name="TextIn") embed_path = "../data/embeddings/numpy/GloVe.npy" print("Loading embeddings...") if not os.path.isfile(embed_path): embeddings = {} with codecs.open('../data/embeddings/wiki-news-300d-1m.vec', encoding='utf-8') as f: for line in tqdm.tqdm(f): values = line.rstrip().rsplit(' ') word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings[word] = coefs with codecs.open('../data/vocab/train_vocab.funlines.json', encoding='utf-8') as fp: vocab_dict = json.load(fp) embed_matrix = np.zeros((len(vocab_dict), 300)) i = 0 for k, v in vocab_dict.items(): try: embed_matrix[v] = embeddings[k] except KeyError: # print(f'{k} does not exist in FastText embeddings') i += 1 print(len(vocab_dict), i) np.save(embed_path, embed_matrix) else: embed_matrix = np.load(embed_path, allow_pickle=True) embed_layer = layers.Embedding( input_dim=len(embed_matrix), output_dim=300, embeddings_initializer=initializers.Constant(embed_matrix), trainable=False)(text_in) x = layers.Conv1D(100, 5, activation='relu', kernel_regularizer=regularizers.l2(0.01))(embed_layer) x = layers.Dropout(0.5)(x) # x = layers.MaxPooling1D(pool_size=2)(x) x = layers.Conv1D(100, 6, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x) x = layers.Dropout(0.5)(x) x = layers.Conv1D(100, 7, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x) x = layers.Dropout(0.5)(x) x = layers.MaxPooling1D(pool_size=2)(x) x = layers.Flatten()(x) x = layers.Dense(64)(x) x = Highway()(x) # x = layers.Dropout(0.5)(x) x = Highway()(x) # x = layers.Dropout(0.5)(x) x = Highway()(x) # x = layers.Dropout(0.5)(x) # x = layers.Dense(64)(x) # x = layers.Dropout(0.5)(x) # x = Highway()(x) # x = layers.Dropout(0.5)(x) # x = Highway()(x) # x = layers.Dropout(0.5)(x) # x = Highway()(x) # x = layers.Dropout(0.5)(x) # x = layers.Dense(32)(x) # x = layers.Dropout(0.5)(x) # x = Highway()(x) # x = layers.Dropout(0.5)(x) # x = Highway()(x) # x = layers.Dropout(0.5)(x) # x = Highway()(x) # x = layers.Dropout(0.5)(x) x = layers.Dense(1)(x) m = Model(text_in, x) m.compile(optimizer=optimizers.Adam(), loss="mean_squared_error", metrics=[metrics.RootMeanSquaredError()]) m.summary() return m