def _baseline_model(self, input_dim): # create model model = Sequential() model.add( Dense(12, input_dim=input_dim, kernel_initializer='normal', activation='relu')) model.add(Dense(10, activation='relu')) model.add(Dense(8, activation='relu')) model.add(Dense(6, activation='relu')) model.add(Dense(1, activation='sigmoid')) # Compile model model.compile(loss=losses.MeanSquaredError(), optimizer='adam', metrics=[ losses.MeanSquaredError(), losses.MeanAbsoluteError(), losses.MeanAbsolutePercentageError() ]) tensorboard = TensorBoard( log_dir= f"./logs/target_prediction_on_high_pca_percentage_loss/{str(round(time()))}", histogram_freq=5) keras_callbacks = [tensorboard] return model, keras_callbacks
def compile_train(model, encoder_bioma=None, encoder_domain=None, reconstruction_error=losses.MeanSquaredError(), encoded_comparison_error=losses.MeanAbsoluteError(), metrics=[[ metrics.MeanSquaredError(), metrics.MeanAbsoluteError(), metrics.MeanAbsolutePercentageError(), ], [ metrics.MeanSquaredError(), metrics.MeanAbsoluteError(), metrics.MeanAbsolutePercentageError(), ], [ metrics.MeanAbsoluteError(), ]], optimizer=optimizers.SGD(lr=0.01)): if encoder_domain is not None and encoder_bioma is not None: model.compile(optimizer=optimizer, loss=[ reconstruction_error, reconstruction_error, encoded_comparison_error ], metrics=metrics) elif encoder_bioma is not None: model.compile(optimizer=optimizer, loss=reconstruction_error, metrics=metrics[0]) elif encoder_domain is not None: model.compile(optimizer=optimizer, loss=reconstruction_error, metrics=metrics[1]) else: raise Exception('Not domain nor bioma models')
def create_model(print_data=False): bioma_shape = data_microbioma_train.shape[1] if data_domain_train is not None: domain_shape = data_domain_train.shape[1] else: domain_shape = None models = autoencoder( bioma_shape=bioma_shape, #bioma_shape=717, domain_shape=domain_shape, output_shape=bioma_shape, #output_shape=717, latent_space=latent_space, bioma_layers=layers, domain_layers=domain_layers, input_transform=input_transform, output_transform=output_transform, activation_function_encoder=activation, activation_function_decoder=activation, activation_function_latent=activation_latent) model, encoder_bioma, encoder_domain, decoder_bioma = models if print_data: plot_models(model, encoder_bioma, encoder_domain, decoder_bioma) compile_train(model, encoder_bioma=encoder_bioma, encoder_domain=encoder_domain, reconstruction_error=reconstruction_loss, encoded_comparison_error=losses.MeanAbsoluteError(), metrics=get_experiment_metrics(input_transform, output_transform), optimizer=optimizer) return model, encoder_bioma, encoder_domain, decoder_bioma
def train_step(inputs, target): with tf.GradientTape() as tape: predictions = model(inputs, training=True) loss = loss_fn(target, predictions) mae=losses.MeanAbsoluteError()(target, predictions) mape=losses.MeanAbsolutePercentageError()(target, predictions) loss += sum(model.losses) gradients = tape.gradient(loss, model.trainable_variables) opt.apply_gradients(zip(gradients, model.trainable_variables)) return loss,mae,mape
def create_model(self): self.model = keras.Sequential([ Dense(128, activations.relu, input_shape=(7, )), Dense(128, activations.relu), Dense(128, activations.relu), Dense(128, activations.relu), Dense(128, activations.relu), Dense(1, activation=activations.linear), ]) self.model.compile(optimizer=optimizers.Adam(), loss=losses.MeanAbsoluteError(), metrics=['mae'])
def get_loss_fn(argument): ''' Return loss function. ''' if argument in ["MSE", "mse", "Mse", "MeanSquaredError"]: loss = kl.MeanSquaredError() elif argument in ["MAE", "mae", "Mae", "MeanAbsoluteError"]: loss = kl.MeanAbsoluteError() else: return("You asked for optimizer : ", argument, \ ". Available loss functions are : MSE, MAE.") return loss
def create_model(): # # ENCODER DECODER # encoder_inputs = Input(shape=(128, 3)) # # masked_encoder_inputs = Masking()(encoder_inputs) # masked_encoder_inputs = encoder_inputs # encoder_lstm = Bidirectional(LSTM(500, return_state=True)) # # # We discard `encoder_outputs` and only keep the states. # _, forward_h, forward_c, backward_h, backward_c = encoder_lstm(masked_encoder_inputs) # state_c = Concatenate()([forward_c, backward_c]) # state_h = Concatenate()([forward_h, backward_h]) # encoder_states = [state_h, state_c] # # # Bottleneck Here # # decoder_inputs = Input(shape=(128, 3)) # # masked_decoder_inputs = Masking()(decoder_inputs) # masked_decoder_inputs = decoder_inputs # decoder_lstm = LSTM(1000, return_state=True, return_sequences=True) # decoder_outputs, _, _ = decoder_lstm(masked_decoder_inputs, initial_state=encoder_states) # # outputs = TimeDistributed(Dense(2, activation=capped_relu))(decoder_outputs) # # model = Model([encoder_inputs, decoder_inputs], outputs) # SEQUENTIAL model = models.Sequential([ Input((128, 3)), Bidirectional(LSTM(500, return_sequences=True)), Bidirectional(LSTM(500, return_sequences=True)), TimeDistributed(Dense(2, activation=capped_relu)) ]) model.compile(optimizer=optimizers.Adam(learning_rate=0.0001), loss=losses.MeanAbsoluteError(), metrics=["accuracy"]) print(model.summary()) return model
def simple_nn(input_shape: tuple, output_shape: int, hidden_size_list: list = [], print_flag=False, learning_type: "reg clf" = 'clf'): input_layer = Input(shape=input_shape) if not hidden_size_list: out = Dense( output_shape, activation=tf.nn.softmax if learning_type == 'clf' else 'linear', use_bias=True, bias_initializer='glorot_uniform')(input_layer) elif len(hidden_size_list) == 1: x = Dense(hidden_size_list[0], activation=tf.nn.leaky_relu)(input_layer) out = Dense( output_shape, activation=tf.nn.softmax if learning_type == 'clf' else 'linear', use_bias=True, bias_initializer='glorot_uniform')(x) else: x = Dense(hidden_size_list[0], activation=tf.nn.leaky_relu)(input_layer) for i in hidden_size_list[1:]: x = Dense(i, activation=tf.nn.leaky_relu)(x) out = Dense( output_shape, activation=tf.nn.softmax if learning_type == 'clf' else 'linear', use_bias=True, bias_initializer='glorot_uniform')(x) clf = Model(input_layer, out) if print_flag: print(clf.summary()) clf.compile(loss='categorical_crossentropy' if learning_type == 'clf' else losses.MeanAbsoluteError(), optimizer='adam', metrics=['accuracy'] if learning_type == 'clf' else [tf.keras.metrics.MeanSquaredError()]) return clf
def forecast(self, local_mse, local_normalized_scaled_unit_sales, local_mean_unit_complete_time_serie, local_raw_unit_sales, local_settings): try: print( 'starting high loss (mse in aggregated LSTM) specific time_serie forecast submodule' ) # set training parameters with open(''.join([local_settings['hyperparameters_path'], 'individual_time_serie_based_model_hyperparameters.json'])) \ as local_r_json_file: model_hyperparameters = json.loads(local_r_json_file.read()) local_r_json_file.close() time_steps_days = int(local_settings['time_steps_days']) epochs = int(model_hyperparameters['epochs']) batch_size = int(model_hyperparameters['batch_size']) workers = int(model_hyperparameters['workers']) optimizer_function = model_hyperparameters['optimizer'] optimizer_learning_rate = model_hyperparameters['learning_rate'] if optimizer_function == 'adam': optimizer_function = optimizers.Adam(optimizer_learning_rate) elif optimizer_function == 'ftrl': optimizer_function = optimizers.Ftrl(optimizer_learning_rate) losses_list = [] loss_1 = model_hyperparameters['loss_1'] loss_2 = model_hyperparameters['loss_2'] loss_3 = model_hyperparameters['loss_3'] union_settings_losses = [loss_1, loss_2, loss_3] if 'mape' in union_settings_losses: losses_list.append(losses.MeanAbsolutePercentageError()) if 'mse' in union_settings_losses: losses_list.append(losses.MeanSquaredError()) if 'mae' in union_settings_losses: losses_list.append(losses.MeanAbsoluteError()) if 'm_mape' in union_settings_losses: losses_list.append(modified_mape()) if 'customized_loss_function' in union_settings_losses: losses_list.append(customized_loss()) metrics_list = [] metric1 = model_hyperparameters['metrics1'] metric2 = model_hyperparameters['metrics2'] union_settings_metrics = [metric1, metric2] if 'rmse' in union_settings_metrics: metrics_list.append(metrics.RootMeanSquaredError()) if 'mse' in union_settings_metrics: metrics_list.append(metrics.MeanSquaredError()) if 'mae' in union_settings_metrics: metrics_list.append(metrics.MeanAbsoluteError()) if 'mape' in union_settings_metrics: metrics_list.append(metrics.MeanAbsolutePercentageError()) l1 = model_hyperparameters['l1'] l2 = model_hyperparameters['l2'] if model_hyperparameters['regularizers_l1_l2'] == 'True': activation_regularizer = regularizers.l1_l2(l1=l1, l2=l2) else: activation_regularizer = None nof_features_by_training = 1 forecaster = tf.keras.Sequential() print( 'current model for specific high loss time_series: Mix_Bid_PeepHole_LSTM_Dense_ANN' ) # first layer (DENSE) if model_hyperparameters['units_layer_1'] > 0: forecaster.add( layers.Dense( units=model_hyperparameters['units_layer_1'], activation=model_hyperparameters['activation_1'], activity_regularizer=activation_regularizer)) forecaster.add( layers.Dropout( rate=float(model_hyperparameters['dropout_layer_1']))) # second LSTM layer if model_hyperparameters['units_layer_2'] > 0: forecaster.add( layers.Bidirectional( layers.RNN(PeepholeLSTMCell( units=model_hyperparameters['units_layer_2'], activation=model_hyperparameters['activation_2'], activity_regularizer=activation_regularizer, dropout=float( model_hyperparameters['dropout_layer_2'])), return_sequences=False))) forecaster.add( RepeatVector(model_hyperparameters['repeat_vector'])) # third LSTM layer if model_hyperparameters['units_layer_3'] > 0: forecaster.add( layers.Bidirectional( layers.RNN(PeepholeLSTMCell( units=model_hyperparameters['units_layer_3'], activation=model_hyperparameters['activation_3'], activity_regularizer=activation_regularizer, dropout=float( model_hyperparameters['dropout_layer_3'])), return_sequences=False))) forecaster.add( RepeatVector(model_hyperparameters['repeat_vector'])) # fourth layer (DENSE) if model_hyperparameters['units_layer_4'] > 0: forecaster.add( layers.Dense( units=model_hyperparameters['units_layer_4'], activation=model_hyperparameters['activation_4'], activity_regularizer=activation_regularizer)) forecaster.add( layers.Dropout( rate=float(model_hyperparameters['dropout_layer_4']))) # final layer forecaster.add(layers.Dense(units=nof_features_by_training)) forecaster.compile(optimizer=optimizer_function, loss=losses_list, metrics=metrics_list) # forecaster.saves(''.join([local_settings['models_path'], '_model_structure_']), # save_format='tf') forecaster.build( input_shape=(1, local_settings['forecast_horizon_days'], 1)) forecaster_yaml = forecaster.to_yaml() with open( ''.join([local_settings['models_path'], 'forecaster.yaml']), 'w') as yaml_file: yaml_file.write(forecaster_yaml) forecaster_untrained = forecaster print('specific time_serie model initialized and compiled') poor_results_mse_threshold = local_settings[ 'poor_results_mse_threshold'] nof_selling_days = local_normalized_scaled_unit_sales.shape[1] last_learning_day_in_year = np.mod(nof_selling_days, 365) max_selling_time = local_settings['max_selling_time'] days_in_focus_frame = model_hyperparameters['days_in_focus_frame'] window_input_length = local_settings['moving_window_input_length'] window_output_length = local_settings[ 'moving_window_output_length'] moving_window_length = window_input_length + window_output_length nof_years = local_settings['number_of_years_ceil'] time_series_individually_treated = [] time_series_not_improved = [] dirname = os.path.dirname(__file__) for result in local_mse: time_serie = int(result[0]) file_path = os.path.join( dirname, ''.join([ '.', local_settings['models_path'], 'specific_time_serie_', str(time_serie), 'model_forecast_.h5' ])) if os.path.isfile( file_path) or result[1] <= poor_results_mse_threshold: continue # training print('\ntime_serie: ', time_serie) time_serie_data = local_normalized_scaled_unit_sales[ time_serie, :] time_serie_data = time_serie_data.reshape( time_serie_data.shape[0]) nof_selling_days = time_serie_data.shape[0] # nof_moving_windows = np.int32(nof_selling_days / moving_window_length) remainder_days = np.mod(nof_selling_days, moving_window_length) window_first_days = [ first_day for first_day in range(0, nof_selling_days, moving_window_length) ] length_window_walk = len(window_first_days) # last_window_start = window_first_days[length_window_walk - 1] if remainder_days != 0: window_first_days[ length_window_walk - 1] = nof_selling_days - moving_window_length day_in_year = [] [ day_in_year.append(last_learning_day_in_year + year * 365) for year in range(nof_years) ] stride_window_walk = model_hyperparameters[ 'stride_window_walk'] print('defining x_train') x_train = [] if local_settings['train_model_input_data_approach'] == "all": [ x_train.append( time_serie_data[day - time_steps_days:day - window_output_length]) for day in range(time_steps_days, max_selling_time, stride_window_walk) ] elif local_settings[ 'train_model_input_data_approach'] == "focused": [ x_train.append(time_serie_data[day:day + window_input_length]) for last_day in day_in_year[:-1] for day in range( last_day + window_output_length, last_day + window_output_length - days_in_focus_frame, -stride_window_walk) ] # border condition, take care with last year, working with last data available [ x_train.append( time_serie_data[day - window_input_length:day]) for last_day in day_in_year[-1:] for day in range( last_day, last_day - days_in_focus_frame, -stride_window_walk) ] x_train = np.array(x_train) print('x_train_shape: ', x_train.shape) else: logging.info( "\ntrain_model_input_data_approach is not defined") print('-a problem occurs with the data_approach settings') return False, None print('defining y_train') y_train = [] if local_settings['train_model_input_data_approach'] == "all": [ y_train.append( time_serie_data[day - window_output_length:day]) for day in range(time_steps_days, max_selling_time, stride_window_walk) ] elif local_settings[ 'train_model_input_data_approach'] == "focused": [ y_train.append(time_serie_data[day:day + window_output_length]) for last_day in day_in_year[:-1] for day in range( last_day + window_output_length, last_day + window_output_length - days_in_focus_frame, -stride_window_walk) ] # border condition, take care with last year, working with last data available [ y_train.append( time_serie_data[day - window_output_length:day]) for last_day in day_in_year[-1:] for day in range( last_day, last_day - days_in_focus_frame, -stride_window_walk) ] y_train = np.array(y_train) factor = local_settings['amplification_factor'] max_time_serie = np.amax(x_train) x_train[x_train > 0] = max_time_serie * factor max_time_serie = np.amax(y_train) y_train[y_train > 0] = max_time_serie * factor print('x_train and y_train built done') # define callbacks, checkpoints namepaths model_weights = ''.join([ local_settings['checkpoints_path'], 'model_for_specific_time_serie_', str(time_serie), model_hyperparameters['current_model_name'], "_loss_-{loss:.4f}-.hdf5" ]) callback1 = cb.EarlyStopping( monitor='loss', patience=model_hyperparameters['early_stopping_patience']) callback2 = cb.ModelCheckpoint(model_weights, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks = [callback1, callback2] x_train = x_train.reshape( (np.shape(x_train)[0], np.shape(x_train)[1], 1)) y_train = y_train.reshape( (np.shape(y_train)[0], np.shape(y_train)[1], 1)) print('input_shape: ', np.shape(x_train)) # train for each time_serie # check settings for repeat or not the training need_store_time_serie = True # load model time_series_individually_treated = np.load(''.join([ local_settings['models_evaluation_path'], 'improved_time_series_forecast.npy' ])) time_series_individually_treated = time_series_individually_treated.tolist( ) model_name = ''.join([ 'specific_time_serie_', str(time_serie), 'model_forecast_.h5' ]) model_path = ''.join( [local_settings['models_path'], model_name]) if os.path.isfile(model_path) and model_hyperparameters[ 'repeat_one_by_one_training'] == "False": forecaster = models.load_model(model_path, custom_objects={ 'modified_mape': modified_mape, 'customized_loss': customized_loss }) need_store_time_serie = False elif model_hyperparameters['one_by_one_feature_training_done'] == "False"\ or model_hyperparameters['repeat_one_by_one_training'] == "True": forecaster = forecaster_untrained forecaster.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, workers=workers, callbacks=callbacks, shuffle=False) # print summary (informative; but if says "shape = multiple", probably useless) forecaster.summary() # compile model and make forecast forecaster.compile(optimizer='adam', loss='mse') # evaluating model and comparing with aggregated (in-block) LSTM print('evaluating the model trained..') forecast_horizon_days = local_settings['forecast_horizon_days'] time_serie_data = time_serie_data.reshape( (1, time_serie_data.shape[0], 1)) x_input = time_serie_data[:, -forecast_horizon_days:, :] y_pred_normalized = forecaster.predict(x_input) print('output shape: ', y_pred_normalized.shape) y_truth = local_raw_unit_sales[time_serie, -forecast_horizon_days:] y_truth = y_truth.reshape(1, np.shape(y_truth)[0]) print('y_truth shape:', y_truth.shape) # reversing preprocess: rescale, denormalize, reshape # inverse reshape y_pred_reshaped = y_pred_normalized.reshape( (y_pred_normalized.shape[2], y_pred_normalized.shape[1])) print('y_pred_reshaped shape:', y_pred_reshaped.shape) # inverse transform (first moving_windows denormalizing and then general rescaling) time_serie_data = time_serie_data.reshape( np.shape(time_serie_data)[1], 1) print('time_serie data shape: ', np.shape(time_serie_data)) time_serie_normalized_window_mean = np.mean( time_serie_data[-moving_window_length:]) print('mean of this time serie (normalized values): ', time_serie_normalized_window_mean) local_denormalized_array = window_based_denormalizer( y_pred_reshaped, time_serie_normalized_window_mean, forecast_horizon_days) local_point_forecast = general_mean_rescaler( local_denormalized_array, local_mean_unit_complete_time_serie[time_serie], forecast_horizon_days) print('rescaled denormalized forecasts array shape: ', local_point_forecast.shape) # calculating MSE local_error_metric_mse = mean_squared_error( y_truth, local_point_forecast) print('time_serie: ', time_serie, '\tMean_Squared_Error: ', local_error_metric_mse) if local_error_metric_mse < result[1]: print( 'better results with time_serie specific model training' ) print('MSE improved from ', result[1], 'to ', local_error_metric_mse) # save models for this time serie forecaster.save(''.join([ local_settings['models_path'], 'specific_time_serie_', str(time_serie), 'model_forecast_.h5' ])) print('model for time_serie ', str(time_serie), " saved") if need_store_time_serie: time_series_individually_treated.append( int(time_serie)) else: print( 'no better results with time serie specific model training' ) time_series_not_improved.append(int(time_serie)) time_series_individually_treated = np.array( time_series_individually_treated) time_series_not_improved = np.array(time_series_not_improved) # store data of (individual-approach) time_series forecast successfully improved and those that not np.save( ''.join([ local_settings['models_evaluation_path'], 'improved_time_series_forecast' ]), time_series_individually_treated) np.save( ''.join([ local_settings['models_evaluation_path'], 'time_series_not_improved' ]), time_series_not_improved) print( 'forecast improvement done. (specific time_serie focused) submodule has finished' ) except Exception as submodule_error: print('time_series individual forecast submodule_error: ', submodule_error) logger.info( 'error in forecast of individual (high_loss_identified_ts_forecast submodule)' ) logger.error(str(submodule_error), exc_info=True) return False return True
def main(): session = Session() # query with condition? alternative all_users = session.query(User).all() all_movies = session.query(Movie).all() user_rating_counts = session.query(Rating.user_id, func.count(Rating.user_id)).group_by( Rating.user_id).all() # user with more than 40 ratings user_filtered = filter(lambda x: x[1] > 60, user_rating_counts) actual_users_index = [elem[0] for elem in user_filtered] actor_dict, director_dict, rated_dict, genre_dict = get_movie_dict( 'movie_dict.json') #author_dict,publisher_dict=get_book_dict('book_dict.json') with open('movie_user_zipcodes.json', 'r') as f: zipcodes = json.load(f) zipcode_dict = dict(zip(zipcodes, range(len(zipcodes)))) all_users_id = [elem.id for elem in all_users] all_users_data = [{ 'gender': elem.gender, 'occupation': elem.occupation, 'age': elem.age, 'zipcode': elem.zipcode } for elem in all_users] all_users_df = pd.DataFrame(all_users_data, index=all_users_id) # occupation doesn't need hashing occu_dict_size = all_users_df.occupation.max() + 1 all_users_df.gender = (all_users_df.gender == 'M').astype(int) all_users_df.zipcode = all_users_df.zipcode.apply( lambda x: zipcode_dict[x]) user_ages = sorted(all_users_df.age.unique()) # age may be quantifiable, but every person in their age periods has their own culture and style age_dict = dict(zip(user_ages, range(len(user_ages)))) all_users_df.age = all_users_df.age.apply(lambda x: age_dict[x]) all_movies_id = [elem.id for elem in all_movies] all_movies_data = [{ 'year': elem.year, 'actor': elem.actor, 'title': elem.title, 'rated': elem.rated, 'director': elem.director, 'genre': elem.genre } for elem in all_movies] all_movies_df = pd.DataFrame(all_movies_data, index=all_movies_id) all_movies_df.actor = all_movies_df.actor.apply(lambda x: actor_dict[x]) all_movies_df.director = all_movies_df.director.apply( lambda x: director_dict[x]) all_movies_df.rated = all_movies_df.rated.apply(lambda x: rated_dict[x]) all_movies_df.genre = all_movies_df.genre.apply(lambda x: genre_dict[x]) all_movies_df.year = all_movies_df.year - MOVIE_MIN_YEAR existing_movies_df = all_movies_df[all_movies_df.year < 1998 - MOVIE_MIN_YEAR] new_movies_df = all_movies_df[all_movies_df.year > 1997 - MOVIE_MIN_YEAR] #user_mask=np.random.rand(len(all_users_df)) < 0.8 #user_existing=all_users_df[user_mask] #user_new=all_users_df[~user_mask] user_existing = all_users_df[all_users_df.index.isin(actual_users_index)] user_new = all_users_df[~all_users_df.index.isin(actual_users_index)] rating_existing = session.query(Rating).join(User).filter( User.id.in_( user_existing.index)).join(Movie).filter(Movie.year < 1998).all() #rating_exist_new=session.query(Rating).join(User).filter(User.id.in_(user_existing.index)).join(Movie).filter(Movie.year>1997).all() #rating_new_exist=session.query(Rating).join(User).filter(User.id.in_(user_new.index)).join(Movie).filter(Movie.year<1998).all() #rating_new_new=session.query(Rating).join(User).filter(User.id.in_(user_new.index)).join(Movie).filter(Movie.year>1997).all() ''' train_genders=[1 if elem.user.genre=='M' else 0 for elem in rating_existing] train_occupations=[elem.user.occupation for elem in rating_existing] train_ages=[elem.user.age for elem in rating_existing] train_zipcodes=[all_users_df.loc[elem.user_id].zipcode for elem in rating_existing] train_actors=[all_movies_df.loc[elem.movie_id].actor for elem in rating_existing] train_directors=[all_movies_df.loc[elem.movie_id].director for elem in rating_existing] train_genres=[all_movies_df.loc[elem.movie_id].genre for elem in rating_existing] train_rateds=[all_movies_df.loc[elem.movie_id].rated for elem in rating_existing] train_labels=[(elem.rate-1)*0.25 for elem in rating_existing] ''' rating_existing_group = [[] for _ in range(MAX_USER_ID + 1)] for rating in rating_existing: # 40 ratings per user, + 10 queries if len(rating_existing_group[ rating.user_id]) < scenario_len + query_len: rating_existing_group[rating.user_id].append(rating) actual_users_index2 = [ idx for idx, elem in enumerate(rating_existing_group) if len(elem) > scenario_len + query_len - 1 ] dict_sizes = { 'zipcode': len(zipcode_dict), 'actor': len(actor_dict), 'authdir': len(director_dict), 'rated': len(rated_dict), 'year': MOVIE_MAX_YEAR - MOVIE_MIN_YEAR + 1, 'occu': occu_dict_size, 'age': len(age_dict), 'genre': len(genre_dict) } emb_sizes = { 'zipcode': 100, 'actor': 50, 'authdir': 50, 'rated': 5, 'year': 15, 'occu': 4, 'age': 2, 'genre': 15 } global_model = MeluGlobal(dict_sizes, emb_sizes, 1) emb_input_size = sum([v for k, v in emb_sizes.items()]) local_model = MeluLocal(emb_input_size, [64, 32, 16, 4]) print(global_model.summary()) print(local_model.summary()) utils.plot_model(global_model, 'global.png', True, expand_nested=True) utils.plot_model(local_model, 'local.png', True, expand_nested=True) USER_BATCH_SIZE = 128 # task batch size should divide scenario length TASK_BATCH_SIZE = 20 total_batch = floor(len(actual_users_index2) / USER_BATCH_SIZE) #remaining_users=len(actual_users_index2)%USER_BATCH_SIZE local_loss_fn = losses.MeanAbsoluteError() local_optimizer = optimizers.Adam(alpha) global_optimizer = optimizers.Adam(beta) #global_loss_fn=losses.MeanAbsoluteError() #local_model.compile(local_optimizer,local_loss_fn,[metrics.MeanAbsoluteError()]) #global_model.compile(global_optimizer,global_loss_fn,[metrics.MeanAbsoluteError()]) #local_model.save_weights('theta2.h5') local_model_weights = local_model.get_weights() # prepare training metric #val_metric=metrics.MeanAbsoluteError() for epoch in range(30): print('start epoch {}'.format(epoch)) # previous validation loss to decide early stopping # prev_val_loss - epoch-1 loss # prev2_val_loss - epoch-2 loss # prev3_val_loss - epoch-3 loss if epoch > 19: prev3_train_loss = prev2_train_loss prev2_train_loss = prev_train_loss prev_train_loss = total_train_loss elif epoch == 19: prev2_train_loss = prev_train_loss prev_train_loss = total_train_loss elif epoch == 18: prev_train_loss = total_train_loss total_train_loss = 0 for i in range(total_batch): print('user batch # {}'.format(i)) users = [ rating_existing_group[elem] for elem in actual_users_index2[i * USER_BATCH_SIZE:(i + 1) * USER_BATCH_SIZE] ] theta2_user_weights = [] # calculate local weights per user for j, user in enumerate(users): #local_model.load_weights('theta2.h5') local_model.set_weights(local_model_weights) # [authdir,year,age,actor,rated,genre,occu,zipcode] user_data = [[ existing_movies_df.loc[elem.movie_id].director, existing_movies_df.loc[elem.movie_id].year, all_users_df.loc[elem.user_id].age, existing_movies_df.loc[elem.movie_id].actor, existing_movies_df.loc[elem.movie_id].rated, existing_movies_df.loc[elem.movie_id].genre, all_users_df.loc[elem.user_id].occupation, all_users_df.loc[elem.user_id].zipcode ] for elem in user[:scenario_len]] label_data = [elem.rate for elem in user[:scenario_len]] train_dataset = tf.data.Dataset.from_tensor_slices( (user_data, label_data)).batch(TASK_BATCH_SIZE, True) for (user_batch, label_batch) in train_dataset: batch_emb_out = global_model(user_batch) with tf.GradientTape() as tape: logits = local_model(batch_emb_out) local_loss = local_loss_fn(label_batch, logits) local_grads = tape.gradient(local_loss, local_model.trainable_weights) local_optimizer.apply_gradients( zip(local_grads, local_model.trainable_weights)) #local_model.save_weights('theta2_{}.h5'.format(j)) theta2_user_weights.append(local_model.get_weights()) # calculate gradients for each uesr theta1_grads = [] theta1_losses = 0 for j, user in enumerate(users): #local_model.load_weights('theta2_{}.h5'.format(j)) local_model.set_weights(theta2_user_weights[j]) user_query = [[ existing_movies_df.loc[elem.movie_id].director, existing_movies_df.loc[elem.movie_id].year, all_users_df.loc[elem.user_id].age, existing_movies_df.loc[elem.movie_id].actor, existing_movies_df.loc[elem.movie_id].rated, existing_movies_df.loc[elem.movie_id].genre, all_users_df.loc[elem.user_id].occupation, all_users_df.loc[elem.user_id].zipcode ] for elem in user[scenario_len:]] label_data = [elem.rate for elem in user[scenario_len:]] train_dataset = tf.data.Dataset.from_tensor_slices( (user_query, label_data)).batch(query_len) (query_batch, label_batch) = next(iter(train_dataset)) with tf.GradientTape() as tape: emb_out = global_model(query_batch) logits = local_model(emb_out) local_loss = local_loss_fn(label_batch, logits) theta1_losses += local_loss.numpy() # there will be USER_BATCH_SIZE * scenario_len/TASK_BATCH_SIZE gradients grad = tape.gradient(local_loss, global_model.trainable_weights) theta1_grads.append(grad) # apply every gradients to embedding layer weights final_theta1_grad = [] theta2_losses = 0 for k in range(len(theta1_grads[0])): data = [elem[k] for elem in theta1_grads] final_data = tf.add_n(data) / USER_BATCH_SIZE final_theta1_grad.append(final_data) global_optimizer.apply_gradients( zip(final_theta1_grad, global_model.trainable_weights)) # calculate each local gradients per user for updated global theta1 theta2_grads = [] for j, user in enumerate(users): #local_model.load_weights('theta2_{}.h5'.format(j)) # below line is wrong(maybe) #local_model.set_weights(theta2_user_weights[j]) local_model.set_weights(local_model_weights) user_query = [[ existing_movies_df.loc[elem.movie_id].director, existing_movies_df.loc[elem.movie_id].year, all_users_df.loc[elem.user_id].age, existing_movies_df.loc[elem.movie_id].actor, existing_movies_df.loc[elem.movie_id].rated, existing_movies_df.loc[elem.movie_id].genre, all_users_df.loc[elem.user_id].occupation, all_users_df.loc[elem.user_id].zipcode ] for elem in user[scenario_len:]] label_data = [elem.rate for elem in user[scenario_len:]] train_dataset = tf.data.Dataset.from_tensor_slices( (user_query, label_data)).batch(query_len) (query_batch, label_batch) = next(iter(train_dataset)) emb_out = global_model(query_batch) with tf.GradientTape() as tape: logits = local_model(emb_out) local_loss = local_loss_fn(label_batch, logits) theta2_losses += local_loss.numpy() theta2_grads.append( tape.gradient(local_loss, local_model.trainable_weights)) # update local dense layer weights final_theta2_grad = [] for k in range(len(theta2_grads[0])): data = [elem[k] for elem in theta2_grads] final_data = tf.add_n(data) / USER_BATCH_SIZE final_theta2_grad.append(final_data) global_optimizer.apply_gradients( zip(final_theta2_grad, local_model.trainable_weights)) #local_model.save_weights('theta2.h5') local_model_weights = local_model.get_weights() # To Do: evaluate validation # use MAE ( paper's choice ) ''' batch_val_loss=0 for j,user in enumerate(users): validation_batch=user[scenario_len:scenario_len+validatioin_len] # this is actually all of it batch_input=[ [existing_movies_df.loc[elem.movie_id].director, existing_movies_df.loc[elem.movie_id].year, all_users_df.loc[elem.user_id].age, existing_movies_df.loc[elem.movie_id].actor, existing_movies_df.loc[elem.movie_id].rated, existing_movies_df.loc[elem.movie_id].genre, all_users_df.loc[elem.user_id].occupation, all_users_df.loc[elem.user_id].zipcode ] for elem in validation_batch ] batch_labels=[elem.rate for elem in validation_batch] # only one batch, so need to be in one-item list val_embedded=global_model.predict_on_batch([batch_input]) val_logits=local_model.predict_on_batch(val_embedded) val_metric(batch_labels,val_logits) batch_val_loss=batch_val_loss+val_metric.result() print('validation loss: %s' % (float(batch_val_loss),)) total_train_loss+=batch_val_loss # To do: end train if validation loss increases of not be reduced enogh - Early stopping ''' #measure total training loss print('batch #{} theta1 loss:{}'.format(i, theta1_losses)) print('batch #{} theta2 loss:{}'.format(i, theta2_losses)) total_train_loss += theta1_losses + theta2_losses print('current train loss at epoch {}: '.format(epoch), total_train_loss) if epoch % 5 == 0: local_model.save('models/local_model_{}.h5'.format(epoch)) global_model.save('models/global_model_{}.h5'.format(epoch)) if epoch > 19: min_prev_loss = min( [prev_train_loss, prev2_train_loss, prev3_train_loss]) print('previous train loss: ', min_prev_loss) if total_train_loss > min_prev_loss: print('total train loss increases, end training') break local_model.save('models/local_model_{}_final.h5'.format(epoch)) global_model.save('models/global_model_{}_final.h5'.format(epoch))
def main(args): print(f"Load {args.DATASET} dataset.....") datasets_root = "../../datasets" # Please edit your root path of datasets train_domain_A_path = os.path.join(datasets_root, args.DATASET, "trainA") train_domain_B_path = os.path.join(datasets_root, args.DATASET, "trainB") val_domain_A_path = os.path.join(datasets_root, args.DATASET, "valA") val_domain_B_path = os.path.join(datasets_root, args.DATASET, "valB") train_A = np.array([ load_img(os.path.join(train_domain_A_path, img), args.IMG_SIZE) for img in sorted(os.listdir(train_domain_A_path)) ]) / 127.5 - 1 train_B = np.array([ load_img(os.path.join(train_domain_B_path, img), args.IMG_SIZE) for img in sorted(os.listdir(train_domain_B_path)) ]) / 127.5 - 1 val_A = np.array([ load_img(os.path.join(val_domain_A_path, img), args.IMG_SIZE) for img in sorted(os.listdir(val_domain_A_path)) ]) / 127.5 - 1 val_B = np.array([ load_img(os.path.join(val_domain_B_path, img), args.IMG_SIZE) for img in sorted(os.listdir(val_domain_B_path)) ]) / 127.5 - 1 print("\nTraining data shape") print(f"Domain A: {train_A.shape}") print(f"Domain B: {train_B.shape}") print("\nValidation data shape") print(f"Domain A: {val_A.shape}") print(f"Domain B: {val_B.shape}") print("================ Building Network ================") A_channel = train_A.shape[-1] B_channel = train_B.shape[-1] n_layers = 3 G = generator_unet(input_size=args.IMG_SIZE, A_channel=A_channel, B_channel=B_channel, name="Generator") G.summary() D = discriminator(input_size=args.IMG_SIZE, A_channel=A_channel, B_channel=B_channel, n_layers=n_layers, name="Discriminator") D.summary() D.compile(optimizer=optimizers.Adam(lr=0.0001, epsilon=1e-8), loss=losses.BinaryCrossentropy()) D.trainable = False A_img = layers.Input(shape=(args.IMG_SIZE, args.IMG_SIZE, A_channel), name="GAN_Input_A") B_img = layers.Input(shape=(args.IMG_SIZE, args.IMG_SIZE, B_channel), name="GAN_Input_B") fake_B = G(A_img) D_output = D([A_img, fake_B]) A = models.Model(inputs=[A_img, B_img], outputs=[D_output, fake_B], name='GAN') A.summary() A.compile(optimizer=optimizers.Adam(lr=0.0001, epsilon=1e-8), loss=[losses.BinaryCrossentropy(), losses.MeanAbsoluteError()], loss_weights=[1, 100]) print("==================================================\n") print("================ Training Network ================") d_output_size = args.IMG_SIZE // (2**(n_layers - 1)) epochs = args.EPOCHS batch_size = args.BATCH_SIZE train_length = len(train_A) val_length = len(val_A) num_iter = int(np.ceil(train_length / batch_size)) num_val_iter = int(np.ceil(val_length / batch_size)) CKPT_PATH = './ckpt' os.makedirs(CKPT_PATH, exist_ok=True) model_json = A.to_json() with open(os.path.join(CKPT_PATH, "GAN.json"), "w") as json_file: json_file.write(model_json) print("\nModel Saved!\n") SAMPLE_PATH = './result' os.makedirs(SAMPLE_PATH, exist_ok=True) for epoch in range(epochs): g_total = 0 g_ad = 0 g_mae = 0 d_ad = 0 shuffle_idx = np.random.choice(train_length, train_length, replace=False) epoch_progbar = Progbar(num_iter, width=15) for i, step in enumerate(range(0, train_length, batch_size)): step_idx = shuffle_idx[step:step + batch_size] real_label = np.ones( (len(step_idx), d_output_size, d_output_size, 1)) fake_label = np.zeros( (len(step_idx), d_output_size, d_output_size, 1)) # Generate fake images fake_imgs = G.predict(train_A[step_idx]) # Train Discriminator dis_label = np.concatenate([fake_label, real_label]) Set_A = np.concatenate([fake_imgs, train_A[step_idx]], axis=0) Set_B = np.concatenate([train_B[step_idx], train_B[step_idx]], axis=0) # [Ad] D_Loss = D.train_on_batch([Set_A, Set_B], dis_label) # Train Generator # [Ad + 100*mae, Ad, mae] G_Loss = A.train_on_batch([train_A[step_idx], train_B[step_idx]], [real_label, train_B[step_idx]]) g_total += G_Loss[0] g_ad += G_Loss[1] g_mae += G_Loss[2] d_ad += D_Loss if i < num_iter: epoch_progbar.update( i + 1, [("G_Total", G_Loss[0])("G_Ad", G_Loss[1]), ("G_MAE", G_Loss[2]), ("D_Ad", D_Loss)]) val_g_total = 0 val_g_ad = 0 val_g_mae = 0 for j, val_idx in enumerate(range(0, val_length, batch_size)): val_label = np.ones([ len(val_A[val_idx:val_idx + batch_size]), d_output_size, d_output_size, 1 ]) V_loss = A.test_on_batch( val_A[val_idx:val_idx + batch_size], [val_B[val_idx:val_idx + batch_size], val_label]) val_g_total += V_loss[0] val_g_ad += V_loss[1] val_g_mae += V_loss[2] epoch_progbar.update(i + 1, [("Val_G_Total", val_g_total / num_val_iter), ("Val_G_Ad", val_g_ad / num_val_iter), ("Val_G_MAE", val_g_mae / num_val_iter)]) A.save_weights(os.path.join(CKPT_PATH, f"{epoch:04d}_params.h5")) train_float2int = np.concatenate((train_B[step_idx][0], fake_imgs[0]), axis=1) train_float2int = (train_float2int + 1) * 127.5 train_float2int = cv.cvtColor(train_float2int.astype(np.uint8), cv.COLOR_RGB2BGR) Train_Result_PATH = os.path.join(SAMPLE_PATH, f"{epoch+1:04d}_train_result.jpg") cv.imwrite(Train_Result_PATH, train_float2int) val_result = G.predict(val_A[:1]) val_float2int = np.concatenate((val_B[0], val_result[0]), axis=1) val_float2int = (val_float2int + 1) * 127.5 val_float2int = cv.cvtColor(val_float2int.astype(np.uint8), cv.COLOR_RGB2BGR) Val_Result_PATH = os.path.join(SAMPLE_PATH, f"{epoch+1:04d}_val_result.jpg") cv.imwrite(Val_Result_PATH, val_float2int) print("Training Done ! ")
import tensorflow as tf from tensorflow.keras import Model, Input, losses from tensorflow.keras.layers import Dense, Dropout from tensorflow.keras.utils import to_categorical from sklearn.model_selection import train_test_split from tqdm import tqdm import matplotlib.pyplot as plt import numpy as np from sklearn.datasets import make_moons, make_classification, make_regression from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error from xgboost import XGBRegressor loss_func = losses.CategoricalCrossentropy(from_logits=True) loss_func_reg = losses.MeanAbsoluteError() def plot_moon(mx, my): x_1 = [mx[i][0] for i in range(len(mx))] x_2 = [mx[i][1] for i in range(len(mx))] plt.scatter(x_1, x_2, c=my) plt.show() def simple_nn(input_shape: tuple, output_shape: int, hidden_size_list: list = [], print_flag=False, learning_type: "reg clf" = 'clf'): input_layer = Input(shape=input_shape)
def main(args): import os import cv2 as cv import numpy as np import tensorflow as tf from tensorflow.keras import models, layers, losses, optimizers from tensorflow.keras.utils import Progbar from models import * tf.random.set_seed(42) # For Efficiency gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: try: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) logical_gpus = tf.config.experimental.list_logical_devices('GPU') print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") except RuntimeError as e: print(e) def load_img(path, size): img = cv.imread(path) img = cv.cvtColor(img, cv.COLOR_BGR2RGB) img = cv.resize(img, (size, size)) return img print(f"Load {args.DATASET} dataset.....") datasets_root = "../../datasets" # Please edit your root path of datasets train_domain_A_path = os.path.join(datasets_root, args.DATASET, "trainA") train_domain_B_path = os.path.join(datasets_root, args.DATASET, "trainB") train_A = np.array([load_img(os.path.join(train_domain_A_path, img), args.IMG_SIZE) for img in sorted(os.listdir(train_domain_A_path))])/127.5 -1 train_B = np.array([load_img(os.path.join(train_domain_B_path, img), args.IMG_SIZE) for img in sorted(os.listdir(train_domain_B_path))])/127.5 -1 print("\nTrain data shape") print(f"Domain A: {train_A.shape}") print(f"Domain B: {train_B.shape}") val_domain_A_path = os.path.join(datasets_root, args.DATASET, "testA") val_domain_B_path = os.path.join(datasets_root, args.DATASET, "testB") val_A = np.array([load_img(os.path.join(val_domain_A_path, img), args.IMG_SIZE) for img in sorted(os.listdir(val_domain_A_path))])/127.5 -1 val_B = np.array([load_img(os.path.join(val_domain_B_path, img), args.IMG_SIZE) for img in sorted(os.listdir(val_domain_B_path))])/127.5 -1 print("\nTest data shape") print(f"Domain A: {val_A.shape}") print(f"Domain B: {val_B.shape}") print("================ Building Network ================") A_channel = train_A.shape[-1] B_channel = train_B.shape[-1] n_layers = 3 G_B2A = ResnetGenerator(input_size=args.IMG_SIZE, A_channel=A_channel, B_channel=B_channel, norm_type="IN", name="G_A") D_A = NLayerDiscriminator(input_size=args.IMG_SIZE, A_channel=A_channel, B_channel=B_channel, n_layers=n_layers, name="D_A") G_A2B = ResnetGenerator(input_size=args.IMG_SIZE, A_channel=A_channel, B_channel=B_channel, norm_type="IN", name="G_B") D_B = NLayerDiscriminator(input_size=args.IMG_SIZE, A_channel=A_channel, B_channel=B_channel, n_layers=n_layers, name="D_B") D_A.compile(optimizer=optimizers.Adam(lr=0.0001, epsilon=1e-8), loss=losses.BinaryCrossentropy()) D_A.trainable=False D_B.compile(optimizer=optimizers.Adam(lr=0.0001, epsilon=1e-8), loss=losses.BinaryCrossentropy()) D_B.trainable=False A_img = layers.Input(shape=(args.IMG_SIZE, args.IMG_SIZE, A_channel), name="GAN_Input_A") B_img = layers.Input(shape=(args.IMG_SIZE, args.IMG_SIZE, B_channel), name="GAN_Input_B") fake_A = G_B2A(B_img) D_A_output = D_A(fake_A) recon_B = G_A2B(fake_A) id_B = G_A2B(B_img) A_B2A = models.Model(inputs=B_img, outputs = [D_A_output, recon_B, id_B], name='GAN_A') A_B2A.compile(optimizer=optimizers.Adam(lr=0.0001, epsilon=1e-8), loss=[losses.BinaryCrossentropy(), losses.MeanAbsoluteError(), losses.MeanAbsoluteError()], loss_weights=[1, 10, 0.5]) fake_B = G_A2B(A_img) D_B_output = D_B(fake_B) recon_A = G_B2A(fake_B) id_A = G_B2A(A_img) A_A2B = models.Model(inputs=A_img, outputs = [D_B_output, recon_A, id_A], name='GAN_A') A_A2B.compile(optimizer=optimizers.Adam(lr=0.0001, epsilon=1e-8), loss=[losses.BinaryCrossentropy(), losses.MeanAbsoluteError(), losses.MeanAbsoluteError()], loss_weights=[1, 10, 0.5]) print("==================================================\n") print("================ Training Network ================") d_output_size = args.IMG_SIZE // (2**(n_layers-1)) epochs = args.EPOCHS batch_size = args.BATCH_SIZE train_length = len(train_A) val_length = len(val_A) num_iter = int(np.ceil(train_length/batch_size)) num_val_iter = int(np.ceil(val_length/batch_size)) CKPT_PATH = './ckpt' os.makedirs(CKPT_PATH, exist_ok=True) model_json = A_B2A.to_json() with open(os.path.join(CKPT_PATH, "GAN_B2A.json"), "w") as json_file: json_file.write(model_json) model_json = A_A2B.to_json() with open(os.path.join(CKPT_PATH, "GAN_A2B.json"), "w") as json_file: json_file.write(model_json) print("\nModel Saved!\n") SAMPLE_PATH = './result' os.makedirs(SAMPLE_PATH, exist_ok=True) for epoch in range(epochs): g_a2b_total = 0 g_a2b_ad = 0 g_a2b_cyc = 0 g_a2b_idt = 0 d_a_ad = 0 g_b2a_total = 0 g_b2a_ad = 0 g_b2a_cyc = 0 g_b2a_idt = 0 d_b_ad = 0 shuffle_idx = np.random.choice(train_length, train_length, replace=False) epoch_progbar = Progbar(num_iter, width=15) for i, step in enumerate(range(0, train_length, batch_size)): step_idx = shuffle_idx[step:step+batch_size] real_label = np.ones((len(step_idx), d_output_size, d_output_size, 1)) fake_label = np.zeros((len(step_idx), d_output_size, d_output_size, 1)) # Generate fake images fake_A_imgs = G_B2A.predict(train_B[step_idx]) fake_B_imgs = G_A2B.predict(train_A[step_idx]) # Train Discriminator dis_label = np.concatenate([fake_label, real_label]) Set_A = np.concatenate([fake_A_imgs, train_A[step_idx]], axis=0) Set_B = np.concatenate([fake_B_imgs, train_B[step_idx]], axis=0) # [Ad] D_A_Loss = D_A.train_on_batch(Set_A, dis_label) D_B_Loss = D_B.train_on_batch(Set_B, dis_label) # Train Generator # [Ad + 10*cyc + 0.5*idt, Ad, mae, idt] # A_B2A = models.Model(inputs=B_img, outputs = [D_A_output, recon_B, id_B], name='GAN_A') G_B2A_Loss = A_B2A.train_on_batch(train_B[step_idx], [real_label, train_B[step_idx], train_B[step_idx]]) G_A2B_Loss = A_A2B.train_on_batch(train_A[step_idx], [real_label, train_A[step_idx], train_A[step_idx]]) g_a2b_total += G_A2B_Loss[0] g_a2b_ad += G_A2B_Loss[1] g_a2b_cyc += G_A2B_Loss[2] g_a2b_idt += G_A2B_Loss[3] d_a_ad += D_A_Loss g_b2a_total += G_B2A_Loss[0] g_b2a_ad += G_B2A_Loss[1] g_b2a_cyc += G_B2A_Loss[2] g_b2a_idt += G_B2A_Loss[3] d_b_ad += D_B_Loss if i < num_iter: epoch_progbar.update(i+1, [("G_A2B_Total", G_A2B_Loss[0]) ("G_A2B_Ad", G_A2B_Loss[1]), ("G_A2B_Cyc", G_A2B_Loss[2]), ("G_A2B_Idt", G_A2B_Loss[3]), ("D_A_Ad", D_A_Loss), ("G_B2A_Total", G_B2A_Loss[0]) ("G_B2A_Ad", G_B2A_Loss[1]), ("G_B2A_Cyc", G_B2A_Loss[2]), ("G_B2A_Idt", G_B2A_Loss[3]), ("D_B_Ad", D_B_Loss) ]) A_A2B.save_weights(os.path.join(CKPT_PATH, f"{epoch:04d}_A2B_params.h5")) A_B2A.save_weights(os.path.join(CKPT_PATH, f"{epoch:04d}_B2A_params.h5")) train_float2int = np.concatenate((train_B[step_idx][0], fake_B_imgs[0]), axis=1) train_float2int = (train_float2int + 1) * 127.5 train_float2int = cv.cvtColor(train_float2int.astype(np.uint8), cv.COLOR_RGB2BGR) Train_Result_PATH = os.path.join(SAMPLE_PATH, f"{epoch+1:04d}_A2B_result.jpg") cv.imwrite(Train_Result_PATH, train_float2int) train_float2int = np.concatenate((train_A[step_idx][0], fake_A_imgs[0]), axis=1) train_float2int = (train_float2int + 1) * 127.5 train_float2int = cv.cvtColor(train_float2int.astype(np.uint8), cv.COLOR_RGB2BGR) Train_Result_PATH = os.path.join(SAMPLE_PATH, f"{epoch+1:04d}_B2A_result.jpg") cv.imwrite(Train_Result_PATH, train_float2int) print("Training Done ! ")
def build_model(local_bm_hyperparameters, local_bm_settings): model_built = 0 time_steps_days = int(local_bm_hyperparameters['time_steps_days']) epochs = int(local_bm_hyperparameters['epochs']) batch_size = int(local_bm_hyperparameters['batch_size']) workers = int(local_bm_hyperparameters['workers']) optimizer_function = local_bm_hyperparameters['optimizer'] optimizer_learning_rate = local_bm_hyperparameters['learning_rate'] if optimizer_function == 'adam': optimizer_function = optimizers.Adam(optimizer_learning_rate) elif optimizer_function == 'ftrl': optimizer_function = optimizers.Ftrl(optimizer_learning_rate) losses_list = [] loss_1 = local_bm_hyperparameters['loss_1'] loss_2 = local_bm_hyperparameters['loss_2'] loss_3 = local_bm_hyperparameters['loss_3'] union_settings_losses = [loss_1, loss_2, loss_3] if 'mape' in union_settings_losses: losses_list.append(losses.MeanAbsolutePercentageError()) if 'mse' in union_settings_losses: losses_list.append(losses.MeanSquaredError()) if 'mae' in union_settings_losses: losses_list.append(losses.MeanAbsoluteError()) if 'm_mape' in union_settings_losses: losses_list.append(modified_mape()) if 'customized_loss_function' in union_settings_losses: losses_list.append(customized_loss()) metrics_list = [] metric1 = local_bm_hyperparameters['metrics1'] metric2 = local_bm_hyperparameters['metrics2'] union_settings_metrics = [metric1, metric2] if 'rmse' in union_settings_metrics: metrics_list.append(metrics.RootMeanSquaredError()) if 'mse' in union_settings_metrics: metrics_list.append(metrics.MeanSquaredError()) if 'mae' in union_settings_metrics: metrics_list.append(metrics.MeanAbsoluteError()) if 'mape' in union_settings_metrics: metrics_list.append(metrics.MeanAbsolutePercentageError()) l1 = local_bm_hyperparameters['l1'] l2 = local_bm_hyperparameters['l2'] if local_bm_hyperparameters['regularizers_l1_l2'] == 'True': activation_regularizer = regularizers.l1_l2(l1=l1, l2=l2) else: activation_regularizer = None nof_features_for_training = local_bm_hyperparameters[ 'nof_features_for_training'] # creating model forecaster_in_block = tf.keras.Sequential() print('creating the ANN model...') # first layer (DENSE) if local_bm_hyperparameters['units_layer_1'] > 0: forecaster_in_block.add( layers.Dense( units=local_bm_hyperparameters['units_layer_1'], activation=local_bm_hyperparameters['activation_1'], input_shape=(local_bm_hyperparameters['time_steps_days'], nof_features_for_training), activity_regularizer=activation_regularizer)) forecaster_in_block.add( layers.Dropout( rate=float(local_bm_hyperparameters['dropout_layer_1']))) # second LSTM layer if local_bm_hyperparameters[ 'units_layer_2'] > 0 and local_bm_hyperparameters[ 'units_layer_1'] > 0: forecaster_in_block.add( layers.Bidirectional( layers.LSTM( units=local_bm_hyperparameters['units_layer_2'], activation=local_bm_hyperparameters['activation_2'], activity_regularizer=activation_regularizer, dropout=float(local_bm_hyperparameters['dropout_layer_2']), return_sequences=False))) forecaster_in_block.add( RepeatVector(local_bm_hyperparameters['repeat_vector'])) # third LSTM layer if local_bm_hyperparameters['units_layer_3'] > 0: forecaster_in_block.add( layers.Bidirectional( layers.LSTM( units=local_bm_hyperparameters['units_layer_3'], activation=local_bm_hyperparameters['activation_3'], activity_regularizer=activation_regularizer, dropout=float(local_bm_hyperparameters['dropout_layer_3']), return_sequences=True))) if local_bm_hyperparameters['units_layer_4'] == 0: forecaster_in_block.add( RepeatVector(local_bm_hyperparameters['repeat_vector'])) # fourth layer (DENSE) if local_bm_hyperparameters['units_layer_4'] > 0: forecaster_in_block.add( layers.Dense(units=local_bm_hyperparameters['units_layer_4'], activation=local_bm_hyperparameters['activation_4'], activity_regularizer=activation_regularizer)) forecaster_in_block.add( layers.Dropout( rate=float(local_bm_hyperparameters['dropout_layer_4']))) # final layer forecaster_in_block.add( TimeDistributed(layers.Dense(units=nof_features_for_training))) forecaster_in_block.save(''.join( [local_bm_settings['models_path'], 'in_block_NN_model_structure_']), save_format='tf') forecast_horizon_days = local_bm_settings['forecast_horizon_days'] forecaster_in_block.build(input_shape=(1, forecast_horizon_days + 1, nof_features_for_training)) forecaster_in_block.compile(optimizer=optimizer_function, loss=losses_list, metrics=metrics_list) forecaster_in_block_json = forecaster_in_block.to_json() with open( ''.join([ local_bm_settings['models_path'], 'freq_acc_forecaster_in_block.json' ]), 'w') as json_file: json_file.write(forecaster_in_block_json) json_file.close() print( 'build_model function finish (model structure saved in json and ts formats)' ) return True, model_built
def pipeline(): featurearr, simarr, labelarr=load_data() xarr, yarr, aarr, edge_attrarr=graphdatageneration(featurearr, simarr, labelarr) dataset = MyDataset(xarr,yarr,aarr,edge_attrarr) np.random.seed(10) # Train/test split idxs = np.random.permutation(len(dataset)) split = int(0.8 * len(dataset)) idx_tr, idx_te = np.split(idxs, [split]) dataset_tr, dataset_te = dataset[idx_tr], dataset[idx_te] loader_tr = DisjointLoader(dataset_tr, batch_size=32, epochs=30,shuffle=True) loader_te = DisjointLoader(dataset_te, batch_size=32, epochs=1,shuffle=True) model=buildmodel(dataset) opt = optimizers.Adam(lr=learning_rate) loss_fn = losses.MeanSquaredError() @tf.function(input_signature=loader_tr.tf_signature(), experimental_relax_shapes=True) def train_step(inputs, target): with tf.GradientTape() as tape: predictions = model(inputs, training=True) loss = loss_fn(target, predictions) mae=losses.MeanAbsoluteError()(target, predictions) mape=losses.MeanAbsolutePercentageError()(target, predictions) loss += sum(model.losses) gradients = tape.gradient(loss, model.trainable_variables) opt.apply_gradients(zip(gradients, model.trainable_variables)) return loss,mae,mape print("training") current_batch = 0 model_loss = 0 total_mape=0 total_mae=0 for batch in loader_tr: outs,mae,mape= train_step(*batch) model_loss += outs total_mae+=mae total_mape+=mape current_batch += 1 if current_batch == loader_tr.steps_per_epoch: print("MSE: {}".format(model_loss / loader_tr.steps_per_epoch), "MAE: {}".format(total_mae/ loader_tr.steps_per_epoch), "MAPE: {}".format(total_mape/ loader_tr.steps_per_epoch)) model_loss = 0 total_mae = 0 total_mape = 0 current_batch = 0 print("testing") model_loss = 0 model_mae=0 model_mape = 0 for batch in loader_te: inputs, target = batch predictions = model(inputs, training=False) model_loss += loss_fn(target, predictions) model_mae += losses.MeanAbsoluteError()(target, predictions) model_mape+= losses.MeanAbsolutePercentageError()(target, predictions) model_loss /= loader_te.steps_per_epoch model_mae /= loader_te.steps_per_epoch model_mape /= loader_te.steps_per_epoch print("Done. Test MSE: {}".format(model_loss), "Test MAE: {}".format(model_mae), "Test MAPE: {}".format(model_mape)) model.save('/home/som/lab/seed-yzj/newpaper4/laboratory/model/fusion.hdf5')
kernel_size=kernel_size, strides=strides, padding="valid", data_format="channels_last", activation=activations.selu, kernel_initializer="lecun_normal", name="conv1_2")(conv1) rnn = layers.GRU(units=1, activation='sigmoid', return_sequences=True, name="rnn")(conv1) rnn_reshape = layers.Lambda(lambda x: tf.keras.backend.squeeze(x, 2))(rnn) wave_model = models.Model(inputs, rnn_reshape, name='wave') loss_func = losses.MeanAbsoluteError() def compute_loss(y_true, y_pred): reconstr_loss = \ -tf.reduce_sum(y_true * tf.math.log(1e-10 + y_pred) + (1-y_true) * tf.math.log(1e-10 + 1 - y_pred), 1) latent_loss = -0.5 * tf.reduce_sum( 1 + y_pred - tf.math.square(y_true) - tf.math.exp(y_pred), 1) return tf.reduce_mean(reconstr_loss + latent_loss) spec_optimizer = optimizers.Adam(lr=1e-4, ) wave_optimizer = optimizers.Adam(lr=1e-4, )
def forecast(self, local_mse, local_normalized_scaled_unit_sales, local_mean_unit_complete_time_serie, local_raw_unit_sales, local_settings): try: print( 'starting high loss (mse in previous LSTM) time_series in-block forecast submodule' ) # set training parameters with open(''.join([local_settings['hyperparameters_path'], 'in_block_time_serie_based_model_hyperparameters.json'])) \ as local_r_json_file: model_hyperparameters = json.loads(local_r_json_file.read()) local_r_json_file.close() local_time_series_group = np.load(''.join( [local_settings['train_data_path'], 'time_serie_group.npy']), allow_pickle=True) time_steps_days = int(local_settings['time_steps_days']) epochs = int(model_hyperparameters['epochs']) batch_size = int(model_hyperparameters['batch_size']) workers = int(model_hyperparameters['workers']) optimizer_function = model_hyperparameters['optimizer'] optimizer_learning_rate = model_hyperparameters['learning_rate'] if optimizer_function == 'adam': optimizer_function = optimizers.Adam(optimizer_learning_rate) elif optimizer_function == 'ftrl': optimizer_function = optimizers.Ftrl(optimizer_learning_rate) losses_list = [] loss_1 = model_hyperparameters['loss_1'] loss_2 = model_hyperparameters['loss_2'] loss_3 = model_hyperparameters['loss_3'] union_settings_losses = [loss_1, loss_2, loss_3] if 'mape' in union_settings_losses: losses_list.append(losses.MeanAbsolutePercentageError()) if 'mse' in union_settings_losses: losses_list.append(losses.MeanSquaredError()) if 'mae' in union_settings_losses: losses_list.append(losses.MeanAbsoluteError()) if 'm_mape' in union_settings_losses: losses_list.append(modified_mape()) if 'customized_loss_function' in union_settings_losses: losses_list.append(customized_loss()) metrics_list = [] metric1 = model_hyperparameters['metrics1'] metric2 = model_hyperparameters['metrics2'] union_settings_metrics = [metric1, metric2] if 'rmse' in union_settings_metrics: metrics_list.append(metrics.RootMeanSquaredError()) if 'mse' in union_settings_metrics: metrics_list.append(metrics.MeanSquaredError()) if 'mae' in union_settings_metrics: metrics_list.append(metrics.MeanAbsoluteError()) if 'mape' in union_settings_metrics: metrics_list.append(metrics.MeanAbsolutePercentageError()) l1 = model_hyperparameters['l1'] l2 = model_hyperparameters['l2'] if model_hyperparameters['regularizers_l1_l2'] == 'True': activation_regularizer = regularizers.l1_l2(l1=l1, l2=l2) else: activation_regularizer = None # searching for time_series with high loss forecast time_series_treated = [] poor_results_mse_threshold = local_settings[ 'poor_results_mse_threshold'] poor_result_time_serie_list = [] nof_features_for_training = 0 for result in local_mse: if result[1] > poor_results_mse_threshold: nof_features_for_training += 1 poor_result_time_serie_list.append(int(result[0])) # nof_features_for_training = local_normalized_scaled_unit_sales.shape[0] nof_features_for_training = len(poor_result_time_serie_list) # creating model forecaster_in_block = tf.keras.Sequential() print( 'current model for specific high loss time_series: Mix_Bid_PeepHole_LSTM_Dense_ANN' ) # first layer (DENSE) if model_hyperparameters['units_layer_1'] > 0: forecaster_in_block.add( layers.Dense( units=model_hyperparameters['units_layer_1'], activation=model_hyperparameters['activation_1'], input_shape=(model_hyperparameters['time_steps_days'], nof_features_for_training), activity_regularizer=activation_regularizer)) forecaster_in_block.add( layers.Dropout( rate=float(model_hyperparameters['dropout_layer_1']))) # second LSTM layer if model_hyperparameters['units_layer_2'] > 0: forecaster_in_block.add( layers.Bidirectional( layers.RNN(PeepholeLSTMCell( units=model_hyperparameters['units_layer_2'], activation=model_hyperparameters['activation_2'], activity_regularizer=activation_regularizer, dropout=float( model_hyperparameters['dropout_layer_2'])), return_sequences=False))) forecaster_in_block.add( RepeatVector(model_hyperparameters['repeat_vector'])) # third LSTM layer if model_hyperparameters['units_layer_3'] > 0: forecaster_in_block.add( layers.Bidirectional( layers.RNN(PeepholeLSTMCell( units=model_hyperparameters['units_layer_3'], activation=model_hyperparameters['activation_3'], activity_regularizer=activation_regularizer, dropout=float( model_hyperparameters['dropout_layer_3'])), return_sequences=False))) forecaster_in_block.add( RepeatVector(model_hyperparameters['repeat_vector'])) # fourth layer (DENSE) if model_hyperparameters['units_layer_4'] > 0: forecaster_in_block.add( layers.Dense( units=model_hyperparameters['units_layer_4'], activation=model_hyperparameters['activation_4'], activity_regularizer=activation_regularizer)) forecaster_in_block.add( layers.Dropout( rate=float(model_hyperparameters['dropout_layer_4']))) # final layer forecaster_in_block.add( TimeDistributed(layers.Dense(units=nof_features_for_training))) # forecaster_in_block.saves(''.join([local_settings['models_path'], '_model_structure_']), # save_format='tf') forecast_horizon_days = local_settings['forecast_horizon_days'] forecaster_in_block.build(input_shape=(1, forecast_horizon_days, nof_features_for_training)) forecaster_in_block.compile(optimizer=optimizer_function, loss=losses_list, metrics=metrics_list) forecaster_in_block_json = forecaster_in_block.to_json() with open( ''.join([ local_settings['models_path'], 'forecaster_in_block.json' ]), 'w') as json_file: json_file.write(forecaster_in_block_json) json_file.close() forecaster_in_block_untrained = forecaster_in_block print('specific time_serie model initialized and compiled') nof_selling_days = local_normalized_scaled_unit_sales.shape[1] last_learning_day_in_year = np.mod(nof_selling_days, 365) max_selling_time = local_settings['max_selling_time'] days_in_focus_frame = model_hyperparameters['days_in_focus_frame'] window_input_length = local_settings['moving_window_input_length'] window_output_length = local_settings[ 'moving_window_output_length'] moving_window_length = window_input_length + window_output_length nof_years = local_settings['number_of_years_ceil'] # training # time_serie_data = local_normalized_scaled_unit_sales nof_poor_result_time_series = len(poor_result_time_serie_list) time_serie_data = np.zeros(shape=(nof_poor_result_time_series, max_selling_time)) time_serie_iterator = 0 for time_serie in poor_result_time_serie_list: time_serie_data[ time_serie_iterator, :] = local_normalized_scaled_unit_sales[ time_serie, :] time_serie_iterator += 1 if local_settings['repeat_training_in_block'] == "True": print( 'starting in-block training of model for high_loss time_series in previous model' ) nof_selling_days = time_serie_data.shape[1] # nof_moving_windows = np.int32(nof_selling_days / moving_window_length) remainder_days = np.mod(nof_selling_days, moving_window_length) window_first_days = [ first_day for first_day in range(0, nof_selling_days, moving_window_length) ] length_window_walk = len(window_first_days) # last_window_start = window_first_days[length_window_walk - 1] if remainder_days != 0: window_first_days[ length_window_walk - 1] = nof_selling_days - moving_window_length day_in_year = [] [ day_in_year.append(last_learning_day_in_year + year * 365) for year in range(nof_years) ] stride_window_walk = model_hyperparameters[ 'stride_window_walk'] print('defining x_train') x_train = [] if local_settings['train_model_input_data_approach'] == "all": [ x_train.append( time_serie_data[:, day - time_steps_days:day - window_output_length]) for day in range(time_steps_days, max_selling_time, stride_window_walk) ] elif local_settings[ 'train_model_input_data_approach'] == "focused": [ x_train.append(time_serie_data[:, day:day + time_steps_days]) for last_day in day_in_year[:-1] for day in range( last_day + window_output_length, last_day + window_output_length - days_in_focus_frame, -stride_window_walk) ] # border condition, take care with last year, working with last data available, yeah really!! [ x_train.append( np.concatenate( (time_serie_data[:, day - window_output_length:day], np.zeros(shape=(nof_poor_result_time_series, time_steps_days - window_output_length))), axis=1)) for last_day in day_in_year[-1:] for day in range( last_day, last_day - days_in_focus_frame, -stride_window_walk) ] else: logging.info( "\ntrain_model_input_data_approach is not defined") print('-a problem occurs with the data_approach settings') return False, None print('defining y_train') y_train = [] if local_settings['train_model_input_data_approach'] == "all": [ y_train.append(time_serie_data[:, day - time_steps_days:day]) for day in range(time_steps_days, max_selling_time, stride_window_walk) ] elif local_settings[ 'train_model_input_data_approach'] == "focused": [ y_train.append(time_serie_data[:, day:day + time_steps_days]) for last_day in day_in_year[:-1] for day in range( last_day + window_output_length, last_day + window_output_length - days_in_focus_frame, -stride_window_walk) ] # border condition, take care with last year, working with last data available, yeah really!! [ y_train.append( np.concatenate( (time_serie_data[:, day - window_output_length:day], np.zeros(shape=(nof_poor_result_time_series, time_steps_days - window_output_length))), axis=1)) for last_day in day_in_year[-1:] for day in range( last_day, last_day - days_in_focus_frame, -stride_window_walk) ] # if time_enhance is active, assigns more weight to the last time_steps according to enhance_last_stride if local_settings['time_enhance'] == 'True': enhance_last_stride = local_settings['enhance_last_stride'] last_elements = [] length_x_y_train = len(x_train) x_train_enhanced, y_train_enhanced = [], [] enhance_iterator = 1 for position in range( length_x_y_train - enhance_last_stride, length_x_y_train, -1): [ x_train_enhanced.append(x_train[position]) for enhance in range(1, 3 * (enhance_iterator + 1)) ] [ y_train_enhanced.append(y_train[position]) for enhance in range(1, 3 * (enhance_iterator + 1)) ] enhance_iterator += 1 x_train = x_train[:-enhance_last_stride] [ x_train.append(time_step) for time_step in x_train_enhanced ] y_train = y_train[:-enhance_last_stride] [ y_train.append(time_step) for time_step in y_train_enhanced ] # broadcasts lists to np arrays and applies the last pre-training preprocessing (amplification) x_train = np.array(x_train) y_train = np.array(y_train) print('x_train_shape: ', x_train.shape) if local_settings['amplification'] == 'True': factor = local_settings[ 'amplification_factor'] # factor tuning was done previously for time_serie_iterator in range(np.shape(x_train)[1]): max_time_serie = np.amax( x_train[:, time_serie_iterator, :]) x_train[:, time_serie_iterator, :][x_train[:, time_serie_iterator, :] > 0] = \ max_time_serie * factor max_time_serie = np.amax( y_train[:, time_serie_iterator, :]) y_train[:, time_serie_iterator, :][y_train[:, time_serie_iterator, :] > 0] = \ max_time_serie * factor print('x_train and y_train built done') # define callbacks, checkpoints namepaths model_weights = ''.join([ local_settings['checkpoints_path'], 'check_point_model_for_high_loss_time_serie_', model_hyperparameters['current_model_name'], "_loss_-{loss:.4f}-.hdf5" ]) callback1 = cb.EarlyStopping( monitor='loss', patience=model_hyperparameters['early_stopping_patience']) callback2 = cb.ModelCheckpoint(model_weights, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks = [callback1, callback2] x_train = x_train.reshape( (np.shape(x_train)[0], np.shape(x_train)[2], np.shape(x_train)[1])) y_train = y_train.reshape( (np.shape(y_train)[0], np.shape(y_train)[2], np.shape(y_train)[1])) print('input_shape: ', np.shape(x_train)) # train for each time_serie # check settings for repeat or not the training forecaster_in_block.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, workers=workers, callbacks=callbacks, shuffle=False) # print summary (informative; but if says "shape = multiple", probably useless) forecaster_in_block.summary() forecaster_in_block.save(''.join([ local_settings['models_path'], '_high_loss_time_serie_model_forecaster_in_block_.h5' ])) forecaster_in_block.save_weights(''.join([ local_settings['models_path'], '_weights_high_loss_ts_model_forecaster_in_block_.h5' ])) print( 'high loss time_series model trained and saved in hdf5 format .h5' ) else: forecaster_in_block.load_weights(''.join([ local_settings['models_path'], '_weights_high_loss_ts_model_forecaster_in_block_.h5' ])) # forecaster_in_block = models.load_model(''.join([local_settings['models_path'], # '_high_loss_time_serie_model_forecaster_.h5'])) print('weights of previously trained model loaded') # compile model and make forecast (not necessary) # forecaster_in_block.compile(optimizer='adam', loss='mse') # evaluating model and comparing with aggregated (in-block) LSTM print('evaluating the model trained..') time_serie_data = time_serie_data.reshape( (1, time_serie_data.shape[1], time_serie_data.shape[0])) x_input = time_serie_data[:, -forecast_horizon_days:, :] y_pred_normalized = forecaster_in_block.predict(x_input) # print('output shape: ', y_pred_normalized.shape) time_serie_data = time_serie_data.reshape( (time_serie_data.shape[2], time_serie_data.shape[1])) # print('time_serie data shape: ', np.shape(time_serie_data)) time_serie_iterator = 0 improved_time_series_forecast = [] time_series_not_improved = [] improved_mse = [] for time_serie in poor_result_time_serie_list: # for time_serie in range(local_normalized_scaled_unit_sales.shape[0]): y_truth = local_raw_unit_sales[time_serie:time_serie + 1, -forecast_horizon_days:] # print('y_truth shape:', y_truth.shape) # reversing preprocess: rescale, denormalize, reshape # inverse reshape y_pred_reshaped = y_pred_normalized.reshape( (y_pred_normalized.shape[2], y_pred_normalized.shape[1])) y_pred_reshaped = y_pred_reshaped[ time_serie_iterator:time_serie_iterator + 1, :] # print('y_pred_reshaped shape:', y_pred_reshaped.shape) # inverse transform (first moving_windows denormalizing and then general rescaling) time_serie_normalized_window_mean = np.mean( time_serie_data[time_serie_iterator, -moving_window_length:]) # print('mean of this time serie (normalized values): ', time_serie_normalized_window_mean) local_denormalized_array = window_based_denormalizer( y_pred_reshaped, time_serie_normalized_window_mean, forecast_horizon_days) local_point_forecast = general_mean_rescaler( local_denormalized_array, local_mean_unit_complete_time_serie[time_serie], forecast_horizon_days) # print('rescaled denormalized forecasts array shape: ', local_point_forecast.shape) # calculating MSE # print(y_truth.shape) # print(local_point_forecast.shape) local_error_metric_mse = mean_squared_error( y_truth, local_point_forecast) # print('time_serie: ', time_serie, '\tMean_Squared_Error: ', local_error_metric_mse) previous_result = local_mse[:, 1][local_mse[:, 0] == time_serie].item() time_series_treated.append( [int(time_serie), previous_result, local_error_metric_mse]) if local_error_metric_mse < previous_result: # print('better results with time_serie specific model training') print(time_serie, 'MSE improved from ', previous_result, 'to ', local_error_metric_mse) improved_time_series_forecast.append(int(time_serie)) improved_mse.append(local_error_metric_mse) else: # print('no better results with time serie specific model training') # print('MSE not improved from: ', previous_result, '\t current mse: ', local_error_metric_mse) time_series_not_improved.append(int(time_serie)) time_serie_iterator += 1 time_series_treated = np.array(time_series_treated) improved_mse = np.array(improved_mse) average_mse_in_block_forecast = np.mean(time_series_treated[:, 2]) average_mse_improved_ts = np.mean(improved_mse) print('poor result time serie list len:', len(poor_result_time_serie_list)) print('mean_mse for in-block forecast:', average_mse_in_block_forecast) print( 'number of time series with better results with this forecast: ', len(improved_time_series_forecast)) print( 'mean_mse of time series with better results with this forecast: ', average_mse_improved_ts) print('not improved time series =', len(time_series_not_improved)) time_series_treated = np.array(time_series_treated) improved_time_series_forecast = np.array( improved_time_series_forecast) time_series_not_improved = np.array(time_series_not_improved) poor_result_time_serie_array = np.array( poor_result_time_serie_list) # store data of (individual-approach) time_series forecast successfully improved and those that not np.save( ''.join([ local_settings['models_evaluation_path'], 'poor_result_time_serie_array' ]), poor_result_time_serie_array) np.save( ''.join([ local_settings['models_evaluation_path'], 'time_series_forecast_results' ]), time_series_treated) np.save( ''.join([ local_settings['models_evaluation_path'], 'improved_time_series_forecast' ]), improved_time_series_forecast) np.save( ''.join([ local_settings['models_evaluation_path'], 'time_series_not_improved' ]), time_series_not_improved) np.savetxt(''.join([ local_settings['models_evaluation_path'], 'time_series_forecast_results.csv' ]), time_series_treated, fmt='%10.15f', delimiter=',', newline='\n') forecaster_in_block_json = forecaster_in_block.to_json() with open(''.join([local_settings['models_path'], 'high_loss_time_serie_model_forecaster_in_block.json']), 'w') \ as json_file: json_file.write(forecaster_in_block_json) json_file.close() print('trained model weights and architecture saved') print('metadata (results, time_serie with high loss) saved') print( 'forecast improvement done. (high loss time_serie focused) submodule has finished' ) except Exception as submodule_error: print('time_series in-block forecast submodule_error: ', submodule_error) logger.info( 'error in forecast of in-block time_series (high_loss_identified_ts_forecast submodule)' ) logger.error(str(submodule_error), exc_info=True) return False return True
def train(self, local_settings, local_raw_unit_sales, local_model_hyperparameters, local_time_series_not_improved, raw_unit_sales_ground_truth): try: # data normalization local_forecast_horizon_days = local_settings['forecast_horizon_days'] local_x_train, local_y_train = build_x_y_train_arrays(local_raw_unit_sales, local_settings, local_model_hyperparameters, local_time_series_not_improved) local_forecast_horizon_days = local_settings['forecast_horizon_days'] local_features_for_each_training = 1 print('starting neural network - individual time_serie training') # building architecture and compiling model_template # set training parameters local_time_steps_days = int(local_settings['time_steps_days']) local_epochs = int(local_model_hyperparameters['epochs']) local_batch_size = int(local_model_hyperparameters['batch_size']) local_workers = int(local_model_hyperparameters['workers']) local_optimizer_function = local_model_hyperparameters['optimizer'] local_optimizer_learning_rate = local_model_hyperparameters['learning_rate'] if local_optimizer_function == 'adam': local_optimizer_function = optimizers.Adam(local_optimizer_learning_rate) elif local_optimizer_function == 'ftrl': local_optimizer_function = optimizers.Ftrl(local_optimizer_learning_rate) local_losses_list = [] local_loss_1 = local_model_hyperparameters['loss_1'] local_loss_2 = local_model_hyperparameters['loss_2'] local_loss_3 = local_model_hyperparameters['loss_3'] local_union_settings_losses = [local_loss_1, local_loss_2, local_loss_3] if 'mape' in local_union_settings_losses: local_losses_list.append(losses.MeanAbsolutePercentageError()) if 'mse' in local_union_settings_losses: local_losses_list.append(losses.MeanSquaredError()) if 'mae' in local_union_settings_losses: local_losses_list.append(losses.MeanAbsoluteError()) if 'm_mape' in local_union_settings_losses: local_losses_list.append(modified_mape()) if 'customized_loss_function' in local_union_settings_losses: local_losses_list.append(customized_loss()) if 'pinball_loss_function' in local_union_settings_losses: local_losses_list.append(pinball_function_loss()) local_metrics_list = [] local_metric1 = local_model_hyperparameters['metrics1'] local_metric2 = local_model_hyperparameters['metrics2'] local_union_settings_metrics = [local_metric1, local_metric2] if 'rmse' in local_union_settings_metrics: local_metrics_list.append(metrics.RootMeanSquaredError()) if 'mse' in local_union_settings_metrics: local_metrics_list.append(metrics.MeanSquaredError()) if 'mae' in local_union_settings_metrics: local_metrics_list.append(metrics.MeanAbsoluteError()) if 'mape' in local_union_settings_metrics: local_metrics_list.append(metrics.MeanAbsolutePercentageError()) local_l1 = local_model_hyperparameters['l1'] local_l2 = local_model_hyperparameters['l2'] if local_model_hyperparameters['regularizers_l1_l2'] == 'True': local_activation_regularizer = regularizers.l1_l2(l1=local_l1, l2=local_l2) else: local_activation_regularizer = None # define callbacks, checkpoints namepaths local_callback1 = cb.EarlyStopping(monitor='loss', patience=local_model_hyperparameters['early_stopping_patience']) local_callbacks = [local_callback1] print('building current model: Mix_Bid_PeepHole_LSTM_Dense_ANN') local_base_model = tf.keras.Sequential() # first layer (DENSE) if local_model_hyperparameters['units_layer_1'] > 0: # strictly dim 1 of input_shape is ['time_steps_days'] (dim 0 is number of batches: None) local_base_model.add(layers.Dense(units=local_model_hyperparameters['units_layer_1'], activation=local_model_hyperparameters['activation_1'], input_shape=(local_time_steps_days, local_features_for_each_training), activity_regularizer=local_activation_regularizer)) local_base_model.add(layers.Dropout(rate=float(local_model_hyperparameters['dropout_layer_1']))) # second layer if local_model_hyperparameters['units_layer_2']: if local_model_hyperparameters['units_layer_1'] == 0: local_base_model.add(layers.RNN( PeepholeLSTMCell(units=local_model_hyperparameters['units_layer_2'], activation=local_model_hyperparameters['activation_2'], input_shape=(local_time_steps_days, local_features_for_each_training), dropout=float(local_model_hyperparameters['dropout_layer_2'])))) else: local_base_model.add(layers.RNN( PeepholeLSTMCell(units=local_model_hyperparameters['units_layer_2'], activation=local_model_hyperparameters['activation_2'], dropout=float(local_model_hyperparameters['dropout_layer_2'])))) # local_base_model.add(RepeatVector(local_model_hyperparameters['repeat_vector'])) # third layer if local_model_hyperparameters['units_layer_3'] > 0: local_base_model.add(layers.Dense(units=local_model_hyperparameters['units_layer_3'], activation=local_model_hyperparameters['activation_3'], activity_regularizer=local_activation_regularizer)) local_base_model.add(layers.Dropout(rate=float(local_model_hyperparameters['dropout_layer_3']))) # fourth layer if local_model_hyperparameters['units_layer_4'] > 0: local_base_model.add(layers.RNN( PeepholeLSTMCell(units=local_model_hyperparameters['units_layer_4'], activation=local_model_hyperparameters['activation_4'], dropout=float(local_model_hyperparameters['dropout_layer_4'])))) local_base_model.add(layers.Dense(units=local_forecast_horizon_days)) # build and compile model local_base_model.build(input_shape=(1, local_time_steps_days, local_features_for_each_training)) local_base_model.compile(optimizer=local_optimizer_function, loss=local_losses_list, metrics=local_metrics_list) # save model architecture (template for specific models) local_base_model.save(''.join([local_settings['models_path'], 'generic_forecaster_template_individual_ts.h5'])) local_base_model_json = local_base_model.to_json() with open(''.join([local_settings['models_path'], 'generic_forecaster_template_individual_ts.json']), 'w') as json_file: json_file.write(local_base_model_json) json_file.close() local_base_model.summary() # training model local_moving_window_length = local_settings['moving_window_input_length'] + \ local_settings['moving_window_output_length'] # all input data in the correct type local_x_train = np.array(local_x_train, dtype=np.dtype('float32')) local_y_train = np.array(local_y_train, dtype=np.dtype('float32')) local_raw_unit_sales = np.array(local_raw_unit_sales, dtype=np.dtype('float32')) # specific time_serie models training loop local_y_pred_list = [] local_nof_time_series = local_settings['number_of_time_series'] remainder = np.array([time_serie for time_serie in range(local_nof_time_series) if time_serie not in local_time_series_not_improved]) for time_serie in remainder: # ----------------------key_point--------------------------------------------------------------------- # take note that each loop the weights and internal last states of previous training are conserved # that's probably save times and (in aggregated or ordered) connected time series will improve results # ----------------------key_point--------------------------------------------------------------------- print('training time_serie:', time_serie) local_x, local_y = local_x_train[:, time_serie: time_serie + 1, :], \ local_y_train[:, time_serie: time_serie + 1, :] local_x = local_x.reshape(local_x.shape[0], local_x.shape[2], 1) local_y = local_y.reshape(local_y.shape[0], local_y.shape[2], 1) # training, saving model and storing forecasts local_base_model.fit(local_x, local_y, batch_size=local_batch_size, epochs=local_epochs, workers=local_workers, callbacks=local_callbacks, shuffle=False) local_base_model.save_weights(''.join([local_settings['models_path'], '/weights_last_year/_individual_ts_', str(time_serie), '_model_weights_.h5'])) local_x_input = local_raw_unit_sales[time_serie: time_serie + 1, -local_forecast_horizon_days:] local_x_input = cof_zeros(local_x_input, local_settings) local_x_input = local_x_input.reshape(1, local_x_input.shape[1], 1) print('x_input shape:', local_x_input.shape) local_y_pred = local_base_model.predict(local_x_input) print('x_input:\n', local_x_input) print('y_pred shape:', local_y_pred.shape) local_y_pred = local_y_pred.reshape(local_y_pred.shape[1]) local_y_pred = cof_zeros(local_y_pred, local_settings) if local_settings['mini_ts_evaluator'] == "True" and \ local_settings['competition_stage'] != 'submitting_after_June_1th_using_1941days': mini_evaluator = mini_evaluator_submodule() evaluation = mini_evaluator.evaluate_ts_forecast( raw_unit_sales_ground_truth[time_serie, -local_forecast_horizon_days:], local_y_pred) print('ts:', time_serie, 'with cof_zeros ts mse:', evaluation) else: print('ts:', time_serie) print(local_y_pred) local_y_pred_list.append(local_y_pred) local_point_forecast_array = np.array(local_y_pred_list) local_point_forecast_normalized = local_point_forecast_array.reshape( (local_point_forecast_array.shape[0], local_point_forecast_array.shape[1])) local_point_forecast = local_point_forecast_normalized # save points forecast np.savetxt(''.join([local_settings['others_outputs_path'], 'point_forecast_NN_LSTM_simulation.csv']), local_point_forecast, fmt='%10.15f', delimiter=',', newline='\n') print('point forecasts saved to file') print('submodule for build, train and forecast time_serie individually finished successfully') return True except Exception as submodule_error: print('train model and forecast individual time_series submodule_error: ', submodule_error) logger.info('error in training and forecast-individual time_serie schema') logger.error(str(submodule_error), exc_info=True) return False
def __init__(self, scope='MAE'): super(MAE, self).__init__(scope) self.cost = losses.MeanAbsoluteError(reduction=losses.Reduction.SUM)
def train_model(self, local_settings, local_raw_unit_sales, local_model_hyperparameters): try: # loading hyperparameters local_days_in_focus = local_model_hyperparameters[ 'days_in_focus_frame'] local_raw_unit_sales_data = local_raw_unit_sales[:, -local_days_in_focus:] local_nof_ts = local_raw_unit_sales.shape[0] local_forecast_horizon_days = local_settings[ 'forecast_horizon_days'] local_features_for_each_training = 1 print( 'starting neural network - individual time_serie training unit_sale_approach' ) # building architecture and compiling model_template # set training parameters local_time_steps_days = int(local_settings['time_steps_days']) local_epochs = int(local_model_hyperparameters['epochs']) local_batch_size = int(local_model_hyperparameters['batch_size']) local_workers = int(local_model_hyperparameters['workers']) local_optimizer_function = local_model_hyperparameters['optimizer'] local_optimizer_learning_rate = local_model_hyperparameters[ 'learning_rate'] local_validation_split = local_model_hyperparameters[ 'validation_split'] if local_optimizer_function == 'adam': local_optimizer_function = optimizers.Adam( local_optimizer_learning_rate) elif local_optimizer_function == 'ftrl': local_optimizer_function = optimizers.Ftrl( local_optimizer_learning_rate) local_losses_list = [] local_loss_1 = local_model_hyperparameters['loss_1'] local_loss_2 = local_model_hyperparameters['loss_2'] local_loss_3 = local_model_hyperparameters['loss_3'] local_union_settings_losses = [ local_loss_1, local_loss_2, local_loss_3 ] if 'mape' in local_union_settings_losses: local_losses_list.append(losses.MeanAbsolutePercentageError()) if 'mse' in local_union_settings_losses: local_losses_list.append(losses.MeanSquaredError()) if 'mae' in local_union_settings_losses: local_losses_list.append(losses.MeanAbsoluteError()) if 'm_mape' in local_union_settings_losses: local_losses_list.append(modified_mape()) if 'customized_loss_function' in local_union_settings_losses: local_losses_list.append(customized_loss()) if 'pinball_loss_function' in local_union_settings_losses: local_losses_list.append(pinball_function_loss()) local_metrics_list = [] local_metric1 = local_model_hyperparameters['metrics1'] local_metric2 = local_model_hyperparameters['metrics2'] local_union_settings_metrics = [local_metric1, local_metric2] if 'rmse' in local_union_settings_metrics: local_metrics_list.append(metrics.RootMeanSquaredError()) if 'mse' in local_union_settings_metrics: local_metrics_list.append(metrics.MeanSquaredError()) if 'mae' in local_union_settings_metrics: local_metrics_list.append(metrics.MeanAbsoluteError()) if 'mape' in local_union_settings_metrics: local_metrics_list.append( metrics.MeanAbsolutePercentageError()) local_l1 = local_model_hyperparameters['l1'] local_l2 = local_model_hyperparameters['l2'] if local_model_hyperparameters['regularizers_l1_l2'] == 'True': local_activation_regularizer = regularizers.l1_l2(l1=local_l1, l2=local_l2) else: local_activation_regularizer = None # define callbacks, checkpoints namepaths local_callback1 = cb.EarlyStopping( monitor='loss', patience=local_model_hyperparameters['early_stopping_patience'] ) local_callbacks = [local_callback1] print( 'building current model: individual_time_serie_acc_freq_LSTM_Dense_ANN' ) local_base_model = tf.keras.Sequential() # first layer (LSTM) if local_model_hyperparameters['units_layer_1'] > 0: local_base_model.add( layers.LSTM( units=local_model_hyperparameters['units_layer_1'], activation=local_model_hyperparameters['activation_1'], input_shape=( local_model_hyperparameters['time_steps_days'], local_features_for_each_training), dropout=float( local_model_hyperparameters['dropout_layer_1']), activity_regularizer=local_activation_regularizer, return_sequences=True)) # second LSTM layer if local_model_hyperparameters['units_layer_2'] > 0: local_base_model.add( layers.Bidirectional( layers.LSTM( units=local_model_hyperparameters['units_layer_2'], activation=local_model_hyperparameters[ 'activation_2'], activity_regularizer=local_activation_regularizer, dropout=float( local_model_hyperparameters['dropout_layer_2'] ), return_sequences=False))) local_base_model.add( RepeatVector(local_model_hyperparameters['repeat_vector'])) # third LSTM layer if local_model_hyperparameters['units_layer_3'] > 0: local_base_model.add( layers.Bidirectional( layers. RNN(PeepholeLSTMCell( units=local_model_hyperparameters['units_layer_3'], dropout=float( local_model_hyperparameters['dropout_layer_3']) ), activity_regularizer=local_activation_regularizer, return_sequences=False))) local_base_model.add( RepeatVector(local_model_hyperparameters['repeat_vector'])) # fourth layer (DENSE) if local_model_hyperparameters['units_layer_4'] > 0: local_base_model.add( layers.Dense( units=local_model_hyperparameters['units_layer_4'], activation=local_model_hyperparameters['activation_4'], activity_regularizer=local_activation_regularizer)) local_base_model.add( layers.Dropout(rate=float( local_model_hyperparameters['dropout_layer_4']))) # final layer local_base_model.add( layers.Dense( units=local_model_hyperparameters['units_final_layer'])) # build and compile model local_base_model.build( input_shape=(1, local_time_steps_days, local_features_for_each_training)) local_base_model.compile(optimizer=local_optimizer_function, loss=local_losses_list, metrics=local_metrics_list) # save model architecture (template for specific models) local_base_model.save(''.join([ local_settings['models_path'], '_unit_sales_forecaster_template_individual_ts.h5' ])) local_base_model_json = local_base_model.to_json() with open(''.join([local_settings['models_path'], '_unit_sales_forecaster_forecaster_template_individual_ts.json']), 'w') \ as json_file: json_file.write(local_base_model_json) json_file.close() local_base_model.summary() # training model local_moving_window_length = local_settings['moving_window_input_length'] + \ local_settings['moving_window_output_length'] # loading x_train and y_train, previously done for third and fourth models trainings local_builder = local_bxy_x_y_builder() local_x_train, local_y_train = local_builder.build_x_y_train_arrays( local_raw_unit_sales, local_settings, local_model_hyperparameters) local_x_train = local_x_train.reshape(local_x_train.shape[0], local_x_train.shape[2], local_x_train.shape[1]) local_y_train = local_x_train.reshape(local_y_train.shape[0], local_y_train.shape[2], local_y_train.shape[1]) # star training time_serie by time_serie local_y_pred_array = np.zeros(shape=(local_raw_unit_sales.shape[0], local_forecast_horizon_days), dtype=np.dtype('float32')) for time_serie in range(local_nof_ts): print('training time_serie:', time_serie) local_x, local_y = local_x_train[:, :, time_serie: time_serie + 1], \ local_y_train[:, :, time_serie: time_serie + 1] # training, saving model and storing forecasts local_base_model.fit(local_x, local_y, batch_size=local_batch_size, epochs=local_epochs, workers=local_workers, callbacks=local_callbacks, shuffle=False, validation_split=local_validation_split) local_base_model.save_weights(''.join([ local_settings['models_path'], '/_weights_unit_sales_NN_35_days/_individual_ts_', str(time_serie), '_model_weights_.h5' ])) local_x_input = local_raw_unit_sales[ time_serie:time_serie + 1, -local_forecast_horizon_days:] local_x_input = local_x_input.reshape(1, local_x_input.shape[1], 1) # print('x_input shape:', local_x_input.shape) local_y_pred = local_base_model.predict(local_x_input) # print('x_input:\n', local_x_input) # print('y_pred shape:', local_y_pred.shape) local_y_pred = local_y_pred.reshape(local_y_pred.shape[1]) # print('ts:', time_serie) # print(local_y_pred) local_y_pred_array[time_serie:time_serie + 1, :] = local_y_pred local_point_forecast_normalized = local_y_pred_array.reshape( (local_y_pred_array.shape[0], local_y_pred_array.shape[1])) local_point_forecast = local_point_forecast_normalized.clip(0) # save points forecast np.save( ''.join([ local_settings['train_data_path'], 'point_forecast_NN_from_unit_sales_training' ]), local_point_forecast) np.save( ''.join([ local_settings['train_data_path'], 'eleventh_model_NN_unit_sales_forecast_data' ]), local_point_forecast) np.savetxt(''.join([ local_settings['others_outputs_path'], 'point_forecast_NN_from_unit_sales_training.csv' ]), local_point_forecast, fmt='%10.15f', delimiter=',', newline='\n') print('point forecasts saved to file') print( 'submodule for build, train and forecast time_serie unit_sales individually finished successfully' ) return True, local_point_forecast except Exception as submodule_error: print( 'train model and forecast individual time_series units_sales_ submodule_error: ', submodule_error) logger.info( 'error in training and forecast-individual time_serie unit_sales_ schema' ) logger.error(str(submodule_error), exc_info=True) return False, []
def _generator_loss(real_image, generate_image, discriminator_output): discriminate_loss = losses.BinaryCrossentropy(from_logits=True)( tf.ones_like(discriminator_output), discriminator_output) generate_loss = losses.MeanAbsoluteError()(real_image, generate_image) return discriminate_loss * 3 + generate_loss * 100, discriminate_loss * 3, generate_loss * 100