virus_name = "measles" species = ["Avian", "Ebola", "Lassa", "Measles", "Mumps", "Zika"] tree_fig = {} mapbox_access_token = "pk.eyJ1IjoicGxvdGx5bWFwYm94IiwiYSI6ImNrOWJqb2F4djBnMjEzbG50amg0dnJieG4ifQ.Zme1-Uzoi75IaFbieBDl3A" tree_file, metadata_file, metadata_file_stat = create_paths_file(virus_name, level1="", level2="", level3="") # To know the minimum and the maximum values of date for slicer df_stat_metadata = pd.read_csv(metadata_file_stat) min_date, max_date = min_max_date(df_stat_metadata) # create the dictionary of slider marks_data = slicer(min_date, max_date) min_max_date_value = [min_date, max_date] fig = create_tree(virus_name, tree_file, metadata_file, "Country") tree_fig[tree_file] = fig fig_map_bubble = create_map_bubble_year(virus_name, metadata_file_stat, 2, min_date, max_date) fig_curve_line = create_curve_line(df_stat_metadata, virus_name, min_date, max_date) ######################################### MAIN APP ######################################### app.layout = html.Div([ # Banner display html.Div(
def _update_slicer( virus_name, mumps, dengue, lassa, avian_opt1, avian_opt2, flu_opt1, flu_opt2, flu_opt3, ): virus_name = virus_name.lower() if virus_name == "ebola" or virus_name == "zika" or virus_name == "measles": ( tree_file_filtred, metadata_file_filtred, metadata_file_stat_filtred, ) = create_paths_file(virus_name, level1="", level2="", level3="") elif virus_name == "mumps": ( tree_file_filtred, metadata_file_filtred, metadata_file_stat_filtred, ) = create_paths_file(virus_name, level1=mumps, level2="", level3="") elif virus_name == "dengue": ( tree_file_filtred, metadata_file_filtred, metadata_file_stat_filtred, ) = create_paths_file(virus_name, level1=dengue, level2="", level3="") elif virus_name == "lassa": ( tree_file_filtred, metadata_file_filtred, metadata_file_stat_filtred, ) = create_paths_file(virus_name, level1=lassa, level2="", level3="") elif virus_name == "avian": ( tree_file_filtred, metadata_file_filtred, metadata_file_stat_filtred, ) = create_paths_file(virus_name, level1=avian_opt1, level2=avian_opt2, level3="") elif virus_name == "flu": ( tree_file_filtred, metadata_file_filtred, metadata_file_stat_filtred, ) = create_paths_file(virus_name, level1=flu_opt1, level2=flu_opt2, level3=flu_opt3) df = pd.read_csv(metadata_file_stat_filtred) min_date, max_date = min_max_date(df) # create the dictionary of slider marks_data = slicer(min_date, max_date) min_max_date_value = [min_date, max_date] # To select only the data between min_date and max_date df = df[df["Year"] >= min_date] df = df[df["Year"] <= max_date] return dcc.RangeSlider( id="id-year", min=min_date, max=max_date, step=1, marks=marks_data, value=min_max_date_value, )
from utils import slicer, split # cd simulations dt_fl = "nn_data.h5" dt_dst = "scaled_data" n_train = 0.8 n_valid = 0.1 # Open data file f = h5py.File(dt_fl, "r") dt = f[dt_dst] idxs = split(dt.shape[0], n_train, n_valid) slc_trn, slc_vld, slc_tst = slicer(dt.shape, idxs) trn = dt[slc_trn][:, :, :, np.newaxis] vld = dt[slc_vld][:, :, :, np.newaxis] act = 'tanh' cnt_mm = cnt.MinMaxNorm(min_value=-1, max_value=2) # Encoder tf.keras.backend.clear_session() inputs = layers.Input(shape=(200, 100, 1)) ed = fconv(inputs, 4, 2, 3) e = fconv(ed, 3, 2, 9) e = fconv(e, 3, 5, 27) # Latent space # l = layers.Flatten()(e)
def objective(trial): # Open data file f_in = h5py.File(DT_FL_IN, "r") dt_in = f_in[DT_DST_IN] f_out = h5py.File(DT_FL_OUT, "r") dt_out = f_out[DT_DST_OUT] WD = 2 # Dummy y_data x_data, _ = format_data(dt_in, wd=WD, get_y=True) _, y_data = format_data(dt_out, wd=WD, get_y=True) x_data = np.squeeze(x_data) # Split data and get slices idxs = split(x_data.shape[0], N_TRAIN, N_VALID, test_last=dt_in.attrs["idx"]) slc_trn, slc_vld, slc_tst = slicer(x_data.shape, idxs) # Get data x_train = x_data[slc_trn[0]] y_train = y_data[slc_trn[0]] x_val = x_data[slc_vld[0]] y_val = y_data[slc_vld[0]] conv_shape = y_train.shape[1:3] # Strides cfg strd = [2, 2, 5, 5] # Limits and options epochs = 60 # Filters flt_lm = [[4, 128], [4, 128], [4, 128]] d_lm = [1, 50] # Kernel k_lm = [3, 5] # Regularizer l2_lm = [1e-7, 1e-3] # Activation functions act_opts = ["relu", "elu", "tanh", "linear"] # Latent space cfg lt_sz = [5, 150] lt_dv = [0.3, 0.7] # Learning rate lm_lr = [1e-5, 1e-1] # Clear tensorflow session tf.keras.backend.clear_session() # Input inputs = layers.Input(shape=x_train.shape[1:]) d = inputs # Decoder n_layers = trial.suggest_int("n_layers", 1, 3) flt = trial.suggest_int("nl_flt", d_lm[0], d_lm[1]) # Reduction from output red = np.prod(strd[:n_layers]) # Decoder first shape lt_shp = (np.array(conv_shape) / red).astype(int) # Decoder dense size n_flat = np.prod(lt_shp) * flt # Format stride list strd = strd[::-1][-n_layers:] # Latent -> Decoder layer # Activation act_lt = trial.suggest_categorical("lt_activation", act_opts) # Regularization l2_lt = int(trial.suggest_loguniform("lt_l2", l2_lm[0], l2_lm[1])) l2_reg = regularizers.l2(l=l2_lt) # Flat input to the decoder d = layers.Dense(n_flat, activation=act_lt, kernel_regularizer=l2_reg, name="l1_dense_decoder")(inputs) # Reshape to the output of the encoder d = layers.Reshape(list(lt_shp) + [flt])(d) # Generate the convolutional layers for i in range(n_layers): # Get number of filters flt = trial.suggest_int("n{}_flt".format(i), flt_lm[i][0], flt_lm[i][1]) # Get the kernel size k_sz = trial.suggest_categorical("d{}_kernel_size".format(i), k_lm) # Get the activation function act = trial.suggest_categorical("d{}_activation".format(i), act_opts) # Regularization value l2 = trial.suggest_loguniform("d{}_l2".format(i), l2_lm[0], l2_lm[1]) l2_reg = regularizers.l2(l=l2) # Convolutional layer d = layers.Conv2DTranspose( flt, (k_sz, k_sz), strides=strd[i], activation=act, padding="same", kernel_regularizer=l2_reg, name="{}_decoder".format(i + 1), )(d) dp = 0 # Dropout layers if dp > 0: d = layers.Dropout(dp, name="{}_dropout_decoder".format(i + 1))(d) decoded = layers.Conv2DTranspose( y_train.shape[3], (5, 5), activation="linear", padding="same", name="output_decoder", )(d) ae = Model(inputs, decoded, name="Decoder_nxt") # Earling stopping monitoring the loss of the validation dataset monitor = "val_loss_norm_error" patience = int(epochs * 0.3) es = EarlyStopping(monitor=monitor, mode="min", patience=patience, restore_best_weights=True) opt = "adam" if opt == "adam": k_optf = optimizers.Adam elif opt == "nadam": k_optf = optimizers.Nadam elif opt == "adamax": k_optf = optimizers.Adamax lr = trial.suggest_loguniform("lr", lm_lr[0], lm_lr[1]) if lr > 0: k_opt = k_optf(learning_rate=lr) else: k_opt = k_optf() ae.compile(optimizer=k_opt, loss=loss_norm_error, metrics=["mse", loss_norm_error]) batch_size = int(trial.suggest_uniform("batch_sz", 2, 32)) ae.summary() hist = ae.fit( x_train, y_train, epochs=epochs, batch_size=batch_size, shuffle=True, validation_data=(x_val, y_val), callbacks=[KerasPruningCallback(trial, "val_loss_norm_error"), es], verbose=1, ) txt = PREFIX + SUFFIX ae.save(txt.format(RUN_VERSION, trial.number)) return min(hist.history["val_loss_norm_error"])
n_valid = 0.1 # Select the variable to train # 0: Temperature - 1: Pressure - 2: Velocity - None: all var = 2 # %% # Open data file f = h5py.File(dt_fl, "r") dt = f[dt_dst] x_data, y_data = format_data(dt, wd=3, var=2, get_y=True, cont=True) # Split data file idxs = split(x_data.shape[0], n_train, n_valid) slc_trn, slc_vld, slc_tst = slicer(x_data.shape, idxs) # Slice data x_train = x_data[slc_trn] x_val = x_data[slc_vld] slc_trn, slc_vld, slc_tst = slicer(y_data.shape, idxs) y_train = y_data[slc_trn] y_val = y_data[slc_vld] # %% # LSTM neural network settings # Activation function act = "tanh" # Convolutional layers activation function # Number of filters of each layer flt = [20, 20, 20, 30]
def objective(trial): # Open data file f = h5py.File(DT_FL, "r") dt = f[DT_DST] # Split data and get slices idxs = split(dt.shape[0], N_TRAIN, N_VALID) slc_trn, slc_vld, slc_tst = slicer(dt.shape, idxs) # Get data x_train = dt[slc_trn] x_val = dt[slc_vld] # Limits and options # Filters # flt_lm = [4, 128] flt_lm = [[4, 128], [4, 128], [4, 128]] # Kernel k_lm = [3, 5] # Regularizer l2_lm = [1e-7, 1e-3] # Activation functions act_opts = ["relu", "elu", "tanh", "linear"] # Latent space cfg lt_sz = [5, 150] lt_dv = [0.3, 0.7] # Learning rate lm_lr = [1e-5, 1e-2] # Clear tensorflow session tf.keras.backend.clear_session() # Input inputs = layers.Input(shape=x_train.shape[1:]) e = inputs # Encoder flt, k_sz, act, l2 = [], [], [], [] strd = [2, 2, 5] # n_layers = trial.suggest_int("n_layers", 2, 3) n_layers = 3 for i in range(n_layers): # Get values flt += [ trial.suggest_int("n{}_flts".format(i), flt_lm[i][0], flt_lm[i][1]) ] k_sz += [trial.suggest_categorical("e{}_kernel_size".format(i), k_lm)] act += [ trial.suggest_categorical("e{}_activation".format(i), act_opts) ] l2 += [ trial.suggest_loguniform("e{}_l2".format(i), l2_lm[0], l2_lm[1]) ] l2_reg = regularizers.l2(l=l2[-1]) # l2_reg = regularizers.l2(l=0) # Set layer e = layers.Conv2D( flt[-1], (k_sz[-1], k_sz[-1]), strides=strd[i], activation=act[-1], padding="same", kernel_regularizer=l2_reg, name="{}_encoder".format(i + 1), )(e) # Add layers if i == 0: ed = layers.Conv2D( 1, (1, 1), padding="same", kernel_regularizer=l2_reg, name="l2_input".format(i), )(e) # Dropout dp = 0 if dp > 0: e = layers.Dropout(dp, name="{}_dropout_encoder".format(i + 1))(e) # Latent space act_lt = trial.suggest_categorical("lt_activation", act_opts) l2_lt = int(trial.suggest_loguniform("lt_l2", l2_lm[0], l2_lm[1])) l2_reg = regularizers.l2(l=l2_lt) sz_lt = trial.suggest_int("lt_sz", lt_sz[0], lt_sz[1]) dv_lt = trial.suggest_uniform("lt_div", lt_dv[0], lt_dv[1]) # Dense latent sizes latent_1 = int(sz_lt * dv_lt) latent_2 = sz_lt - latent_1 lt1 = layers.Flatten()(e) lt1 = layers.Dense(latent_1, activation=act_lt, kernel_regularizer=l2_reg, name="l1_latent")(lt1) lt2 = layers.Flatten()(ed) lt2 = layers.Dense(latent_2, activation=act_lt, kernel_regularizer=l2_reg, name="l2_latent")(lt2) # Dencoder # Flat input to the decoder n_flat = np.prod(backend.int_shape(e)[1:]) d = layers.Dense(n_flat, activation=act_lt, kernel_regularizer=l2_reg, name="l1_dense_decoder")(lt1) # Consider uses only one filter with convolution # Reshape to the output of the encoder d = layers.Reshape(backend.int_shape(e)[1:])(d) # Generate the convolutional layers for i in range(n_layers): # Settings index j = -i - 1 # Set the regularizer l2_reg = regularizers.l2(l=l2[j]) # Add the latent space if i == n_layers - 1: d1 = layers.Dense( 5000, activation="linear", kernel_regularizer=l2_reg, name="l2_dense_decoder", )(lt2) d1 = layers.Reshape(backend.int_shape(ed)[1:], name="l2_reshape_decoder")(d1) d1 = layers.Conv2D( flt[j + 1], (1, 1), padding="same", name="l2_compat_decoder", kernel_regularizer=l2_reg, )(d1) d = layers.Add()([d1, d]) # Convolutional layer d = layers.Conv2DTranspose( flt[j], (k_sz[j], k_sz[j]), strides=strd[j], activation=act[j], padding="same", kernel_regularizer=l2_reg, name="{}_decoder".format(i + 1), )(d) # Dropout layers if dp > 0: d = layers.Dropout(dp, name="{}_dropout_decoder".format(i + 1))(d) decoded = layers.Conv2DTranspose( x_train.shape[-1], (5, 5), activation="linear", padding="same", kernel_regularizer=l2_reg, name="output_decoder", )(d) ae = Model(inputs, decoded, name="auto_encoder_add") opt = "adam" if opt == "adam": k_optf = optimizers.Adam elif opt == "nadam": k_optf = optimizers.Nadam elif opt == "adamax": k_optf = optimizers.Adamax lr = trial.suggest_loguniform("lr", lm_lr[0], lm_lr[1]) if lr > 0: k_opt = k_optf(learning_rate=lr) else: k_opt = k_optf() ae.compile(optimizer=k_opt, loss=loss_norm_error, metrics=["mse", loss_norm_error]) batch_size = int(trial.suggest_uniform("batch_sz", 2, 32)) ae.summary() hist = ae.fit( x_train, x_train, epochs=30, batch_size=batch_size, shuffle=True, validation_data=(x_val, x_val), callbacks=[KerasPruningCallback(trial, "val_loss_norm_error")], verbose=1, ) txt = PREFIX + SUFFIX ae.save(txt.format(RUN_VERSION, trial.number)) return hist.history["val_loss_norm_error"][-1]
def objective(trial): # Open data file f = h5py.File(DT_FL, "r") dt = f[DT_DST] # Format data for LSTM training x_data, y_data = format_data(dt, wd=WD, get_y=True) x_data = np.squeeze(x_data) # Split data and get slices idxs = split(x_data.shape[0], N_TRAIN, N_VALID) slc_trn, slc_vld, slc_tst = slicer(x_data.shape, idxs) # Get data x_train = x_data[slc_trn[0]] y_train = y_data[slc_trn[0]] - x_train x_val = x_data[slc_vld[0]] y_val = y_data[slc_vld[0]] - x_val # Limits and options # Filters # n_lstm = [[4, 128], [4, 128], [4, 128]] n_lstm = [[4, 196], [4, 196], [4, 196]] # Regularizer l2_lm = [1e-7, 1e-3] # Activation functions act_opts = ["relu", "elu", "tanh", "linear"] # Latent space cfg lt_sz = [5, 150] lt_dv = [0.3, 0.7] # Learning rate lm_lr = [1e-5, 1] # Clear tensorflow session tf.keras.backend.clear_session() # Input inputs = layers.Input(shape=x_train.shape[1:]) p = inputs # Dense layers # n_lyr_dense = trial.suggest_int("n_lyr_dense", 0, 2) n_lyr_dense = trial.suggest_int("n_lyr_dense", 1, 3) for i in range(n_lyr_dense): # For the current layer # Get number of filters l = trial.suggest_int("n{}_dense".format(i), n_lstm[i][0], n_lstm[i][1]) # Get the activation function act = trial.suggest_categorical("d{}_activation".format(i), act_opts) # Regularization value l2 = trial.suggest_loguniform("d{}_l2".format(i), l2_lm[0], l2_lm[1]) l2_reg = regularizers.l2(l=l2) # Set layer p = layers.Dense( l, activation=act, # kernel_regularizer=l2_reg, name="{}_dense".format(i + 1), )(p) # Dropout dp = trial.suggest_uniform("d{}_dropout".format(i), 0, 1) p = layers.Dropout(dp, name="{}_dropout_dense".format(i + 1))(p) bn = trial.suggest_categorical("d{}_batchnorm".format(i), [0, 1]) if bn == 1: p = layers.BatchNormalization(name="{}_bnorm_dense".format(i + 1))(p) out = layers.Dense(y_data.shape[1], activation="linear")(p) pred = Model(inputs, out, name="auto_encoder_add") # opt_opts = ["adam", "nadam", "adamax", "RMSprop"] # opt = trial.suggest_categorical("optimizer", opt_opts) opt = "adam" if opt == "adam": k_optf = optimizers.Adam elif opt == "nadam": k_optf = optimizers.Nadam elif opt == "adamax": k_optf = optimizers.Adamax elif opt == "RMSprop": k_optf = optimizers.RMSprop lr = trial.suggest_loguniform("lr", lm_lr[0], lm_lr[1]) if lr > 0: k_opt = k_optf(learning_rate=lr) else: k_opt = k_optf() pred.compile(optimizer=k_opt, loss="mse", metrics=["mse", loss_norm_error]) batch_size = int(trial.suggest_uniform("batch_sz", 2, 32)) pred.summary() hist = pred.fit( x_train, y_train, epochs=100, batch_size=batch_size, shuffle=True, validation_data=(x_val, y_val), callbacks=[KerasPruningCallback(trial, "val_mse")], verbose=1, ) txt = PREFIX + SUFFIX pred.save(txt.format(RUN_VERSION, trial.number)) return hist.history["val_mse"][-1]
def objective(trial): # Open data file f = h5py.File(DT_FL, "r") dt = f[DT_DST] y_data = np.empty_like(dt) for idx in dt.attrs['idx']: y_data[idx[0]:idx[1]] = np.gradient(dt[idx[0]:idx[1]], 10, axis=0) # Split data file idxs = split(dt.shape[0], N_TRAIN, N_VALID, test_last=dt.attrs['idx']) slc_trn, slc_vld, slc_tst = slicer(dt.shape, idxs) # Slice data x_train = dt[slc_trn] y_train = y_data[slc_trn] x_val = dt[slc_vld] y_val = y_data[slc_vld] # Limits and options epochs = 500 # Filters n_n = [[30, 150], [30, 150]] # Regularizer l2_lm = [1e-7, 1e-2] # Activation functions act_opts = ["relu", "elu", "tanh", "linear"] # Learning rate lm_lr = [1e-5, 1e-1] # Clear tensorflow session tf.keras.backend.clear_session() # Input inputs = layers.Input(shape=x_train.shape[1:]) d = inputs # FCNN n_layers = trial.suggest_int("n_layers", 1, 3) for i in range(n_layers): # For the current layer # Get number of filters n = trial.suggest_int("l{}_n_neurons".format(i), n_n[i][0], n_n[i][1]) # Get the activation function act = trial.suggest_categorical("l{}_activation".format(i), act_opts) # Regularization value l2 = trial.suggest_loguniform("l{}_l2".format(i), l2_lm[0], l2_lm[1]) l2_reg = regularizers.l2(l=l2) # Set layer d = layers.Dense( n, activation=act, kernel_regularizer=l2_reg, name="l{}_fc".format(i), )(d) dd = layers.Dense(x_train.shape[1], activation='linear')(d) fcnn = Model(inputs, dd, name="FCNN") monitor = "val_loss_norm_error" patience = int(epochs * 0.1) es = EarlyStopping(monitor=monitor, mode="min", patience=patience, restore_best_weights=True) opt = "adam" if opt == "adam": k_optf = optimizers.Adam elif opt == "nadam": k_optf = optimizers.Nadam elif opt == "adamax": k_optf = optimizers.Adamax lr = trial.suggest_loguniform("lr", lm_lr[0], lm_lr[1]) if lr > 0: k_opt = k_optf(learning_rate=lr) else: k_opt = k_optf() fcnn.compile(optimizer=k_opt, loss=loss_norm_error, metrics=["mse", loss_norm_error]) batch_size = int(trial.suggest_uniform("batch_sz", 2, 32)) fcnn.summary() hist = fcnn.fit( x_train, y_train, epochs=epochs, batch_size=batch_size, shuffle=True, validation_data=(x_val, y_val), callbacks=[KerasPruningCallback(trial, "val_loss_norm_error"), es], verbose=1, ) txt = PREFIX + SUFFIX fcnn.save(txt.format(RUN_VERSION, trial.number)) return hist.history["val_loss_norm_error"][-1]
from utils import devide_to_train_test, slicer if __name__ == "__main__": devide_to_train_test() slicer()