def train_model( sample_size: int, workers: int, random_optimizer: bool, experiment: mlflow.entities.experiment.Experiment, ) -> None: logger.info("Load IMDB reviews") df_train, _ = load_data(folder=base_folder, sample_size=sample_size) # Anonymize data before pipeline, since this step is slow and constant logger.info("Preprocess reviews with spaCy. This may take a while..") anonymized_reviews = Anonymizer().transform(df_train.review) # Perform Hyperparameter optimization optimizer = optimize( X=anonymized_reviews, y=df_train.sentiment, workers=workers, random_optimizer=random_optimizer, ) # MLflow logging of results logger.info("Write results to MLflow experiment: %s", experiment.name) mlflow_sklearn_logging( optimizer=optimizer, experiment_id=experiment.experiment_id, sample_size=sample_size, data=base_folder / "train.csv", )
def train_model( sample_size: int, workers: int, trials: int, experiment: mlflow.entities.experiment.Experiment, ) -> None: logger.info("Load IMDB reviews") df_train, _ = load_data(folder=base_folder, sample_size=sample_size) # Anonymize data before pipeline, since this step is slow and constant logger.info("Preprocess reviews with spaCy. This may take a while..") anonymized_reviews = Anonymizer().transform(df_train.review) logger.info("Explore search space") study = optuna.create_study(direction="maximize") study.set_user_attr(key="sample_size", value=sample_size) study.set_user_attr(key="experiment", value=experiment) study.set_user_attr(key="data", value=base_folder / "train.csv") # Perform Hyperparameter optimization and log results study.optimize( lambda trial: objective( trial, X=anonymized_reviews, y=df_train.sentiment, workers=workers, ), n_trials=trials, callbacks=[terminal_logging, mlflow_optuna_logging], )
import numpy as np from keras.models import Sequential from keras.layers.convolutional import Convolution2D, Convolution3D from keras.layers.recurrent_convolutional import LSTMConv2D from keras.layers.core import TimeDistributedDense from keras.callbacks import ModelCheckpoint from os import sys, path sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) from helpers.preprocessing import load_data from helpers.util import shuffle_in_unison_inplace # loading processed data all_data = np.loadtxt('../final_weekly_data_250.txt') # loading preprocessed with avg and proba data input_data = load_data('weekly', 250) height = 107 width = 72 input_length = 3 height_red = height/2 width_red = width/2 height_red = height_red + 1 if (height_red % 2) == 1 else height_red # reshaping to samples x height x width x channel all_data = all_data.reshape((all_data.shape[0], height, width, 1)) input_data = input_data.reshape(all_data.shape) # get SE part only SE_input = input_data[:, height_red:, 0:width_red, :]
import numpy as np from keras.models import Sequential from keras.layers.convolutional import Convolution2D, Convolution3D from keras.layers.recurrent_convolutional import LSTMConv2D from keras.layers.core import TimeDistributedDense from keras.callbacks import ModelCheckpoint from os import sys, path sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) from helpers.preprocessing import load_data from helpers.util import shuffle_in_unison_inplace # loading processed data all_data = np.loadtxt('../final_weekly_data_750.txt') # loading preprocessed with avg and proba data input_data = load_data('weekly', 750) height = 36 width = 24 input_length = 3 height_red = height / 2 width_red = width / 2 # height_red = height_red + 1 if (height_red % 2) == 1 else height_red # reshaping to samples x height x width x channel all_data = all_data.reshape((all_data.shape[0], height, width, 1)) input_data = input_data.reshape(all_data.shape) # get SE part only SE_input = input_data[:, height_red:, 0:width_red, :]
import numpy as np from keras.models import Sequential from keras.layers.convolutional import Convolution2D, Convolution3D from keras.layers.recurrent_convolutional import LSTMConv2D from keras.layers.core import TimeDistributedDense from keras.callbacks import ModelCheckpoint from os import sys, path sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) from helpers.preprocessing import load_data from helpers.util import shuffle_in_unison_inplace # loading processed data all_data = np.loadtxt('../final_weekly_data_500.txt') # loading preprocessed with avg and proba data input_data = load_data('weekly', 500) height = 54 width = 36 input_length = 3 height_red = height / 2 width_red = width / 2 # height_red = height_red + 1 if (height_red % 2) == 1 else height_red # reshaping to samples x height x width x channel all_data = all_data.reshape((all_data.shape[0], height, width, 1)) input_data = input_data.reshape(all_data.shape) # get SE part only SE_input = input_data[:, height_red:, 0:width_red, :]
import numpy as np from keras.models import Sequential from keras.layers.convolutional import Convolution2D, Convolution3D from keras.layers.recurrent_convolutional import LSTMConv2D from keras.layers.core import TimeDistributedDense from keras.callbacks import ModelCheckpoint from os import sys, path sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) from helpers.preprocessing import load_data from helpers.util import shuffle_in_unison_inplace # loading processed data all_data = np.loadtxt('../final_daily_data_500.txt') # loading preprocessed with avg and proba data input_data = load_data('daily', 500) height = 54 width = 36 input_length = 3 height_red = height/2 width_red = width/2 # height_red = height_red + 1 if (height_red % 2) == 1 else height_red # reshaping to samples x height x width x channel all_data = all_data.reshape((all_data.shape[0], height, width, 1)) input_data = input_data.reshape(all_data.shape) # get SE part only SE_input = input_data[:, height_red:, 0:width_red, :]
import numpy as np from keras.models import Sequential from keras.layers.convolutional import Convolution2D, Convolution3D from keras.layers.recurrent_convolutional import LSTMConv2D from keras.layers.core import TimeDistributedDense from keras.callbacks import ModelCheckpoint from os import sys, path sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) from helpers.preprocessing import load_data from helpers.util import shuffle_in_unison_inplace # loading processed data all_data = np.loadtxt('../final_weekly_data_500.txt') # loading preprocessed with avg and proba data input_data = load_data('weekly', 500) height = 54 width = 36 input_length = 3 height_red = height/2 width_red = width/2 # height_red = height_red + 1 if (height_red % 2) == 1 else height_red # reshaping to samples x height x width x channel all_data = all_data.reshape((all_data.shape[0], height, width, 1)) input_data = input_data.reshape(all_data.shape) # get SE part only SE_input = input_data[:, height_red:, 0:width_red, :]