def _data_generator(self, mode=DataGeneratorMode.training): def gen_training_data(): (x_train, y_train), (_, _), _ = load(self.data_mode) z = list(zip(x_train, y_train)) pos = int(len(x_train) * self.validation_split) z_tr = z[pos:] z_val = z[:pos] self.validation_x, self.validation_y = zip(*z_val) self.train_x, self.train_y = zip(*z_tr) return x_train, y_train inputs, outputs = [], [] if mode == DataGeneratorMode.training or mode == DataGeneratorMode.validation: if len(self.validation_x) == 0: inputs, outputs = gen_training_data() else: inputs, outputs = self.validation_x, self.validation_y else: (_, _), (x_test, y_test), _ = load(self.data_mode) inputs, outputs = x_test, y_test index = 0 last_batch_x, last_batch_y = [], [] while True: def get_batch(i): try: vectorized_inp = [] vectorized_out = [] for b in range(self.batch_size): playlist = inputs[i + b] vectorized_inp.append( [self.vocab_index[str(song)] for song in playlist]) vectorized_out.append( self.embeddings_matrix[self.vocab_index[str( outputs[i + b])]]) x = np.asarray( pad_sequences(vectorized_inp, maxlen=self.max_length)) y = np.asarray(vectorized_out) return x, y, i except KeyError as key_error: # logging.warning('Can\'t find key', key_error) return get_batch(i + self.batch_size) try: last_batch_x, last_batch_y, index = get_batch(index) index += self.batch_size except IndexError: index = 0 except Exception as exp: logging.error('Data generator error', exp) index = 0 yield last_batch_x, last_batch_y
def gen_training_data(): (x_train, y_train), (_, _), _ = load(self.data_mode) z = list(zip(x_train, y_train)) pos = int(len(x_train) * self.validation_split) z_tr = z[pos:] z_val = z[:pos] self.validation_x, self.validation_y = zip(*z_val) self.train_x, self.train_y = zip(*z_tr) return x_train, y_train
def __init__(self, mode=DatasetMode.small, model_name=ModelName.simple_gru): self.optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) self.activation = 'softmax' self.loss = 'categorical_crossentropy' self.metrics = top_k_accuracy_func_list([50, 100, 200, 300, 400, 500]) self.epochs = 15 self.batch_size = 512 self.validation_split = 0.1 self.data_mode = mode ''' Index of songs in x_train(or test) starts from 1 because of zero padding. Index of songs in y_train(or test) starts from zero like song hash. For instance: In dataset, index of songA is 21. songA's index is 22 in x_train(or test) songA's index is 21 in y_train(or test). The goal is the neural network having the ability to ignore zero-paddings ''' (x_train, y_train), (x_test, y_test), songs = load(mode) self.max_length = max([len(playlist) for playlist in x_train]) self.song_hash = songs self.train_len = len(x_train) self.test_len = len(x_test) self.validation_x, self.validation_y = [], [] self.train_x, self.train_y = [], [] if model_name is ModelName.simple_gru: self.weights_loc = TRAINED_MODELS_BASE_PATH + 'simple_gru_weights.best.hdf5' self.tb_logs_path = LOGS_BASE_PATH + 'simple_gru_logs' self.model = self.create_simple_gru_model() elif model_name is ModelName.bi_directional_lstm: self.weights_loc = TRAINED_MODELS_BASE_PATH + 'bi_lstm_weights.best.hdf5' self.tb_logs_path = LOGS_BASE_PATH + 'bi_lstm_logs' self.model = self.create_bi_lstm_model() elif model_name is ModelName.attention_bilstm: self.weights_loc = TRAINED_MODELS_BASE_PATH + 'ablstm_model_weights.best.hdf5' self.tb_logs_path = LOGS_BASE_PATH + 'ablstm_model_logs' self.model = self.create_ablstm_model() else: raise Exception("Unknown Model Name; ", model_name) self.callbacks = self._get_callback_functions()
def __init__(self, mode=DatasetMode.small, model_name=ModelName.simple_gru): self.optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0) self.activation = 'tanh' self.loss = 'cosine' self.metrics = [ 'mae', 'mse', 'acc', 'mape', 'cosine', 'categorical_crossentropy' ] self.epochs = 50 self.batch_size = 512 self.validation_split = 0.1 self.data_mode = mode v_index, embedding_matrix = SongEmbeddings().get_embeddings_matrix() self.embeddings_matrix = embedding_matrix self.vocab_index = v_index self.validation_x, self.validation_y = [], [] self.train_x, self.train_y = [], [] ''' Index of songs in x_train(or test) starts from 1 because of zero padding. Index of songs in y_train(or test) starts from zero like song hash. For instance: In dataset, index of songA is 21. songA's index is 22 in x_train(or test) songA's index is 21 in y_train(or test). The goal is the neural network having the ability to ignore zero-paddings ''' (x_train, y_train), (x_test, _), songs = load(mode) self.max_length = max([len(playlist) for playlist in x_train]) self.song_hash = songs self.train_len = len(x_train) self.test_len = len(x_test) if model_name is ModelName.simple_gru: self.weights_loc = TRAINED_MODELS_BASE_PATH + 'e_simple_gru_weights.best.hdf5' self.tb_logs_path = LOGS_BASE_PATH + 'e_simple_gru_logs' self.model = self._create_simple_gru_model() elif model_name is ModelName.bi_directional_lstm: self.weights_loc = TRAINED_MODELS_BASE_PATH + 'e_bi_lstm_weights.best.hdf5' self.tb_logs_path = LOGS_BASE_PATH + 'e_bi_lstm_logs' self.model = self._create_bi_lstm_model() elif model_name is ModelName.attention_bilstm: self.weights_loc = TRAINED_MODELS_BASE_PATH + 'e_ablstm_model_weights.best.hdf5' self.tb_logs_path = LOGS_BASE_PATH + 'e_ablstm_model_logs' self.model = self._create_ablstm_model() else: raise Exception("Unknown Model Name; ", model_name) self.callbacks = self._get_callback_functions()
def _data_generator(self, mode=DataGeneratorMode.training): def gen_training_data(): (x_train, y_train), (_, _), _ = load(self.data_mode) z = list(zip(x_train, y_train)) pos = int(len(x_train) * self.validation_split) z_tr = z[pos:] z_val = z[:pos] self.validation_x, self.validation_y = zip(*z_val) self.train_x, self.train_y = zip(*z_tr) return x_train, y_train inputs, outputs = [], [] if mode == DataGeneratorMode.training or mode == DataGeneratorMode.validation: if len(self.validation_x) == 0: inputs, outputs = gen_training_data() else: inputs, outputs = self.validation_x, self.validation_y else: (_, _), (x_test, y_test), _ = load(self.data_mode) inputs, outputs = x_test, y_test index = 0 last_batch_x, last_batch_y = [], [] while True: try: inp = inputs[index:index + self.batch_size] out = outputs[index:index + self.batch_size] last_batch_x = np.asarray(sequence.pad_sequences( inp, maxlen=self.max_length), dtype="int64") last_batch_y = to_categorical(out, len(self.song_hash) + 1) # Zero is included index += self.batch_size except IndexError: index = 0 except Exception as exp: logging.error('Data generator error', exp) index = 0 yield last_batch_x, last_batch_y