def create_model(): checkpoint = ModelCheckpoint('sdr_model.h5', monitor='accuracy', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] gray_data = np.load("npy_data/gray_dataset.npy") color_data = np.load("npy_data/color_dataset.npy") # img_pixel_dataset = np.load("npy_data/img_pixel_dataset.npy") label = np.load("npy_data/label.npy") # dataset = pre_processing.npy_dataset_concatenate(gray_data, color_data) dataset = pre_processing.npy_dataset_concatenate(gray_data, color_data) # corr_matrix = np.corrcoef(dataset) # print(corr_matrix) le = preprocessing.LabelEncoder() label = le.fit_transform(label) x_train, x_test, y_train, y_test = train_test_split(dataset, label, test_size=0.20, shuffle=True) model = Sequential() model.add(Dense(14, input_dim=14, activation=None)) model.add(Dense(128, activation='tanh')) model.add(Dense(256, activation='sigmoid')) model.add(Dense(3, activation='softmax')) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train, epochs=150, verbose=0, batch_size=20, shuffle=True, callbacks=callbacks_list) pred_y_test = model.predict_classes(x_test) acc_model = accuracy_score(y_test, pred_y_test) print("Prediction Acc model:", acc_model) print("Org. Labels:", y_test[:30]) print("Pred Labels:", (pred_y_test[:30])) # c_report = classification_report(y_test, pred_y_test, zero_division=0) # print(c_report) print("\n\n")
def sequential_nn_model(X_train, y_train): model = Sequential([ Dense(100, activation='relu', input_shape=(X_train.shape[1], )), Dense(40, activation='relu'), Dense(20, activation='relu'), Dense(1, activation='relu') ]) model.compile(optimizer='nadam', loss=rmsle, metrics=['mean_squared_logarithmic_error']) hist = model.fit(X_train, y_train, epochs=50) return model
class KerasNeuralNetwork(PredictionModel): def __init__(self, layers: Iterable[int], funcs: Union[str, Iterable[str]], batch_size=None, max_cores=8): session_conf = tf.compat.v1.ConfigProto( device_count={"CPU": max_cores}) sess = tf.compat.v1.Session(config=session_conf) tf.compat.v1.keras.backend.set_session(sess) layers = list(layers) self.input_shape = layers.pop(0), if isinstance(funcs, str): funcs = [funcs] * len(layers) self.__keras_network = Sequential([ Dense(size, activation=funcs[i]) for i, size in enumerate(layers) ]) self.__keras_network.compile( optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'], ) self.batch_size = batch_size def predict(self, data: Data) -> Label: flatten = False if len(data.shape) == 1: data = data.reshape(1, -1) flatten = True res = self.__keras_network.predict(data) if flatten: res = res.flatten() return res def train(self, data: Data, label: Label): return self.__keras_network.fit(data, label, batch_size=self.batch_size, verbose=False)
X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[ split:] sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) y_train = y_train.to_numpy() classifier = Sequential() classifier.add(Dense(units=64, activation='relu', input_dim=X.shape[1])) classifier.add(Dense(units=64, activation='relu')) classifier.add(Dense(units=1, activation='sigmoid')) classifier.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy']) classifier.fit(X_train, y_train, batch_size=10, epochs=50) y_pred = classifier.predict(X_test) #y_pred = (y_pred > 0.5) #y_pred = (np.round(y_pred * 2) - 1 ) y_pred = (2 * np.round(y_pred) - 1) dataset['y_pred'] = np.NaN dataset.iloc[(len(dataset) - len(y_pred)):, -1:] = y_pred trade_dataset = dataset.dropna() trade_dataset['Tomorrows Returns'] = 0. trade_dataset['Tomorrows Returns'] = np.log( trade_dataset['Close'] / trade_dataset['Close'].shift(1)) trade_dataset['Tomorrows Returns'] = trade_dataset[ 'Tomorrows Returns'].shift(-1)
class LSTMModel: def __init__(self, num_hidden, window, end_time, future_target_size, validation_ratio, validation_freq, effective_factor, mean_absolute_percentage_error=None, describe=None, epoc=10, metric_sender=None): self.__num_hidden = num_hidden self.__future_target = future_target_size self.__window = window self.__epochs = epoc self.__mean_absolute_percentage_error = mean_absolute_percentage_error self.__end_time = end_time self.__effective_factor = effective_factor self.__model = Sequential([ LSTM(num_hidden, input_shape=np.zeros((window, len(effective_factor))).shape), Dense(self.__future_target) ]) self.__model.compile(optimizer='adam', loss='mean_squared_error') self.__validation_ratio = validation_ratio self.__validation_freq = validation_freq self.__fit_model = ModelType.LSTM self.__describe = describe self.__metric_collector = MetricCollector(epochs=self.__epochs, metric_sender=metric_sender) def train(self, input_data: MultivariateData, batch_size, steps_per_epoc): input_factors = input_data.generate_outer_join_factors() input_factors = MultivariateData.generate_filled_missing_frame( input_factors, input_data.get_gran(), input_data.get_custom_in_seconds(), fill_type=input_data.fill_type, fill_value=input_data.fill_value) merged_input = MultivariateData.generate_inner_join_frame( [input_data.get_target(), input_factors]) input_target = merged_input[[TIMESTAMP, VALUE]] input_factors = merged_input.drop([TIMESTAMP, VALUE], axis=1) input_factors = input_factors.reindex(columns=self.__effective_factor) self.__describe = merged_input.describe().T train, label = input_data.get_normalized_batch(self.__window, self.__future_target, label=input_target, factors=input_factors) batch_size = int(min(batch_size, max(1, len(train) / steps_per_epoc))) train_multi = data.Dataset.from_tensor_slices( (train[:-int(len(train) * self.__validation_ratio)], label[:-int(len(label) * self.__validation_ratio)])) train_multi = train_multi.cache().shuffle( len(train) * 100).batch(batch_size).repeat() val_multi = data.Dataset.from_tensor_slices( (train[-int(len(train) * self.__validation_ratio):], label[-int(len(label) * self.__validation_ratio):])) val_multi = val_multi.cache().batch(batch_size).repeat() self.__model.fit(train_multi, epochs=self.__epochs, shuffle=False, validation_data=val_multi, validation_freq=self.__validation_freq, steps_per_epoch=len(train) * (1 - self.__validation_ratio) / batch_size, validation_steps=len(train) * self.__validation_ratio / batch_size, callbacks=[self.__metric_collector]) validation_result = self.__model.predict( train[-int(len(train) * self.__validation_ratio):]) validation_labels = label[-int(len(label) * self.__validation_ratio):] mean_average_percentage_error = np.abs( validation_result - validation_labels) / np.abs(validation_labels) mean_average_percentage_error[np.isinf( mean_average_percentage_error)] = np.nan mean_average_percentage_error = np.nanmean( mean_average_percentage_error, axis=0) self.__mean_absolute_percentage_error = mean_average_percentage_error def get_mean_absolute_percentage_error(self): return list(self.__mean_absolute_percentage_error) def get_effective_factor(self): return self.__effective_factor def save_model(self, model_dir): with open(os.path.join(model_dir, 'LSTM-Meta.pkl'), "wb") as f: meta = { 'mean_absolute_percentage_error': self.__mean_absolute_percentage_error, 'end_time': self.__end_time, 'future_target': self.__future_target, 'window': self.__window, 'effective_factor': self.__effective_factor, 'num_hidden': self.__num_hidden, 'describe': self.__describe } pickle.dump(meta, f) self.__model.save_weights( os.path.join(model_dir, self.__fit_model.name)) def get_model_type(self): return self.__fit_model @staticmethod def load_model_meta(model_dir): with open(os.path.join(model_dir, 'LSTM-Meta.pkl'), "rb") as f: meta = pickle.load(f) return { 'mean_absolute_percentage_error': meta['mean_absolute_percentage_error'], 'end_time': meta['end_time'], 'window': meta['window'], 'effective_factor': meta['effective_factor'], 'future_target': meta['future_target'], 'num_hidden': meta['num_hidden'], 'describe': meta['describe'] } def inference(self, input_data: MultivariateData, window, timestamp, **kwargs): input_factors = input_data.generate_outer_join_factors() if timestamp is None: ts = input_factors[TIMESTAMP].max() else: ts = pd.to_datetime(timestamp) ts = ts.tz_localize(None) input_factors = MultivariateData.gen_filled_missing_by_period( input_factors, input_data.get_gran(), input_data.get_custom_in_seconds(), end_time=ts, periods=window, fill_type=input_data.fill_type, fill_value=input_data.fill_value) input_factors = input_factors[self.__effective_factor] input_factors = input_factors.reindex(columns=self.__effective_factor) input_factors = input_factors.tail(window) # print(input_factors) for column in self.__effective_factor: min_value = self.__describe.loc[column]['min'] max_value = self.__describe.loc[column]['max'] if max_value == min_value: input_factors[column] = 0 else: input_factors[column] = (input_factors[column] - min_value) / (max_value - min_value) input_factors = input_factors.values input_factors[(input_factors < 0) | (input_factors > 1)] = 0 predicted = self.__model.predict(np.array([input_factors])) predicted = predicted.reshape(self.__future_target) predicted = predicted * (self.__describe.loc[VALUE]['max'] - self.__describe.loc[VALUE]['min'] ) + self.__describe.loc[VALUE]['min'] target_timestamps = pd.date_range( start=timestamp, periods=self.__future_target, freq=convert_freq(input_data.get_gran(), input_data.get_custom_in_seconds())) lower_boundary = [ predicted[i] - np.abs(predicted[i]) * self.__mean_absolute_percentage_error[i] for i in range(0, len(predicted)) ] upper_boundary = [ predicted[i] + np.abs(predicted[i]) * self.__mean_absolute_percentage_error[i] for i in range(0, len(predicted)) ] return [ UnivariateForecastItem( predicted[i], lower_boundary[i], upper_boundary[i], (1 - self.__mean_absolute_percentage_error[i]), timestamp=target_timestamps[i]).to_dict() for i in range(0, len(predicted)) ] def load_model(self, model_dir): self.__model.load_weights( os.path.join(model_dir, self.__fit_model.name)) def get_end_time(self): return self.__end_time
model = keras.Sequential([ layers.Embedding(encoder.vocab_size, embedding_dim), layers.GlobalAveragePooling1D(), layers.Dense(16, activation='relu'), layers.Dense(1, activation='sigmoid') ]) model.summary() model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) history = model.fit( train_batches, epochs=10, validation_data=test_batches, validation_steps=20) import matplotlib.pyplot as plt history_dict = history.history acc = history_dict['accuracy'] val_acc = history_dict['val_accuracy'] loss = history_dict['loss'] val_loss = history_dict['val_loss'] epochs = range(1, len(acc) + 1) plt.figure(figsize=(12,9))