class KerasNeuralNetwork(PredictionModel):
    def __init__(self,
                 layers: Iterable[int],
                 funcs: Union[str, Iterable[str]],
                 batch_size=None,
                 max_cores=8):
        session_conf = tf.compat.v1.ConfigProto(
            device_count={"CPU": max_cores})
        sess = tf.compat.v1.Session(config=session_conf)
        tf.compat.v1.keras.backend.set_session(sess)

        layers = list(layers)
        self.input_shape = layers.pop(0),
        if isinstance(funcs, str):
            funcs = [funcs] * len(layers)

        self.__keras_network = Sequential([
            Dense(size, activation=funcs[i]) for i, size in enumerate(layers)
        ])
        self.__keras_network.compile(
            optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'],
        )
        self.batch_size = batch_size

    def predict(self, data: Data) -> Label:
        flatten = False
        if len(data.shape) == 1:
            data = data.reshape(1, -1)
            flatten = True
        res = self.__keras_network.predict(data)
        if flatten:
            res = res.flatten()
        return res

    def train(self, data: Data, label: Label):
        return self.__keras_network.fit(data,
                                        label,
                                        batch_size=self.batch_size,
                                        verbose=False)
示例#2
0
NNmodel.add(Dense(57, kernel_initializer='normal', activation='relu'))

NNmodel.add(Dense(100, kernel_initializer='normal', activation='relu'))
NNmodel.add(Dense(40, kernel_initializer='normal', activation='relu'))
NNmodel.add(Dense(20, kernel_initializer='normal', activation='relu'))

NNmodel.add(Dense(1, kernel_initializer='normal', activation='relu'))

NNmodel.compile(loss='mean_squared_logarithmic_error',
                optimizer='adam',
                metrics=['mean_squared_logarithmic_error'])

df_test['weather_4'] = 0
df_test = df_test[[
    x for x in all_columns if x.startswith(tuple(train_columns))
]]
test_array = df_test.to_numpy()
predictions = NNmodel.predict(test_array)

print('hello')

individual_predictions = [transform_list(x) for x in predictions]
for i, y in enumerate(individual_predictions):
    if individual_predictions[i] < 0:
        individual_predictions[i] = 0

submission = pd.DataFrame()
submission['Id'] = range(len(individual_predictions))
submission['Predicted'] = individual_predictions
submission.to_csv("submission.csv", index=False)
示例#3
0
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    y_train = y_train.to_numpy()

    classifier = Sequential()
    classifier.add(Dense(units=64, activation='relu', input_dim=X.shape[1]))
    classifier.add(Dense(units=64, activation='relu'))
    classifier.add(Dense(units=1, activation='sigmoid'))
    classifier.compile(optimizer='adam',
                       loss='mean_squared_error',
                       metrics=['accuracy'])
    classifier.fit(X_train, y_train, batch_size=10, epochs=50)

    y_pred = classifier.predict(X_test)
    #y_pred = (y_pred > 0.5)
    #y_pred = (np.round(y_pred * 2) - 1 )
    y_pred = (2 * np.round(y_pred) - 1)

    dataset['y_pred'] = np.NaN
    dataset.iloc[(len(dataset) - len(y_pred)):, -1:] = y_pred
    trade_dataset = dataset.dropna()

    trade_dataset['Tomorrows Returns'] = 0.
    trade_dataset['Tomorrows Returns'] = np.log(
        trade_dataset['Close'] / trade_dataset['Close'].shift(1))
    trade_dataset['Tomorrows Returns'] = trade_dataset[
        'Tomorrows Returns'].shift(-1)

    trade_dataset['Strategy Returns'] = 0.
示例#4
0
class LSTMModel:
    def __init__(self,
                 num_hidden,
                 window,
                 end_time,
                 future_target_size,
                 validation_ratio,
                 validation_freq,
                 effective_factor,
                 mean_absolute_percentage_error=None,
                 describe=None,
                 epoc=10,
                 metric_sender=None):
        self.__num_hidden = num_hidden
        self.__future_target = future_target_size
        self.__window = window
        self.__epochs = epoc
        self.__mean_absolute_percentage_error = mean_absolute_percentage_error
        self.__end_time = end_time
        self.__effective_factor = effective_factor
        self.__model = Sequential([
            LSTM(num_hidden,
                 input_shape=np.zeros((window, len(effective_factor))).shape),
            Dense(self.__future_target)
        ])
        self.__model.compile(optimizer='adam', loss='mean_squared_error')
        self.__validation_ratio = validation_ratio
        self.__validation_freq = validation_freq
        self.__fit_model = ModelType.LSTM
        self.__describe = describe
        self.__metric_collector = MetricCollector(epochs=self.__epochs,
                                                  metric_sender=metric_sender)

    def train(self, input_data: MultivariateData, batch_size, steps_per_epoc):
        input_factors = input_data.generate_outer_join_factors()
        input_factors = MultivariateData.generate_filled_missing_frame(
            input_factors,
            input_data.get_gran(),
            input_data.get_custom_in_seconds(),
            fill_type=input_data.fill_type,
            fill_value=input_data.fill_value)

        merged_input = MultivariateData.generate_inner_join_frame(
            [input_data.get_target(), input_factors])
        input_target = merged_input[[TIMESTAMP, VALUE]]
        input_factors = merged_input.drop([TIMESTAMP, VALUE], axis=1)
        input_factors = input_factors.reindex(columns=self.__effective_factor)
        self.__describe = merged_input.describe().T
        train, label = input_data.get_normalized_batch(self.__window,
                                                       self.__future_target,
                                                       label=input_target,
                                                       factors=input_factors)
        batch_size = int(min(batch_size, max(1, len(train) / steps_per_epoc)))
        train_multi = data.Dataset.from_tensor_slices(
            (train[:-int(len(train) * self.__validation_ratio)],
             label[:-int(len(label) * self.__validation_ratio)]))

        train_multi = train_multi.cache().shuffle(
            len(train) * 100).batch(batch_size).repeat()

        val_multi = data.Dataset.from_tensor_slices(
            (train[-int(len(train) * self.__validation_ratio):],
             label[-int(len(label) * self.__validation_ratio):]))
        val_multi = val_multi.cache().batch(batch_size).repeat()

        self.__model.fit(train_multi,
                         epochs=self.__epochs,
                         shuffle=False,
                         validation_data=val_multi,
                         validation_freq=self.__validation_freq,
                         steps_per_epoch=len(train) *
                         (1 - self.__validation_ratio) / batch_size,
                         validation_steps=len(train) *
                         self.__validation_ratio / batch_size,
                         callbacks=[self.__metric_collector])
        validation_result = self.__model.predict(
            train[-int(len(train) * self.__validation_ratio):])
        validation_labels = label[-int(len(label) * self.__validation_ratio):]
        mean_average_percentage_error = np.abs(
            validation_result - validation_labels) / np.abs(validation_labels)
        mean_average_percentage_error[np.isinf(
            mean_average_percentage_error)] = np.nan
        mean_average_percentage_error = np.nanmean(
            mean_average_percentage_error, axis=0)
        self.__mean_absolute_percentage_error = mean_average_percentage_error

    def get_mean_absolute_percentage_error(self):
        return list(self.__mean_absolute_percentage_error)

    def get_effective_factor(self):
        return self.__effective_factor

    def save_model(self, model_dir):
        with open(os.path.join(model_dir, 'LSTM-Meta.pkl'), "wb") as f:
            meta = {
                'mean_absolute_percentage_error':
                self.__mean_absolute_percentage_error,
                'end_time': self.__end_time,
                'future_target': self.__future_target,
                'window': self.__window,
                'effective_factor': self.__effective_factor,
                'num_hidden': self.__num_hidden,
                'describe': self.__describe
            }
            pickle.dump(meta, f)
        self.__model.save_weights(
            os.path.join(model_dir, self.__fit_model.name))

    def get_model_type(self):
        return self.__fit_model

    @staticmethod
    def load_model_meta(model_dir):
        with open(os.path.join(model_dir, 'LSTM-Meta.pkl'), "rb") as f:
            meta = pickle.load(f)
        return {
            'mean_absolute_percentage_error':
            meta['mean_absolute_percentage_error'],
            'end_time':
            meta['end_time'],
            'window':
            meta['window'],
            'effective_factor':
            meta['effective_factor'],
            'future_target':
            meta['future_target'],
            'num_hidden':
            meta['num_hidden'],
            'describe':
            meta['describe']
        }

    def inference(self, input_data: MultivariateData, window, timestamp,
                  **kwargs):
        input_factors = input_data.generate_outer_join_factors()
        if timestamp is None:
            ts = input_factors[TIMESTAMP].max()
        else:
            ts = pd.to_datetime(timestamp)
            ts = ts.tz_localize(None)

        input_factors = MultivariateData.gen_filled_missing_by_period(
            input_factors,
            input_data.get_gran(),
            input_data.get_custom_in_seconds(),
            end_time=ts,
            periods=window,
            fill_type=input_data.fill_type,
            fill_value=input_data.fill_value)
        input_factors = input_factors[self.__effective_factor]
        input_factors = input_factors.reindex(columns=self.__effective_factor)

        input_factors = input_factors.tail(window)
        # print(input_factors)
        for column in self.__effective_factor:
            min_value = self.__describe.loc[column]['min']
            max_value = self.__describe.loc[column]['max']
            if max_value == min_value:
                input_factors[column] = 0
            else:
                input_factors[column] = (input_factors[column] -
                                         min_value) / (max_value - min_value)
        input_factors = input_factors.values
        input_factors[(input_factors < 0) | (input_factors > 1)] = 0
        predicted = self.__model.predict(np.array([input_factors]))
        predicted = predicted.reshape(self.__future_target)
        predicted = predicted * (self.__describe.loc[VALUE]['max'] -
                                 self.__describe.loc[VALUE]['min']
                                 ) + self.__describe.loc[VALUE]['min']
        target_timestamps = pd.date_range(
            start=timestamp,
            periods=self.__future_target,
            freq=convert_freq(input_data.get_gran(),
                              input_data.get_custom_in_seconds()))
        lower_boundary = [
            predicted[i] -
            np.abs(predicted[i]) * self.__mean_absolute_percentage_error[i]
            for i in range(0, len(predicted))
        ]
        upper_boundary = [
            predicted[i] +
            np.abs(predicted[i]) * self.__mean_absolute_percentage_error[i]
            for i in range(0, len(predicted))
        ]
        return [
            UnivariateForecastItem(
                predicted[i],
                lower_boundary[i],
                upper_boundary[i],
                (1 - self.__mean_absolute_percentage_error[i]),
                timestamp=target_timestamps[i]).to_dict()
            for i in range(0, len(predicted))
        ]

    def load_model(self, model_dir):
        self.__model.load_weights(
            os.path.join(model_dir, self.__fit_model.name))

    def get_end_time(self):
        return self.__end_time
tfds.disable_progress_bar()

model = Sequential()
model.add(Embedding(1000, 64, input_length=10))
# the model will take as input an integer matrix of size (batch,
# input_length).
# the largest integer (i.e. word index) in the input should be no larger
# than 999 (vocabulary size).
# now model.output_shape == (None, 10, 64), where None is the batch
# dimension.

input_array = np.random.randint(1000, size=(32, 10))

model.compile('rmsprop', 'mse')
output_array = model.predict(input_array)
assert output_array.shape == (32, 10, 64)

embedding_layer = layers.Embedding(1000, 5)

result = embedding_layer(tf.constant([1,2,3]))
print(result.numpy())

result = embedding_layer(tf.constant([[0,1,2],[3,4,5]]))
print(result.numpy())


(train_data, test_data), info = tfds.load(
    'imdb_reviews/subwords8k',
    split = (tfds.Split.TRAIN, tfds.Split.TEST),
    with_info=True, as_supervised=True)