示例#1
0
    def train_test_split_df(self):
        x, y = self.create_np_array()
        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.10,
                                                            shuffle=False)

        # Since we are working with timeseries data we create batches of sequences to predict next y
        train_gen = TimeseriesGenerator(data=x_train,
                                        targets=y_train,
                                        length=5,
                                        batch_size=1,
                                        shuffle=False,
                                        reverse=False,
                                        start_index=0,
                                        end_index=None)
        test_gen = TimeseriesGenerator(x_test,
                                       y_test,
                                       length=5,
                                       sampling_rate=1,
                                       batch_size=1,
                                       shuffle=False,
                                       reverse=False,
                                       start_index=0,
                                       end_index=None)
        self.train_generator = train_gen
        self.test_generator = test_gen
        return (train_gen, test_gen)
    def setupData(self, series, val_days=450):
        """
        splits data, scales data, creates generators for the model
        """
        assert val_days > self.length , "val_days must exceed lenght"
        
        #split data into train and validation
        self.train = series.iloc[:-val_days]
        self.validation = series.iloc[-val_days:]
        
        #Apply smoothing filters  
        self.train_smooth = \
             gaussian_filter1d(self.train, self.g_filt)\
                 .reshape(-1,1)
            
        self.validation_smooth = \
             gaussian_filter1d(self.validation, self.g_filt)\
                 .reshape(-1,1)

        #create time series generators
        self.generator = \
             TimeseriesGenerator(data=self.train_smooth,\
                                 targets=self.train_smooth,\
                                 length=self.length,\
                                 batch_size=self.batch_size)
                 
        self.val_generator = \
             TimeseriesGenerator(data=self.validation_smooth,\
                                 targets=self.validation_smooth,\
                                 length=self.length,\
                                 batch_size=self.batch_size)                 
    def setupData(self, series, val_days=450):
        """
        splits data, scales data, creates generators for the model
        """
        assert val_days > self.length, "val_days must exceed length"

        #split data into train and validation
        self.train = series.iloc[:-val_days]
        self.validation = series.iloc[-val_days:]

        #scale data for neural network suitability
        self.scaler = MinMaxScaler()
        self.scaler.fit(self.train.values.reshape(-1, 1))

        self.train_scaled = \
            self.scaler.transform(self.train.values.reshape(-1,1))

        self.validation_scaled = \
             self.scaler.transform(self.validation.values.reshape(-1,1))

        #create time series generators
        self.generator = \
             TimeseriesGenerator(data=self.train_scaled,\
                                 targets=self.train_scaled,\
                                 length=self.length,\
                                 batch_size=self.batch_size)

        self.val_generator = \
             TimeseriesGenerator(data=self.validation_scaled,\
                                 targets=self.validation_scaled,\
                                 length=self.length,\
                                 batch_size=self.batch_size)
示例#4
0
def rnn(company):
    df = pd.read_csv('pg4_data.csv', parse_dates=True, index_col='date')
    df = df[df.company == company]
    df.drop(['ticker', 'company'], inplace=True, axis=1)
    df['price'] = df.price.apply(lambda x: x.replace(',', ''))
    df['price'] = pd.to_numeric(df.price, errors='coerce')
    train_data = df[:-7]
    test_data = df[-7:]
    scaler = MinMaxScaler(feature_range=(0, 1))
    train_scaled = scaler.fit_transform(train_data)
    test_scaled = scaler.transform(test_data)
    generator = TimeseriesGenerator(train_scaled, train_scaled, length=3, batch_size=1)
    model = Sequential()
    model.add(SimpleRNN(132, input_shape=(3, 1)))
    model.add(Dense(64))
    model.add(Dense(1))
    early_stops = EarlyStopping(monitor='val_loss', patience=2)
    validation = TimeseriesGenerator(test_scaled, test_scaled, length=3, batch_size=1)
    model.compile(optimizer='adam', loss='mse')
    model.fit(generator, epochs=20, validation_data=validation, callbacks=[early_stops])

    test_prediction = []
    first_eval_batch = test_scaled[-3:]
    current_batch = first_eval_batch.reshape(1, 3, 1)

    current_pred = model.predict(current_batch)[0]
    test_prediction.append(current_pred)
    current_batch = np.append(current_batch[:, 1:, :], [[current_pred]], axis=1)
    true_predictions = scaler.inverse_transform(test_prediction)
    return round(true_predictions[0][0], 2)
def build_model(df, ticker):

    # nas/split
    df.dropna(inplace=True)
    X = df.drop(
        columns=['target', 'ticker', 'price open', 'price close', 'price low'])
    y = df['target']
    y = to_categorical(y)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        shuffle=False,
                                                        test_size=.2)

    #scale
    ss = StandardScaler()
    X_train_sc = ss.fit_transform(X_train)
    X_test_sc = ss.transform(X_test)

    # time series gen
    tsg_len = 5
    tsg_batch = 512
    train_seq = TimeseriesGenerator(X_train_sc,
                                    y_train,
                                    length=tsg_len,
                                    batch_size=tsg_batch)
    test_seq = TimeseriesGenerator(X_test_sc,
                                   y_test,
                                   length=tsg_len,
                                   batch_size=tsg_batch)
    # Design RNN
    model = Sequential()
    model.add(GRU(32, input_dim=X.shape[1],
                  return_sequences=True))  # True if next layer is RNN
    model.add(GRU(16, return_sequences=False))  # False if next layer is Dense
    model.add(Dense(8, activation='relu'))
    model.add(Dense(4, activation='relu'))
    # output layer
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer=Adam(),
                  metrics=['accuracy'])
    hist = model.fit(train_seq,
                     epochs=100,
                     validation_data=test_seq,
                     verbose=0)

    plot_acc(hist, ticker)
    plot_loss(hist, ticker)

    # metrics:
    # https://stackoverflow.com/questions/54875846/how-to-print-labels-and-column-names-for-confusion-matrix
    preds = np.argmax(model.predict(test_seq), axis=-1)
    labels = ['Down', 'Flat', 'Up']
    y_cats = np.argmax(y_test, axis=1)
    cf = confusion_matrix(y_cats[tsg_len:], preds)
    cf_df = pd.DataFrame(cf, columns=labels, index=labels)
    cf_df.to_csv(f'./charts/rnn/{resample}/cm/{ticker}.csv', index=True)
    #pickle model
    model.save(f'./models/rnn/{resample}/{ticker}_rnn')
    return hist
示例#6
0
    def setupData(self, series, val_days=450):
        """
        splits data, scales data, creates generators for the model
        """
        assert val_days > self.length, "val_days must exceed lenght"

        #split data into train and validation
        self.train = series.iloc[:-val_days]
        self.validation = series.iloc[-val_days:]

        # =============================================================================
        #       APPLY Smoothing filters
        # =============================================================================
        self.train_smooth = self.train
        #self.train_smooth = medfilt(self.train,7)
        self.train_smooth = gaussian_filter1d(self.train_smooth, 0.8)

        self.validation_smooth = self.validation
        #self.validation_smooth = medfilt(self.validation,7)
        self.validation_smooth = gaussian_filter1d(self.validation_smooth, 0.8)

        # =============================================================================
        #       SCALE AND GEN THAT
        # =============================================================================
        # =============================================================================
        #         #scale data for neural network suitability
        #         self.scaler = MinMaxScaler()
        #         self.scaler.fit(self.train_smooth.reshape(-1,1))
        #
        #         self.train_scaled = \
        #             self.scaler.transform(self.train_smooth.reshape(-1,1))
        #
        #         self.validation_scaled = \
        #              self.scaler.transform(self.validation_smooth.reshape(-1,1))
        # =============================================================================

        # =============================================================================
        #              NO SCALE
        # =============================================================================
        self.train_scaled = \
            self.train_smooth.reshape(-1,1)

        self.validation_scaled = \
             self.validation_smooth.reshape(-1,1)

        #create time series generators
        self.generator = \
             TimeseriesGenerator(data=self.train_scaled,\
                                 targets=self.train_scaled,\
                                 length=self.length,\
                                 batch_size=self.batch_size)

        self.val_generator = \
             TimeseriesGenerator(data=self.validation_scaled,\
                                 targets=self.validation_scaled,\
                                 length=self.length,\
                                 batch_size=self.batch_size)
def series_generator(scaled_train, scaled_validation, n_input):

    train_generator = TimeseriesGenerator(scaled_train,
                                          scaled_train,
                                          length=n_input,
                                          batch_size=1)
    validation_generator = TimeseriesGenerator(scaled_validation,
                                               scaled_validation,
                                               length=n_input,
                                               batch_size=1)
    return train_generator, validation_generator
def timeserieGenerator(length=12, batch_size=1):
    train_generator = TimeseriesGenerator(scale_train,
                                          scale_train,
                                          length=length,
                                          batch_size=batch_size)

    validation_generator = TimeseriesGenerator(scale_test,
                                               scale_test,
                                               length=length,
                                               batch_size=batch_size)

    return train_generator, validation_generator, length
示例#9
0
def data_generator(data, backward, forward, mean, std):
    data = data.values
    N = len(data)
    normalized = ((data - mean) / std)
    target, _ = TimeseriesGenerator(normalized,
                                    normalized,
                                    length=forward,
                                    batch_size=N)[0]
    input, output = TimeseriesGenerator(normalized[:-forward],
                                        target,
                                        length=backward,
                                        batch_size=N - forward)[0]
    return input, output
    def genarate_timeseries(self, x_train, y_train, x_val, y_val, x_test):

        train_timeseries = TimeseriesGenerator(x_train,
                                               y_train,
                                               length=self.past_days,
                                               batch_size=512)
        val_timeseries = TimeseriesGenerator(x_val,
                                             y_val,
                                             length=self.past_days,
                                             batch_size=512)
        test_timeseries = TimeseriesGenerator(x_test,
                                              np.zeros((x_test.shape[0], 1)),
                                              length=self.past_days,
                                              batch_size=512)

        return train_timeseries, val_timeseries, test_timeseries
示例#11
0
def generator_builder(data, targets, TIMESTEPS):

    generator = TimeseriesGenerator(data=data,
                                    targets=targets,
                                    length=TIMESTEPS)

    return generator
def trainSimpleLSTM(model, data, history_window, n_batch, n_epochs):

    print(data)

    trainingData = data

    #Calculate this once
    scaler = MinMaxScaler()
    scaler.fit(trainingData)

    trainingData = scaler.transform(trainingData)

    generator = TimeseriesGenerator(trainingData,
                                    trainingData,
                                    length=history_window,
                                    batch_size=n_batch)

    optimizer = keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='mse')

    history = model.fit_generator(generator, epochs=n_epochs, verbose=1)

    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch
    """
    plt.scatter(x=hist['epoch'],y=hist['loss'])
    plt.show()
    """

    return trainingData, scaler
示例#13
0
 def train(self, training_data_tuple=None, verbose=False):
     try:
         training_params = self._model_details.get("model_config")
         if training_params is None:
             msg = f"model_config not in {self._model_details}"
             raise KeyError(msg)
         elif training_data_tuple is None:
             msg = "training_data_tuple cannot be None"
             raise ValueError(msg)
         else:
             X_train, X_test, y_train, y_test, train_test_split = training_data_tuple
             num_inputs = training_params.get("num_inputs")
             batch_size = training_params.get("batch_size")
             epochs = training_params.get("epochs")
             generator = TimeseriesGenerator(X_train,
                                             y_train,
                                             length=num_inputs,
                                             batch_size=batch_size)
             self._model.fit_generator(generator, epochs=epochs)
             persisted_model_path = self._model_details.get(
                 "persisted_model_path")
             self._model.save(persisted_model_path)
             if verbose:
                 print(f"Saved recurrent_nn model type to "
                       f"{persisted_model_path}")
     except Exception as e:
         msg = "Error in train"
         raise e(msg)
示例#14
0
def route_to_ts(f, window_len, max_len=250):
    route = np.load(f).astype("float32")[-max_len:]

    X = route[1:, :-1]
    y = route[:-1, -1]

    data_gen = TimeseriesGenerator(X, y, length=window_len, batch_size=len(X))

    X_ts = data_gen[0][0]
    y_ts = data_gen[0][1] * SECONDS_PER_YEAR / 60

    return X_ts, y_ts
示例#15
0
def pred(f1):
    flag = 0
    n = 0
    if f1.isdigit() == True:
        flag = 1
    else:
        flag = 0
    if flag == 1:
        f = 0
        f = int(f1)
        if f > 0:
            n_input = f
            n_features = 1
            generator = TimeseriesGenerator(train,
                                            train,
                                            length=n_input,
                                            batch_size=6)

            model = Sequential()
            model.add(
                LSTM(200, activation='relu',
                     input_shape=(n_input, n_features)))
            model.add(Dropout(0.15))
            model.add(Dense(1))
            optimizer = keras.optimizers.Adam(lr=0.001)
            model.compile(optimizer=optimizer, loss='mse')
            history = model.fit_generator(generator, epochs=100, verbose=1)

            pred_list = []
            batch = train[-n_input:].reshape((1, n_input, n_features))
            for i in range(n_input):
                pred_list.append(model.predict(batch)[0])
                batch = np.append(batch[:, 1:, :], [[pred_list[i]]], axis=1)

            import pandas as pd
            ts = pd.Timestamp('2019-10-10 07:15:11')
            do = pd.tseries.offsets.DateOffset(n=2)
            add_dates = [
                pd.Timestamp(df.index[-1]) + DateOffset(months=x)
                for x in range(0, f + 1)
            ]
            future_dates = pd.DataFrame(index=add_dates[1:],
                                        columns=df.columns)
            df_predict = pd.DataFrame(scaler.inverse_transform(pred_list),
                                      index=future_dates[-n_input:].index,
                                      columns=['Prediction'])
            df_proj = pd.concat([df, df_predict], axis=1)
            res = df_predict.reset_index()
            res.columns = ['Date', 'count of cases']
            res['count of cases'] = res['count of cases'].apply(np.int64)

            return res
示例#16
0
def create_generator(dataset, params, shuffle=True):
    # DECONSTRUCT DATASET
    features = dataset['features']
    labels = dataset['labels']

    # DECONSTRUCT PARAMS
    batch = params['batch']
    window = params['window']

    # GENERATE & RETURN
    return TimeseriesGenerator(features,
                               labels,
                               length=window,
                               batch_size=batch,
                               shuffle=shuffle)
示例#17
0
def forecast(df, length_generator, fc_period):
    full_scaler = MinMaxScaler()
    scaled_full_data = full_scaler.fit_transform(df)

    length = length_generator
    n_features = 1
    generator = TimeseriesGenerator(scaled_full_data,
                                    scaled_full_data,
                                    length=length,
                                    batch_size=1)

    model = Sequential()
    model.add(LSTM(100, activation="relu",
                   input_shape=(length, n_features)))  # can add dropout too
    model.add(Dense(1))
    model.compile(optimizer="adam", loss="mse")
    model.fit(generator, epochs=75)

    forecast = []
    forecast_period = fc_period
    first_eval_batch = scaled_full_data[-length:]
    current_batch = first_eval_batch.reshape((1, length, n_features))

    for i in range(forecast_period):

        # get prediction 1 time atamp ahead ([0] is for grabbing just the number insede the brackets)
        current_pred = model.predict(current_batch)[0]

        # store prediction
        forecast.append(current_pred)

        # update batch to now include prediction and drop first value
        current_batch = np.append(current_batch[:, 1:, :], [[current_pred]],
                                  axis=1)

    forecast = full_scaler.inverse_transform(forecast)
    forecast_index = pd.date_range(start="2020-05-05",
                                   periods=forecast_period,
                                   freq="D")
    forecast_df = pd.DataFrame(data=forecast,
                               index=forecast_index,
                               columns=["Forecast"])
    forecast_df["Forecast"] = forecast_df["Forecast"].apply(lambda x: int(x))

    return forecast_df
def nn_model(scaled_train_array,y_scaled_train_array,length,epochs):
    
    length = 12 # Length of the output sequences (in number of timesteps)
    batch_size = 1 #Number of timeseries samples in each batch
    generator = TimeseriesGenerator(scaled_train_array, y_scaled_train_array, length=length, batch_size=batch_size)
    
    model = Sequential()
    
    model.add(tf.keras.layers.Conv1D(filters=32, kernel_size=5,
                          strides=1, padding="causal",
                          activation="tanh",
                          input_shape=(None,(scaled_train_array.shape[1]))))
 
    model.add(tf.keras.layers.LSTM(512, activation = 'tanh', dropout=0.25, recurrent_dropout= 0.25, return_sequences=True))

    model.add(tf.keras.layers.LSTM(512, dropout=0.25, recurrent_dropout= 0.25, activation = 'tanh', return_sequences=True))
    model.add(tf.keras.layers.LSTM(512,  activation = 'tanh'))
    
    model.add(tf.keras.layers.Dense(100))
    model.add(tf.keras.layers.Dense(1))
    
    optimizer = tf.keras.optimizers.Adam(lr=0.0001)
    model.compile(optimizer=optimizer, loss='mse')
    
    model.summary()
    #---------------------------------
    
    
    #from tensorflow.keras.callbacks import EarlyStopping
    
    #early_stop = EarlyStopping(monitor='val_loss',patience=1)
    
    #validation_generator = TimeseriesGenerator(scaled_test_array,y_scaled_test_array, 
     #                                          length=length, batch_size=batch_size)
    
    model.fit_generator(generator,epochs=epochs)
    #                    validation_data=validation_generator)
                  #     callbacks=[early_stop])
    
    
    model.history.history.keys()
    
    losses = pd.DataFrame(model.history.history)
    losses.plot()
    return model
示例#19
0
    def create_univariate_dataset(self,
                                  data_type: str = 'train',
                                  sampling_rate: int = 1,
                                  stride: int = 1,
                                  batch_size: int = 1):

        if data_type == 'train':
            data = self.train
        elif data_type == 'test':
            data = self.test

        generator = TimeseriesGenerator(data,
                                        data,
                                        length=self.look_back,
                                        sampling_rate=sampling_rate,
                                        stride=stride,
                                        batch_size=batch_size)
        return generator
示例#20
0
 def __init__(self,
              x_path,
              folder_name,
              y_path,
              to_fit=True,
              batch_size=2,
              seq_len=15):
     self.x_path = x_path + folder_name
     self.folder_name = folder_name
     self.y_path = y_path
     self.to_fit = to_fit
     self.all_frames = self.get_all_frames(self.x_path)
     self.targets = self.get_Y(y_path, folder_name)
     self.series_data = TimeseriesGenerator(self.all_frames,
                                            self.targets,
                                            length=seq_len,
                                            batch_size=batch_size)
     self.len = len(self.series_data)
示例#21
0
def windowed_dataset(x, y, win_sz, batch_sz, kind='regress'):
    """
    Helper to prepare a windowed data set from a series

    kind : "regress" or "class"
    """

    if kind == 'class':
        # to class labels
        y = y > 0

    dataset = TimeseriesGenerator(x,
                                  y,
                                  win_sz,
                                  sampling_rate=1,
                                  shuffle=True,
                                  batch_size=batch_sz)
    return dataset
示例#22
0
 def predict(self, pred_data=None):
     try:
         if pred_data is None:
             msg = "pred_data cannot be None type"
             raise Exception(msg)
         else:
             pred_params_dict = self._model_details.get("model_config")
             generator_test = TimeseriesGenerator(
                 pred_data,
                 pred_data,
                 length=pred_params_dict["num_inputs"] - 1,
                 batch_size=1)
             pred_result = self._format_predict(
                 self._model.predict(generator_test))
             return pred_result
     except Exception as e:
         msg = "Error in predict"
         raise e(msg)
示例#23
0
    def __getitem__(self, index):
        images_folder = self.list_X[index]
        images_list = sorted(os.listdir(self.x_path + images_folder))
        all_frames = []
        for img in images_list:
            all_frames.append(
                np.array(cv2.imread(self.x_path + images_folder + '/' + img)))

        all_frames = np.stack(all_frames).astype(np.float16)

        key = images_folder.split('_')[:2]
        key = '_'.join(key)
        Y = np.array(self.dict_Y[key])
        all_frames, targets = self.check(all_frames, Y)
        series_data = TimeseriesGenerator(all_frames,
                                          targets,
                                          length=self.seq_len,
                                          batch_size=self.batch_size)

        return series_data
示例#24
0
    def create_multivariate_dataset(self,
                                    data_type='train',
                                    outcome: str = 'adjclose',
                                    sampling_rate: int = 1,
                                    stride: int = 1,
                                    batch_size: int = 1):

        if data_type == 'train':
            outcome = self.train['outcome']
            data = self.train['predictors']
        elif data_type == 'test':
            outcome = self.test['outcome']
            data = self.test['predictors']

        generator = TimeseriesGenerator(data,
                                        outcome,
                                        length=self.look_back,
                                        sampling_rate=sampling_rate,
                                        stride=stride,
                                        batch_size=batch_size)
        return generator
示例#25
0
文件: convlstm.py 项目: fmidev/trains
    def predict_proba(self, X):
        """ Predict proba """
        if not self.fitted:
            raise NotFittedError()

        data_gen = TimeseriesGenerator(X,
                                       np.empty(len(X)),
                                       length=self.length,
                                       sampling_rate=1,
                                       batch_size=self.batch_size)
        y_pred = self.model.predict_generator(data_gen)

        # Scale to [-1 1]
        y_pred_proba = np.zeros((len(y_pred), 2))
        y_pred_proba[:, 1] = y_pred.ravel()
        y_pred_proba[:, 0] = 1 - y_pred.ravel()

        self.y_pred = y_pred
        self.y_pred_proba = y_pred_proba

        return y_pred_proba
示例#26
0
    def _fitRNN(self):
        treinar = True
        while treinar:
            gerador = TimeseriesGenerator(numpy.array(self._data_set[0]) /
                                          255.0,
                                          numpy.array(self._data_set[1]),
                                          length=self.time_steps,
                                          batch_size=self.batch_size)
            historico = inteligencia.modelo.fit_generator(gerador,
                                                          epochs=self.epochs,
                                                          shuffle=self.suffle,
                                                          verbose=1)
            self._atualizarLog(historico)

            if self._iterativo:
                try:
                    treinar = input("Continuar (s/n)? ") == 's'
                except:
                    treinar = False
            else:
                treinar = False
示例#27
0
    data[:trainLen, 3][i:i + stepsForward]
    for i in range(trainLen - stepsForward)
])
yTest = np.array([
    data[trainLen + xLen + 2:, 3][i:i + stepsForward]
    for i in range(valLen - xLen - stepsForward - 1)
])
yScaler = StandardScaler()
yScaler.fit(yTrain)
yTrain = yScaler.transform(yTrain)
yTest = yScaler.transform(yTest)

#Создаем генератор для обучения
trainDataGen = TimeseriesGenerator(xTrain,
                                   yTrain,
                                   length=xLen,
                                   sampling_rate=1,
                                   batch_size=20)

#Создаем аналогичный генератор для валидации при обучении
testDataGen = TimeseriesGenerator(xTest,
                                  yTest,
                                  length=xLen,
                                  sampling_rate=1,
                                  batch_size=20)

#Создаём нейронку
modelD = Sequential()
modelD.add(Dense(150, input_shape=(xLen, 5),
                 activation="linear"))  # 5 - количество каналов
modelD.add(Flatten())
示例#28
0
epochs = 5
train_split_size = 0.7
test_set_per_tf = []

# Train
for timeframe_idx in range(len(time_frames)):
    df = cross_timeframe_dfs[timeframe_idx].copy()
    Xtrain, Xtest, ytrain, ytest, train_test_split = create_train_test_set(
        df=df, train_split=train_split_size)
    n_features = Xtrain.shape[1]
    test_set_per_tf.append((Xtest, ytest))

    # Setup model & TimeseriesGenerator, and train the model
    model = create_model(n_inputs=n_inputs, n_features=n_features)
    generator = TimeseriesGenerator(Xtrain,
                                    ytrain,
                                    length=n_inputs,
                                    batch_size=batch_size)
    model.fit_generator(generator, epochs=epochs)

    print(f"model saved as model_tf_{time_frames[timeframe_idx]}")
    model.save(f"model_tf_{time_frames[timeframe_idx]}.h5")

    # Visualize the loss function over the training epochs
    loss_val_per_epoch = model.history.history['loss']
    plt.plot(range(len(loss_val_per_epoch)), loss_val_per_epoch)
    plt.title('Loss vs. training epochs')
    plt.ylabel('Loss')
    plt.xlabel('epochs')
    plt.show()

# #### Section D.1 - Model Validation (Linear Regression Feature)
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

data_X_scaled = scaler_X.fit_transform(np.array(data_X).reshape(-1, 2))
data_y_scaled = scaler_y.fit_transform(np.array(data_y).reshape(-1, 1))

# Делим данные на учебные и тестовые:
train_X = data_X_scaled[:-90]
train_y = data_y_scaled[:-90]

test_X = data_X_scaled[-90:]
test_y = data_y_scaled[-90:]

# Создаем генераторы временных рядов (учитываем 3 последних значения):
train_data_gen = TimeseriesGenerator(train_X, train_y,
	length=3, sampling_rate=1, stride=1,
    batch_size=50)

test_data_gen = TimeseriesGenerator(test_X, test_y,
	length=3, sampling_rate=1, stride=1,
	batch_size=10)

# Создаем модель:
model = Sequential([LSTM(4, recurrent_dropout=0.15, return_sequences=True, input_shape=(3, 2)),
                    LSTM(4, recurrent_dropout=0.15, return_sequences=False),
                    Dense(1)])

model.compile(optimizer='adam', loss='mse')
model.summary()

# Обучаем модель (в процессе обучения отслеживаем показатель MSE,
示例#30
0
test = covid_cases[test_ind:]
print(len(test),'train')
print(len(train),'train')
#we divide the historical data into train and test clusters
#length of train dataset = 223
#length of test dataset = 25
scaler = MinMaxScaler()
scaler.fit(np.reshape(train,(223,1)))
scaled_train = scaler.transform(np.reshape(train,(223,1)))
scaled_test = scaler.transform(np.reshape(test,(25,1)))
#define prediction training length
pred_len = 20
#define future prediction numbers
batch_size=1
#Create training generator
generator = TimeseriesGenerator(scaled_train,scaled_train,
                                length=pred_len,batch_size=batch_size)
#Create validation generator
val_generator = TimeseriesGenerator(scaled_test,scaled_test,
                                length=pred_len,batch_size=batch_size)
#The number of features in which the data will predict the next values.
n_features = 1 

#Create the LSTM Model

model = Sequential()
model.add(LSTM(60,input_shape=(pred_len,n_features)))
model.add(Dense(1))
model.compile(optimizer='rmsprop',loss='mse')
#Declare an early stop
early_stop = EarlyStopping(monitor='val_loss',patience=2)
#Fit the training data into the model