示例#1
0
class timeSeries(trainKeras):
    """keras on time series"""
    def __init__(self, X, y=[None]):
        """initiate parent and set Xy"""
        trainKeras.__init__(self, X)

    def longShort(self):
        """define an autoencoder model"""
        inputs = Input(shape=(timesteps, input_dim))
        encoded = LSTM(latent_dim)(inputs)
        decoded = RepeatVector(timesteps)(encoded)
        decoded = LSTM(input_dim, return_sequences=True)(decoded)
        sequence_autoencoder = Model(inputs, decoded)
        encoder = Model(inputs, encoded)
        self.model = encoder

    def textModel(self, max_words=400, max_phrase_len=30):
        """define a model for text"""
        model = Sequential()
        model.add(
            Embedding(input_dim=max_words,
                      output_dim=256,
                      input_length=max_phrase_len))
        model.add(SpatialDropout1D(0.3))
        model.add(LSTM(256, dropout=0.3, recurrent_dropout=0.3))
        model.add(Dense(256, activation='relu'))
        model.add(Dropout(0.3))
        model.add(Dense(5, activation='softmax'))
        model.compile(loss='categorical_crossentropy',
                      optimizer='Adam',
                      metrics=['accuracy'])
        self.model = model

    def modelXgboost(self):
        """define xgboost model"""
        try:
            from xgboost import XGBRegressor
            self.model = XGBRegressor(objective='reg:squarederror',
                                      n_estimators=1000)
        except:
            self.newModel()

    def modelLongShort(self, input_shape=(1, 1)):
        """define baseline model"""
        shape = self.X.shape
        input_shape = (batch_size, 1, shape[1])
        model = Sequential()
        model.add(LSTM(50, input_shape=input_shape))
        #model.add(LSTM(neurons,batch_input_shape=(batch_size,X.shape[1],X.shape[2]),stateful=True))
        model.add(Dense(1))
        model.compile(loss='mae', optimizer='adam')
        #model.compile(loss='mean_squared_error', optimizer='adam')
        self.model = model

    def lstmBatch(self, batch_size=1, neurons=4, n_in=1, n_out=1):
        """ long short with batch_size"""
        shape = self.X.shape
        model = Sequential()
        model.add(
            LSTM(neurons,
                 batch_input_shape=(batch_size, 1, n_in * shape[1]),
                 stateful=True))
        model.add(Dense(n_out * shape[1]))
        model.compile(loss='mean_squared_error', optimizer='adam')
        self.batch_size = batch_size
        self.model = model

    def longShortDeep(self, batch_size=1, neurons=32, n_in=1, n_out=1):
        """ long short with batch_size"""
        shape = self.X.shape
        model = Sequential()
        model.add(
            LSTM(units=neurons,
                 return_sequences=True,
                 input_shape=(1, n_in * shape[1])))
        model.add(Dropout(0.2))
        model.add(LSTM(units=neurons, return_sequences=True))
        model.add(Dropout(0.2))
        model.add(LSTM(units=neurons))
        model.add(Dropout(0.2))
        model.add(Dense(units=n_out * shape[1]))
        model.compile(optimizer='adam', loss='mean_squared_error')
        self.model = model

    def newModel(self):
        """define a new model"""
        print("new long short term memory model")
        self.modelLongShort()

    def defModel(self):
        """create or return an existing model"""
        try:
            self.model
        except:
            self.newModel()

    def toSupervised(self, X, n_in=1, n_out=1):
        """perform a n_in shift for each of training feature"""
        df = pd.DataFrame(X)
        pre = [df.shift(i) for i in range(1, n_in + 1)]
        post = [df.shift(-i) for i in range(0, n_out)]
        post = pd.concat(post, axis=1)
        pre = pd.concat(pre, axis=1)
        if n_out == 1:
            pre = pre[n_in:]
            post = post[n_in:]
        else:
            n_end = n_out - 1
            pre = pre[n_in:-n_end]
            post = post[n_in:-n_end]
        # pre.fillna(0., inplace=True)
        # post.fillna(0., inplace=True)
        return pre.values, post.values

    def shiftColumns(self, X, n_in=1):
        """perform a n_in shift for each of training feature"""
        df = pd.DataFrame(X)
        pre = [df.shift(i) for i in range(0, n_in)]
        pre = pd.concat(pre, axis=1)
        pre.replace(float('nan'), 0, inplace=True)
        return pre

    def splitSet(self, X1, portion=.75, n_in=1, n_out=1, shuffle=False):
        """overwrite split set preparing Xy"""
        N = X1.shape[0]
        if shuffle:
            shuffleL = np.arange(0, N)
            n = int(np.random.random(1) * N)
            shuffleL = np.roll(shuffleL, n)
        else:
            shuffleL = list(range(N))
        X = np.array(X1)[shuffleL]
        ahead = int(X.shape[0] * portion)
        pre, post = self.toSupervised(X, n_in=n_in, n_out=n_out)
        X_train, X_test = pre[0:ahead], pre[ahead:]
        y_train, y_test = post[0:ahead, :], post[ahead:, :]
        X_train = X_train.reshape(X_train.shape[0], -1, X_train.shape[1])
        y_train = y_train.reshape(len(y_train), -1)
        if portion < 1.:
            X_test = X_test.reshape(X_test.shape[0], -1, X_test.shape[1])
            y_test = y_test.reshape(len(y_test), -1)
        if len(X_train) == 0:
            raise Exception('Empty training set')
        return X_train, y_train, X_test, y_test

    def train(self,
              batch_size=1,
              nb_epoch=300,
              shuffle=False,
              calc_metrics=False,
              portion=0.75,
              n_in=1,
              n_out=1,
              **args):
        """train a baseline model resetting the states every nb_epochs"""
        X_train, y_train, X_test, y_test = self.splitSet(self.X,
                                                         shuffle=shuffle,
                                                         portion=portion,
                                                         n_in=n_in,
                                                         n_out=n_out)
        self.defModel()
        for i in range(nb_epoch):
            history = self.model.fit(X_train,
                                     y_train,
                                     epochs=1,
                                     batch_size=batch_size,
                                     validation_data=(X_test, y_test),
                                     verbose=2,
                                     shuffle=False)
            self.model.reset_states()
            self.history.append(history)
        if not calc_metrics:
            return {}
        self.model.predict(X_train, batch_size=batch_size)
        y_pred = self.predict(X_test, batch_size=batch_size)
        kpi = t_s.calcMetrics(y_pred[:, 0], y_test[:, 0])
        return kpi

    def evaluate(self,
                 shuffle=True,
                 mode="forecast",
                 portion=0.8,
                 n_in=1,
                 n_out=1):
        """evaluate performances"""
        X_train, y_train, X_test, y_test = self.splitSet(self.X,
                                                         shuffle=shuffle,
                                                         portion=portion,
                                                         n_in=n_in,
                                                         n_out=n_out)
        if mode == "forecast":
            y_pred = self.forecast(X_test)
        else:
            y_pred = self.predict(X_test)
        kpi = t_s.calcMetrics(y_pred[:, 0], y_test[:, 0])
        return kpi

    def trainCross(self,
                   X_test,
                   batch_size=1,
                   nb_epoch=300,
                   calc_metrics=False,
                   n_in=1,
                   n_out=1):
        """train a baseline model resetting the states every nb_epochs"""
        X_train, y_train, X_train1, y_train1 = self.splitSet(self.X,
                                                             portion=1.,
                                                             n_in=n_in,
                                                             n_out=n_out)
        X_test = np.asarray(self.scaleX(X_test))
        X_test, y_test, X_test1, y_test1 = self.splitSet(X_test, portion=1.)
        self.defModel()
        for i in range(nb_epoch):
            history = self.model.fit(X_train,
                                     y_train,
                                     epochs=1,
                                     batch_size=batch_size,
                                     validation_data=(X_test, y_test),
                                     verbose=2,
                                     shuffle=False)
            self.model.reset_states()
            self.history.append(history)
        if not calc_metrics:
            return {}
        self.model.predict(X_train, batch_size=batch_size)
        y_pred = self.predict(X_test, batch_size=batch_size)
        kpi = t_s.calcMetrics(y_pred[:, 0], y_test[:, 0])
        return kpi

    def trainBoost(self,
                   shuffle=False,
                   calc_metrics=False,
                   portion=0.75,
                   n_in=1,
                   n_out=1,
                   **args):
        """train a baseline model resetting the states every nb_epochs"""
        from xgboost import XGBRegressor
        from sklearn.metrics import mean_absolute_error
        N = self.X.shape[0]
        if shuffle:
            shuffleL = np.arange(0, N)
            n = int(np.random.random(1) * N)
            shuffleL = np.roll(shuffleL, n)
        else:
            shuffleL = list(range(N))
        X = np.array(self.X)[shuffleL]
        ahead = int(X.shape[0] * portion)
        pre, post = self.toSupervised(X, n_in=n_in, n_out=n_out)
        X_train, X_test = pre[0:ahead, :], pre[ahead:, :]
        y_train, y_test = post[0:ahead, -1], post[ahead:, -1]
        y_pred = list()
        history = [x for x in X_train]
        for i in range(len(X_test)):
            testX, testy = X_test[i, :], X_test[i, -1]
            train = np.asarray(history)
            self.model = XGBRegressor(objective='reg:squarederror',
                                      n_estimators=1000)
            self.model.fit(X_train, y_train)
            testX = testX.reshape((1, testX.shape[0]))
            yhat = self.model.predict(testX)[0]
            y_pred.append(yhat)
            history.append(testX)
        t = list(range(ahead, len(self.X)))
        # if not calc_metrics:
        #     return {}
        # error = mean_absolute_error(testX[:, 1], y_pred)
        # kpi = {"error":error}
        return t, y_pred

    def predict(self, X_test, batch_size=1, n_in=1):
        """prediction for the short long term memory"""
        y_pred = []
        if n_in > 1:
            X_test = self.shiftColumns(X_test, n_in=n_in)
        if len(X_test.shape) == 2:
            X_test = self.reshape(X_test)
        for i in range(len(X_test)):
            X1 = X_test[i, :]
            X1 = X1.reshape(1, 1, X1.shape[1])
            pred = self.model.predict(X1, batch_size=batch_size)
            pred = [x for x in pred[0, :]]
            y_pred.append(pred)
        y_pred = np.asarray(y_pred)
        return y_pred

    def forecast(self, X_test, batch_size=1, n_in=1):
        """forecast using longshort"""
        if len(X_test.shape) == 2:
            X_test = self.reshape(X_test)
        y_fore = np.zeros((X_test.shape[0], X_test.shape[2]))
        if n_in > 1:
            X_test = self.shiftColumns(X_test, n_in=n_in)
        for i in range(0, n_in):
            y_fore[i] = X_test[0][i]
        for i in range(len(X_test) - n_in):
            X1 = y_fore[i].reshape(1, -1)
            #X1 = y_fore[:i+n_in]
            if n_in > 1:
                X1 = self.shiftColumns(X1, n_in=n_in).values
            X1 = self.reshape(X1)
            pred = self.model.predict(X1, batch_size=batch_size)
            y_fore[i + n_in] = pred[-1]
        return y_fore

    def forecastLongShort(self, y, ahead=28, epoch=50, n_in=2, n_out=1):
        """forecast on ahead time points"""
        portion = 1. - ahead / len(y)
        self.setLimit(y)
        X_train, X_test, y_train, y_test = self.splitSet(self.X,
                                                         y,
                                                         portion=portion)
        self.defModel()
        self.history = self.model.fit(
            X_train,
            y_train,
            epochs=epoch,
            batch_size=72,
            validation_data=(X_test, y_test),
            verbose=0,
            shuffle=False)  #,callbacks=[TensorBoard(log_dir='/tmp/lstm')])
        y_scaled = self.model.predict(X_test)
        y_pred = y_scaled[:, 0] * (self.yMax + self.yMin) + self.yMin
        y_test1 = y_test * (self.yMax + self.yMin) + self.yMin
        if False:
            plt.plot(y[ahead:], label="test")
            plt.plot(y_pred, label="pred")
            plt.legend()
            plt.show()

        kpi = t_s.calcMetrics(y_pred, y_test1)
        return self.model, kpi

    def reshape(self, X, mode=None, batch_size=1):
        """reshape the input dataset"""
        X_test = np.asarray(X)
        X_test = X_test.reshape((X.shape[0], batch_size, X.shape[1]))
        return X_test