def transform(self, X: dt.Frame):
        XX = X[:, self.tgc].to_pandas()
        XX.rename(columns={self.time_column: "ds"}, inplace=True)
        tgc_wo_time = list(np.setdiff1d(self.tgc, self.time_column))
        if len(tgc_wo_time) > 0:
            XX_grp = XX.groupby(tgc_wo_time)
        else:
            XX_grp = [([None], XX)]

        preds = []
        for key, X in XX_grp:
            key = key if isinstance(key, list) else [key]
            grp_hash = '_'.join(map(str, key))
            print("prophet - transforming data of shape: %s for group: %s" %
                  (str(X.shape), grp_hash))
            if grp_hash in self.models:
                model = self.models[grp_hash]
                if model is not None:
                    # Facebook Prophet returns the predictions ordered by time
                    # So we should keep track of the times for each group so that
                    # predictions are ordered the same as the imput frame
                    # Make a copy of the input dates
                    X_ds = X.copy()
                    X_ds['ds'] = pd.to_datetime(X_ds['ds'])
                    # Predict with prophet, get the time and prediction and index by time as well
                    # In the case date repeats inside of a group (this happens at least in acceptance test)
                    # We groupby date and keep the max (prophet returns the same value for a given date)
                    # XX will contain the predictions indexed by date
                    XX = model.predict(X)[['ds', 'yhat']].groupby('ds').max()
                    # Now put yhat in the right order, simply by maping the dates to the predictions
                    X_ds['yhat'] = X_ds["ds"].map(XX['yhat'])
                    # Now set XX back to the predictions and drop the index
                    XX = X_ds['yhat'].reset_index(drop=True)
                else:
                    XX = pd.DataFrame(np.full((X.shape[0], 1), self.nan_value),
                                      columns=['yhat'])  # invalid model
            else:
                XX = pd.DataFrame(np.full((X.shape[0], 1), self.nan_value),
                                  columns=['yhat'])  # unseen groups
            XX.index = X.index
            preds.append(XX)
        XX = pd.concat(tuple(preds), axis=0).sort_index()
        return XX
Exemplo n.º 2
0
    CREATE TABLE test.some_table
    (
        id INTEGER NOT NULL,
        char1 VARCHAR(8) NOT NULL,
        char2 VARCHAR(8) NOT NULL,
        num1 INTEGER NOT NULL,
        num2 INTEGER NOT NULL,
        PRIMARY KEY (`id`)
    )
    ENGINE=InnoDB
    DEFAULT CHARSET=utf8mb4;
    """)


# With ~ 25% rows changed:
DF_NEW = DF.copy()
DF_NEW[f.id < NROW / 2, 4] = 1

with mysql.Database(URL) as db:
    db.send_data(DF, 'some_table', mode='truncate')
    time.sleep(200)
    with Stopwatch('Send with update'):
        db.send_data(DF_NEW, 'some_table', 'update')

with mysql.Database(URL) as db:
    db.send_data(DF, 'some_table', mode='truncate')
    time.sleep(200)
    with Stopwatch('Send update diffs: 25%'):
        db.send_data(DF_NEW, 'some_table', 'update_diffs')

# With ~5% changes