def main():
    # Data Extraction
    df = data_extract_e('e_20190609_15.pkl')

    # Data Transformation and Engineering
    df = feature_eng(df)
    df = extract_queues(df)
    dept_encoder, queue_encoder = load_labels('dept_encoder.pkl',
                                              'queue_encoder.pkl',
                                              df=df)
    df = feature_transform(df,
                           dept_encoder=dept_encoder,
                           queue_encoder=queue_encoder)

    # Training/Test Split
    x, y = data_filter(df)
    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.2,
        random_state=1357)  # 2468 to use same shuffle as individual models

    # Load models from persistent files
    models = load_models()
    print(models)

    # Stacking
    # Produces a new set of features based on the predictions of base models
    x_train_s, x_test_s = stacking(models,
                                   x_train,
                                   y_train,
                                   x_test,
                                   n_folds=10,
                                   shuffle=True,
                                   verbose=0,
                                   regression=True)

    save_data(x_train_s, 'x_train_s.pkl')
    save_data(y_train, 'y_train.pkl')
    save_data(x_test_s, 'x_test_s.pkl')
    save_data(y_test, 'y_test.pkl')
import pandas as pd
import numpy as np
from ai_cloud_etl import data_extract_e, data_filter, feature_eng, feature_transform, extract_queues, fit_labels
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from tpot import TPOTRegressor
import xgb_config

# Data Extraction
df = data_extract_e('e_20190609_15.pkl')

# Data Transformation and Engineering
df = feature_eng(df)
df = extract_queues(df)
dept_encoder, queue_encoder = fit_labels(df)
df = feature_transform(df, queue_encoder, dept_encoder)

# Training/Test Split
x, y = data_filter(df)
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=2468)

# Using TPOT AutoML
tpot = TPOTRegressor(n_jobs=-1,
                     verbosity=1,
                     config_dict=xgb_config.xgb_config_dict)
tpot = tpot.fit(x_train, y_train)
y_pred = tpot.predict(x_train)
print('XGB TPOT training R2 score: ', r2_score(y_train, y_pred))