def get_df(cols, load_from_temp, temp_path):
    weather_df = load_weather_df(CONFIG.preprocessed_meteo_path_complete)
    if not load_from_temp:
        logger.info('Loading train Dataframe...')
        train_df = load_train_df(CONFIG.preprocessed_train_path_means)
        logger.info('Loading weather Dataframe...')

        logger.info('Creating features...')
        ff = FeatureFactory(train_df, weather_df)
        for col in cols:
            logger.info('Creating %s feature...' % col)
            ff(col)
        if 'ASS_ASSIGNMENT' not in cols:
            cols = ['ASS_ASSIGNMENT'] + cols
        if 'DATE' not in cols:
            cols = ['DATE'] + cols
        if 'CSPL_RECEIVED_CALLS' not in cols:
            cols += ['CSPL_RECEIVED_CALLS']

        logger.info('Selecting features...')
        ff.select_features(cols)
        train_df = ff.X
        if temp_path is not None:
            train_df.to_csv(temp_path)
    else:
        assert temp_path is not None
        logger.info('Loading train Dataframe...')
        train_df = pd.read_csv(temp_path, encoding='latin-1', index_col=0, parse_dates=['DATE'])
    weather_df.reset_index(inplace=True)
    return train_df, weather_df
def get_cross_validation_parameters(df, columns, weather_df=None, k_fold=5, weights=None, out_path=None, label=None):
    """
    Get the parameters you need for sklearn cross-validation functions.

    Parameters
    ==========
    df: pd.Dataframe, the input data.
    columns: list, the features to be processed and/or kept.
    weather_df: pd.Dataframe, the weather data.
    k_fold: The number of partitions of size 1/k to choose for cross-validation.
    weights: dict, each column will be multiplied by its corresponding weight (e.g. impact on KNN)
    out_path: str or None, path to a file in which X is saved, to avoid recomputing features.
        None not to save this file.
    label: str, Name of the 'calls' column (CSPL_RECEIVED_CALLS or prediction)

    Returns
    =======
    X: np.array or sparse matrix, the data set.
    y: np.array, the corresponding labels.
    cv: An object to be used as a cross-validation generator.
    dates: np.array, the list of dates corresponding to the data set.
    """
    if label is None:
        label = 'CSPL_RECEIVED_CALLS'
    if weights is None:
        weights = {}

    df.reset_index(inplace=True, drop=True)
    ff = FeatureFactory(df, weather_df)
    dates = np.array(ff('full_date'))
    y = np.array(df[label]) - df['MEAN']
    for column in columns:
        logger.debug(column)
        ff(column)
    ff.select_features(columns)
    ff.apply_weights(weights)
    X = np.array(ff.X)
    if out_path is not None:
        ff.X.to_csv(out_path)
    cv = DateShuffleSplit(dates, n_iter=k_fold, test_size=float(1)/k_fold)
    return X, y, cv, dates