Exemplo n.º 1
0
from safe_logger import SafeLogger


logger = SafeLogger("api-connect plugin Pagination")


class Pagination(object):

    def __init__(self, config=None, skip_key=None, limit_key=None, total_key=None, next_page_key=None):
        self.next_page_key = None
        self.skip_key = None
        self.limit_key = None
        self.total_key = None
        self.total = None
        self.next_page_url = None
        self.remaining_records = None
        self.records_to_skip = None
        self.pagination_type = ""
        self.counting_key = None
        self.counter = None
        self.is_last_batch_empty = None
        self.is_first_batch = None
        self.is_paging_started = None
        self.next_page_number = None

    def configure_paging(self, config=None, skip_key=None, limit_key=None, total_key=None, next_page_key=None, url=None, pagination_type="na"):
        config = {} if config is None else config
        self.limit_key = config.get("limit_key", limit_key)
        self.pagination_type = config.get("pagination_type", pagination_type)
        if self.pagination_type == "next_page":
            self.next_page_key = config.get("next_page_key", next_page_key)
import pandas as pd
import numpy as np
from gluonts_forecasts.utils import concat_timeseries_per_identifiers, concat_all_timeseries, add_row_origin, quantile_forecasts_series
from constants import METRICS_DATASET, METRICS_COLUMNS_DESCRIPTIONS, TIMESERIES_KEYS, ROW_ORIGIN, CUSTOMISABLE_FREQUENCIES_OFFSETS
from gluonts.model.forecast import QuantileForecast
from safe_logger import SafeLogger

logger = SafeLogger("Forecast plugin")


class TrainedModel:
    """
    Wrapper class to make forecasts using a GluonTS Predictor and a training GluonTS ListDataset, and to output a well formatted forecasts dataframe


    Attributes:
        predictor (gluonts.model.predictor.Predictor)
        gluon_dataset (gluonts.dataset.common.ListDataset): GluonTS ListDataset generated by the GluonDataset class (with extra fields to name timeseries)
        prediction_length (int): Number of time steps to predict (at most the prediction length used in training)
        quantiles (list): List of forecasts quantiles to compute in the forecasts_df
        include_history (bool): True to append to the forecasts dataframe the training data
        history_length_limit (int): Maximum number of values to retrieve from historical data per timeseries. Default to None which means all.
        time_column_name (str): Time column name used in training
        identifiers_columns (list): List of timeseries identifiers column names used in training.
        forecasts_df (DataFrame): Dataframe with the different quantiles forecasts and the training data if include_history is True
    """
    def __init__(self,
                 predictor,
                 gluon_dataset,
                 prediction_length,
                 quantiles,
from dku_io_utils.recipe_config_loading import load_predict_config
from dku_io_utils.utils import set_column_description
from dku_io_utils.checks_utils import external_features_check
from dku_io_utils.model_selection import ModelSelection
from gluonts_forecasts.utils import add_future_external_features
from gluonts_forecasts.trained_model import TrainedModel
from safe_logger import SafeLogger
from time import perf_counter

logger = SafeLogger("Forecast plugin")
params = load_predict_config()

start = perf_counter()
logger.info("Forecasting future values...")

model_selection = ModelSelection(
    folder=params["model_folder"],
    partition_root=params["partition_root"],
)

if params["manual_selection"]:
    model_selection.set_manual_selection_parameters(
        session_name=params["selected_session"],
        model_label=params["selected_model_label"])
else:
    model_selection.set_auto_selection_parameters(
        performance_metric=params["performance_metric"])

predictor = model_selection.get_model_predictor()

gluon_train_dataset = model_selection.get_gluon_train_dataset()
import os
import requests
from time import sleep

from onedrive_item import OneDriveItem
from onedrive_constants import OneDriveConstants
from safe_logger import SafeLogger
from common import get_value_from_path

logger = SafeLogger("onedrive plugin", forbiden_keys=["onedrive_credentials"])


class OneDriveClient():
    access_token = None
    CHUNK_SIZE = 320 * 1024
    DRIVE_API_URL = "https://graph.microsoft.com/v1.0/me/drive/"
    ITEMS_API_URL = "https://graph.microsoft.com/v1.0/me/drive/items/"
    SHARED_API_URL = "https://graph.microsoft.com/v1.0/drives/{drive_id}/root:/{file_path}:"
    SHARED_WITH_ME_URL = "https://graph.microsoft.com/v1.0/me/drive/sharedWithMe"

    def __init__(self, access_token, shared_folder_root=""):
        self.access_token = access_token
        self.shared_with_me = None
        self.drive_id = None
        self.shared_folder_root = shared_folder_root
        if shared_folder_root:
            self.shared_with_me = self.get_shared_with_me()
            self.drive_id = self.get_shared_directory_drive_id(shared_folder_root)

    def upload(self, path, file_handle):
        # https://docs.microsoft.com/fr-fr/onedrive/developer/rest-api/api/driveitem_createuploadsession?view=odsp-graph-online
Exemplo n.º 5
0
from safe_logger import SafeLogger

logger = SafeLogger("Timeseries preparation plugin")


class DecompositionInputValidator(object):
    """Checks if the input dataframe is consistent with a decomposition

    Attributes:
        dku_config(DecompositionConfig): mapping structure storing the recipe parameters
        minimum_observations(int): Minimum number of observations required by the decomposition method
    """

    def __init__(self, dku_config):
        self.dku_config = dku_config
        self.minimum_observations = 3

    def check(self, df):
        """Checks if the input dataframe is compatible with the decomposition method

        :param df: Input dataframe
        :type df: pd.DataFrame
        """
        logger.info("Checking input values:...")
        self._check_model_compatibility(df)
        self._check_size(df)
        logger.info("Checking input values: the recipe parameters are consistent with the input dataset ")

    def _check_model_compatibility(self, df):
        """Checks if the input dataframe is compatible with the decomposition model
Exemplo n.º 6
0
# -*- coding: utf-8 -*-
import dataiku
from dataiku.customrecipe import get_input_names_for_role, get_recipe_config, get_output_names_for_role
import pandas as pd
from safe_logger import SafeLogger
from dku_utils import get_dku_key_values, get_endpoint_parameters
from rest_api_recipe_session import RestApiRecipeSession


logger = SafeLogger("api-connect plugin", forbiden_keys=["token", "password"])


def get_partitioning_keys(id_list, dku_flow_variables):
    partitioning_keys = {}
    partitioning = id_list.get_config().get("partitioning")
    if partitioning:
        dimensions_types = partitioning.get("dimensions", [])
        dimensions = []
        for dimension_type in dimensions_types:
            dimensions.append(dimension_type.get("name"))
        for dimension in dimensions:
            dimension_src = "DKU_DST_{}".format(dimension)
            if dimension_src in dku_flow_variables:
                partitioning_keys[dimension] = dku_flow_variables.get(dimension_src)
    return partitioning_keys


input_A_names = get_input_names_for_role('input_A_role')
config = get_recipe_config()
dku_flow_variables = dataiku.get_flow_variables()
Exemplo n.º 7
0
from gluonts.model.estimator import Estimator
from gluonts.model.forecast import SampleForecast
from gluonts.model.predictor import RepresentablePredictor
from gluonts.support.pandas import frequency_add
from gluonts.core.component import validated
from gluonts_forecasts.custom_models.utils import cast_kwargs
from constants import TIMESERIES_KEYS
from pmdarima.arima.utils import nsdiffs
import pmdarima as pm
import numpy as np
from safe_logger import SafeLogger
from tqdm import tqdm
from threadpoolctl import threadpool_limits

logger = SafeLogger("Forecast plugin - AutoARIMA")


class AutoARIMAPredictor(RepresentablePredictor):
    """
    An abstract predictor that can be subclassed by models that are not based
    on Gluon. Subclasses should have @validated() constructors.
    (De)serialization and value equality are all implemented on top of the
    @validated() logic.

    Parameters
    ----------
    prediction_length
        Prediction horizon.
    freq
        Frequency of the predicted data.
    """
import requests
import time
from pagination import Pagination
from safe_logger import SafeLogger
from loop_detector import LoopDetector
from dku_utils import get_dku_key_values
from dku_constants import DKUConstants

logger = SafeLogger("api-connect plugin", forbiden_keys=["token", "password"])


def template_dict(dictionnary, **kwargs):
    """ Recurses into dictionnary and replace template {{keys}} with the matching values present in the kwargs dictionnary"""
    ret = dict.copy(dictionnary)
    for key in ret:
        if isinstance(ret[key], dict):
            ret[key] = template_dict(ret[key], **kwargs)
        if isinstance(ret[key], str):
            ret[key] = format_template(ret[key], **kwargs)
            return ret
    return ret


def format_template(template, **kwargs):
    """ Replace {{keys}} elements in template with the matching value in the kwargs dictionnary"""
    if template is None:
        return None
    formated = template
    for key in kwargs:
        replacement = kwargs.get(key, "")
        formated = formated.replace("{{{{{}}}}}".format(key), str(replacement))
# -*- coding: utf-8 -*-
from gluonts_forecasts.mxnet_utils import set_mxnet_context

from dataiku.customrecipe import get_recipe_config
from datetime import datetime
from dku_io_utils.utils import set_column_description
from gluonts_forecasts.training_session import TrainingSession
from dku_io_utils.recipe_config_loading import load_training_config, get_models_parameters
from dku_io_utils.utils import write_to_folder
from gluonts_forecasts.model_handler import get_model_label
from dku_constants import ObjectType
from timeseries_preparation.preparation import TimeseriesPreparator
from safe_logger import SafeLogger
from time import perf_counter

logger = SafeLogger("Forecast plugin")
session_name = datetime.utcnow().isoformat() + "Z"
logger.info("Starting training session {}...".format(session_name))

config = get_recipe_config()
params = load_training_config(config)

mxnet_context = set_mxnet_context(params["gpu_devices"])

models_parameters = get_models_parameters(
    config, is_training_multivariate=params["is_training_multivariate"])
start = perf_counter()

training_df = params["training_dataset"].get_dataframe()

timeseries_preparator = TimeseriesPreparator(
from gluonts.model.estimator import Estimator
from gluonts.model.forecast import SampleForecast
from gluonts.model.predictor import RepresentablePredictor
from gluonts.support.pandas import frequency_add
from gluonts.core.component import validated
from gluonts_forecasts.custom_models.utils import cast_kwargs
from constants import TIMESERIES_KEYS
from statsmodels.tsa.api import STLForecast
from statsmodels.tsa.exponential_smoothing.ets import ETSModel
import numpy as np
import pandas as pd
from safe_logger import SafeLogger
from tqdm import tqdm

logger = SafeLogger("Forecast plugin - SeasonalTrend")


class SeasonalTrendPredictor(RepresentablePredictor):
    """
    An abstract predictor that can be subclassed by models that are not based
    on Gluon. Subclasses should have @validated() constructors.
    (De)serialization and value equality are all implemented on top of the
    @validated() logic.

    Parameters
    ----------
    prediction_length
        Prediction horizon.
    freq
        Frequency of the predicted data.
    """
if sys.version_info.major == 2:
    raise PluginCodeEnvError(
        "This custom recipe requires a Python 3 code env. You are using Python {}.{}. Please ask an administrator to delete the "
        "existing Python 2 code environment and create a new Python 3 code environment if you want to use the decomposition "
        "recipe".format(sys.version_info.major, sys.version_info.minor))

from time import perf_counter

from dataiku.customrecipe import get_recipe_config

from io_utils import get_input_output, set_column_description
from recipe_config_loading import get_decomposition_params
from safe_logger import SafeLogger
from timeseries_preparation.preparation import TimeseriesPreparator

logger = SafeLogger("Timeseries preparation plugin")

(input_dataset, output_dataset) = get_input_output()
config = get_recipe_config()
input_dataset_columns = [
    column["name"] for column in input_dataset.read_schema()
]
(dku_config, input_validator,
 decomposition) = get_decomposition_params(config, input_dataset_columns)

timeseries_preparator = TimeseriesPreparator(dku_config)
input_df = input_dataset.get_dataframe(infer_with_pandas=False)
df_prepared = timeseries_preparator.prepare_timeseries_dataframe(input_df)
input_validator.check(df_prepared)

start = perf_counter()
import json
from powerbi import PowerBI, generate_access_token
from dataiku.exporter import Exporter
from math import isnan
from safe_logger import SafeLogger

logger = SafeLogger(
    "power-bi-v2 plugin",
    forbiden_keys=["ms-oauth_credentials", "password", "client-secret"])


class PowerBIExporter(Exporter):

    EMPTY_CONNECTION = {
        "username": None,
        "password": None,
        "client-id": None,
        "client-secret": None
    }

    def __init__(self, config, plugin_config):
        logger.info("config={}, plugin_config={}".format(
            logger.filter_secrets(config),
            logger.filter_secrets(plugin_config)))
        self.config = config
        self.plugin_config = plugin_config
        self.row_index = 0
        self.row_buffer = {}
        self.row_buffer["rows"] = []

        self.pbi_dataset = self.config.get("dataset", None)