Пример #1
0
def create_data_merge_pipeline(mode: str = 'train', **kwargs):
    conf_loader = ConfigLoader('conf/local/parameters/')
    use_feature_names = conf_loader.get(
        'data_engineering_parameters.yml')['use_feature_names']
    if mode == 'train':
        return Pipeline([
            node(merge_features,
                 inputs=['params:data_primary_key', *use_feature_names],
                 outputs='model_input_features'),
            node(extract_feature_columns,
                 inputs='model_input_features',
                 outputs='output_columns')
        ])
    elif mode == 'inference':
        return Pipeline([
            node(merge_features,
                 inputs=['params:data_primary_key', *use_feature_names],
                 outputs='tmp_model_input_features'),
            node(format_features_for_inference,
                 inputs=['tmp_model_input_features', 'output_columns'],
                 outputs='model_input_features')
        ])
Пример #2
0
    def _create_config_loader(  # pylint: disable=no-self-use
            self, conf_paths: Iterable[str]) -> ConfigLoader:
        """A factory method for the ConfigLoader instantiation.

        Returns:
            Instance of `ConfigLoader`.

        """
        hook_manager = get_hook_manager()
        config_loader = hook_manager.hook.register_config_loader(  # pylint: disable=no-member
            conf_paths=conf_paths)
        return config_loader or ConfigLoader(
            conf_paths)  # for backwards compatibility
Пример #3
0
def test_table_embedding() -> None:
    conf_loader: ConfigLoader = ConfigLoader(
        conf_paths=["eos/conf/base", "eos/conf/local"]
    )

    conf_logging: Dict[str, Any] = conf_loader.get("logging*", "logging*/**")
    logging.config.dictConfig(conf_logging)

    conf_catalog: Dict[str, Any] = conf_loader.get("catalog*", "catalog*/**")
    data_catalog: DataCatalog = DataCatalog.from_config(conf_catalog)

    conf_params: Dict[str, Any] = conf_loader.get("parameters*", "parameters*/**")
    data_catalog.add_feed_dict(feed_dict=get_feed_dict(params=conf_params))

    conf_pipeline: Dict[str, Any] = conf_loader.get("pipelines*", "pipelines*/**")
    ae_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get("autoencoder_pipeline")

    runner: SequentialRunner = SequentialRunner()
    runner.run(pipeline=ae_pipeline, catalog=data_catalog)
Пример #4
0
def get_config(project_path: str, env: str = None, **kwargs) -> ConfigLoader:
    """Loads Kedro's configuration at the root of the project.

    Args:
        project_path: The root directory of the Kedro project.
        env: The environment used for loading configuration.
        kwargs: Ignore any additional arguments added in the future.

    Returns:
        ConfigLoader which can be queried to access the project config.

    """
    project_path = Path(project_path)
    env = env or DEFAULT_RUN_ENV
    conf_paths = [
        str(project_path / CONF_ROOT / "base"),
        str(project_path / CONF_ROOT / env),
    ]
    return ConfigLoader(conf_paths)
Пример #5
0
def test_viz() -> None:
    dir_static_site: str = "./public"
    # Configure pipeline and catalog objects
    conf_loader: ConfigLoader = ConfigLoader(
        conf_paths=["eos/conf/base", "eos/conf/local"])

    conf_logging: Dict[str, Any] = conf_loader.get("logging*", "logging*/**")
    logging.config.dictConfig(conf_logging)

    conf_catalog: Dict[str, Any] = conf_loader.get("catalog*", "catalog*/**")
    data_catalog: DataCatalog = DataCatalog.from_config(conf_catalog)

    conf_params: Dict[str, Any] = conf_loader.get("parameters*",
                                                  "parameters*/**")
    data_catalog.add_feed_dict(feed_dict=get_feed_dict(params=conf_params))

    conf_pipeline: Dict[str, Any] = conf_loader.get("pipelines*",
                                                    "pipelines*/**")
    ae_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get(
        "autoencoder_pipeline")
    nx_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get(
        "networkx_pipeline")
    dgl_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get(
        "dgl_pipeline")

    pipelines: Dict[str, FlexiblePipeline] = {
        "autoencoder_pipeline": ae_pipeline,
        "networkx_pipeline": nx_pipeline,
        "dgl_pipeline": dgl_pipeline,
        "master_pipeline": ae_pipeline + nx_pipeline + dgl_pipeline,
    }
    # Parse Python object information into JSON form and export to local
    call_viz(dir_static_site=dir_static_site,
             catalog=data_catalog,
             pipelines=pipelines)
    # Serve the static website from local
    # run_static_server(directory = dir_static_site, port = 4141)
    assert Path(dir_static_site).joinpath("pipeline.json")
Пример #6
0
    def test_overlapping_patterns(self, tmp_path, caplog):
        """Check that same configuration file is not loaded more than once."""
        paths = [
            str(tmp_path / "base"),
            str(tmp_path / "dev"),
            str(tmp_path / "dev" / "user1"),
        ]
        _write_yaml(tmp_path / "base" / "catalog0.yml", {
            "env": "base",
            "common": "common"
        })
        _write_yaml(tmp_path / "dev" / "catalog1.yml", {
            "env": "dev",
            "dev_specific": "wiz"
        })
        _write_yaml(tmp_path / "dev" / "user1" / "catalog2.yml",
                    {"user1_c2": True})
        _write_yaml(tmp_path / "dev" / "user1" / "catalog3.yml",
                    {"user1_c3": True})

        catalog = ConfigLoader(paths).get("catalog*", "catalog*/**",
                                          "user1/catalog2*")
        expected_catalog = {
            "env": "dev",
            "common": "common",
            "dev_specific": "wiz",
            "user1_c2": True,
            "user1_c3": True,
        }
        assert catalog == expected_catalog

        log_messages = [record.getMessage() for record in caplog.records]
        expected_path = (tmp_path / "dev" / "user1" / "catalog2.yml").resolve()
        expected_message = (
            f"Config file(s): {expected_path} already processed, skipping loading..."
        )
        assert expected_message in log_messages
Пример #7
0
 def test_load_base_config(self, tmp_path, conf_paths, base_config):
     """Test config loading if `local/` directory is empty"""
     (tmp_path / "local").mkdir(exist_ok=True)
     catalog = ConfigLoader(conf_paths).get("catalog*.yml")
     assert catalog == base_config
Пример #8
0
import pandas as pd
import logging
import numpy as np
from one_two_trip.crossval.crossval import CV_score

from kedro.config import ConfigLoader

conf_paths = ['conf/base', 'conf/local']
conf_loader = ConfigLoader(conf_paths)
conf_credentials = conf_loader.get('credentials*', 'credentials*/**')
conf_parameters = conf_loader.get('parameters*', 'parameters*/**')
conf_catalog = conf_loader.get('catalog*', 'catalog*/**')

cols_target = conf_parameters['model']['cols_target']
col_id = conf_parameters['model']['col_id']
col_client = conf_parameters['model']['col_client']
cols_cat = conf_parameters['model']['cols_cat']
cv_byclient = conf_parameters['model']['cv_byclient']
n_splits = conf_parameters['model']['n_splits']
n_repeats = conf_parameters['model']['n_repeats']

params = conf_parameters['lightgbm']['params']


def union_data_node(train: pd.DataFrame, test: pd.DataFrame) -> pd.DataFrame:
    df_union = pd.concat([train, test], sort=False, ignore_index=True)
    return df_union


def mean_byuser_node(df_union: pd.DataFrame) -> pd.DataFrame:
    cols = list(
Пример #9
0
 def __init__(self):
     conf_paths = ["conf/base", "conf/local"]
     conf_loader = ConfigLoader(conf_paths)
     self.credentials = conf_loader.get("credentials*", "credentials*/**")
Пример #10
0
 def register_config_loader(self, conf_paths):  # pylint: disable=no-self-use
     return ConfigLoader(conf_paths)
Пример #11
0
def config_loader():
    return ConfigLoader(conf_source=str(Path.cwd()))
Пример #12
0
 def register_config_loader(self, conf_paths) -> ConfigLoader:
     return ConfigLoader(conf_paths)
Пример #13
0
 def register_config_loader(self,
                            conf_paths: Iterable[str]) -> ConfigLoader:
     self.logger.info("Registering config loader",
                      extra={"conf_paths": conf_paths})
     return ConfigLoader(conf_paths)
Пример #14
0
import logging
from typing import Any, Dict

import numpy as np
import pandas as pd
import discord
from discord.ext import commands
import datetime

from urllib import parse, request
import re

from kedro.config import ConfigLoader

conf_paths = ["conf/base", "conf/local"]
conf_loader = ConfigLoader(conf_paths)
credentials = conf_loader.get("credentials*", "credentials*/**")


def run_bot_odesla():

    bot = commands.Bot(command_prefix='!', description="Bot de ODESLA")

    @bot.command()
    async def info(ctx):
        embed = discord.Embed(title=f"{ctx.guild.name}",
                              descripcion='Test',
                              timestamp=datetime.datetime.utcnow(),
                              color=discord.Color.blue())
        embed.add_field(name="1", value=f"{ctx.guild.created_at}")
        embed.add_field(name="2", value=f"{ctx.guild.created_at}")
Пример #15
0
import matplotlib.pyplot as plt
from kedro.config import ConfigLoader
from kedro.io import DataCatalog
from kedro.pipeline import Pipeline, node
from kedro.runner import SequentialRunner

# Find the configuration (catalog.yaml) in the current working directory and load it
conf_loader = ConfigLoader(".")
conf_catalog = conf_loader.get("catalog*")

# Create the Data Catalog from the catalog.yml file
io = DataCatalog.from_config(conf_catalog)
df = io.load("titanic_training_data")


# Create nodes by writing Python functions
# Remove NaN values
def clean_raw_data(df):
    df = df.drop(["Ticket", "Cabin"], axis=1)
    df = df.dropna()
    return df


# Plot the amount of people who survived and who died.
def plot_survival_breakdown(df):
    plt.figure(figsize=(6, 4))
    fig, ax = plt.subplots()
    df.Survived.value_counts().plot(kind="barh", color="blue", alpha=0.65)
    ax.set_ylim(-1, len(df.Survived.value_counts()))
    plt.title("Survival Breakdown (1 = Survived, 0 = Died)")
    return fig
Пример #16
0
 def test_nested_subdirs(self, tmp_path):
     """Test loading the config from subdirectories"""
     catalog = ConfigLoader(str(tmp_path / "base")).get("**/catalog*")
     assert (catalog["cars"]["type"] == catalog["prod"]["cars"]["type"] ==
             "CSVLocalDataSet")
     assert catalog["cars"]["save_args"]["index"] is True
Пример #17
0
def get_model_params() -> Dict[str, Any]:
    conf = ConfigLoader(MODEL_PARAMETERS_PATH).get('*.yaml')
    return conf['best_evaluated_params']
Пример #18
0
def __get_creds():
    conf_paths = ["conf/base", "conf/local"]
    conf_loader = ConfigLoader(conf_paths)
    credentials = conf_loader.get("credentials*", "credentials*/**")
    return credentials
Пример #19
0
from kedro.config import ConfigLoader
from typing import Dict

conf_paths = ["conf/base", "conf/local"]
conf_loader = ConfigLoader(conf_paths)

conf_catalog = conf_loader.get("catalog*", "catalog*/**")


def fetch_catalog() -> Dict[str, Dict]:
    """Fetch the catalog dict

    Returns:
        Dict[str, Dict]: catalog dict
    """
    return conf_catalog


def print_catalog_path(conf_catalog: Dict[str, Dict]):
    """Print the catalog entries' path

    Args:
        conf_catalog (Dict[str, Dict]): catalog dict
    """
    print("Catalog entry path:")
    for dataset in conf_catalog.keys():
        print(f'{dataset} -- {conf_catalog[dataset]["filepath"]}')
Пример #20
0
    def register_config_loader(self,
                               conf_paths: Iterable[str]) -> ConfigLoader:
        import kedro.config.config

        kedro.config.config._load_config = _load_config
        return ConfigLoader(conf_paths)
Пример #21
0
 def test_empty_patterns(self, conf_paths):
     """Check the error if no config patterns were specified"""
     pattern = (r"`patterns` must contain at least one glob pattern "
                r"to match config filenames against")
     with pytest.raises(ValueError, match=pattern):
         ConfigLoader(conf_paths).get()
def kedro_conf(mock_config):

    config = ConfigLoader(["conf/base", "conf/local"])
    config_dict = config.get("catalog*", "catalog*/**")

    return config_dict
Пример #23
0
def valid_rf(race_results_df_processed_valid, model_rf, parameters):
    # mlflow
    print('FILE_DIR: ' + FILE_DIR)
    mlflow.set_tracking_uri(FILE_DIR + '/../../../logs/mlruns/')
    mlflow.set_experiment('forecast_keiba_valid')
    run_info = mlflow.start_run()
    mlflow.set_tag('model', 'lr')

    # 検証のデータ準備
    race_results_df_processed_valid = race_results_df_processed_valid
    # 説明変数の取得
    X_valid = race_results_df_processed_valid.drop(['rank'], axis=1)
    # 目的変数の取得
    y_valid = race_results_df_processed_valid['rank']

    # 推論実行
    y_valid_pred = model_rf.predict(X_valid)

    # 集計用に処理
    valid_results_df = pd.DataFrame({'pred': y_valid_pred, 'actual': y_valid})
    race_id_list = list(set(list(valid_results_df.index)))
    valid_results_list = valid_results_df.reset_index().values.tolist()
    # シャッフル
    random.shuffle(valid_results_list)

    # 集計(馬単)
    correct_count = 0
    for race_id in race_id_list:
        pred_cnt_by_race = 0
        cnt_by_race = 0
        for rank in [1]:
            for i in range(len(valid_results_list)):
                # 対象レースidのうち、{rank}位と予測された馬
                if valid_results_list[i][0] == race_id and valid_results_list[
                        i][1] == rank:
                    pred_cnt_by_race += 1
                    if pred_cnt_by_race <= 1 and (valid_results_list[i][2]
                                                  == 1):
                        cnt_by_race += 1
        if cnt_by_race == 1:
            correct_count += 1
    acc_exacta_1 = correct_count / 100
    print('acc_exacta_1: ' + str(acc_exacta_1))

    # 集計(馬連)
    correct_count = 0
    for race_id in race_id_list:
        pred_cnt_by_race = 0
        cnt_by_race = 0
        for rank in [1, 2]:
            for i in range(len(valid_results_list)):
                # 対象レースidのうち、{rank}位と予測された馬
                if valid_results_list[i][0] == race_id and valid_results_list[
                        i][1] == rank:
                    pred_cnt_by_race += 1
                    if pred_cnt_by_race <= 2 and (valid_results_list[i][2] == 1
                                                  or valid_results_list[i][2]
                                                  == 2):
                        cnt_by_race += 1
        if cnt_by_race == 2:
            correct_count += 1
    acc_quinella_2 = correct_count / 100
    print('acc_quinella_2: ' + str(acc_quinella_2))

    # 集計(三連複)
    correct_count = 0
    for race_id in race_id_list:
        pred_cnt_by_race = 0
        cnt_by_race = 0
        for rank in [1, 2, 3]:
            for i in range(len(valid_results_list)):
                # 対象レースidのうち、{rank}位と予測された馬
                if valid_results_list[i][0] == race_id and valid_results_list[
                        i][1] == rank:
                    pred_cnt_by_race += 1
                    if pred_cnt_by_race <= 3 and (
                            valid_results_list[i][2] == 1
                            or valid_results_list[i][2] == 2
                            or valid_results_list[i][2] == 3):
                        cnt_by_race += 1
        if cnt_by_race == 3:
            correct_count += 1
    acc_trio_3 = correct_count / 100
    print('acc_trio_3: ' + str(acc_trio_3))

    mlflow.log_metric("acc_exacta_1", acc_exacta_1)
    mlflow.log_metric("acc_quinella_2", acc_quinella_2)
    mlflow.log_metric("acc_trio_3", acc_trio_3)

    # 通知
    if parameters['is_notify']:
        run_result_dict = mlflow.get_run(run_info.info.run_id).to_dictionary()
        run_result_str = json.dumps(run_result_dict, indent=4)

        conf_paths = [
            FILE_DIR + "/../../../conf/base", FILE_DIR + "/../../../conf/local"
        ]
        conf_loader = ConfigLoader(conf_paths)
        credentials = conf_loader.get("credentials*", "credentials*/**")
        token = credentials['dev_line']['access_token']

        url = "https://notify-api.line.me/api/notify"
        headers = {"Authorization": "Bearer " + token}
        payload = {"message": "model_rf" + run_result_str}
        requests.post(url, headers=headers, data=payload)

    mlflow.end_run()
Пример #24
0
def kedro_conf_path() -> dict:
    config = ConfigLoader(["conf/base", "conf/local"])
    conf_catalog = config.get("catalog*", "catalog*/**")

    return conf_catalog
Пример #25
0
from kedro.config import ConfigLoader

# CONFIG
conf_paths = ['conf/base']
conf_loader = ConfigLoader(conf_paths)
config = conf_loader.get('credentials*', 'credentials*/**')

# DROP TABLES
staging_events_table_drop = "DROP TABLE IF EXISTS staging_events;"
staging_songs_table_drop = "DROP TABLE IF EXISTS staging_songs;"
songplay_table_drop = "DROP TABLE IF EXISTS songplays;"
user_table_drop = "DROP TABLE IF EXISTS users;"
song_table_drop = "DROP TABLE IF EXISTS songs;"
artist_table_drop = "DROP TABLE IF EXISTS artists;"
time_table_drop = "DROP TABLE IF EXISTS time;"

# CREATE TABLES

staging_events_table_create = ("""
CREATE TABLE staging_events 
(
  artist        VARCHAR(200),
  auth          VARCHAR(50),
  firstName     VARCHAR(200),
  gender        CHAR(1),
  itemInSession INTEGER,
  lastname      VARCHAR(50),
  length        NUMERIC(10,5),
  level         VARCHAR(10),
  location      VARCHAR(50),
  method        VARCHAR(10),
import lightgbm as lgb
import numpy as np
import pandas as pd
import shap
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import logging
from hyperopt import STATUS_OK
from digital_reputation_challenge.nodes.datatransform import get_fold_data

from kedro.config import ConfigLoader

conf_paths = ['conf/base', 'conf/local']
conf_loader = ConfigLoader(conf_paths)
conf_parameters = conf_loader.get('parameters*', 'parameters*/**')

from sklearn.model_selection import StratifiedKFold, KFold, RepeatedStratifiedKFold


class CV_score:
    def __init__(self,
                 params,
                 cols_all,
                 col_target,
                 cols_cat='auto',
                 num_boost_round=99999,
                 early_stopping_rounds=50,
                 valid=True):
        self.params = params
        self.cols_all = cols_all
Пример #27
0
 def register_config_loader(
     self, conf_paths: Iterable[str], env: str, extra_params: Dict[str, Any]
 ) -> ConfigLoader:
     return ConfigLoader(conf_paths)
Пример #28
0
def config_loader():
    return ConfigLoader(conf_source=str(Path.cwd() / settings.CONF_SOURCE))
Пример #29
0
 def register_config_loader(self,
                            conf_paths: Iterable[str]) -> ConfigLoader:
     return ConfigLoader(conf_paths)
Пример #30
0
# AUTOGENERATED! DO NOT EDIT! File to edit: catalog.ipynb (unless otherwise specified).

__all__ = ['conf_loader', 'conf_test_data_catalog', 'test_data_catalog']

# Cell

from kedro.config import ConfigLoader
from kedro.io import DataCatalog

# Cell
conf_loader = ConfigLoader("conf/base")
conf_test_data_catalog = conf_loader.get("catalog*.yaml", "catalog*/*.yaml")
test_data_catalog = DataCatalog.from_config(conf_test_data_catalog)