Пример #1
0
def google_sheets_to_csv_notebook(
    work_policy,
    google_sheets_export_policy,
    csv_transform_policy,
    csv_load_policy,
    flowitem_model,
):
    from flowmaster.operators.etl.loaders import Loaders
    from flowmaster.operators.etl.policy import ETLNotebook
    from flowmaster.operators.etl.providers import Providers

    csv_transform_policy.column_schema["date_col"] = {
        "name": "date",
        "dtype": "date",
        "errors": "default",
        "dt_format": "%Y-%m-%d",
        "allow_null": True,
    }
    name = "__test_google_sheets_to_csv__"
    flowitem_model.clear(name)
    yield ETLNotebook(
        name=name,
        provider=Providers.GoogleSheetsProvider.name,
        storage=Loaders.CSVLoader.name,
        work=work_policy,
        export=google_sheets_export_policy,
        transform=csv_transform_policy,
        load=csv_load_policy,
    )
Пример #2
0
def ymm_goals_to_csv_notebook(
    flowitem_model,
    work_policy,
    yandex_metrika_management_credentials,
    csv_transform_policy,
    csv_load_policy,
):
    from flowmaster.operators.etl.loaders import Loaders
    from flowmaster.operators.etl.policy import ETLNotebook
    from flowmaster.operators.etl.providers import Providers

    name = "__test_ya_metrika_goals_to_csv__"
    flowitem_model.clear(name)
    yield ETLNotebook(
        name=name,
        provider=Providers.YandexMetrikaManagementProvider.name,
        storage=Loaders.CSVLoader.name,
        work=work_policy,
        export=Providers.YandexMetrikaManagementProvider.policy_model(
            resource=Providers.YandexMetrikaManagementProvider.policy_model.ResourceNames.goals,
            credentials=Providers.YandexMetrikaManagementProvider.policy_model.CredentialsPolicy(
                **yandex_metrika_management_credentials
            ),
        ),
        transform=csv_transform_policy,
        load=csv_load_policy,
    )
Пример #3
0
def seconds_interval_work_policy(pendulum_utcnow):
    from flowmaster.operators.etl.policy import ETLNotebook

    return ETLNotebook.WorkPolicy(
        triggers=ETLNotebook.WorkPolicy.TriggersPolicy(
            schedule=ETLNotebook.WorkPolicy.TriggersPolicy.SchedulePolicy(
                timezone="Europe/Moscow",
                start_time="00:00:00",
                from_date=None,
                interval=60,
            )))
Пример #4
0
def work_policy(pendulum_utcnow):
    from flowmaster.operators.etl.policy import ETLNotebook

    return ETLNotebook.WorkPolicy(
        triggers=ETLNotebook.WorkPolicy.TriggersPolicy(
            schedule=ETLNotebook.WorkPolicy.TriggersPolicy.SchedulePolicy(
                timezone="Europe/Moscow",
                start_time="00:00:00",
                from_date=pendulum.today() - dt.timedelta(5),
                interval="daily",
            )))
Пример #5
0
def fakedata_to_csv_notebook(flowitem_model, work_policy, csv_transform_policy,
                             csv_load_policy):
    from flowmaster.operators.etl.providers import Providers
    from flowmaster.operators.etl.loaders import Loaders
    from flowmaster.operators.etl.policy import ETLNotebook

    name = "__test_ya_metrika_stats_to_csv__"
    flowitem_model.clear(name)
    yield ETLNotebook(
        name=name,
        provider=Providers.FakeDataProvider.name,
        storage=Loaders.CSVLoader.name,
        work=work_policy,
        export=Providers.FakeDataProvider.policy_model(rows=1),
        transform=csv_transform_policy,
        load=csv_load_policy,
    )
Пример #6
0
def ya_direct_report_to_csv_notebook(
    flowitem_model,
    work_policy,
    csv_transform_policy,
    csv_load_policy,
    yandex_direct_credentials,
):
    from flowmaster.operators.etl.providers import Providers
    from flowmaster.operators.etl.loaders import Loaders
    from flowmaster.operators.etl.policy import ETLNotebook

    name = "__test2_ya_direct_report_to_csv__"
    flowitem_model.clear(name)
    yield ETLNotebook(
        name=name,
        provider=Providers.YandexDirectProvider.name,
        storage=Loaders.CSVLoader.name,
        work=work_policy,
        export=Providers.YandexDirectProvider.policy_model(
            credentials=Providers.YandexDirectProvider.policy_model.CredentialsPolicy(
                **yandex_direct_credentials
            ),
            resource="reports",
            headers=Providers.YandexDirectProvider.policy_model.HeadersPolicy(
                return_money_in_micros=True
            ),
            body=Providers.YandexDirectProvider.policy_model.ReportBodyPolicy(
                params=Providers.YandexDirectProvider.policy_model.ReportBodyPolicy.ReportParamsPolicy(
                    ReportType="ACCOUNT_PERFORMANCE_REPORT",
                    DateRangeType="AUTO",
                    FieldNames=["CampaignType", "Cost"],
                    IncludeVAT="NO",
                    Page=Providers.YandexDirectProvider.policy_model.ReportBodyPolicy.ReportParamsPolicy.PagePolicy(
                        Limit=10
                    ),
                ),
            ),
        ),
        transform=csv_transform_policy,
        load=csv_load_policy,
    )
    flowitem_model.clear(name)
Пример #7
0
def test_codex_telegram():
    def export_func(start_period,
                    end_period) -> Iterator[tuple[dict, list, list]]:
        yield ({}, ["date"], [[start_period]])

    ya_metrika_logs_to_csv_notebook.work.notifications = ETLNotebook.WorkPolicy.NotificationsPolicy(
        codex_telegram=ETLNotebook.WorkPolicy.NotificationsPolicy.
        CodexTelegramPolicy(
            links=[credentials["codex_telegram"]],
            on_success=True,
        ))
    notebook = ETLNotebook(**dict(ya_metrika_logs_to_csv_notebook))

    Providers.YandexMetrikaLogsProvider.export_class.__call__ = Mock(
        side_effect=export_func)
    etl_flow = ETLOperator(notebook)

    list(
        etl_flow(start_period=dt.datetime(2021, 1, 1),
                 end_period=dt.datetime(2021, 1, 1)))
Пример #8
0
def criteo_to_csv_notebook(
    work_policy,
    criteo_export_policy,
    csv_transform_policy,
    csv_load_policy,
    flowitem_model,
):
    from flowmaster.operators.etl.loaders import Loaders
    from flowmaster.operators.etl.policy import ETLNotebook
    from flowmaster.operators.etl.providers import Providers

    name = "__test_criteo_to_csv__"
    flowitem_model.clear(name)
    yield ETLNotebook(
        name=name,
        provider=Providers.CriteoProvider.name,
        storage=Loaders.CSVLoader.name,
        work=work_policy,
        export=criteo_export_policy,
        transform=csv_transform_policy,
        load=csv_load_policy,
    )
Пример #9
0
def csv_to_clickhouse_notebook(flowitem_model, work_policy, clickhouse_credentials):
    from flowmaster.operators.etl.loaders import Loaders
    from flowmaster.operators.etl.policy import ETLNotebook
    from flowmaster.operators.etl.providers import Providers

    name = "__test_csv_to_clickhouse__"
    flowitem_model.clear(name)
    yield ETLNotebook(
        name=name,
        provider=Providers.CSVProvider.name,
        storage=Loaders.ClickhouseLoader.name,
        work=work_policy,
        export=Providers.CSVProvider.policy_model(
            file_path="", with_columns=False, columns=["date"]
        ),
        transform=Loaders.ClickhouseLoader.transform_policy_model(
            error_policy="default",
            partition_columns=["Date"],
            column_map={"date": "Date"},
        ),
        load=Loaders.ClickhouseLoader.policy_model(
            credentials=Loaders.ClickhouseLoader.policy_model.CredentialsPolicy(
                **clickhouse_credentials
            ),
            table_schema=Loaders.ClickhouseLoader.policy_model.TableSchemaPolicy(
                db="default",
                table=name,
                columns=[
                    "Date Date",
                ],
                orders=["Date"],
                partition=["Date"],
            ),
            data_cleaning_mode=Loaders.ClickhouseLoader.DataCleaningMode.off,
            sql_before=["SELECT 1"],
            sql_after=["SELECT 2"],
        ),
    )
Пример #10
0
def ya_metrika_logs_to_csv_notebook2(
    tmp_path,
    work_policy,
    ya_metrika_logs_export_policy,
    csv_transform_policy,
    csv_load_policy,
    flowitem_model,
):
    from flowmaster.operators.etl.loaders import Loaders
    from flowmaster.operators.etl.policy import ETLNotebook
    from flowmaster.operators.etl.providers import Providers

    name = "__test_ya_metrika_logs_to_csv2__"
    flowitem_model.clear(name)
    yield ETLNotebook(
        name=name,
        provider=Providers.YandexMetrikaLogsProvider.name,
        storage=Loaders.CSVLoader.name,
        work=work_policy,
        export=ya_metrika_logs_export_policy,
        transform=csv_transform_policy,
        load=csv_load_policy,
    )
Пример #11
0
def ymstats_to_csv_notebook(
    flowitem_model,
    work_policy,
    yandex_metrika_stats_credentials,
    csv_transform_policy,
    csv_load_policy,
):
    from flowmaster.operators.etl.providers import Providers
    from flowmaster.operators.etl.loaders import Loaders
    from flowmaster.operators.etl.policy import ETLNotebook

    name = "__test_ya_metrika_stats_to_csv__"
    flowitem_model.clear(name)
    yield ETLNotebook(
        name=name,
        provider=Providers.YandexMetrikaStatsProvider.name,
        storage=Loaders.CSVLoader.name,
        work=work_policy,
        export=Providers.YandexMetrikaStatsProvider.policy_model(
            **yandex_metrika_stats_credentials
        ),
        transform=csv_transform_policy,
        load=csv_load_policy,
    )
Пример #12
0
def flowmasterdata_items_to_csv_notebook(
    tmp_path,
    seconds_interval_work_policy,
    flowmasterdata_items_export_policy,
    csv_transform_policy,
    csv_load_policy,
    flowitem_model,
):
    from flowmaster.operators.etl.loaders.csv.service import CSVLoader
    from flowmaster.operators.etl.policy import ETLNotebook
    from flowmaster.operators.etl.providers.flowmaster_data import (
        FlowmasterDataProvider, )

    name = "__test_flowmasterdata_items_to_csv__"
    flowitem_model.clear(name)
    yield ETLNotebook(
        name=name,
        provider=FlowmasterDataProvider.name,
        storage=CSVLoader.name,
        work=seconds_interval_work_policy,
        export=flowmasterdata_items_export_policy,
        transform=csv_transform_policy,
        load=csv_load_policy,
    )