def google_sheets_to_csv_notebook( work_policy, google_sheets_export_policy, csv_transform_policy, csv_load_policy, flowitem_model, ): from flowmaster.operators.etl.loaders import Loaders from flowmaster.operators.etl.policy import ETLNotebook from flowmaster.operators.etl.providers import Providers csv_transform_policy.column_schema["date_col"] = { "name": "date", "dtype": "date", "errors": "default", "dt_format": "%Y-%m-%d", "allow_null": True, } name = "__test_google_sheets_to_csv__" flowitem_model.clear(name) yield ETLNotebook( name=name, provider=Providers.GoogleSheetsProvider.name, storage=Loaders.CSVLoader.name, work=work_policy, export=google_sheets_export_policy, transform=csv_transform_policy, load=csv_load_policy, )
def ymm_goals_to_csv_notebook( flowitem_model, work_policy, yandex_metrika_management_credentials, csv_transform_policy, csv_load_policy, ): from flowmaster.operators.etl.loaders import Loaders from flowmaster.operators.etl.policy import ETLNotebook from flowmaster.operators.etl.providers import Providers name = "__test_ya_metrika_goals_to_csv__" flowitem_model.clear(name) yield ETLNotebook( name=name, provider=Providers.YandexMetrikaManagementProvider.name, storage=Loaders.CSVLoader.name, work=work_policy, export=Providers.YandexMetrikaManagementProvider.policy_model( resource=Providers.YandexMetrikaManagementProvider.policy_model.ResourceNames.goals, credentials=Providers.YandexMetrikaManagementProvider.policy_model.CredentialsPolicy( **yandex_metrika_management_credentials ), ), transform=csv_transform_policy, load=csv_load_policy, )
def seconds_interval_work_policy(pendulum_utcnow): from flowmaster.operators.etl.policy import ETLNotebook return ETLNotebook.WorkPolicy( triggers=ETLNotebook.WorkPolicy.TriggersPolicy( schedule=ETLNotebook.WorkPolicy.TriggersPolicy.SchedulePolicy( timezone="Europe/Moscow", start_time="00:00:00", from_date=None, interval=60, )))
def work_policy(pendulum_utcnow): from flowmaster.operators.etl.policy import ETLNotebook return ETLNotebook.WorkPolicy( triggers=ETLNotebook.WorkPolicy.TriggersPolicy( schedule=ETLNotebook.WorkPolicy.TriggersPolicy.SchedulePolicy( timezone="Europe/Moscow", start_time="00:00:00", from_date=pendulum.today() - dt.timedelta(5), interval="daily", )))
def fakedata_to_csv_notebook(flowitem_model, work_policy, csv_transform_policy, csv_load_policy): from flowmaster.operators.etl.providers import Providers from flowmaster.operators.etl.loaders import Loaders from flowmaster.operators.etl.policy import ETLNotebook name = "__test_ya_metrika_stats_to_csv__" flowitem_model.clear(name) yield ETLNotebook( name=name, provider=Providers.FakeDataProvider.name, storage=Loaders.CSVLoader.name, work=work_policy, export=Providers.FakeDataProvider.policy_model(rows=1), transform=csv_transform_policy, load=csv_load_policy, )
def ya_direct_report_to_csv_notebook( flowitem_model, work_policy, csv_transform_policy, csv_load_policy, yandex_direct_credentials, ): from flowmaster.operators.etl.providers import Providers from flowmaster.operators.etl.loaders import Loaders from flowmaster.operators.etl.policy import ETLNotebook name = "__test2_ya_direct_report_to_csv__" flowitem_model.clear(name) yield ETLNotebook( name=name, provider=Providers.YandexDirectProvider.name, storage=Loaders.CSVLoader.name, work=work_policy, export=Providers.YandexDirectProvider.policy_model( credentials=Providers.YandexDirectProvider.policy_model.CredentialsPolicy( **yandex_direct_credentials ), resource="reports", headers=Providers.YandexDirectProvider.policy_model.HeadersPolicy( return_money_in_micros=True ), body=Providers.YandexDirectProvider.policy_model.ReportBodyPolicy( params=Providers.YandexDirectProvider.policy_model.ReportBodyPolicy.ReportParamsPolicy( ReportType="ACCOUNT_PERFORMANCE_REPORT", DateRangeType="AUTO", FieldNames=["CampaignType", "Cost"], IncludeVAT="NO", Page=Providers.YandexDirectProvider.policy_model.ReportBodyPolicy.ReportParamsPolicy.PagePolicy( Limit=10 ), ), ), ), transform=csv_transform_policy, load=csv_load_policy, ) flowitem_model.clear(name)
def test_codex_telegram(): def export_func(start_period, end_period) -> Iterator[tuple[dict, list, list]]: yield ({}, ["date"], [[start_period]]) ya_metrika_logs_to_csv_notebook.work.notifications = ETLNotebook.WorkPolicy.NotificationsPolicy( codex_telegram=ETLNotebook.WorkPolicy.NotificationsPolicy. CodexTelegramPolicy( links=[credentials["codex_telegram"]], on_success=True, )) notebook = ETLNotebook(**dict(ya_metrika_logs_to_csv_notebook)) Providers.YandexMetrikaLogsProvider.export_class.__call__ = Mock( side_effect=export_func) etl_flow = ETLOperator(notebook) list( etl_flow(start_period=dt.datetime(2021, 1, 1), end_period=dt.datetime(2021, 1, 1)))
def criteo_to_csv_notebook( work_policy, criteo_export_policy, csv_transform_policy, csv_load_policy, flowitem_model, ): from flowmaster.operators.etl.loaders import Loaders from flowmaster.operators.etl.policy import ETLNotebook from flowmaster.operators.etl.providers import Providers name = "__test_criteo_to_csv__" flowitem_model.clear(name) yield ETLNotebook( name=name, provider=Providers.CriteoProvider.name, storage=Loaders.CSVLoader.name, work=work_policy, export=criteo_export_policy, transform=csv_transform_policy, load=csv_load_policy, )
def csv_to_clickhouse_notebook(flowitem_model, work_policy, clickhouse_credentials): from flowmaster.operators.etl.loaders import Loaders from flowmaster.operators.etl.policy import ETLNotebook from flowmaster.operators.etl.providers import Providers name = "__test_csv_to_clickhouse__" flowitem_model.clear(name) yield ETLNotebook( name=name, provider=Providers.CSVProvider.name, storage=Loaders.ClickhouseLoader.name, work=work_policy, export=Providers.CSVProvider.policy_model( file_path="", with_columns=False, columns=["date"] ), transform=Loaders.ClickhouseLoader.transform_policy_model( error_policy="default", partition_columns=["Date"], column_map={"date": "Date"}, ), load=Loaders.ClickhouseLoader.policy_model( credentials=Loaders.ClickhouseLoader.policy_model.CredentialsPolicy( **clickhouse_credentials ), table_schema=Loaders.ClickhouseLoader.policy_model.TableSchemaPolicy( db="default", table=name, columns=[ "Date Date", ], orders=["Date"], partition=["Date"], ), data_cleaning_mode=Loaders.ClickhouseLoader.DataCleaningMode.off, sql_before=["SELECT 1"], sql_after=["SELECT 2"], ), )
def ya_metrika_logs_to_csv_notebook2( tmp_path, work_policy, ya_metrika_logs_export_policy, csv_transform_policy, csv_load_policy, flowitem_model, ): from flowmaster.operators.etl.loaders import Loaders from flowmaster.operators.etl.policy import ETLNotebook from flowmaster.operators.etl.providers import Providers name = "__test_ya_metrika_logs_to_csv2__" flowitem_model.clear(name) yield ETLNotebook( name=name, provider=Providers.YandexMetrikaLogsProvider.name, storage=Loaders.CSVLoader.name, work=work_policy, export=ya_metrika_logs_export_policy, transform=csv_transform_policy, load=csv_load_policy, )
def ymstats_to_csv_notebook( flowitem_model, work_policy, yandex_metrika_stats_credentials, csv_transform_policy, csv_load_policy, ): from flowmaster.operators.etl.providers import Providers from flowmaster.operators.etl.loaders import Loaders from flowmaster.operators.etl.policy import ETLNotebook name = "__test_ya_metrika_stats_to_csv__" flowitem_model.clear(name) yield ETLNotebook( name=name, provider=Providers.YandexMetrikaStatsProvider.name, storage=Loaders.CSVLoader.name, work=work_policy, export=Providers.YandexMetrikaStatsProvider.policy_model( **yandex_metrika_stats_credentials ), transform=csv_transform_policy, load=csv_load_policy, )
def flowmasterdata_items_to_csv_notebook( tmp_path, seconds_interval_work_policy, flowmasterdata_items_export_policy, csv_transform_policy, csv_load_policy, flowitem_model, ): from flowmaster.operators.etl.loaders.csv.service import CSVLoader from flowmaster.operators.etl.policy import ETLNotebook from flowmaster.operators.etl.providers.flowmaster_data import ( FlowmasterDataProvider, ) name = "__test_flowmasterdata_items_to_csv__" flowitem_model.clear(name) yield ETLNotebook( name=name, provider=FlowmasterDataProvider.name, storage=CSVLoader.name, work=seconds_interval_work_policy, export=flowmasterdata_items_export_policy, transform=csv_transform_policy, load=csv_load_policy, )