def build_scenario(build_plan,
                   filter_on='ready',
                   connection='dataiku_workspace',
                   ref_table='referentialclient',
                   ref_project='DIReferential',
                   add_ecrm_context=True,
                   finish_on_client=None,
                   single_client=None):
    scenario = Scenario()
    if not isinstance(filter_on, list):
        filter_on = [filter_on]
    project_name = dataiku.default_project_key()
    project_key = dataiku.api_client().get_project(project_name)
    local_variables = project_key.get_variables()['local']
    env = local_variables['env']
    kut.display_message('reading client context referential')

    executor = SQLExecutor2(connection=connection)
    sql_query_referential_client = "SELECT * FROM " + '_'.join(
        [env, ref_project, ref_table])
    client_ref = executor.query_to_df(sql_query_referential_client)
    filter_query = ' & '.join(filter_on)
    client_ref = client_ref.query(filter_query) if filter_query else client_ref
    kut.display_message('Client ready for automation  : ' +
                        client_ref.clientName.unique())

    kut.display_message('run configuration')
    print(build_plan)

    if not pd.isnull(finish_on_client):
        finish_client = client_ref[client_ref.clientName == finish_on_client]
        if len(finish_client) == 0:
            kut.display_message(
                'finish client not found in plan ' + finish_on_client +
                ' is the client name valid ?'
            )  # Example: load a DSS dataset as a Pandas dataframe
        other_clients = client_ref[client_ref.clientName != finish_on_client]
        client_ref = pd.concat([other_clients, finish_client],
                               ignore_index=True)
    success = []
    if single_client is not None:
        requested_client = client_ref[client_ref.clientName == single_client]
        if not len(single_client):
            kut.display_message(
                'requested single client is not found,building all allowed clients'
            )
        else:
            client_ref = requested_client
    for index, client_row in client_ref.iterrows():
        variables = set_client_context(client_row=client_row,
                                       add_ecrm_context=add_ecrm_context,
                                       connection=connection)
        client_name = variables['local']['clientName']
        kut.display_message('starting builds on ' + client_name)

        run_scenario(table_plan=build_plan, scenario=scenario)
        success.append(client_name)
        scenario.set_global_variables(successfullRun=success)
        print('done_________________' + client_name)
    return success
Exemplo n.º 2
0
from dataiku.scenario import Scenario

scenario = Scenario()

scenario.synchronize_hive_metastore("mydataset")
Exemplo n.º 3
0
# this part can be used in a custom scenario's script, or in a "Execute python" step in a step-based scenario
import dataiku
from dataiku.scenario import Scenario

s = Scenario()

dataset_name = 'input_partitioned'
output_name = 'output'
# fetch the partitions
ds = dataiku.Dataset(dataset_name)
all_partitions = ds.list_partitions()
print("Dataset %s has %s partitions" % (dataset_name, len(all_partitions)))

# maybe filter partitions, depending on your usage
partitions_to_build = all_partitions

# build the variable's value as a comma separated string
partition_list_value = ','.join(partitions_to_build)
s.set_scenario_variables(partition_list=partition_list_value)

# in a step-based scenario:
# add a build step to build the output dataset, and set ${partition_list} as the partition identifier

# in a custom scenario:
# launch the build
s.build_dataset(output_name, partitions='${partition_list}')
# alternatively, in a custom scenario, you can pass the value directly, without using a variable
#s.build_dataset(output_name, partitions=partition_list_value)
Exemplo n.º 4
0
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
import uuid
from dataiku.scenario import Scenario
PARTNER = 'IHG'
MODEL_TYPE = 'BG'
proj_handle = dataiku.api_client().get_project("IHG_AUTO_V1_1")
proj_var = proj_handle.get_variables()
# Create the main handle to interact with the scenario
scen = Scenario()
step = scen.get_previous_steps_outputs()
# Score #
result_date = ([d for d in step if d['stepName'] == 'query_date'])[0]['result']
new_score_date = result_date['rows'][0][0]

# train #
result_train_ref_date = ([
    d for d in step if d['stepName'] == 'train_ref_date'
])[0]['result']
new_train_date = result_train_ref_date['rows'][0][0]

# valid #
result_valid_ref_date = ([
    d for d in step if d['stepName'] == 'valid_ref_date'
])[0]['result']
new_valid_date = result_valid_ref_date['rows'][0][0]

#previous value
cur_score_date = proj_var["standard"]["scoring_file_date"]
cur_train_date = proj_var["standard"]["train_file_date"]