示例#1
0
from dataiku.scenario import Scenario

scenario = Scenario()

# Partitions are specified using the partitions spec syntax
scenario.build_dataset("mydataset", partitions="partition1|partition2")
示例#2
0
# this part can be used in a custom scenario's script, or in a "Execute python" step in a step-based scenario
import dataiku
from dataiku.scenario import Scenario

s = Scenario()

dataset_name = 'input_partitioned'
output_name = 'output'
# fetch the partitions
ds = dataiku.Dataset(dataset_name)
all_partitions = ds.list_partitions()
print("Dataset %s has %s partitions" % (dataset_name, len(all_partitions)))

# maybe filter partitions, depending on your usage
partitions_to_build = all_partitions

# build the variable's value as a comma separated string
partition_list_value = ','.join(partitions_to_build)
s.set_scenario_variables(partition_list=partition_list_value)

# in a step-based scenario:
# add a build step to build the output dataset, and set ${partition_list} as the partition identifier

# in a custom scenario:
# launch the build
s.build_dataset(output_name, partitions='${partition_list}')
# alternatively, in a custom scenario, you can pass the value directly, without using a variable
#s.build_dataset(output_name, partitions=partition_list_value)
# This sample code helps you get started with the custom scenario API.
#For more details and samples, please see our Documentation
from dataiku.scenario import Scenario

# The Scenario object is the main handle from which you initiate steps
scenario = Scenario()

# A few example steps follow

# Building a dataset
scenario.build_dataset("customers_prepared", partitions="2015-01-03")

# Controlling the train of a dataset
train_ret = scenario.train_model("uSEkldfsm")
trained_model = train_ret.get_trained_model()
performance = trained_model.get_new_version_metrics().get_performance_values()
if performance["AUC"] > 0.85:
    trained_model.activate_new_version()

# Sending custom reports
sender = scenario.get_message_sender("mail-scenario", "local-mail") # A messaging channel
sender.set_params(sender="*****@*****.**", recipient="*****@*****.**")

sender.send(subject="The scenario is doing well", message="All is good")
示例#4
0
###########################################################################################
# !! CUSTOM SCENARIO EXAMPLE !!                                                           #
# See https://doc.dataiku.com/dss/latest/scenarios/custom_scenarios.html for more details #
###########################################################################################

import time
import dataiku
from dataiku.scenario import Scenario, BuildFlowItemsStepDefHelper
from dataikuapi.dss.future import DSSFuture

TIMEOUT_SECONDS = 3600

s = Scenario()

# Replace this commented block by your Scenario steps
# Example: build a Dataset
step_handle = s.build_dataset("your_dataset_name", asynchronous=True)

start = time.time()
while not step_handle.is_done():
    end = time.time()
    print("Duration: {}s".format(end - start))
    if end - start > TIMEOUT_SECONDS:
        f = DSSFuture(dataiku.api_client(), step_handle.future_id)
        f.abort()
        raise Exception("Scenario was aborted because it took too much time.")
示例#5
0
# This sample code helps you get started with the custom scenario API.
#For more details and samples, please see our Documentation
from dataiku.scenario import Scenario

# The Scenario object is the main handle from which you initiate steps
scenario = Scenario()

# A few example steps follow

# Building a dataset

scenario.build_dataset("scores",
                       build_mode="RECURSIVE_FORCED_BUILD",
                       project_key="FRAUD_MODEL")

scenario.build_dataset("unseen_scored", build_mode="RECURSIVE_FORCED_BUILD")
示例#6
0
from dataiku.scenario import Scenario

scenario = Scenario()

scenario.build_dataset("mydataset")