Exemplo n.º 1
0
class Extract(Task):
    def run(self) -> list:
        """Get a list of data"""
        data = [1, 2, 3]
        print("Here's your output data: {}".format(data))
        return data


class Transform(Task):
    def run(self, data: list) -> list:
        """Multiply the input by 10"""
        return [x * 10 for x in data]


class Load(Task):
    def run(self, data: list):
        """Print the data to indicate it was received"""
        print("Here's your output data: {}".format(data))


# Define Tasks in a Flow Context
e = Extract()
t = Transform()
l = Load()
flow = Flow('ETL')

# Set dependency graph
flow.set_dependencies(t, keyword_tasks={'data': e})
flow.set_dependencies(l, keyword_tasks={'data': t})

# flow.run() # prints "Here's your data: [10, 20, 30]"
Exemplo n.º 2
0
We run our flow on a 1 minute interval schedule and observe that the output of
`return_random_number` only changes every other run, due to output caching.
"""
import datetime
import random

from prefect import Flow, task
from prefect.schedules import IntervalSchedule


@task(cache_for=datetime.timedelta(minutes=1, seconds=30))
def return_random_number():
    return random.random()


@task
def print_number(num):
    print("=" * 50)
    print("Value: {}".format(num))
    print("=" * 50)


schedule = IntervalSchedule(start_date=datetime.datetime.utcnow(),
                            interval=datetime.timedelta(minutes=1))

flow = Flow("cached-task", schedule=schedule)
flow.set_dependencies(task=print_number, upstream_tasks=[return_random_number])

flow.visualize(format='png', filename=__file__.replace('.py', ''))
Exemplo n.º 3
0
from prefect import Flow

from etl import extract, transform, load

flow = Flow('ETL')
flow.set_dependencies(transform, keyword_tasks=dict(data=extract))
flow.set_dependencies(load, keyword_tasks=dict(data=transform))

flow.run()  # prints "Here's your data: [10, 20, 30]"
Exemplo n.º 4
0
flow_full_data_pipeline = Flow("Imperative-MTG-NLP-full-flow")

# %% INSTANTIATE TASKS

# Task in sequence
create_cards_database = CreateCardsDatabase()
load_decks_into_database = LoadDecksIntoDatabase()
enhance_cards_with_nlp = EnhanceCardsDataWithNLP()
build_individual_cards_graph = BuildIndividualCardsInOutGraph()
build_text_to_entity_graphs = BuildTextToEntityGraphs()
build_graph_for_a_few_cards_and_save_pics = BuildGraphForAFewCardsAndSaveInPics(
)

# %% SET DEPENDENCIES
flow_full_data_pipeline.set_dependencies(
    task=build_graph_for_a_few_cards_and_save_pics,
    upstream_tasks=[build_text_to_entity_graphs],
)
flow_full_data_pipeline.set_dependencies(
    task=build_text_to_entity_graphs,
    upstream_tasks=[build_individual_cards_graph],
)
flow_full_data_pipeline.set_dependencies(
    task=build_individual_cards_graph,
    upstream_tasks=[enhance_cards_with_nlp],
)

flow_full_data_pipeline.set_dependencies(
    task=enhance_cards_with_nlp,
    upstream_tasks=[create_cards_database],
)