class Extract(Task): def run(self) -> list: """Get a list of data""" data = [1, 2, 3] print("Here's your output data: {}".format(data)) return data class Transform(Task): def run(self, data: list) -> list: """Multiply the input by 10""" return [x * 10 for x in data] class Load(Task): def run(self, data: list): """Print the data to indicate it was received""" print("Here's your output data: {}".format(data)) # Define Tasks in a Flow Context e = Extract() t = Transform() l = Load() flow = Flow('ETL') # Set dependency graph flow.set_dependencies(t, keyword_tasks={'data': e}) flow.set_dependencies(l, keyword_tasks={'data': t}) # flow.run() # prints "Here's your data: [10, 20, 30]"
We run our flow on a 1 minute interval schedule and observe that the output of `return_random_number` only changes every other run, due to output caching. """ import datetime import random from prefect import Flow, task from prefect.schedules import IntervalSchedule @task(cache_for=datetime.timedelta(minutes=1, seconds=30)) def return_random_number(): return random.random() @task def print_number(num): print("=" * 50) print("Value: {}".format(num)) print("=" * 50) schedule = IntervalSchedule(start_date=datetime.datetime.utcnow(), interval=datetime.timedelta(minutes=1)) flow = Flow("cached-task", schedule=schedule) flow.set_dependencies(task=print_number, upstream_tasks=[return_random_number]) flow.visualize(format='png', filename=__file__.replace('.py', ''))
from prefect import Flow from etl import extract, transform, load flow = Flow('ETL') flow.set_dependencies(transform, keyword_tasks=dict(data=extract)) flow.set_dependencies(load, keyword_tasks=dict(data=transform)) flow.run() # prints "Here's your data: [10, 20, 30]"
flow_full_data_pipeline = Flow("Imperative-MTG-NLP-full-flow") # %% INSTANTIATE TASKS # Task in sequence create_cards_database = CreateCardsDatabase() load_decks_into_database = LoadDecksIntoDatabase() enhance_cards_with_nlp = EnhanceCardsDataWithNLP() build_individual_cards_graph = BuildIndividualCardsInOutGraph() build_text_to_entity_graphs = BuildTextToEntityGraphs() build_graph_for_a_few_cards_and_save_pics = BuildGraphForAFewCardsAndSaveInPics( ) # %% SET DEPENDENCIES flow_full_data_pipeline.set_dependencies( task=build_graph_for_a_few_cards_and_save_pics, upstream_tasks=[build_text_to_entity_graphs], ) flow_full_data_pipeline.set_dependencies( task=build_text_to_entity_graphs, upstream_tasks=[build_individual_cards_graph], ) flow_full_data_pipeline.set_dependencies( task=build_individual_cards_graph, upstream_tasks=[enhance_cards_with_nlp], ) flow_full_data_pipeline.set_dependencies( task=enhance_cards_with_nlp, upstream_tasks=[create_cards_database], )