示例#1
0
PROJECT_ID = 'put-you-project-id-here'

dataset = Dataset(
    project_id=PROJECT_ID,
    dataset_name='bigflow_cheatsheet',
    external_tables={
        '311_requests': '{}.external_data.311_requests'.format(PROJECT_ID)
    },
    internal_tables=['request_aggregate'])

wait_for_requests = bf.bigquery.sensor_component(
    '311_requests',
    where_clause="DATE(TIMESTAMP(created_date)) = DATE(TIMESTAMP_ADD(TIMESTAMP('{dt}'), INTERVAL -24 HOUR))",
    ds=dataset)

started_jobs = []

class ExampleJob:
    def __init__(self, id):
        self.id = id

    def run(self, runtime):
        started_jobs.append(self.id)

workflow_1 = bf.Workflow(workflow_id="ID_1", definition=[wait_for_requests.to_job(), wait_for_requests.to_job()], schedule_interval="@once")
workflow_2 = bf.Workflow(workflow_id="ID_2", definition=[wait_for_requests.to_job()])
workflow_3 = bf.Workflow(workflow_id="ID_3", definition=[ExampleJob("J_ID_3"), ExampleJob("J_ID_4")])
workflow_4 = bf.Workflow(workflow_id="ID_4", definition=[ExampleJob("J_ID_5")])

print("AAA")
示例#2
0
import bigflow as bf


workflow_1 = bf.Workflow(workflow_id="ID_1", definition=[], schedule_interval="@once", log_config={
        'gcp_project_id': 'some-project-id',
        'log_level': 'INFO',
    })
workflow_2 = bf.Workflow(workflow_id="ID_2", definition=[], log_config={
        'gcp_project_id': 'some-project-id',
        'log_level': 'INFO',
    })
示例#3
0
import bigflow as bf
from .job import ExampleJob

workflow2 = bf.Workflow(workflow_id='workflow2',
                        definition=[ExampleJob('job1')])
示例#4
0
import datetime
import bigflow

class DailyJob(bigflow.Job):
    id = 'daily_job'

    def execute(self, context):
        dt1 = context.runtime
        dt2 = dt1 + datetime.timedelta(days=1, seconds=-1)
        print(f'I should process data with timestamps from: {dt1} to {dt2}')

daily_workflow = bigflow.Workflow(
    workflow_id='daily_workflow',
    schedule_interval='@daily',
    definition=[
        DailyJob(),
    ],
)
if __name__ == '__main__':
    daily_workflow.run(datetime.datetime(2020, 1, 1))
示例#5
0
        '311_requests': '{}.external_data.311_requests'.format(PROJECT_ID)
    },
    internal_tables=['request_aggregate'])

wait_for_requests = bf.bigquery.sensor(
    '311_requests',
    where_clause="DATE(TIMESTAMP(created_date)) = DATE(TIMESTAMP_ADD(TIMESTAMP('{dt}'), INTERVAL -24 HOUR))",
    ds=dataset)

started_jobs = []

class ExampleJob:
    def __init__(self, id):
        self.id = id

    def execute(self, context):
        started_jobs.append(self.id)

workflow_1 = bf.Workflow(workflow_id="ID_1", definition=[wait_for_requests.to_job(), wait_for_requests.to_job()], schedule_interval="@once", log_config={
        'gcp_project_id': 'some-project-id',
        'log_level': 'INFO',
    })
workflow_2 = bf.Workflow(workflow_id="ID_2", definition=[wait_for_requests.to_job()], log_config={
        'gcp_project_id': 'another-project-id',
        'log_level': 'INFO',
    })
workflow_3 = bf.Workflow(workflow_id="ID_3", definition=[ExampleJob("J_ID_3"), ExampleJob("J_ID_4")])
workflow_4 = bf.Workflow(workflow_id="ID_4", definition=[ExampleJob("J_ID_5")])

print("AAA")
示例#6
0
from pathlib import Path

import bigflow
from bigflow.resources import get_resource_absolute_path


class PrintResourceJob(bigflow.Job):
    id = 'print_resource_job'

    def execute(self, context: bigflow.JobContext):
        with open(
                get_resource_absolute_path('example_resource.txt',
                                           Path(__file__))) as f:
            print(f.read())


resources_workflow = bigflow.Workflow(
    workflow_id='resources_workflow',
    definition=[
        PrintResourceJob(),
    ],
)
示例#7
0
import bigflow


class TheJob(bigflow.Job):
    def execute(self, context: bigflow.JobContext):
        pass


the_workflow = bigflow.Workflow(
    workflow_id='workflow_one',
    definition=[
        TheJob('the_job'),
    ],
)
示例#8
0
import bigflow


class HelloWorldJob(bigflow.Job):
    id = 'hello_world'

    def execute(self, context: bigflow.JobContext):
        print(f'Hello world on {context.runtime}!')


class SayGoodbyeJob(bigflow.Job):
    id = 'say_goodbye'

    def execute(self, context: bigflow.JobContext):
        print(f'Goodbye!')


hello_world_workflow = bigflow.Workflow(workflow_id='hello_world_workflow',
                                        definition=[
                                            HelloWorldJob(),
                                            SayGoodbyeJob(),
                                        ])
示例#9
0
import bigflow as bf
from .Unused1 import ExampleJob

int_1 = 123

workflow_1 = bf.Workflow(workflow_id="ID_5", definition=[ExampleJob("J_ID_6")])

int_2 = 456
int_3 = 789
示例#10
0
import datetime
import bigflow
from bigflow.workflow import hourly_start_time


class HourlyJob(bigflow.Job):
    id = 'hourly_job'

    def execute(self, context):
        print(
            f'I should process data with timestamps from: {context.runtime} '
            f'to {context.runtime + datetime.timedelta(minutes=59, seconds=59)}'
        )


hourly_workflow = bigflow.Workflow(
    workflow_id='hourly_workflow',
    schedule_interval='@hourly',
    start_time_factory=hourly_start_time,
    definition=[HourlyJob()],
)
if __name__ == '__main__':
    hourly_workflow.run(datetime.datetime(2020, 1, 1, 10))
示例#11
0
config = bigflow.Config(
    name='dev',
    properties={
        'message_to_print': 'Message to print on DEV'
    },
).add_configuration(
    name='prod',
    properties={
        'message_to_print': 'Message to print on PROD'
    },
)


class HelloConfigJob(bigflow.Job):
    id = 'hello_config_job'

    def __init__(self, message_to_print):
        self.message_to_print = message_to_print

    def execute(self, context: bigflow.JobContext):
        print(self.message_to_print)


hello_world_workflow = bigflow.Workflow(
    workflow_id='hello_config_workflow',
    definition=[
        HelloConfigJob(config.resolve_property('message_to_print')),
    ],
)