def build_project(project_name, global_props, project_props, jobs, files, version): logger.info("Building workflow %s, version: %s.", project_name, version) project = Project(project_name, root=os.curdir, version=version) project.properties = global_props project.properties.update(project_props) for job_name, job_definition in jobs.items(): project.add_job(job_name, Job(job_definition)) for file, target in files: project.add_file(file, target) return project
#!/usr/bin/env python # encoding: utf-8 """ Azkaban example projects configuration script. • Azkaban CLI syntax definition to configure all examples in this project """ from azkaban import Job, Project PROJECT = Project('azkaban_examples', root=__file__) # Project level properties declared here are visible to all jobs. PROJECT.properties = { 'project_1': 'project-val1' } JOBS = { # `basic_flow` example 'basic_step_1.cmd': Job({'type': 'command', 'command': 'echo "job: basic_step_1.cmd"'}), 'basic_step_2.cmd': Job({'type': 'command', 'command': 'echo "job: basic_step_2.cmd"', 'dependencies': 'basic_step_1.cmd'}), 'basic_step_3.cmd': Job({'type': 'command', 'command': 'echo "job: basic_step_3.cmd"', 'dependencies': 'basic_step_2.cmd'}), 'basic_step_4.cmd': Job({'type': 'command', 'command': 'echo "job: basic_step_4.cmd"', 'dependencies': 'basic_step_3.cmd'}), 'basic_step_5.cmd': Job({'type': 'command', 'command': 'echo "job: basic_step_5.cmd"', 'dependencies': 'basic_step_4.cmd'}), 'basic_step_6.cmd': Job({'type': 'command', 'command': 'echo "job: basic_step_6.cmd"', 'dependencies': 'basic_step_4.cmd'}), 'basic_step_7.cmd': Job({'type': 'command', 'command': 'echo "job: basic_step_7.cmd"', 'dependencies': 'basic_step_2.cmd'}), 'basic_step_8.cmd': Job({'type': 'command', 'command': 'echo "job: basic_step_8.cmd"', 'dependencies': 'basic_step_2.cmd'}), 'basic_flow': Job({'type': 'noop' , 'dependencies': 'basic_step_5.cmd,basic_step_6.cmd,basic_step_7.cmd,basic_step_8.cmd'}), # `template_flow` example # • Demonstrates using one flow as a "template" that is embedded in another flow and reused multiple times. # • The only work performed by job in this example template is to echo out the variables it receives to the log. # NOTE: We have to `chmod 777` our script to make sure Azkaban can run it.
production and test, without any job duplication. """ from azkaban import Job, Project from getpass import getuser # Production project # ------------------ # # This project is configured to run in a production environment (e.g. using a # headless user with permissions to write to a specific directory). PROJECT = Project('azkabancli_sample', root=__file__) PROJECT.properties = { 'user.to.proxy': 'production_user', 'hdfs.root': '/jobs/sample/' } # dictionary of jobs, keyed by job name JOBS = { 'gather_data': Job({ 'type': 'hadoopJava', 'job.class': 'sample.GatherData', 'path.output': '${hdfs.root}data.avro', # note the property use here }), # ... } for name, job in JOBS.items():
#!/usr/bin/env python # encoding: utf-8 """ Azkaban example projects configuration script. • Azkaban CLI syntax definition to configure all examples in this project """ from azkaban import Job, Project PROJECT = Project('azkaban_examples', root=__file__) # Project level properties declared here are visible to all jobs. PROJECT.properties = {'project_1': 'project-val1'} JOBS = { # `basic_flow` example 'basic_step_1.cmd': Job({ 'type': 'command', 'command': 'echo "job: basic_step_1.cmd"' }), 'basic_step_2.cmd': Job({ 'type': 'command', 'command': 'echo "job: basic_step_2.cmd"', 'dependencies': 'basic_step_1.cmd' }), 'basic_step_3.cmd': Job({ 'type': 'command', 'command': 'echo "job: basic_step_3.cmd"', 'dependencies': 'basic_step_2.cmd'
""" from azkaban import Job, Project from getpass import getuser # Production project # ------------------ # # This project is configured to run in a production environment (e.g. using a # headless user with permissions to write to a specific directory). PROJECT = Project('azkabancli_sample', root=__file__) PROJECT.properties = { 'user.to.proxy': 'production_user', 'hdfs.root': '/jobs/sample/' } # dictionary of jobs, keyed by job name JOBS = { 'gather_data': Job({ 'type': 'hadoopJava', 'job.class': 'sample.GatherData', 'path.output': '${hdfs.root}data.avro', # note the property use here }), # ... }