def create_generate_training_data_operator(dag, task_id='generate_training_data'): add_dag_macro(dag, 'get_grobid_trainer_image', get_grobid_trainer_image) return ContainerRunOperator( dag=dag, task_id=task_id, namespace='{{ dag_run.conf.namespace }}', image='{{ get_grobid_trainer_image(dag_run.conf) }}', name= '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "training-data") }}', preemptible=True, requests='cpu=900m,memory=4Gi', command=GENERATE_TRAINING_DATA_TEMPLATE)
def test_should_add_requests(self, dag, airflow_context): container_requests = 'cpu=123m,memory=123Mi' operator = ContainerRunOperator(dag, task_id='task1', namespace='namespace1', image='image1', name='name1', command='command1', requests=container_requests) rendered_bash_command = create_and_render_command( operator, airflow_context) args = parse_command_arg(rendered_bash_command, {'--requests': str}) assert args.requests == container_requests
def create_train_grobid_model_operator(dag, task_id='train_grobid_model'): add_dag_macro(dag, 'get_grobid_trainer_image', get_grobid_trainer_image) return ContainerRunOperator( dag=dag, task_id=task_id, namespace='{{ dag_run.conf.namespace }}', image='{{ get_grobid_trainer_image(dag_run.conf) }}', name= '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "train-grobid-model") }}', preemptible=True, requests='cpu=900m,memory=10Gi', command=TRAIN_GROBID_MODEL_TEMPLATE, )
def create_build_grobid_image_operator(dag, task_id='build_grobid_image'): add_dag_macro(dag, 'get_grobid_trainer_tools_image', get_grobid_trainer_tools_image) add_dag_macro(dag, 'get_gcp_project_id', get_gcp_project_id) return ContainerRunOperator( dag=dag, task_id=task_id, namespace='{{ dag_run.conf.namespace }}', image='{{ get_grobid_trainer_tools_image(dag_run.conf) }}', name= '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "build-image") }}', requests='cpu=100m,memory=256Mi', command=BUILD_GROBID_IMAGE_TEMPLATE, )
def create_get_data_operator(dag, task_id='get_data'): add_dag_macro(dag, 'get_grobid_trainer_tools_image', get_grobid_trainer_tools_image) add_dag_macro(dag, 'get_source_dataset_args', get_source_dataset_args) return ContainerRunOperator( dag=dag, task_id=task_id, namespace='{{ dag_run.conf.namespace }}', image='{{ get_grobid_trainer_tools_image(dag_run.conf) }}', name= '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "get-data") }}', preemptible=True, requests='cpu=100m,memory=256Mi', command=GET_DATA_TEMPLATE)
def create_train_autocut_model_operator(dag, task_id='train_autocut_model'): add_dag_macro(dag, 'get_sciencebeam_gym_image', get_sciencebeam_gym_image) return ContainerRunOperator( dag=dag, task_id=task_id, namespace='{{ dag_run.conf.namespace }}', image='{{ get_sciencebeam_gym_image(dag_run.conf) }}', name=('{{ \ generate_run_name(dag_run.conf.sciencebeam_release_name, "train-autocut-model") \ }}'), preemptible=True, requests='cpu=900m,memory=1Gi', command=TRAIN_AUTOCUT_MODEL_TEMPLATE, )
def create_generate_header_tei_xml_file_list_operator( dag, task_id='generate_header_tei_xml_file_list'): add_dag_macro(dag, 'get_grobid_trainer_tools_image', get_grobid_trainer_tools_image) return ContainerRunOperator( dag=dag, task_id=task_id, namespace='{{ dag_run.conf.namespace }}', image='{{ get_grobid_trainer_tools_image(dag_run.conf) }}', name= '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "tei-xml-file-list") }}', preemptible=True, requests='cpu=100m,memory=128Mi', command=GENERATE_HEADER_TEI_XML_FILE_LIST_TEMPLATE, )
def create_auto_annotate_header_operator(dag, task_id='auto_annotate_header'): add_dag_macro(dag, 'get_grobid_trainer_tools_image', get_grobid_trainer_tools_image) add_dag_macro(dag, 'DEFAULT_GROBID_TRAIN_FIELDS', DEFAULT_GROBID_TRAIN_FIELDS) return ContainerRunOperator( dag=dag, task_id=task_id, namespace='{{ dag_run.conf.namespace }}', image='{{ get_grobid_trainer_tools_image(dag_run.conf) }}', name= '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "auto-annotate") }}', preemptible=True, requests='cpu=300m,memory=800Mi', command=AUTO_ANNOTATE_HEADER_TEMPLATE)
def create_get_output_file_list_op(dag, macros: ScienceBeamConvertMacros = None, task_id='get_output_file_list'): add_sciencebeam_convert_dag_macros(dag, macros) return ContainerRunOperator( dag=dag, task_id=task_id, namespace='{{ dag_run.conf.namespace }}', image='{{ get_sciencebeam_image(dag_run.conf) }}', name= '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "get-output-list") }}', preemptible=True, requests='cpu=100m,memory=256Mi', command=SCIENCEBEAM_GET_OUTPUT_FILE_LIST_TEMPLATE, )
def test_should_allow_requests_expression(self, dag, airflow_context, dag_run): container_requests = 'cpu=123m,memory=123Mi' dag_run.conf = {'container_requests': container_requests} operator = ContainerRunOperator( dag, task_id='task1', namespace='namespace1', image='image1', name='name1', command='command1', requests='{{ dag_run.conf.container_requests }}') rendered_bash_command = create_and_render_command( operator, airflow_context) args = parse_command_arg(rendered_bash_command, {'--requests': str}) assert args.requests == container_requests
def create_sciencebeam_evaluate_op( dag, macros: Optional[ScienceBeamEvaluateMacros] = None, task_id: str = 'sciencebeam_evaluate'): _macros = add_sciencebeam_evaluate_dag_macros(dag, macros) return ContainerRunOperator( dag=dag, task_id=task_id, namespace='{{ dag_run.conf.namespace }}', image='{{ get_sciencebeam_judge_image(dag_run.conf) }}', name= '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "judge") }}', preemptible=True, requests=DEFAULT_JUDGE_CONTAINER_REQUESTS, container_overrides_fn=_macros.get_sciencebeam_judge_container_kwargs, command=SCIENCEBEAM_EVALUATE_TEMPLATE)
def create_sciencebeam_convert_op( dag, macros: ScienceBeamConvertMacros = None, task_id='sciencebeam_convert') -> BashOperator: _macros = add_sciencebeam_convert_dag_macros(dag, macros) return ContainerRunOperator( dag=dag, task_id=task_id, namespace='{{ dag_run.conf.namespace }}', image='{{ get_sciencebeam_image(dag_run.conf) }}', name= '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "convert") }}', preemptible=True, requests=DEFAULT_CONVERT_CONTAINER_REQUESTS, container_overrides_fn=_macros. get_sciencebeam_convert_container_kwargs, command=SCIENCEBEAM_CONVERT_TEMPLATE)
def test_should_allow_preemptible_expression(self, dag, airflow_context, dag_run, preemptible: bool): dag_run.conf = {'preemptible': preemptible} operator = ContainerRunOperator( dag, task_id='task1', namespace='namespace1', image='image1', name='name1', command='command1', preemptible='{{ dag_run.conf.preemptible }}') rendered_bash_command = create_and_render_command( operator, airflow_context) args = parse_command_arg(rendered_bash_command, {'--overrides': str}) if preemptible: assert args.overrides == _get_select_preemptible_json() else: assert args.overrides is None
def test_should_allow_highcpu_expression(self, dag, airflow_context, dag_run, highcpu: bool): container_requests = 'cpu=123m,memory=123Mi' dag_run.conf = {'highcpu': highcpu, 'requests': container_requests} operator = ContainerRunOperator(dag, task_id='task1', namespace='namespace1', image='image1', name='name1', command='command1', highcpu='{{ dag_run.conf.highcpu }}', requests='{{ dag_run.conf.requests }}') rendered_bash_command = create_and_render_command( operator, airflow_context) args = parse_command_arg(rendered_bash_command, {'--overrides': str}) if highcpu: assert args.overrides == _get_highcpu_json() else: assert args.overrides is None