def create_generate_training_data_operator(dag,
                                           task_id='generate_training_data'):
    add_dag_macro(dag, 'get_grobid_trainer_image', get_grobid_trainer_image)
    return ContainerRunOperator(
        dag=dag,
        task_id=task_id,
        namespace='{{ dag_run.conf.namespace }}',
        image='{{ get_grobid_trainer_image(dag_run.conf) }}',
        name=
        '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "training-data") }}',
        preemptible=True,
        requests='cpu=900m,memory=4Gi',
        command=GENERATE_TRAINING_DATA_TEMPLATE)
示例#2
0
 def test_should_add_requests(self, dag, airflow_context):
     container_requests = 'cpu=123m,memory=123Mi'
     operator = ContainerRunOperator(dag,
                                     task_id='task1',
                                     namespace='namespace1',
                                     image='image1',
                                     name='name1',
                                     command='command1',
                                     requests=container_requests)
     rendered_bash_command = create_and_render_command(
         operator, airflow_context)
     args = parse_command_arg(rendered_bash_command, {'--requests': str})
     assert args.requests == container_requests
def create_train_grobid_model_operator(dag, task_id='train_grobid_model'):
    add_dag_macro(dag, 'get_grobid_trainer_image', get_grobid_trainer_image)
    return ContainerRunOperator(
        dag=dag,
        task_id=task_id,
        namespace='{{ dag_run.conf.namespace }}',
        image='{{ get_grobid_trainer_image(dag_run.conf) }}',
        name=
        '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "train-grobid-model") }}',
        preemptible=True,
        requests='cpu=900m,memory=10Gi',
        command=TRAIN_GROBID_MODEL_TEMPLATE,
    )
def create_build_grobid_image_operator(dag, task_id='build_grobid_image'):
    add_dag_macro(dag, 'get_grobid_trainer_tools_image',
                  get_grobid_trainer_tools_image)
    add_dag_macro(dag, 'get_gcp_project_id', get_gcp_project_id)
    return ContainerRunOperator(
        dag=dag,
        task_id=task_id,
        namespace='{{ dag_run.conf.namespace }}',
        image='{{ get_grobid_trainer_tools_image(dag_run.conf) }}',
        name=
        '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "build-image") }}',
        requests='cpu=100m,memory=256Mi',
        command=BUILD_GROBID_IMAGE_TEMPLATE,
    )
def create_get_data_operator(dag, task_id='get_data'):
    add_dag_macro(dag, 'get_grobid_trainer_tools_image',
                  get_grobid_trainer_tools_image)
    add_dag_macro(dag, 'get_source_dataset_args', get_source_dataset_args)
    return ContainerRunOperator(
        dag=dag,
        task_id=task_id,
        namespace='{{ dag_run.conf.namespace }}',
        image='{{ get_grobid_trainer_tools_image(dag_run.conf) }}',
        name=
        '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "get-data") }}',
        preemptible=True,
        requests='cpu=100m,memory=256Mi',
        command=GET_DATA_TEMPLATE)
示例#6
0
def create_train_autocut_model_operator(dag, task_id='train_autocut_model'):
    add_dag_macro(dag, 'get_sciencebeam_gym_image', get_sciencebeam_gym_image)
    return ContainerRunOperator(
        dag=dag,
        task_id=task_id,
        namespace='{{ dag_run.conf.namespace }}',
        image='{{ get_sciencebeam_gym_image(dag_run.conf) }}',
        name=('{{ \
            generate_run_name(dag_run.conf.sciencebeam_release_name, "train-autocut-model") \
            }}'),
        preemptible=True,
        requests='cpu=900m,memory=1Gi',
        command=TRAIN_AUTOCUT_MODEL_TEMPLATE,
    )
def create_generate_header_tei_xml_file_list_operator(
        dag, task_id='generate_header_tei_xml_file_list'):
    add_dag_macro(dag, 'get_grobid_trainer_tools_image',
                  get_grobid_trainer_tools_image)
    return ContainerRunOperator(
        dag=dag,
        task_id=task_id,
        namespace='{{ dag_run.conf.namespace }}',
        image='{{ get_grobid_trainer_tools_image(dag_run.conf) }}',
        name=
        '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "tei-xml-file-list") }}',
        preemptible=True,
        requests='cpu=100m,memory=128Mi',
        command=GENERATE_HEADER_TEI_XML_FILE_LIST_TEMPLATE,
    )
def create_auto_annotate_header_operator(dag, task_id='auto_annotate_header'):
    add_dag_macro(dag, 'get_grobid_trainer_tools_image',
                  get_grobid_trainer_tools_image)
    add_dag_macro(dag, 'DEFAULT_GROBID_TRAIN_FIELDS',
                  DEFAULT_GROBID_TRAIN_FIELDS)
    return ContainerRunOperator(
        dag=dag,
        task_id=task_id,
        namespace='{{ dag_run.conf.namespace }}',
        image='{{ get_grobid_trainer_tools_image(dag_run.conf) }}',
        name=
        '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "auto-annotate") }}',
        preemptible=True,
        requests='cpu=300m,memory=800Mi',
        command=AUTO_ANNOTATE_HEADER_TEMPLATE)
def create_get_output_file_list_op(dag,
                                   macros: ScienceBeamConvertMacros = None,
                                   task_id='get_output_file_list'):
    add_sciencebeam_convert_dag_macros(dag, macros)
    return ContainerRunOperator(
        dag=dag,
        task_id=task_id,
        namespace='{{ dag_run.conf.namespace }}',
        image='{{ get_sciencebeam_image(dag_run.conf) }}',
        name=
        '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "get-output-list") }}',
        preemptible=True,
        requests='cpu=100m,memory=256Mi',
        command=SCIENCEBEAM_GET_OUTPUT_FILE_LIST_TEMPLATE,
    )
示例#10
0
 def test_should_allow_requests_expression(self, dag, airflow_context,
                                           dag_run):
     container_requests = 'cpu=123m,memory=123Mi'
     dag_run.conf = {'container_requests': container_requests}
     operator = ContainerRunOperator(
         dag,
         task_id='task1',
         namespace='namespace1',
         image='image1',
         name='name1',
         command='command1',
         requests='{{ dag_run.conf.container_requests }}')
     rendered_bash_command = create_and_render_command(
         operator, airflow_context)
     args = parse_command_arg(rendered_bash_command, {'--requests': str})
     assert args.requests == container_requests
示例#11
0
def create_sciencebeam_evaluate_op(
        dag,
        macros: Optional[ScienceBeamEvaluateMacros] = None,
        task_id: str = 'sciencebeam_evaluate'):
    _macros = add_sciencebeam_evaluate_dag_macros(dag, macros)
    return ContainerRunOperator(
        dag=dag,
        task_id=task_id,
        namespace='{{ dag_run.conf.namespace }}',
        image='{{ get_sciencebeam_judge_image(dag_run.conf) }}',
        name=
        '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "judge") }}',
        preemptible=True,
        requests=DEFAULT_JUDGE_CONTAINER_REQUESTS,
        container_overrides_fn=_macros.get_sciencebeam_judge_container_kwargs,
        command=SCIENCEBEAM_EVALUATE_TEMPLATE)
def create_sciencebeam_convert_op(
        dag,
        macros: ScienceBeamConvertMacros = None,
        task_id='sciencebeam_convert') -> BashOperator:
    _macros = add_sciencebeam_convert_dag_macros(dag, macros)
    return ContainerRunOperator(
        dag=dag,
        task_id=task_id,
        namespace='{{ dag_run.conf.namespace }}',
        image='{{ get_sciencebeam_image(dag_run.conf) }}',
        name=
        '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "convert") }}',
        preemptible=True,
        requests=DEFAULT_CONVERT_CONTAINER_REQUESTS,
        container_overrides_fn=_macros.
        get_sciencebeam_convert_container_kwargs,
        command=SCIENCEBEAM_CONVERT_TEMPLATE)
示例#13
0
 def test_should_allow_preemptible_expression(self, dag, airflow_context,
                                              dag_run, preemptible: bool):
     dag_run.conf = {'preemptible': preemptible}
     operator = ContainerRunOperator(
         dag,
         task_id='task1',
         namespace='namespace1',
         image='image1',
         name='name1',
         command='command1',
         preemptible='{{ dag_run.conf.preemptible }}')
     rendered_bash_command = create_and_render_command(
         operator, airflow_context)
     args = parse_command_arg(rendered_bash_command, {'--overrides': str})
     if preemptible:
         assert args.overrides == _get_select_preemptible_json()
     else:
         assert args.overrides is None
示例#14
0
 def test_should_allow_highcpu_expression(self, dag, airflow_context,
                                          dag_run, highcpu: bool):
     container_requests = 'cpu=123m,memory=123Mi'
     dag_run.conf = {'highcpu': highcpu, 'requests': container_requests}
     operator = ContainerRunOperator(dag,
                                     task_id='task1',
                                     namespace='namespace1',
                                     image='image1',
                                     name='name1',
                                     command='command1',
                                     highcpu='{{ dag_run.conf.highcpu }}',
                                     requests='{{ dag_run.conf.requests }}')
     rendered_bash_command = create_and_render_command(
         operator, airflow_context)
     args = parse_command_arg(rendered_bash_command, {'--overrides': str})
     if highcpu:
         assert args.overrides == _get_highcpu_json()
     else:
         assert args.overrides is None