def __init__(  # pylint: disable=too-many-arguments
         self,
         dag,
         namespace,
         release_name,
         chart_name,
         helm_args,
         preemptible=False,
         get_child_chart_names=None,
         **kwargs):
     self._temp_dir = None
     add_dag_macro(dag, 'get_helm_args', self.get_helm_args)
     self.preemptible = preemptible
     self.get_child_chart_names = get_child_chart_names
     bash_command = ('''
         helm upgrade --install --wait "{release_name}" \
             --namespace "{namespace}" \
             {get_helm_args} \
             {helm_args} \
             {chart_name}
         '''.format(namespace=namespace,
                    release_name=release_name,
                    chart_name=chart_name,
                    get_helm_args='{{ get_helm_args(dag_run) }}',
                    helm_args=helm_args.strip()).strip())
     super().__init__(dag=dag, bash_command=bash_command, **kwargs)
def create_train_autocut_model_operator(dag, task_id='build_autocut_image'):
    add_dag_macro(dag, 'get_sciencebeam_gym_image', get_sciencebeam_gym_image)
    return BashOperator(
        dag=dag,
        task_id=task_id,
        bash_command=BUILD_AUTOCUT_IMAGE_TEMPLATE,
    )
def create_generate_target_xml_file_list_operator(
        dag, task_id='generate_target_xml_file_list'):
    add_dag_macro(dag, 'get_grobid_trainer_tools_image',
                  get_grobid_trainer_tools_image)
    return BashOperator(dag=dag,
                        task_id=task_id,
                        bash_command=GENERATE_TARGET_XML_FILE_LIST_TEMPLATE)
 def __init__(  # pylint: disable=too-many-arguments
         self,
         dag,
         namespace: str,
         image: str,
         name: str,
         command: str,
         preemptible: Union[str, bool] = False,
         prefer_preemptible: Union[str, bool] = False,
         highcpu: Union[str, bool] = False,
         requests='',
         container_overrides_fn: Optional[Callable[[dict], dict]] = None,
         **kwargs):
     add_dag_macro(dag, 'get_container_run_command',
                   self.get_container_run_command)
     add_dag_macro(dag, 'generate_run_name', generate_run_name)
     self.container_args = dict(namespace=namespace,
                                image=image,
                                name=name,
                                command=command,
                                preemptible=preemptible,
                                prefer_preemptible=prefer_preemptible,
                                highcpu=highcpu,
                                requests=requests)
     self.container_overrides: Optional[dict] = None
     self.container_overrides_fn = container_overrides_fn
     bash_command = '{{ get_container_run_command() }}'
     super().__init__(dag=dag, bash_command=bash_command, **kwargs)
     # Note: we are processing the command separately
     self.template_fields = ('env', )
def create_generate_training_data_operator(dag,
                                           task_id='generate_training_data'):
    add_dag_macro(dag, 'get_grobid_trainer_image', get_grobid_trainer_image)
    return ContainerRunOperator(
        dag=dag,
        task_id=task_id,
        namespace='{{ dag_run.conf.namespace }}',
        image='{{ get_grobid_trainer_image(dag_run.conf) }}',
        name=
        '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "training-data") }}',
        preemptible=True,
        requests='cpu=900m,memory=4Gi',
        command=GENERATE_TRAINING_DATA_TEMPLATE)
def create_train_grobid_model_operator(dag, task_id='train_grobid_model'):
    add_dag_macro(dag, 'get_grobid_trainer_image', get_grobid_trainer_image)
    return ContainerRunOperator(
        dag=dag,
        task_id=task_id,
        namespace='{{ dag_run.conf.namespace }}',
        image='{{ get_grobid_trainer_image(dag_run.conf) }}',
        name=
        '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "train-grobid-model") }}',
        preemptible=True,
        requests='cpu=900m,memory=10Gi',
        command=TRAIN_GROBID_MODEL_TEMPLATE,
    )
def create_build_grobid_image_operator(dag, task_id='build_grobid_image'):
    add_dag_macro(dag, 'get_grobid_trainer_tools_image',
                  get_grobid_trainer_tools_image)
    add_dag_macro(dag, 'get_gcp_project_id', get_gcp_project_id)
    return ContainerRunOperator(
        dag=dag,
        task_id=task_id,
        namespace='{{ dag_run.conf.namespace }}',
        image='{{ get_grobid_trainer_tools_image(dag_run.conf) }}',
        name=
        '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "build-image") }}',
        requests='cpu=100m,memory=256Mi',
        command=BUILD_GROBID_IMAGE_TEMPLATE,
    )
def create_get_data_operator(dag, task_id='get_data'):
    add_dag_macro(dag, 'get_grobid_trainer_tools_image',
                  get_grobid_trainer_tools_image)
    add_dag_macro(dag, 'get_source_dataset_args', get_source_dataset_args)
    return ContainerRunOperator(
        dag=dag,
        task_id=task_id,
        namespace='{{ dag_run.conf.namespace }}',
        image='{{ get_grobid_trainer_tools_image(dag_run.conf) }}',
        name=
        '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "get-data") }}',
        preemptible=True,
        requests='cpu=100m,memory=256Mi',
        command=GET_DATA_TEMPLATE)
示例#9
0
def create_train_autocut_model_operator(dag, task_id='train_autocut_model'):
    add_dag_macro(dag, 'get_sciencebeam_gym_image', get_sciencebeam_gym_image)
    return ContainerRunOperator(
        dag=dag,
        task_id=task_id,
        namespace='{{ dag_run.conf.namespace }}',
        image='{{ get_sciencebeam_gym_image(dag_run.conf) }}',
        name=('{{ \
            generate_run_name(dag_run.conf.sciencebeam_release_name, "train-autocut-model") \
            }}'),
        preemptible=True,
        requests='cpu=900m,memory=1Gi',
        command=TRAIN_AUTOCUT_MODEL_TEMPLATE,
    )
def create_generate_header_tei_xml_file_list_operator(
        dag, task_id='generate_header_tei_xml_file_list'):
    add_dag_macro(dag, 'get_grobid_trainer_tools_image',
                  get_grobid_trainer_tools_image)
    return ContainerRunOperator(
        dag=dag,
        task_id=task_id,
        namespace='{{ dag_run.conf.namespace }}',
        image='{{ get_grobid_trainer_tools_image(dag_run.conf) }}',
        name=
        '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "tei-xml-file-list") }}',
        preemptible=True,
        requests='cpu=100m,memory=128Mi',
        command=GENERATE_HEADER_TEI_XML_FILE_LIST_TEMPLATE,
    )
def create_auto_annotate_header_operator(dag, task_id='auto_annotate_header'):
    add_dag_macro(dag, 'get_grobid_trainer_tools_image',
                  get_grobid_trainer_tools_image)
    add_dag_macro(dag, 'DEFAULT_GROBID_TRAIN_FIELDS',
                  DEFAULT_GROBID_TRAIN_FIELDS)
    return ContainerRunOperator(
        dag=dag,
        task_id=task_id,
        namespace='{{ dag_run.conf.namespace }}',
        image='{{ get_grobid_trainer_tools_image(dag_run.conf) }}',
        name=
        '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "auto-annotate") }}',
        preemptible=True,
        requests='cpu=300m,memory=800Mi',
        command=AUTO_ANNOTATE_HEADER_TEMPLATE)