def __init__( # pylint: disable=too-many-arguments self, dag, namespace, release_name, chart_name, helm_args, preemptible=False, get_child_chart_names=None, **kwargs): self._temp_dir = None add_dag_macro(dag, 'get_helm_args', self.get_helm_args) self.preemptible = preemptible self.get_child_chart_names = get_child_chart_names bash_command = (''' helm upgrade --install --wait "{release_name}" \ --namespace "{namespace}" \ {get_helm_args} \ {helm_args} \ {chart_name} '''.format(namespace=namespace, release_name=release_name, chart_name=chart_name, get_helm_args='{{ get_helm_args(dag_run) }}', helm_args=helm_args.strip()).strip()) super().__init__(dag=dag, bash_command=bash_command, **kwargs)
def create_train_autocut_model_operator(dag, task_id='build_autocut_image'): add_dag_macro(dag, 'get_sciencebeam_gym_image', get_sciencebeam_gym_image) return BashOperator( dag=dag, task_id=task_id, bash_command=BUILD_AUTOCUT_IMAGE_TEMPLATE, )
def create_generate_target_xml_file_list_operator( dag, task_id='generate_target_xml_file_list'): add_dag_macro(dag, 'get_grobid_trainer_tools_image', get_grobid_trainer_tools_image) return BashOperator(dag=dag, task_id=task_id, bash_command=GENERATE_TARGET_XML_FILE_LIST_TEMPLATE)
def __init__( # pylint: disable=too-many-arguments self, dag, namespace: str, image: str, name: str, command: str, preemptible: Union[str, bool] = False, prefer_preemptible: Union[str, bool] = False, highcpu: Union[str, bool] = False, requests='', container_overrides_fn: Optional[Callable[[dict], dict]] = None, **kwargs): add_dag_macro(dag, 'get_container_run_command', self.get_container_run_command) add_dag_macro(dag, 'generate_run_name', generate_run_name) self.container_args = dict(namespace=namespace, image=image, name=name, command=command, preemptible=preemptible, prefer_preemptible=prefer_preemptible, highcpu=highcpu, requests=requests) self.container_overrides: Optional[dict] = None self.container_overrides_fn = container_overrides_fn bash_command = '{{ get_container_run_command() }}' super().__init__(dag=dag, bash_command=bash_command, **kwargs) # Note: we are processing the command separately self.template_fields = ('env', )
def create_generate_training_data_operator(dag, task_id='generate_training_data'): add_dag_macro(dag, 'get_grobid_trainer_image', get_grobid_trainer_image) return ContainerRunOperator( dag=dag, task_id=task_id, namespace='{{ dag_run.conf.namespace }}', image='{{ get_grobid_trainer_image(dag_run.conf) }}', name= '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "training-data") }}', preemptible=True, requests='cpu=900m,memory=4Gi', command=GENERATE_TRAINING_DATA_TEMPLATE)
def create_train_grobid_model_operator(dag, task_id='train_grobid_model'): add_dag_macro(dag, 'get_grobid_trainer_image', get_grobid_trainer_image) return ContainerRunOperator( dag=dag, task_id=task_id, namespace='{{ dag_run.conf.namespace }}', image='{{ get_grobid_trainer_image(dag_run.conf) }}', name= '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "train-grobid-model") }}', preemptible=True, requests='cpu=900m,memory=10Gi', command=TRAIN_GROBID_MODEL_TEMPLATE, )
def create_build_grobid_image_operator(dag, task_id='build_grobid_image'): add_dag_macro(dag, 'get_grobid_trainer_tools_image', get_grobid_trainer_tools_image) add_dag_macro(dag, 'get_gcp_project_id', get_gcp_project_id) return ContainerRunOperator( dag=dag, task_id=task_id, namespace='{{ dag_run.conf.namespace }}', image='{{ get_grobid_trainer_tools_image(dag_run.conf) }}', name= '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "build-image") }}', requests='cpu=100m,memory=256Mi', command=BUILD_GROBID_IMAGE_TEMPLATE, )
def create_get_data_operator(dag, task_id='get_data'): add_dag_macro(dag, 'get_grobid_trainer_tools_image', get_grobid_trainer_tools_image) add_dag_macro(dag, 'get_source_dataset_args', get_source_dataset_args) return ContainerRunOperator( dag=dag, task_id=task_id, namespace='{{ dag_run.conf.namespace }}', image='{{ get_grobid_trainer_tools_image(dag_run.conf) }}', name= '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "get-data") }}', preemptible=True, requests='cpu=100m,memory=256Mi', command=GET_DATA_TEMPLATE)
def create_train_autocut_model_operator(dag, task_id='train_autocut_model'): add_dag_macro(dag, 'get_sciencebeam_gym_image', get_sciencebeam_gym_image) return ContainerRunOperator( dag=dag, task_id=task_id, namespace='{{ dag_run.conf.namespace }}', image='{{ get_sciencebeam_gym_image(dag_run.conf) }}', name=('{{ \ generate_run_name(dag_run.conf.sciencebeam_release_name, "train-autocut-model") \ }}'), preemptible=True, requests='cpu=900m,memory=1Gi', command=TRAIN_AUTOCUT_MODEL_TEMPLATE, )
def create_generate_header_tei_xml_file_list_operator( dag, task_id='generate_header_tei_xml_file_list'): add_dag_macro(dag, 'get_grobid_trainer_tools_image', get_grobid_trainer_tools_image) return ContainerRunOperator( dag=dag, task_id=task_id, namespace='{{ dag_run.conf.namespace }}', image='{{ get_grobid_trainer_tools_image(dag_run.conf) }}', name= '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "tei-xml-file-list") }}', preemptible=True, requests='cpu=100m,memory=128Mi', command=GENERATE_HEADER_TEI_XML_FILE_LIST_TEMPLATE, )
def create_auto_annotate_header_operator(dag, task_id='auto_annotate_header'): add_dag_macro(dag, 'get_grobid_trainer_tools_image', get_grobid_trainer_tools_image) add_dag_macro(dag, 'DEFAULT_GROBID_TRAIN_FIELDS', DEFAULT_GROBID_TRAIN_FIELDS) return ContainerRunOperator( dag=dag, task_id=task_id, namespace='{{ dag_run.conf.namespace }}', image='{{ get_grobid_trainer_tools_image(dag_run.conf) }}', name= '{{ generate_run_name(dag_run.conf.sciencebeam_release_name, "auto-annotate") }}', preemptible=True, requests='cpu=300m,memory=800Mi', command=AUTO_ANNOTATE_HEADER_TEMPLATE)