def execute(self, env: MasonEnvironment, response: Response, dry_run: bool = True) -> OperatorResponse: try: module = self.module(env) if isinstance(module, OperatorDefinition): if dry_run: response.add_info( f"Valid Operator: {self.namespace}:{self.command} with specified parameters." ) return OperatorResponse(response) else: operator_response: OperatorResponse = module.run( env, self.config, self.parameters, response) else: response.add_error( f"Module does not contain a valid OperatorDefinition. See /examples for sample operator implementations. \nMessage: {module.reason}" ) operator_response = OperatorResponse(response) except ModuleNotFoundError as e: response.add_error(f"Module Not Found: {e}") operator_response = OperatorResponse(response) return operator_response
def register_dag(self, schedule_name: str, valid_dag: ValidDag, schedule: Optional[Schedule], response: Response): # Short-circuit for glue crawler definition since glue as a scheduler is only well defined for Table Infer Operator if len(valid_dag.valid_steps) == 1 and valid_dag.valid_steps[ 0].operator.type_name() == "TableInfer": op = valid_dag.valid_steps[0].operator params = valid_dag.valid_steps[0].operator.parameters db_name = params.get_required("database_name") storage_engine = op.config.storage() if isinstance(storage_engine, StorageClient): storage_path = storage_engine.path( params.get_required("storage_path")) else: response = response.add_error( f"Attempted to register_dag for invalid client: {storage_engine.reason}" ) response = self.register_schedule(db_name, storage_path, schedule_name, schedule, response) else: response.add_error( "Glue Scheduler only defined for TableInfer type which registers a glue crawler" ) return (schedule_name, response, None)
def run( self, env: MasonEnvironment, response: Response = Response() ) -> OperatorResponse: scheduler = self.config.scheduler() if isinstance(scheduler, SchedulerClient): response.add_info( f"Registering workflow dag {self.name} with {scheduler.client.name()}." ) schedule_id, response, client_dag = scheduler.register_dag( self.name, self.dag, self.schedule, response) if not response.errored(): response.add_info(f"Registered schedule {schedule_id}") # TODO: FIX # if client_dag and output_path: # with tempfile.NamedTemporaryFile("w", delete=False) as f: # json = client_dag.to_json() # response.add_info(f"Saving client dag to {output_path}") # f.write(json) # f.close() # response = self.config.storage.client.save_to(f.name, output_path, response) if self.schedule: response.add_warning( f"Triggering workflow off schedule: {self.schedule.definition}" ) response.add_info(f"Triggering schedule: {schedule_id}") response = scheduler.trigger_schedule(schedule_id, response, env) else: response.add_error("Scheduler client not defined") return OperatorResponse(response)
def register_schedule(self, database_name: str, path: Path, schedule_name: str, schedule: Optional[Schedule], response: Response): create_crawler_response = self.create_glue_crawler( database=database_name, name=schedule_name, role=self.aws_role_arn or "", path=path.clean_path_str(), schedule=schedule) response.add_response(create_crawler_response) error, status, message = self.parse_response(create_crawler_response) if error == "AlreadyExistsException": response.add_warning( f"Table crawler {schedule_name} already exists. Skipping creation." ) response.set_status(201) elif error == "CrawlerRunningException": response.add_warning( f"Table crawler {schedule_name} is already refreshing.") response.set_status(202) elif 200 <= status < 300: response.add_info(f"Created table crawler {schedule_name}.") response.set_status(201) else: response.add_error(message) response.set_status(status) return response
def save_to(self, inpath: Path, outpath: Path, response: Response): try: self.client().upload(inpath.path_str, outpath.path_str) except Exception as e: response.add_error(f"Error saving {inpath} to {outpath.path_str}") response.add_error(message(e)) return response
def to_response(self, response: Response) -> Response: for it in self.invalid_tables: response = it.to_response(response) if self.error: response.add_error(self.error) return response
def save( self, state_store: MasonStateStore, overwrite: bool = False, response: Response = Response()) -> Response: message = self.get_message() if message: response.add_error(message) return response
def execute(self, env: MasonEnvironment, response: Response, dry_run: bool = True, run_now: bool = False, schedule_name: Optional[str] = None) -> Response: response.add_error(f"Invalid Operator. Reason: {self.reason}") response.set_status(400) return response
def trigger_schedule(self, schedule_name: str, response: Response, env: MasonEnvironment) -> Response: dag = self.dag if dag: workflow_run = WorkflowRun(dag) response = workflow_run.run(env, response) else: response.add_error("Dag not found. Run 'register_dag' first.") return response
def save(self, state_store: MasonStateStore, overwrite: bool = False, response: Response = Response()) -> Response: try: result = state_store.cp_source(self.source_path, "operator", self.namespace, self.command, overwrite) if isinstance(result, FailedOperation): response.add_error(f"{result.message}") else: response.add_info(result) except Exception as e: response.add_error(f"Error copying source: {message(e)}") return response
def run(self, env: MasonEnvironment, response: Response) -> Response: response.add_info(f"Running dag \n{self.dag.display()}") while not self.finished(): self.step(env) for step in sorted(self.executed_steps): response = response.merge(step.operator_response.response) if len(self.invalid_steps) > 0: response.add_error(f"Workflow failed") for i in self.invalid_steps: response.add_error(i.reason) response.set_status(400) return response
def trigger_schedule_for_table(self, table_name: str, database_name: str, response: Response): table, response = self.get_table(database_name, table_name) crawler_name = None if isinstance(table, Table): created_by = table.created_by cb = created_by or "" if "crawler:" in cb: crawler_name = cb.replace("crawler:", "") self.trigger_schedule(crawler_name, response) else: response.add_error( f"Table not created by crawler. created_by: {created_by}") else: response.add_error(f"Could not find table {table_name}") response.set_status(404) return response
def delete_schedule(self, schedule_name: str, response: Response) -> Response: try: glue_response = self.client().delete_crawler(Name=schedule_name) except ClientError as e: glue_response = e.response error, status, message = self.parse_response(glue_response) response.add_response(glue_response) if not error == "": response.set_status(status) response.add_error(message) else: response.add_info( f"Schedule {schedule_name} successfully deleted.") return response
def trigger_schedule(self, schedule_name: str, response: Response): refresh_glue_table_response = self.refresh_glue_table(schedule_name) error, status, message = self.parse_response( refresh_glue_table_response) response.add_response(refresh_glue_table_response) if error == "CrawlerRunningException": response.add_warning( f"Table crawler {schedule_name} is already refreshing.") response.add_data({}) response.set_status(202) elif status: if 200 <= status < 300: response.add_info(f"Refreshing Table Crawler: {schedule_name}") response.add_data({}) response.set_status(201) else: response.add_error(message) response.set_status(status) return response
def config(config_id: Optional[str], set_current: bool = False, log_level: Optional[str] = None, env: Optional[MasonEnvironment] = None, printer: Printer = ApiPrinter()): environment = env or MasonEnvironment().initialize() logger.set_level(log_level) response = Response() if set_current and config_id: result = Resources(environment).set_session_config(config_id) if isinstance(result, str): response.add_error(result) response.set_status(404) else: response.add_info(f"Set session config to {config_id}") config_id = None res = Resources(environment) configs = res.get_resources("config", config_id) response = printer.print_resources(configs, "config", config_id, environment=environment) return response.with_status()
def print_resources( self, resources: List[Union[Operator, Workflow, Config, MalformedResource]], type: Optional[str] = None, namespace: Optional[str] = None, command: Optional[str] = None, environment: Optional[MasonEnvironment] = None) -> Response: operators, workflows, configs, bad = sequence_4( resources, Operator, Workflow, Config, MalformedResource) response = Response() if len(resources) == 0: response.add_error(self.none_message(type, namespace, command)) response.set_status(404) else: if len(operators) > 0: response.add("Operators", list(map(lambda o: o.to_dict(), operators))) if len(configs) > 0: current_id: Optional[str] = None if environment: current_id = environment.state_store.get_session_config() response.add( "Configs", list(map(lambda c: c.to_dict(current_id), configs))) if len(workflows) > 0: response.add("Workflows", list(map(lambda w: w.to_dict(), workflows))) if len(bad) > 0: response.add("Errors", list(map(lambda b: b.get_message(), bad))) if len(operators + configs + workflows) == 0: # type: ignore response.set_status(400) return response
def run(resource_type: str, namespace: str, command: str, parameter_string: Optional[str] = None, param_file: Optional[str] = None, config_id: Optional[str] = None, log_level: Optional[str] = None, env: Optional[MasonEnvironment] = None, dry_run: bool = False, parameters: Optional[dict] = None, printer=ApiPrinter()): response = Response() environment: MasonEnvironment = env or MasonEnvironment().initialize() logger.set_level(log_level) res = base.Resources(environment) resource: Union[Resource, MalformedResource] = res.get_resource( resource_type, namespace, command) config: Union[Config, MalformedResource] = res.get_best_config(config_id) params: Union[Parameters, MalformedResource] = res.get_parameters( resource_type, parameter_string, param_file, parameters) if isinstance(resource, Resource) and isinstance( config, Config) and isinstance(params, Parameters): if dry_run: response = validate_resource(resource, config, params, environment).dry_run( environment, response).to_response(response) else: response = validate_resource(resource, config, params, environment).run( environment, response).to_response(response) else: if isinstance(resource, MalformedResource): response.add_error(f"Malformed Resource: {resource.get_message()}") elif isinstance(config, MalformedResource): response.add_error(f"Bad Config: {config.get_message()}") elif isinstance(params, MalformedResource): response.add_error(f"Bad Parameters: {params.get_message()}") return printer.print_response(response)
def to_response(self, response: Response): response.add_error(self.reason) response.add_data(self.schema_conflict.to_dict()) response.set_status(403) return response
def _missing(response: Response, *args, **kwargs) -> Response: response.add_error(f"Invalid Client: {self.reason}") return response
def to_response(self, response: Response): if self.reason: response.add_error(f"Job errored: " + self.reason) return response
def dry_run( self, env: MasonEnvironment, response: Response = Response() ) -> OperatorResponse: response.add_error("Invalid Resource: " + self.reason) response.set_status(400) return OperatorResponse(response)
def to_response(self, response: Response): response.add_error(self.reason) response.set_status(404) return response
def run( self, env: MasonEnvironment, response: Response = Response() ) -> OperatorResponse: response.add_error(f"Invalid Operator. Reason: {self.reason}") response.set_status(400) return OperatorResponse(response)
def to_response(self, response: Response): response.add_error(self.reason) return response