def run_pynotebook(pyfile: str): """ Wraps the execution of a python3 script Parameters ---------- pyfile : The path and filename of the python3 script to run. """ log.info(f'Running {pyfile}') try: result = subprocess.run(['python3', pyfile], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, text=True) print(result.stdout) if result.returncode != 0: log.exception(f'{pyfile} returned non zero exit ...') traceback.print_stack() raise signals.FAIL() except Exception as e: log.exception(f'{pyfile} caused an exception ...') traceback.print_stack() raise signals.FAIL() return
def remote_mount(self, hosts: list): """ Create the symbolic link on each host Parameters ---------- hosts : list of str The list of remote hosts """ for host in hosts: result = subprocess.run(['ssh', host, 'sudo', 'rm', '-Rf', self.mountpath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) if result.returncode != 0: print(result.stdout) log.warning(f'Unable to remove {mountpath} on {host}') try: result = subprocess.run(['ssh', host, 'sudo', 'ln', '-s', self.mount, self.mountpath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) if result.returncode != 0: print(result.stdout) log.exception(f'Unable to mount scratch disk on {host}') raise signals.FAIL() except Exception as e: log.exception(f'Unable to mount scratch disk on {host}') traceback.print_stack() raise signals.FAIL() return
def remote_mount(self, hosts: list): """ Mount this FSx disk on remote hosts Parameters ---------- hosts : list of str The list of remote hosts """ # TODO: synchronization issue if two new jobs are started at the same time # We need to make sure that the FSx disk is already spun up otherwise this will fail for host in hosts: try: result = subprocess.run([ 'ssh', host, 'sudo', 'mount', '-t', 'lustre', '-o', 'noatime,flock', f'{self.dnsname}@tcp:/{self.mountname}', self.mountpath ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) if result.returncode != 0: print(result.stdout) log.exception( f'unable to mount scratch disk on host: {host}') raise signals.FAIL() except Exception as e: log.exception('unable to mount scratch disk on host...', host) traceback.print_stack() raise signals.FAIL() return
def create_scratch(provider: str, configfile: str, mountpath: str = '/ptmp') -> ScratchDisk: """ Provides a high speed scratch disk if available. Creates and mounts the disk. Parameters ---------- provider : str Name of an implemented provider. configfile : str The Job configuration file Returns ------- scratch : ScratchDisk Returns the ScratchDisk object """ if provider == 'FSx': scratch = FSxScratchDisk(configfile) elif provider == 'NFS': scratch = NFSScratchDisk(configfile) elif provider == 'Local': log.error('Coming soon ...') raise signals.FAIL() else: log.error('Unsupported provider') raise signals.FAIL() scratch.create(mountpath) return scratch
def _verify_workspace_requirements(self, workspace, families): """ Check that an existing workspace family mets the required families Parameters ---------- workspace: dict Dictionary representation of workspace details. families: dict Dictionary of family to version number with the required metadata families and versions. A ``None`` value means `"latest"`. Returns ------- None Raises ------ prefect.signals.PrefectStateSignal A :py:class:`prefect.signals.FAIL` signal when the verification fails. """ for name, version in families.items(): if name not in workspace['families']: raise signals.FAIL('Workspace exists but does not have ' 'the required families') # version == None means that we want the latest. # No check is done but we could implement something more strict later if version is not None and version > families[name]: raise signals.FAIL('Workspace does not meet family version ' 'requirement')
def forecast_run(cluster: Cluster, job: Job): """ Run the forecast Parameters ---------- cluster : Cluster The cluster to run on job : Job The job to run """ PPN = cluster.getCoresPN() # Easier to read CDATE = job.CDATE HH = job.HH OFS = job.OFS NPROCS = job.NPROCS OUTDIR = job.OUTDIR #EXEC = job.EXEC runscript = f"{curdir}/fcst_launcher.sh" try: HOSTS = cluster.getHostsCSV() except Exception as e: log.exception('In driver: execption retrieving list of hostnames:' + str(e)) raise signals.FAIL() try: if OFS == "adnoc": time.sleep(60) result = subprocess.run([runscript, CDATE, HH, OUTDIR, str(NPROCS), str(PPN), HOSTS, OFS, job.EXEC], \ stderr=subprocess.STDOUT) else: result = subprocess.run([runscript, CDATE, HH, OUTDIR, str(NPROCS), str(PPN), HOSTS, OFS], \ stderr=subprocess.STDOUT) if result.returncode != 0: log.exception(f'Forecast failed ... result: {result.returncode}') raise signals.FAIL() except Exception as e: log.exception('In driver: Exception during subprocess.run :' + str(e)) raise signals.FAIL() log.info('Forecast finished successfully') curfcst = f"{job.COMROT}/current.fcst" with open(curfcst, 'w') as cf: cf.write(f"{OFS}.{CDATE}{HH}\n") return
def run_workflow(parametrised_workflow: Tuple[Flow, Dict[str, Any]]) -> None: """ Run a workflow. Parameters ---------- parametrised_workflow : tuple (prefect.Flow, dict) Workflow to run, and parameters to run it with. Notes ----- The workflow will run once, starting immediately. If the workflow has a schedule, the schedule will be ignored. """ workflow, parameters = parametrised_workflow prefect.context.logger.info( f"Running workflow '{workflow.name}' with parameters {parameters}.") state = workflow.run(parameters=parameters, run_on_schedule=False) if state.is_successful(): prefect.context.logger.info( f"Workflow '{workflow.name}' ran successfully with parameters {parameters}." ) else: raise signals.FAIL( f"Workflow '{workflow.name}' failed when run with parameters {parameters}." )
def signal_task(message): if message == 'go!': raise signals.SUCCESS(message='going!') elif message == 'stop!': raise signals.FAIL(message='stopping!') elif message == 'skip!': raise signals.SKIP(message='skipping!')
def _acquire(mountpath: str): tries = 0 maxtries = 3 delay = 0.1 lockpath = f'{_LOCKROOT}{mountpath}' lockfile = f'{lockpath}/.lockctl' if not os.path.exists(lockpath): os.makedirs(lockpath) while tries < maxtries: # If lockfile exists, some other process is holding the lock if os.path.exists(lockfile): #print(f'lock not acquired ... trying again in {delay} seconds') time.sleep(delay) tries += 1 continue else: lock = open(lockfile, "w") #print('lock acquired ') lock.close() return log.exception( f'ERROR: Unable to obtain lock on {lockfile}. You may need to delete it.' ) traceback.print_stack() raise signals.FAIL() return
def cluster(self, configfile): """ Creates a new Cluster object Parameters ---------- configfile : string Full path and filename of a JSON configuration file for this cluster. Returns ------- newcluster : Cluster Returns a new instance of a Cluster implementation. """ cfdict = self.readconfig(configfile) provider = cfdict['platform'] if provider == 'AWS': log.info(f'Attempting to make a new cluster : {provider}') try: newcluster = AWSCluster(configfile) except Exception as e: log.exception('Could not create cluster: ' + str(e)) raise signals.FAIL() elif provider == 'Local': newcluster = LocalCluster(configfile) log.info(f"Created new {provider} cluster") return newcluster
def run(self, threshold: int) -> int: r = Random() v = r.randint(a=self.min, b=self.max) if v > threshold: raise signals.FAIL(message=f'{v} is greater than {threshold}') self.logger.info(f'Value is {v}') return v
def task_3(): logger = prefect.context.get("logger") interval = randrange(0, 60) logger.info(interval) time.sleep(interval) if interval > 50: logger.info("Failing flow...") raise signals.FAIL()
def execute_query(client, table_name): logger = prefect.context.get("logger") logger.info(f"Table Name: {table_name}") if table_name == "Users": time.sleep(9) raise signals.FAIL( message="TableNotFound: The table specified does not exist.") else: time.sleep(9) return table_name
def _release(mountpath: str): try: os.remove(f'{_LOCKROOT}{mountpath}/.lockctl') #print('lock released') except Exception as e: log.exception( f'ERROR: error releasing lock {_LOCKROOT}{mountpath}/.lockctl') raise signals.FAIL() return
def subprocesscall(cmd, stdout=None): print('SHELL CMD ----------------------------------') print(cmd) print('--------- ----------------------------------') if stdout is None: rslt = subprocess.call(cmd, shell=True) else: with open(stdout, 'w') as sout: rslt = subprocess.call(cmd, stdout=sout, shell=True) if rslt != 0: raise signals.FAIL(message=f'{cmd} returned non zero result {rslt}')
def run( self, df: pd.DataFrame = None, expectations_path: str = None, keep_output: bool = None, **kwargs, ): ge_project_path = str(Path(expectations_path).parent) batch_kwargs = self._get_batch_kwargs(df) context = self._get_ge_context_local(ge_project_path) self.logger.info("Beginning validation run...") try: results = super().run( batch_kwargs=batch_kwargs, # input data context=context, # ~project config **kwargs, ) except signals.FAIL as e: results = e.state.result # Show summary of results n_successful, n_expectations = self._get_stats_from_results(results) status = "success" if results.success else "failure" level = logging.INFO if results.success else logging.ERROR self.logger.log( msg=f"Validation finished with status '{status}'. {n_successful}/{n_expectations} test(s) passed.", level=level, ) validation_ids = [res for res in results["run_results"]] validation_id = validation_ids[0] url_dicts = context.get_docs_sites_urls(resource_identifier=validation_id) validation_site_url = url_dicts[0]["site_url"] if keep_output: docs_msg = f"To explore the docs, visit {validation_site_url}" docs_msg += " or the 'Artifacts' tab on the Prefect flow run dashboard." self.logger.info(docs_msg) else: docs_path = os.path.join(ge_project_path, "uncommitted") checkpoints_path = os.path.join(ge_project_path, "checkpoints") shutil.rmtree(docs_path) shutil.rmtree(checkpoints_path) if not results.success: raise signals.FAIL(result=results) return results
def storage_init(provider: str) -> StorageService: """Class factory that returns an implementation of StorageService. StorageService is the abstract base class that provides a generic interface for multiple cloud platforms. Parameters ---------- provider : str Name of an implemented provider. Returns ------- service : StorageService Returns a specific implementation of the StorageService interface. Raises ------ signals.FAIL Triggers and exception if `provider` is not supported. Notes ----- The following providers are implemented: AWS S3 - S3Storage """ if provider == 'AWS': service = S3Storage() elif provider == 'Local': log.error('Coming soon ...') raise signals.FAIL() else: log.error('Unsupported provider') raise signals.FAIL() return service
def run(self, query: str, dialect: str = 'postgresql', workspace_id: Optional[int] = None, id_column: Optional[str] = None) -> List[ResultSetType]: """ Perform the Quetzal SQL query Parameters ---------- query: str Quetzal query. dialect: str Dialect used to express the `query`. workspace_id: int Workspace where the query should be executed. If not set, it uses the global workspace. id_column: str Name of the column on the query that represents a Quetzal file id. Returns ------- results A list of dictionaries, one for each result row. """ if not query: raise signals.FAIL('Query is empty') self.logger.info('Querying Quetzal at %s with SQL (dialect %s)=\n%s', self.client.configuration.host, dialect, query) rows, total = helpers.query(self.client, workspace_id, query, dialect) # Handle results self.logger.info('Query gave %d results', total) # Shuffle the results if self.shuffle: random.shuffle(rows) # Only keep N results if self.limit is not None and total > self.limit: rows = rows[:self.limit] total = len(rows) self.logger.info('Query was limited to %d results', total) if self._as_file_adapter: for i, row in enumerate(rows): rows[i] = QuetzalFile.retrieve(file_id=row['id'], workspace_id=workspace_id) return rows
def delete(self): """ Delete this FSx disk """ log.debug(f'Attempting to delete FSx disk at {self.mountpath}') log.debug(f'This processes lockid: {self.lockid}') ScratchDiskModule.removelock(self.mountpath, self.lockid) # Is the disk in use by anyone else? There is a potential for a race condition here. # If another process is blocking on entering the mutex to add a lock, this process will still remove the disk # TODO: possibly make __acquire non-blocking if ScratchDiskModule.haslocks(self.mountpath): log.info( f'FSx disk at {self.mountpath} is currently in use. Unable to remove it.' ) return log.info(f'Unmounting FSx disk at {self.mountpath} ...') try: # umount -f = force, -l = lazy result = subprocess.run(['sudo', 'umount', '-fl', self.mountpath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) if result.returncode != 0: print(result.stdout) log.exception( f'error while unmounting scratch disk at {self.mountpath} ...' ) except Exception as e: log.exception( 'Exception while unmounting scratch disk at {self.mountpath} ...' ) # Remove the AWS FSx resource client = boto3.client('fsx', region_name=self.region) try: response = client.delete_file_system( FileSystemId=self.filesystemid) if response['Lifecycle'] == 'DELETING': log.info(f'FSx disk {self.filesystemid} is DELETING') self.status = 'deleted' else: log.info( f'Something went wrong when deleting the FSx disk {self.filesystemid} ... manually check the status' ) self.status = 'error' except ClientError as e: log.exception('ClientError exception in AWSScratch.delete. ' + str(e)) raise signals.FAIL()
def mkdirs(pdir: str, mode=0o775) -> None: # utility function to create a directory (recursively) if os.path.exists(pdir): #print(f"Directory {pdir} already exists") return # make output directory try: oumask = os.umask(0o777 - mode) #0o002 os.makedirs(pdir, exist_ok=True, mode=mode) #os.chmod(pdir, mode) print(f"Directory {pdir} created successfully") except OSError as error: signals.FAIL(message=f"Directory {pdir} can not be created") finally: os.umask(oumask)
def get_baseline(job: Job, sshuser=None): """ Retrieve operational forecast files for comparison to quasi-operational forecasts Parameters ---------- job : Job The Job object. sshuser : str The user and host to use for retrieving data from a remote server. Required for LiveOcean. """ cdate = job.CDATE ofs = job.OFS vdir = job.VERIFDIR hh = job.HH if ofs == 'liveocean': try: util.get_baseline_lo(cdate, vdir, sshuser) except Exception as e: log.exception(f'Retrieving baselines failed ...') raise signals.FAIL() elif ofs in util.nosofs_models: script = f"{curdir}/scripts/getNomadsProd.sh" result = subprocess.run([script, ofs, cdate, hh, vdir], stderr=subprocess.STDOUT) if result.returncode != 0: log.exception( f'Retrieving baselines failed ... result: {result.returncode}') raise signals.FAIL() else: log.exception(f'{ofs} is not supported') raise signals.FAIL() return
def cluster_start(cluster): """ Start the cluster Parameters ---------- cluster : Cluster """ log.info('Starting ' + str(cluster.nodeCount) + ' instances ...') log.info('Waiting for nodes to start ...') try: cluster.start() except Exception as e: log.exception('In driver: Exception while creating nodes :' + str(e)) raise signals.FAIL() return
def ramdisk_to_nfs(self, delete_outs1=False): """ Move outputs from local RAMDISK (srcdir) to NFS (dstdir), delete other files used in RAMDISK. """ p = self.p nfs = p.get('nfs', '') ramdisk = p.get('ramdisk', '') inps = self.inps inps1 = self._inps tmps1 = self._tmps outs1 = self._outs if (not ramdisk) or (nfs == ramdisk): # no op return delete = [] host = socket.gethostname() for k, src in outs1.items(): if src.startswith(ramdisk): dst = src.replace(ramdisk, nfs) try: mkdirs(os.path.dirname(dst)) if src != dst: shutil.copyfile(src, dst) os.chmod(dst, 0o660) if delete_outs1: os.unlink(src) else: delete.append((host, src)) print( '********** RAMDISK => NFS *********************************' ) print(f'{host}:{src}=>{dst}') print( '***********************************************************' ) except: raise signals.FAIL( message=f'shutil.copy {src} to {dst} failed.') # delete tmps1 regardless deletefiles(tmps1.values()) # delete inps1 if not original tgts = [inps1[k] for k in inps1 if inps[k] != inps1[k]] deletefiles(tgts) # delete later self.param.delete = self.param.get('delete', []) + delete
def check_files(self): skip = False force = self.p.get('force', False) signalskip = self.p.get('signalskip', True) # check input exists for k, f in self.inps.items(): if not os.path.exists(f): raise signals.FAIL( message=f'input file {k}:{f} does not exist') done = all([os.path.exists(x) for k, x in self.outs.items()]) # if force delete tmps and outs if force or (not done): # delete all existing deletefiles(list(self.tmps.values()) + list(self.outs.values())) # if all outs exists then skip elif done: deletefiles(list(self.tmps.values())) if signalskip: raise signals.SKIP( message=f'all outputs {self.outs} already exist') skip = True return skip
def create(self, mountpath: str = '/ptmp'): """ The NFS disk is assumed to be mounted, just create a symlink Parameters ---------- mountpath : str The path where the disk will be mounted. Default = /ptmp" (optional) """ self.lockid = ScratchDiskModule.addlock(mountpath) # TODO: maybe, create an additional EFS drive to use as /ptmp self.mountpath = mountpath if self._mountexists(): log.info("Scratch disk already exists...") return elif ScratchDiskModule.get_lockcount(self.mountpath) == 1: # Mount does not exist, but another process might be creating it # We just created a lock for this, so lock count must be == 1 if we are the only one starting it # Now mount it log.info("Creating symbolic link ...") # TODO: Check to make sure it is not in use subprocess.run(['sudo', 'rm', '-Rf', self.mountpath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) result = subprocess.run(['sudo', 'ln', '-s', self.mount, self.mountpath], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) if result.returncode != 0: print(result.stdout) log.exception(f'error attempting to create link to scratch disk ...') raise signals.FAIL() self.status='available' return
def ptmp2com(job: Job): """ Transfer completed run from scratch disk to com Parameters ---------- job : Job The Job object with CDATE, PTMP, and COMROT attributes set. """ # It takes 20 minutes to copy liveocean data from ptmp to /com 132GB # If done in the cluster ~$5.18 of compute cost, do it in the head node instead # NOS does it in the forecast script and renames the files in the process if job.OFS == "liveocean": fdate = util.lo_date(job.CDATE) ptmp = f'{job.PTMP}/liveocean/{fdate}/*' comout = job.COMROT + '/liveocean/' + fdate if debug: print(f"ptmp: {ptmp}, comout: {comout}") try: cmd = f'cp -pf {ptmp} {comout}' result = subprocess.run(cmd, universal_newlines=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) if result.returncode != 0: log.error(result.stdout) log.error(f'error copying data from {ptmp} to {comout}') except Exception as e: log.exception(result.stdout) log.exception(f'exception copying data from {ptmp} to {comout}') raise signals.FAIL() else: log.info("Skipping ... NOSOFS does this in the forecast script") pass return
def post_graph(appointment: ExternalAppointmentStruct) -> Dict: logger = prefect.context.get("logger") logger.info(f"Starting post_graph") s = ExternalAppointmentUpdateSummaryStruct() external_appointment_schema = ExternalAppointmentStructSchema() external_appointment_update_schema = ExternalAppointmentUpdateSummaryStructSchema( ) json_data = external_appointment_schema.dump(appointment) summary: ExternalAppointmentUpdateSummaryStruct logger.info(f"About to post") summary, err = common.post_to_endpoint(1, json_data, '/api/external_appointment/update', external_appointment_update_schema, commit=False) logger.info(f"Finished posting") if err: raise signals.FAIL(message=str(err)) return summary
def get_forcing(job: Job, sshuser=None): """ Retrieve operational moddel forcing data and initial conditions Parameters ---------- job : Job The Job object. sshuser : str The user and host to use for retrieving data from a remote server. Required for LiveOcean. """ cdate = job.CDATE ofs = job.OFS comrot = job.COMROT hh = job.HH comdir = job.OUTDIR # ex: /com/liveocean/f2020.MM.DD if ofs == 'liveocean': frcdir = job.COMROT + '/liveocean' try: util.get_ICs_lo(cdate, frcdir, sshuser) except Exception as e: log.exception( 'Problem encountered with downloading forcing data ...') raise signals.FAIL() # ROMS models elif ofs in ('cbofs', 'dbofs', 'tbofs', 'gomofs', 'ciofs'): #comdir = f"{comrot}/{ofs}.{cdate}" script = f"{curdir}/scripts/getICsROMS.sh" result = subprocess.run([script, cdate, hh, ofs, comdir], stderr=subprocess.STDOUT) if result.returncode != 0: log.exception( f'Retrieving ICs failed ... result: {result.returncode}') raise signals.FAIL() # FVCOM models elif ofs in ('ngofs', 'nwgofs', 'negofs', 'leofs', 'sfbofs', 'lmhofs'): #comdir = f"{comrot}/{ofs}.{cdate}" script = f"{curdir}/scripts/getICsFVCOM.sh" result = subprocess.run([script, cdate, hh, ofs, comdir], stderr=subprocess.STDOUT) if result.returncode != 0: log.exception( f'Retrieving ICs failed ... result: {result.returncode}') raise signals.FAIL() # Coupled WRF/ROMS elif ofs == 'wrfroms': #comdir = f"{comrot}/{ofs}/{cdate}" script = f"{curdir}/scripts/getICsWRFROMS.sh" result = subprocess.run([script, cdate, comdir], stderr=subprocess.STDOUT) if result.returncode != 0: log.exception( f'Retrieving ICs failed ... result: {result.returncode}') raise signals.FAIL() else: log.error("Unsupported forecast: ", ofs) raise signals.FAIL() return
def handler(signal_received, frame): print('SIGINT or CTRL-C detected. Exiting gracefully') raise signals.FAIL()
def run( self, checkpoint_name: str = None, ge_checkpoint: Checkpoint = None, checkpoint_kwargs: dict = None, context: ge.DataContext = None, assets_to_validate: list = None, batch_kwargs: dict = None, expectation_suite_name: str = None, context_root_dir: str = None, runtime_environment: Optional[dict] = None, run_name: str = None, run_info_at_end: bool = True, disable_markdown_artifact: bool = False, validation_operator: str = "action_list_operator", evaluation_parameters: Optional[dict] = None, ): """ Task run method. Args: - checkpoint_name (str, optional): the name of a pre-configured checkpoint; should match the filename of the checkpoint without the extension. Either checkpoint_name or checkpoint_config is required when using the Great Expectations v3 API. - ge_checkpoint (Checkpoint, optional): an in-memory GE `Checkpoint` object used to perform validation. If not provided then `checkpoint_name` will be used to load the specified checkpoint. - checkpoint_kwargs (Dict, optional): A dictionary whose keys match the parameters of `CheckpointConfig` which can be used to update and populate the task's Checkpoint at runtime. - context (DataContext, optional): an in-memory GE `DataContext` object. e.g. `ge.data_context.DataContext()` If not provided then `context_root_dir` will be used to look for one. - assets_to_validate (list, optional): A list of assets to validate when running the validation operator. Only used in the Great Expectations v2 API - batch_kwargs (dict, optional): a dictionary of batch kwargs to be used when validating assets. Only used in the Great Expectations v2 API - expectation_suite_name (str, optional): the name of an expectation suite to be used when validating assets. Only used in the Great Expectations v2 API - context_root_dir (str, optional): the absolute or relative path to the directory holding your `great_expectations.yml` - runtime_environment (dict, optional): a dictionary of great expectation config key-value pairs to overwrite your config in `great_expectations.yml` - run_name (str, optional): the name of this Great Expectation validation run; defaults to the task slug - run_info_at_end (bool, optional): add run info to the end of the artifact generated by this task. Defaults to `True`. - disable_markdown_artifact (bool, optional): toggle the posting of a markdown artifact from this tasks. Defaults to `False`. - evaluation_parameters (Optional[dict], optional): the evaluation parameters to use when running validation. For more information, see [example](https://docs.prefect.io/api/latest/tasks/great_expectations.html#rungreatexpectationsvalidation) and [docs](https://docs.greatexpectations.io/en/latest/reference/core_concepts/evaluation_parameters.html). - validation_operator (str, optional): configure the actions to be executed after running validation. Defaults to `action_list_operator`. Raises: - 'signals.FAIL' if the validation was not a success Returns: - result ('great_expectations.validation_operators.types.validation_operator_result.ValidationOperatorResult'): The Great Expectations metadata returned from the validation if the v2 (batch_kwargs) API is used. ('great_expectations.checkpoint.checkpoint.CheckpointResult'): The Great Expectations metadata returned from running the provided checkpoint if a checkpoint name is provided. """ if version.parse(ge.__version__) < version.parse("0.13.8"): self.logger.warning( f"You are using great_expectations version {ge.__version__} which may cause" "errors in this task. Please upgrade great_expections to 0.13.8 or later." ) runtime_environment = runtime_environment or dict() checkpoint_kwargs = checkpoint_kwargs or dict() # Load context if not provided directly if not context: context = ge.DataContext( context_root_dir=context_root_dir, runtime_environment=runtime_environment, ) # Check that the parameters are mutually exclusive if (sum( bool(x) for x in [ (expectation_suite_name and batch_kwargs), assets_to_validate, checkpoint_name, ge_checkpoint, ]) != 1): raise ValueError( "Exactly one of expectation_suite_name + batch_kwargs, assets_to_validate, " "checkpoint_name, or ge_checkpoint is required to run validation." ) results = None # If there is a checkpoint or checkpoint name provided, run the checkpoint. # Checkpoints are the preferred deployment of validation configuration. if ge_checkpoint or checkpoint_name: ge_checkpoint = ge_checkpoint or context.get_checkpoint( checkpoint_name) results = ge_checkpoint.run( evaluation_parameters=evaluation_parameters, run_id={ "run_name": run_name or prefect.context.get("task_slug") }, **checkpoint_kwargs, ) else: # If assets are not provided directly through `assets_to_validate` then they need be loaded # get batch from `batch_kwargs` and `expectation_suite_name` if not assets_to_validate: assets_to_validate = [ context.get_batch(batch_kwargs, expectation_suite_name) ] # Run validation operator results = context.run_validation_operator( validation_operator, assets_to_validate=assets_to_validate, run_id={ "run_name": run_name or prefect.context.get("task_slug") }, evaluation_parameters=evaluation_parameters, ) # Generate artifact markdown if not disable_markdown_artifact: validation_results_page_renderer = ( ge.render.renderer.ValidationResultsPageRenderer( run_info_at_end=run_info_at_end)) rendered_content_list = validation_results_page_renderer.render_validation_operator_result( # This also works with a CheckpointResult because of duck typing. # The passed in object needs a list_validation_results method that # returns a list of ExpectationSuiteValidationResult. validation_operator_result=results) markdown_artifact = " ".join( ge.render.view.DefaultMarkdownPageView().render( rendered_content_list)) create_markdown_artifact(markdown_artifact) if results.success is False: raise signals.FAIL(result=results) return results