def test_extract_flow_from_file_path(self, flow_path): flow = extract_flow_from_file(file_path=flow_path) assert flow.name == "flow-1" assert flow.run().is_successful() flow = extract_flow_from_file(file_path=flow_path, flow_name="flow-1") assert flow.name == "flow-1" flow = extract_flow_from_file(file_path=flow_path, flow_name="flow-2") assert flow.name == "flow-2"
def test_extract_flow_from_file(tmpdir): contents = """from prefect import Flow\nf=Flow('test-flow')""" full_path = os.path.join(tmpdir, "flow.py") with open(full_path, "w") as f: f.write(contents) flow = extract_flow_from_file(file_path=full_path) assert flow.run().is_successful() flow = extract_flow_from_file(file_contents=contents) assert flow.run().is_successful() flow = extract_flow_from_file(file_path=full_path, flow_name="test-flow") assert flow.run().is_successful() with pytest.raises(ValueError): extract_flow_from_file(file_path=full_path, flow_name="not-real") with pytest.raises(ValueError): extract_flow_from_file(file_path=full_path, file_contents=contents) with pytest.raises(ValueError): extract_flow_from_file()
def test_extract_flow_from_file_raises_on_run_register(self, tmpdir, method): contents = f"from prefect import Flow\nf=Flow('test-flow')\nf.{method}()" full_path = os.path.join(tmpdir, "flow.py") with open(full_path, "w") as f: f.write(contents) with prefect.context({"loading_flow": True}): with pytest.warns(Warning): extract_flow_from_file(file_path=full_path)
def test_extract_flow_from_file_contents(self, flow_path): with open(flow_path, "r") as f: contents = f.read() flow = extract_flow_from_file(file_contents=contents) assert flow.name == "flow-1" assert flow.run().is_successful() flow = extract_flow_from_file(file_contents=contents, flow_name="flow-1") assert flow.name == "flow-1" flow = extract_flow_from_file(file_contents=contents, flow_name="flow-2") assert flow.name == "flow-2"
def flow(file, name, project, label, skip_if_flow_metadata_unchanged): """ Register a flow from a file. This call will pull a Flow object out of a `.py` file and call `flow.register` on it. \b Options: --file, -f TEXT The path to a local file which contains a flow [required] --name, -n TEXT The `flow.name` to pull out of the file provided. If a name is not provided then the first flow object found will be registered. --project, -p TEXT The name of a Prefect project to register this flow --label, -l TEXT A label to set on the flow, extending any existing labels. Multiple labels are supported, eg. `-l label1 -l label2`. --skip-if-flow-metadata-unchanged If set, the flow will only be re-registered if its metadata or structure has changed. \b Examples: $ prefect register flow --file my_flow.py --name My-Flow -l label1 -l label2 """ # Don't run extra `run` and `register` functions inside file file_path = os.path.abspath(file) with prefect.context({ "loading_flow": True, "local_script_path": file_path }): flow = extract_flow_from_file(file_path=file_path, flow_name=name) idempotency_key = (flow.serialized_hash() if skip_if_flow_metadata_unchanged else None) flow.register(project_name=project, labels=label, idempotency_key=idempotency_key)
def get_flow(self, flow_location: str) -> "Flow": """ Given a flow_location within this Storage object, returns the underlying Flow (if possible). If the Flow is not found an error will be logged and `None` will be returned. Args: - flow_location (str): the location of a flow within this Storage; in this case, a file path on a repository where a Flow file has been committed Returns: - Flow: the requested Flow Raises: - UnknownObjectException: if the Flow file is unable to be retrieved """ from github import UnknownObjectException repo = self._github_client.get_repo(self.repo) try: contents = repo.get_contents(flow_location) decoded_contents = contents.decoded_content except UnknownObjectException as exc: self.logger.error( "Error retrieving file contents from {} on repo {}. Ensure the file exists.".format( flow_location, self.repo ) ) raise exc return extract_flow_from_file(file_contents=decoded_contents)
def get_flow(self, flow_location: str) -> "Flow": """ Given a flow_location within this Storage object, returns the underlying Flow (if possible). Args: - flow_location (str): the location of a flow within this Storage; in this case, a file path where a Flow has been serialized to Returns: - Flow: the requested flow Raises: - ValueError: if the flow is not contained in this storage """ if flow_location not in self.flows.values(): raise ValueError("Flow is not contained in this Storage") bucket = self._gcs_client.get_bucket(self.bucket) self.logger.info("Downloading {} from {}".format( flow_location, self.bucket)) blob = bucket.get_blob(flow_location) if not blob: raise StorageError( "Flow not found in bucket: flow={} bucket={}".format( flow_location, self.bucket)) content = blob.download_as_string() if self.stored_as_script: return extract_flow_from_file(file_contents=content) return cloudpickle.loads(content)
def get_flow(self, flow_name: str) -> "Flow": """ Given a flow name within this Storage object, load and return the Flow. Args: - flow_name (str): the name of the flow to return. Returns: - Flow: the requested flow """ if flow_name not in self.flows: raise ValueError("Flow is not contained in this Storage") flow_location = self.flows[flow_name] ref = self.ref or "master" from gitlab.exceptions import GitlabAuthenticationError, GitlabGetError try: project = self._gitlab_client.projects.get(quote_plus(self.repo)) contents = project.files.get(file_path=flow_location, ref=ref) except GitlabAuthenticationError: self.logger.error( "Unable to authenticate Gitlab account. Please check your credentials." ) raise except GitlabGetError: self.logger.error( f"Error retrieving file contents at {flow_location} in {self.repo}@{ref}. " "Ensure the project and file exist.") raise return extract_flow_from_file(file_contents=contents.decode(), flow_name=flow_name)
def get_flow(self, flow_name: str) -> "Flow": """ Given a flow name within this Storage object, load and return the Flow. Args: - flow_name (str): the name of the flow to return. Returns: - Flow: the requested flow """ if flow_name not in self.flows: raise ValueError("Flow is not contained in this Storage") req_function = self._method_to_function[ self.get_flow_request_http_method] get_flow_request_kwargs = _render_dict(self.get_flow_request_kwargs) response = req_function(**get_flow_request_kwargs) # type: ignore response.raise_for_status() if self.stored_as_script: flow_script_content = response.content.decode("utf-8") return extract_flow_from_file(file_contents=flow_script_content, flow_name=flow_name) return flow_from_bytes_pickle(response.content)
def get_flow(self, flow_name: str) -> "Flow": """ Given a flow name within this Storage object, load and return the Flow. Args: - flow_name (str): the name of the flow to return. Returns: - Flow: the requested flow """ if flow_name not in self.flows: raise ValueError("Flow is not contained in this Storage") flow_location = self.flows[flow_name] from github import UnknownObjectException repo = self._github_client.get_repo(self.repo) try: contents = repo.get_contents(flow_location, ref=self.ref) decoded_contents = contents.decoded_content except UnknownObjectException as exc: self.logger.error( "Error retrieving file contents from {} on repo {}. Ensure the file exists." .format(flow_location, self.repo)) raise exc return extract_flow_from_file(file_contents=decoded_contents, flow_name=flow_name)
def get_flow(self, flow_name: str) -> "Flow": """ Given a flow name within this Storage object, load and return the Flow. Args: - flow_name (str): the name of the flow to return. Returns: - Flow: the requested flow """ if flow_name not in self.flows: raise ValueError("Flow is not contained in this Storage") flow_location = self.flows[flow_name] bucket = self._gcs_client.get_bucket(self.bucket) self.logger.info("Downloading {} from {}".format( flow_location, self.bucket)) blob = bucket.get_blob(flow_location) if not blob: raise FlowStorageError( "Flow not found in bucket: flow={} bucket={}".format( flow_location, self.bucket)) # Support GCS < 1.31 content = (blob.download_as_bytes() if hasattr( blob, "download_as_bytes") else blob.download_as_string()) if self.stored_as_script: return extract_flow_from_file(file_contents=content, flow_name=flow_name) return flow_from_bytes_pickle(content)
def get_flow(self, flow_location: str = "placeholder") -> "Flow": """ Get the flow from storage. This method will call `cloudpickle.loads()` on the binary content of the flow, so it should only be called in an environment with all of the flow's dependencies. Args: - flow_location (str): This argument is included to comply with the interface used by other storage objects, but it has no meaning for `Webhook` storage, since `Webhook` only corresponds to a single flow. Ignore it. Raises: - requests.exceptions.HTTPError if getting the flow fails """ self.logger.info("Retrieving flow") req_function = self._method_to_function[self.get_flow_request_http_method] get_flow_request_kwargs = _render_dict(self.get_flow_request_kwargs) response = req_function(**get_flow_request_kwargs) # type: ignore response.raise_for_status() if self.stored_as_script: flow_script_content = response.content.decode("utf-8") return extract_flow_from_file(file_contents=flow_script_content) # type: ignore return cloudpickle.loads(response.content)
def get_flow(self, flow_location: str = None) -> "prefect.core.flow.Flow": """ Given a file path within this Docker container, returns the underlying Flow. Note that this method should only be run _within_ the container itself. Args: - flow_location (str, optional): the file path of a flow within this container. Will use `path` if not provided. Returns: - Flow: the requested flow Raises: - ValueError: if the flow is not contained in this storage """ if flow_location: if flow_location not in self.flows.values(): raise ValueError("Flow is not contained in this Storage") elif self.path: flow_location = self.path else: raise ValueError("No flow location provided") if self.stored_as_script: return extract_flow_from_file(file_path=flow_location) with open(flow_location, "rb") as f: return cloudpickle.load(f)
def get_flow(self, flow_name: str) -> "Flow": """ Given a flow name within this Storage object, load and return the Flow. Args: - flow_name (str): the name of the flow to return. Returns: - Flow: the requested flow """ if flow_name not in self.flows: raise ValueError("Flow is not contained in this Storage") flow_location = self.flows[flow_name] client = self._boto3_client try: file_contents = client.get_file( repositoryName=self.repo, commitSpecifier=self.commit, filePath=flow_location, )["fileContent"] decoded_contents = file_contents.decode("utf-8") except Exception as exc: self.logger.error( "Error retrieving file contents from {} on repo {}. Ensure the file exists.".format( flow_location, self.repo ) ) raise exc return extract_flow_from_file( file_contents=decoded_contents, flow_name=flow_name )
def flow(file, name, project, label): """ Register a flow from a file. This call will pull a Flow object out of a `.py` file and call `flow.register` on it. \b Options: --file, -f TEXT The path to a local file which contains a flow [required] --name, -n TEXT The `flow.name` to pull out of the file provided. If a name is not provided then the first flow object found will be registered. --project, -p TEXT The name of a Prefect project to register this flow --label, -l TEXT A label to set on the flow, extending any existing labels. Multiple labels are supported, eg. `-l label1 -l label2`. \b Examples: $ prefect register flow --file my_flow.py --name My-Flow -l label1 -l label2 """ # Don't run extra `run` and `register` functions inside file file_path = os.path.abspath(file) with prefect.context({ "loading_flow": True, "local_script_path": file_path }): flow = extract_flow_from_file(file_path=file_path, flow_name=name) flow.register(project_name=project, labels=label)
def get_flow(self, flow_location: str) -> "Flow": """ Given a flow_location within this Storage object, returns the underlying Flow (if possible). Args: - flow_location (str): the location of a flow within this Storage; in this case, a file path where a Flow has been serialized to Returns: - Flow: the requested flow Raises: - ValueError: if the flow is not contained in this storage """ if flow_location not in self.flows.values(): raise ValueError("Flow is not contained in this Storage") client = self._azure_block_blob_service.get_blob_client( container=self.container, blob=flow_location) self.logger.info("Downloading {} from {}".format( flow_location, self.container)) content = client.download_blob().content_as_bytes() if self.stored_as_script: return extract_flow_from_file( file_contents=content) # type: ignore return cloudpickle.loads(content)
def get_flow(self, flow_location: str = None) -> "Flow": """ Given a flow_location within this Storage object, returns the underlying Flow (if possible). Args: - flow_location (str, optional): the location of a flow within this Storage; in this case, a file path or python path where a Flow has been serialized to. Will use `path` if not provided. Returns: - Flow: the requested flow Raises: - ValueError: if the flow is not contained in this storage """ if flow_location: if flow_location not in self.flows.values(): raise ValueError("Flow is not contained in this Storage") elif self.path: flow_location = self.path else: raise ValueError("No flow location provided") # check if the path given is a file path if os.path.isfile(flow_location): if self.stored_as_script: return extract_flow_from_file(file_path=flow_location) else: return prefect.core.flow.Flow.load(flow_location) # otherwise the path is given in the module format else: return extract_flow_from_module(module_str=flow_location)
def get_flow(self, flow_name: str) -> "Flow": """ Given a flow name within this Storage object, load and return the Flow. Args: - flow_name (str): the name of the flow to return. Returns: - Flow: the requested flow """ if flow_name not in self.flows: raise ValueError("Flow is not contained in this Storage") flow_location = self.flows[flow_name] # check if the path given is a file path if os.path.isfile(flow_location): if self.stored_as_script: return extract_flow_from_file( file_path=flow_location, flow_name=flow_name ) else: with open(flow_location, "rb") as f: return flow_from_bytes_pickle(f.read()) # otherwise the path is given in the module format else: return extract_flow_from_module( module_str=flow_location, flow_name=flow_name )
def get_flow(self, flow_name: str) -> "Flow": """ Given a flow name within this Storage object, load and return the Flow. Args: - flow_name (str): the name of the flow to return. Returns: - Flow: the requested flow """ if flow_name not in self.flows: raise ValueError("Flow is not contained in this Storage") flow_location = self.flows[flow_name] try: client = self._azure_block_blob_service.get_blob_client( container=self.container, blob=flow_location ) self.logger.info( "Downloading {} from {}".format(flow_location, self.container) ) content = client.download_blob().content_as_bytes() except Exception as err: self.logger.error("Error downloading Flow from Azure: {}".format(err)) raise if self.stored_as_script: return extract_flow_from_file(file_contents=content, flow_name=flow_name) # type: ignore return flow_from_bytes_pickle(content)
def get_flow(self, flow_name: str) -> "Flow": """ Given a flow name within this Storage object, load and return the Flow. Args: - flow_name (str): the name of the flow to return. Returns: - Flow: the requested flow """ if flow_name not in self.flows: raise ValueError("Flow is not contained in this Storage") path = self.flows[flow_name] from github import UnknownObjectException # Log info about the active storage object. Only include `ref` if # explicitly set. self.logger.info( "Downloading flow from GitHub storage - repo: %r, path: %r%s", self.repo, path, f", ref: {self.ref!r}" if self.ref is not None else "", ) try: repo = self._github_client.get_repo(self.repo) except UnknownObjectException: self.logger.error( "Repo %r not found. Check that it exists (and is spelled correctly), " "and that you have configured the proper credentials for accessing it.", self.repo, ) raise # Use the default branch if unspecified ref = self.ref or repo.default_branch # Get the current commit sha for this ref try: commit = repo.get_commit(ref).sha except UnknownObjectException: self.logger.error("Ref %r not found in repo %r.", ref, self.repo) raise try: contents = repo.get_contents(path, ref=commit) decoded_contents = contents.decoded_content except UnknownObjectException: self.logger.error("File %r not found in repo %r, ref %r", path, self.repo, ref) raise self.logger.info("Flow successfully downloaded. Using commit: %s", commit) return extract_flow_from_file(file_contents=decoded_contents, flow_name=flow_name)
def import_flow_from_script_check(flow_file_paths: list): from prefect.utilities.storage import extract_flow_from_file flows = [] for flow_file_path in flow_file_paths: flows.append(extract_flow_from_file(file_path=flow_file_path)) print("Flow import from script check: OK") return flows
def get_flow(self, flow_location: str = None, ref: str = None) -> "Flow": """ Given a flow_location within this Storage object, returns the underlying Flow (if possible). If the Flow is not found an error will be logged and `None` will be returned. Args: - flow_location (str): the location of a flow within this Storage; in this case, a file path on a repository where a Flow file has been committed. Will use `path` if not provided. - ref (str, optional): a commit SHA-1 value or branch name. Defaults to 'master' if not specified Returns: - Flow: the requested Flow; Atlassian API retrieves raw, decoded files. Raises: - ValueError: if the flow is not contained in this storage - HTTPError: if flow is unable to access the Bitbucket repository """ if flow_location: if flow_location not in self.flows.values(): raise ValueError("Flow is not contained in this Storage") elif self.path: flow_location = self.path else: raise ValueError("No flow location provided") # Use ref argument if exists, else use attribute, else default to 'master' ref = ref if ref else (self.ref if self.ref else "master") try: contents = self._bitbucket_client.get_content_of_file( self.project, self.repo, flow_location, at=ref, ) except HTTPError as err: if err.code == 401: self.logger.error( "Access denied to repository. Please check credentials.") raise elif err.code == 404: self.logger.error( "Invalid address. Check that host, project, and repository are correct." ) raise else: self.logger.error( f"Error retrieving contents at {flow_location} in {self.repo}@{ref}. " "Please check arguments passed to Bitbucket storage and verify project exists." ) raise return extract_flow_from_file(file_contents=contents)
def test_extract_flow_from_file_raises_on_run_register(tmpdir): contents = """from prefect import Flow\nf=Flow('test-flow')\nf.run()""" full_path = os.path.join(tmpdir, "flow.py") with open(full_path, "w") as f: f.write(contents) with prefect.context({"loading_flow": True}): with pytest.raises(RuntimeError): extract_flow_from_file(file_path=full_path) contents = """from prefect import Flow\nf=Flow('test-flow')\nf.register()""" full_path = os.path.join(tmpdir, "flow.py") with open(full_path, "w") as f: f.write(contents) with prefect.context({"loading_flow": True}): with pytest.raises(RuntimeError): extract_flow_from_file(file_path=full_path)
def test_extract_flow_from_file_errors(self, flow_path): with pytest.raises(ValueError, match="but not both"): extract_flow_from_file(file_path="", file_contents="") with pytest.raises(ValueError, match="Provide either"): extract_flow_from_file() expected = ( "Flow 'not-real' not found in file. Found flows:\n- 'flow-1'\n- 'flow-2'" ) with pytest.raises(ValueError, match=expected): extract_flow_from_file(file_path=flow_path, flow_name="not-real") with pytest.raises(ValueError, match="No flows found in file."): extract_flow_from_file(file_contents="")
def get_flow(self, flow_location: str = None) -> "Flow": """ Given a flow_location within this Storage object or S3, returns the underlying Flow (if possible). Args: - flow_location (str, optional): the location of a flow within this Storage; in this case an S3 object key where a Flow has been serialized to. Will use `key` if not provided. Returns: - Flow: the requested Flow Raises: - ValueError: if the flow is not contained in this storage - botocore.ClientError: if there is an issue downloading the Flow from S3 """ if flow_location: if flow_location not in self.flows.values(): raise ValueError("Flow is not contained in this Storage") elif self.key: flow_location = self.key else: raise ValueError("No flow location provided") stream = io.BytesIO() self.logger.info("Downloading {} from {}".format( flow_location, self.bucket)) # Download stream from S3 from botocore.exceptions import ClientError try: self._boto3_client.download_fileobj(Bucket=self.bucket, Key=flow_location, Fileobj=stream) except ClientError as err: self.logger.error("Error downloading Flow from S3: {}".format(err)) raise err # prepare data and return stream.seek(0) output = stream.read() if self.stored_as_script: return extract_flow_from_file(file_contents=output) # type: ignore return cloudpickle.loads(output)
def get_flow(self, flow_location: str = None, ref: str = None) -> "Flow": """ Given a flow_location within this Storage object, returns the underlying Flow (if possible). If the Flow is not found an error will be logged and `None` will be returned. Args: - flow_location (str): the location of a flow within this Storage; in this case, a file path on a repository where a Flow file has been committed. Will use `path` if not provided. - ref (str, optional): a commit SHA-1 value or branch name. Defaults to 'master' if not specified Returns: - Flow: the requested Flow Raises: - ValueError: if the flow is not contained in this storage - UnknownObjectException: if the flow file is unable to be retrieved """ if flow_location: if flow_location not in self.flows.values(): raise ValueError("Flow is not contained in this Storage") elif self.path: flow_location = self.path else: raise ValueError("No flow location provided") # Use ref argument if exists, else use attribute, else default to 'master' ref = ref if ref else (self.ref if self.ref else "master") from gitlab.exceptions import GitlabAuthenticationError, GitlabGetError try: project = self._gitlab_client.projects.get(quote_plus(self.repo)) contents = project.files.get(file_path=flow_location, ref=ref) except GitlabAuthenticationError: self.logger.error( "Unable to authenticate Gitlab account. Please check your credentials." ) raise except GitlabGetError: self.logger.error( f"Error retrieving file contents at {flow_location} in {self.repo}@{ref}. " "Ensure the project and file exist." ) raise return extract_flow_from_file(file_contents=contents.decode())
def get_flow(self, flow_location: str) -> "prefect.core.flow.Flow": """ Given a file path within this Docker container, returns the underlying Flow. Note that this method should only be run _within_ the container itself. Args: - flow_location (str): the file path of a flow within this container Returns: - Flow: the requested flow """ if self.stored_as_script: return extract_flow_from_file(file_path=flow_location) with open(flow_location, "rb") as f: return cloudpickle.load(f)
def get_flow(self, flow_name: str) -> "Flow": """ Given a flow name within this Storage object, load and return the Flow. Args: - flow_name (str): the name of the flow to return. Returns: - Flow: the requested flow """ if flow_name not in self.flows: raise ValueError("Flow is not contained in this Storage") flow_location = self.flows[flow_name] # Use ref attribute if present, defaulting to "master" ref = self.ref or "master" client = self._get_bitbucket_client() try: contents = client.get_content_of_file( self.project, self.repo, flow_location, at=ref, ) except HTTPError as err: if err.code == 401: self.logger.error( "Access denied to repository. Please check credentials." ) raise elif err.code == 404: self.logger.error( "Invalid address. Check that host, project, and repository are correct." ) raise else: self.logger.error( f"Error retrieving contents at {flow_location} in {self.repo}@{ref}. " "Please check arguments passed to Bitbucket storage and verify project exists." ) raise return extract_flow_from_file(file_contents=contents, flow_name=flow_name)
def get_flow(self, flow_location: str = None) -> "Flow": """ Given a flow_location within this Storage object, returns the underlying Flow (if possible). Args: - flow_location (str, optional): the location of a flow within this Storage; in this case, a file path where a Flow has been serialized to. Will use `key` if not provided. Returns: - Flow: the requested flow Raises: - ValueError: if the flow is not contained in this storage """ if flow_location: if flow_location not in self.flows.values(): raise ValueError("Flow is not contained in this Storage") elif self.key: flow_location = self.key else: raise ValueError("No flow location provided") bucket = self._gcs_client.get_bucket(self.bucket) self.logger.info("Downloading {} from {}".format(flow_location, self.bucket)) blob = bucket.get_blob(flow_location) if not blob: raise StorageError( "Flow not found in bucket: flow={} bucket={}".format( flow_location, self.bucket ) ) # Support GCS < 1.31 content = ( blob.download_as_bytes() if hasattr(blob, "download_as_bytes") else blob.download_as_string() ) if self.stored_as_script: return extract_flow_from_file(file_contents=content) return flow_from_bytes_pickle(content)
def flow(file, name, project, label, skip_if_flow_metadata_unchanged): """Register a flow (DEPRECATED)""" click.secho( ( "Warning: `prefect register flow` is deprecated, please transition to " "using `prefect register` instead." ), fg="yellow", ) # Don't run extra `run` and `register` functions inside file file_path = os.path.abspath(file) with prefect.context({"loading_flow": True, "local_script_path": file_path}): flow = extract_flow_from_file(file_path=file_path, flow_name=name) idempotency_key = ( flow.serialized_hash() if skip_if_flow_metadata_unchanged else None ) flow.register(project_name=project, labels=label, idempotency_key=idempotency_key)