def graphql(self, query: Any, raise_on_error: bool = True, **variables: Union[bool, dict, str, int]) -> GraphQLResult: """ Convenience function for running queries against the Prefect GraphQL API Args: - query (Any): A representation of a graphql query to be executed. It will be parsed by prefect.utilities.graphql.parse_graphql(). - raise_on_error (bool): if True, a `ClientError` will be raised if the GraphQL returns any `errors`. - **variables (kwarg): Variables to be filled into a query with the key being equivalent to the variables that are accepted by the query Returns: - dict: Data returned from the GraphQL query Raises: - ClientError if there are errors raised by the GraphQL mutation """ result = self.post( path="", query=parse_graphql(query), variables=json.dumps(variables), server=self.graphql_server, ) if raise_on_error and "errors" in result: raise ClientError(result["errors"]) else: return as_nested_dict(result, GraphQLResult) # type: ignore
def get_key_value(key: str) -> Any: """ Get the value for a key Args: - key (str): the name of the key Returns: - value (Any): A json compatible value Raises: - ValueError: if the specified key does not exist - ClientError: if using Prefect Server instead of Cloud """ if prefect.config.backend != "cloud": raise ClientError(NON_CLOUD_BACKEND_ERROR_MESSAGE) query = { "query": { with_args("key_value", {"where": { "key": { "_eq": key } }}): {"value"} } } client = Client() result = client.graphql(query) # type: Any if len(result.data.key_value) == 0: raise ValueError(f"No value found for key: {key}") return result.data.key_value[0].value
def set_key_value(key: str, value: Any) -> str: """ Set key value pair, overwriting values for existing key Args: - key (str): the name of the key - value (Any): A json compatible value Returns: - id (str): the id of the key value pair Raises: - ClientError: if using Prefect Server instead of Cloud """ if prefect.config.backend != "cloud": raise ClientError(NON_CLOUD_BACKEND_ERROR_MESSAGE) mutation = { "mutation($input: set_key_value_input!)": { "set_key_value(input: $input)": {"id"} } } client = Client() result = client.graphql(query=mutation, variables=dict(input=dict(key=key, value=value))) return result.data.set_key_value.id
def set_key_value(key: str, value: Any) -> str: """ Set key value pair, overwriting values for existing key Args: - key (str): the name of the key - value (Any): A json compatible value Returns: - id (str): the id of the key value pair Raises: - ClientError: if using Prefect Server instead of Cloud - ValueError: if `value` exceeds 10 KB limit """ if prefect.config.backend != "cloud": raise ClientError(NON_CLOUD_BACKEND_ERROR_MESSAGE) # check value is under size limit # note this will be enforced by the API value_size = sys.getsizeof(json.dumps(value)) if value_size > 10000: # 10 KB max raise ValueError("Value payload exceedes 10 KB limit.") mutation = { "mutation($input: set_key_value_input!)": { "set_key_value(input: $input)": {"id"} } } client = Client() result = client.graphql(query=mutation, variables=dict(input=dict(key=key, value=value))) return result.data.set_key_value.id
def get_flow_run_info(self, flow_run_id: str) -> FlowRunInfoResult: """ Retrieves version and current state information for the given flow run. Args: - flow_run_id (str): the id of the flow run to get information for Returns: - GraphQLResult: a `DotDict` representing information about the flow run Raises: - ClientError: if the GraphQL mutation is bad for any reason """ query = { "query": { with_args("flow_run_by_pk", {"id": flow_run_id}): { "parameters": True, "context": True, "version": True, "scheduled_start_time": True, "serialized_state": True, # load all task runs except dynamic task runs with_args("task_runs", { "where": { "map_index": { "_eq": -1 } } }): { "id", "task_id", "version", "serialized_state", }, } } } result = self.graphql(query).data.flow_run_by_pk # type: ignore if result is None: raise ClientError( 'Flow run ID not found: "{}"'.format(flow_run_id)) # convert scheduled_start_time from string to datetime result.scheduled_start_time = pendulum.parse( result.scheduled_start_time) # create "state" attribute from serialized_state result.state = prefect.engine.state.State.deserialize( result.pop("serialized_state")) # reformat task_runs task_runs = [] for tr in result.task_runs: tr.state = prefect.engine.state.State.deserialize( tr.pop("serialized_state")) task_runs.append(TaskRunInfoResult(**tr)) result.task_runs = task_runs return FlowRunInfoResult(**result)
def list_keys() -> List[str]: """ List all keys Returns: - keys (list): A list of keys Raises: - ClientError: if using Prefect Server instead of Cloud """ if prefect.config.backend != "cloud": raise ClientError(NON_CLOUD_BACKEND_ERROR_MESSAGE) client = Client() result = client.graphql({"query": {"key_value": {"key"}}}) # type: ignore return sorted([res["key"] for res in result.data.key_value])
def graphql( self, query: Any, raise_on_error: bool = True, headers: Dict[str, str] = None, variables: Dict[str, JSONLike] = None, token: str = None, ) -> GraphQLResult: """ Convenience function for running queries against the Prefect GraphQL API Args: - query (Any): A representation of a graphql query to be executed. It will be parsed by prefect.utilities.graphql.parse_graphql(). - raise_on_error (bool): if True, a `ClientError` will be raised if the GraphQL returns any `errors`. - headers (dict): any additional headers that should be passed as part of the request - variables (dict): Variables to be filled into a query with the key being equivalent to the variables that are accepted by the query - token (str): an auth token. If not supplied, the `client.access_token` is used. Returns: - dict: Data returned from the GraphQL query Raises: - ClientError if there are errors raised by the GraphQL mutation """ result = self.post( path="", server=self.api_server, headers=headers, params=dict(query=parse_graphql(query), variables=json.dumps(variables)), token=token, ) if raise_on_error and "errors" in result: if "UNAUTHENTICATED" in str(result["errors"]): raise AuthorizationError(result["errors"]) elif "Malformed Authorization header" in str(result["errors"]): raise AuthorizationError(result["errors"]) raise ClientError(result["errors"]) else: return GraphQLResult(result) # type: ignore
def delete_key(key: str) -> bool: """ Delete a key value pair Args: - key (str): the name of the key Returns: - success (bool): Whether or not deleting the key succeeded Raises: - ValueError: if the specified key does not exist - ClientError: if using Prefect Server instead of Cloud """ if prefect.config.backend != "cloud": raise ClientError(NON_CLOUD_BACKEND_ERROR_MESSAGE) query = { "query": { with_args("key_value", {"where": { "key": { "_eq": key } }}): {"id"} } } mutation = { "mutation($input: delete_key_value_input!)": { "delete_key_value(input: $input)": {"success"} } } client = Client() key_value_id_query = client.graphql(query=query) if len(key_value_id_query.data.key_value) == 0: raise ValueError(f"No key {key} found to delete") result = client.graphql( query=mutation, variables=dict(input=dict( key_value_id=key_value_id_query.data.key_value[0].id)), ) return result.data.delete_key_value.success
def graphql( self, query: Any, raise_on_error: bool = True, headers: Dict[str, str] = None, variables: Dict[str, JSONLike] = None, ) -> GraphQLResult: """ Convenience function for running queries against the Prefect GraphQL API Args: - query (Any): A representation of a graphql query to be executed. It will be parsed by prefect.utilities.graphql.parse_graphql(). - raise_on_error (bool): if True, a `ClientError` will be raised if the GraphQL returns any `errors`. - headers (dict): any additional headers that should be passed as part of the request - variables (dict): Variables to be filled into a query with the key being equivalent to the variables that are accepted by the query Returns: - dict: Data returned from the GraphQL query Raises: - ClientError if there are errors raised by the GraphQL mutation """ result = self.post( path="", server=self.graphql_server, headers=headers, params=dict(query=parse_graphql(query), variables=json.dumps(variables)), ) if raise_on_error and "errors" in result: raise ClientError(result["errors"]) else: return as_nested_dict(result, GraphQLResult) # type: ignore
def deploy( self, flow: "Flow", project_name: str, build: bool = True, set_schedule_active: bool = True, compressed: bool = True, ) -> str: """ Push a new flow to Prefect Cloud Args: - flow (Flow): a flow to deploy - project_name (str): the project that should contain this flow. - build (bool, optional): if `True`, the flow's environment is built prior to serialization; defaults to `True` - set_schedule_active (bool, optional): if `False`, will set the schedule to inactive in the database to prevent auto-scheduling runs (if the Flow has a schedule). Defaults to `True`. This can be changed later. - compressed (bool, optional): if `True`, the serialized flow will be; defaults to `True` compressed Returns: - str: the ID of the newly-deployed flow Raises: - ClientError: if the deploy failed """ required_parameters = {p for p in flow.parameters() if p.required} if flow.schedule is not None and required_parameters: raise ClientError( "Flows with required parameters can not be scheduled automatically." ) if compressed: create_mutation = { "mutation($input: createFlowFromCompressedStringInput!)": { "createFlowFromCompressedString(input: $input)": {"id"} } } else: create_mutation = { "mutation($input: createFlowInput!)": { "createFlow(input: $input)": {"id"} } } query_project = { "query": { with_args("project", { "where": { "name": { "_eq": project_name } } }): { "id": True } } } project = self.graphql(query_project).data.project # type: ignore if not project: raise ValueError( 'Project {} not found. Run `client.create_project("{}")` to create it.' .format(project_name, project_name)) serialized_flow = flow.serialize(build=build) # type: Any # verify that the serialized flow can be deserialized try: prefect.serialization.flow.FlowSchema().load(serialized_flow) except Exception as exc: raise ValueError( "Flow could not be deserialized successfully. Error was: {}". format(repr(exc))) if compressed: serialized_flow = compress(serialized_flow) res = self.graphql( create_mutation, variables=dict(input=dict( projectId=project[0].id, serializedFlow=serialized_flow, setScheduleActive=set_schedule_active, )), ) # type: Any flow_id = (res.data.createFlowFromCompressedString.id if compressed else res.data.createFlow.id) return flow_id
def register( self, flow: "Flow", project_name: str, build: bool = True, set_schedule_active: bool = True, version_group_id: str = None, compressed: bool = True, no_url: bool = False, ) -> str: """ Push a new flow to Prefect Cloud Args: - flow (Flow): a flow to register - project_name (str): the project that should contain this flow. - build (bool, optional): if `True`, the flow's environment is built prior to serialization; defaults to `True` - set_schedule_active (bool, optional): if `False`, will set the schedule to inactive in the database to prevent auto-scheduling runs (if the Flow has a schedule). Defaults to `True`. This can be changed later. - version_group_id (str, optional): the UUID version group ID to use for versioning this Flow in Cloud; if not provided, the version group ID associated with this Flow's project and name will be used. - compressed (bool, optional): if `True`, the serialized flow will be; defaults to `True` compressed - no_url (bool, optional): if `True`, the stdout from this function will not contain the URL link to the newly-registered flow in the Cloud UI Returns: - str: the ID of the newly-registered flow Raises: - ClientError: if the register failed """ required_parameters = {p for p in flow.parameters() if p.required} if flow.schedule is not None and required_parameters: required_names = {p.name for p in required_parameters} if not all( [ required_names == set(c.parameter_defaults.keys()) for c in flow.schedule.clocks ] ): raise ClientError( "Flows with required parameters can not be scheduled automatically." ) if any(e.key for e in flow.edges) and flow.result_handler is None: warnings.warn( "No result handler was specified on your Flow. Cloud features such as input caching and resuming task runs from failure may not work properly.", UserWarning, ) if compressed: create_mutation = { "mutation($input: createFlowFromCompressedStringInput!)": { "createFlowFromCompressedString(input: $input)": {"id"} } } else: create_mutation = { "mutation($input: createFlowInput!)": { "createFlow(input: $input)": {"id"} } } query_project = { "query": { with_args("project", {"where": {"name": {"_eq": project_name}}}): { "id": True } } } project = self.graphql(query_project).data.project # type: ignore if not project: raise ValueError( 'Project {} not found. Run `client.create_project("{}")` to create it.'.format( project_name, project_name ) ) serialized_flow = flow.serialize(build=build) # type: Any # verify that the serialized flow can be deserialized try: prefect.serialization.flow.FlowSchema().load(serialized_flow) except Exception as exc: raise ValueError( "Flow could not be deserialized successfully. Error was: {}".format( repr(exc) ) ) if compressed: serialized_flow = compress(serialized_flow) res = self.graphql( create_mutation, variables=dict( input=dict( projectId=project[0].id, serializedFlow=serialized_flow, setScheduleActive=set_schedule_active, versionGroupId=version_group_id, ) ), ) # type: Any flow_id = ( res.data.createFlowFromCompressedString.id if compressed else res.data.createFlow.id ) if not no_url: # Generate direct link to Cloud flow flow_url = self.get_cloud_url("flow", flow_id) print("Flow: {}".format(flow_url)) return flow_id
def _request( self, method: str, path: str, params: Dict[str, JSONLike] = None, server: str = None, headers: dict = None, token: str = None, ) -> "requests.models.Response": """ Runs any specified request (GET, POST, DELETE) against the server Args: - method (str): The type of request to be made (GET, POST, DELETE) - path (str): Path of the API URL - params (dict, optional): Parameters used for the request - server (str, optional): The server to make requests against, base API server is used if not specified - headers (dict, optional): Headers to pass with the request - token (str): an auth token. If not supplied, the `client.access_token` is used. Returns: - requests.models.Response: The response returned from the request Raises: - ClientError: if the client token is not in the context (due to not being logged in) - ValueError: if a method is specified outside of the accepted GET, POST, DELETE - requests.HTTPError: if a status code is returned that is not `200` or `401` """ if server is None: server = self.api_server assert isinstance(server, str) # mypy assert if token is None: token = self.get_auth_token() # 'import requests' is expensive time-wise, we should do this just-in-time to keep # the 'import prefect' time low import requests url = urljoin(server, path.lstrip("/")).rstrip("/") params = params or {} headers = headers or {} if token: headers["Authorization"] = "Bearer {}".format(token) headers["X-PREFECT-CORE-VERSION"] = str(prefect.__version__) if self._attached_headers: headers.update(self._attached_headers) session = requests.Session() retries = requests.packages.urllib3.util.retry.Retry( total=6, backoff_factor=1, status_forcelist=[500, 502, 503, 504], method_whitelist=["DELETE", "GET", "POST"], ) session.mount("https://", requests.adapters.HTTPAdapter(max_retries=retries)) response = self._send_request(session=session, method=method, url=url, params=params, headers=headers) # parse the response try: json_resp = response.json() except json.JSONDecodeError: if prefect.config.backend == "cloud" and "Authorization" not in headers: raise ClientError( "Malformed response received from Cloud - please ensure that you have an API token properly configured." ) else: raise ClientError(f"Malformed response received from API.") # check if there was an API_ERROR code in the response if "API_ERROR" in str(json_resp.get("errors")): success, retry_count = False, 0 # retry up to six times while success is False and retry_count < 6: response = self._send_request( session=session, method=method, url=url, params=params, headers=headers, ) if "API_ERROR" in str(response.json().get("errors")): retry_count += 1 time.sleep(0.1 * (2**(retry_count - 1))) else: success = True return response
def get_task_run_info( self, flow_run_id: str, task_id: str, map_index: Optional[int] = None) -> TaskRunInfoResult: """ Retrieves version and current state information for the given task run. Args: - flow_run_id (str): the id of the flow run that this task run lives in - task_id (str): the task id for this task run - map_index (int, optional): the mapping index for this task run; if `None`, it is assumed this task is _not_ mapped Returns: - NamedTuple: a tuple containing `id, task_id, version, state` Raises: - ClientError: if the GraphQL mutation is bad for any reason """ mutation = { "mutation": { with_args( "get_or_create_task_run", { "input": { "flow_run_id": flow_run_id, "task_id": task_id, "map_index": -1 if map_index is None else map_index, } }, ): { "id": True, } } } result = self.graphql(mutation) # type: Any if result is None: raise ClientError("Failed to create task run.") task_run_id = result.data.get_or_create_task_run.id query = { "query": { with_args("task_run_by_pk", {"id": task_run_id}): { "version": True, "serialized_state": True, "task": { "slug": True }, } } } task_run = self.graphql(query).data.task_run_by_pk # type: ignore if task_run is None: raise ClientError( 'Task run ID not found: "{}"'.format(task_run_id)) state = prefect.engine.state.State.deserialize( task_run.serialized_state) return TaskRunInfoResult( id=task_run_id, task_id=task_id, task_slug=task_run.task.slug, version=task_run.version, state=state, )
def deploy( self, flow: "Flow", project_name: str, build: bool = True, set_schedule_active: bool = True, ) -> str: """ Push a new flow to Prefect Cloud Args: - flow (Flow): a flow to deploy - project_name (str): the project that should contain this flow. - build (bool, optional): if `True`, the flow's environment is built prior to serialization; defaults to `True` - set_schedule_active (bool, optional): if `False`, will set the schedule to inactive in the database to prevent auto-scheduling runs (if the Flow has a schedule). Defaults to `True`. This can be changed later. Returns: - str: the ID of the newly-deployed flow Raises: - ClientError: if the deploy failed """ required_parameters = {p for p in flow.parameters() if p.required} if flow.schedule is not None and required_parameters: raise ClientError( "Flows with required parameters can not be scheduled automatically." ) create_mutation = { "mutation($input: createFlowInput!)": { "createFlow(input: $input)": {"id"} } } query_project = { "query": { with_args("project", { "where": { "name": { "_eq": project_name } } }): { "id": True } } } project = self.graphql(query_project).data.project # type: ignore if not project: raise ValueError( "Project {} not found. Run `client.create_project({})` to create it." .format(project_name, project_name)) res = self.graphql( create_mutation, input=dict( projectId=project[0].id, serializedFlow=flow.serialize(build=build), setScheduleActive=set_schedule_active, ), ) # type: Any return res.data.createFlow.id
def register( self, flow: "Flow", project_name: str = None, build: bool = True, set_schedule_active: bool = True, version_group_id: str = None, compressed: bool = True, no_url: bool = False, ) -> str: """ Push a new flow to Prefect Cloud Args: - flow (Flow): a flow to register - project_name (str, optional): the project that should contain this flow. - build (bool, optional): if `True`, the flow's environment is built prior to serialization; defaults to `True` - set_schedule_active (bool, optional): if `False`, will set the schedule to inactive in the database to prevent auto-scheduling runs (if the Flow has a schedule). Defaults to `True`. This can be changed later. - version_group_id (str, optional): the UUID version group ID to use for versioning this Flow in Cloud; if not provided, the version group ID associated with this Flow's project and name will be used. - compressed (bool, optional): if `True`, the serialized flow will be; defaults to `True` compressed - no_url (bool, optional): if `True`, the stdout from this function will not contain the URL link to the newly-registered flow in the Cloud UI Returns: - str: the ID of the newly-registered flow Raises: - ClientError: if the register failed """ required_parameters = {p for p in flow.parameters() if p.required} if flow.schedule is not None and required_parameters: required_names = {p.name for p in required_parameters} if not all( [ required_names <= set(c.parameter_defaults.keys()) for c in flow.schedule.clocks ] ): raise ClientError( "Flows with required parameters can not be scheduled automatically." ) if any(e.key for e in flow.edges) and flow.result is None: warnings.warn( "No result handler was specified on your Flow. Cloud features such as " "input caching and resuming task runs from failure may not work properly." ) if compressed: create_mutation = { "mutation($input: create_flow_from_compressed_string_input!)": { "create_flow_from_compressed_string(input: $input)": {"id"} } } else: create_mutation = { "mutation($input: create_flow_input!)": { "create_flow(input: $input)": {"id"} } } project = None if prefect.config.backend == "cloud": if project_name is None: raise TypeError( "'project_name' is a required field when registering a flow with Cloud. " "If you are attempting to register a Flow with a local Prefect server " "you may need to run `prefect backend server` first." ) query_project = { "query": { with_args("project", {"where": {"name": {"_eq": project_name}}}): { "id": True } } } project = self.graphql(query_project).data.project # type: ignore if not project: raise ValueError( 'Project {} not found. Run `client.create_project("{}")` to create it.'.format( project_name, project_name ) ) serialized_flow = flow.serialize(build=build) # type: Any # Set Docker storage image in environment metadata if provided if isinstance(flow.storage, prefect.environments.storage.Docker): flow.environment.metadata["image"] = flow.storage.name serialized_flow = flow.serialize(build=False) # If no image ever set, default metadata to all_extras image on current version if not flow.environment.metadata.get("image"): version = prefect.__version__.split("+")[0] flow.environment.metadata[ "image" ] = f"prefecthq/prefect:all_extras-{version}" serialized_flow = flow.serialize(build=False) # verify that the serialized flow can be deserialized try: prefect.serialization.flow.FlowSchema().load(serialized_flow) except Exception as exc: raise ValueError( "Flow could not be deserialized successfully. Error was: {}".format( repr(exc) ) ) if compressed: serialized_flow = compress(serialized_flow) res = self.graphql( create_mutation, variables=dict( input=dict( project_id=(project[0].id if project else None), serialized_flow=serialized_flow, set_schedule_active=set_schedule_active, version_group_id=version_group_id, ) ), ) # type: Any flow_id = ( res.data.create_flow_from_compressed_string.id if compressed else res.data.create_flow.id ) if not no_url: # Generate direct link to Cloud flow flow_url = self.get_cloud_url("flow", flow_id) print("Flow: {}".format(flow_url)) return flow_id