Пример #1
0
async def register_edges(flow_id: str, tenant_id: str,
                         edges: List[Union[EdgeSchema, dict]]) -> None:
    batch_insertion_size = config.insert_many_batch_size
    flow = await models.Flow.where(id=flow_id).first(
        {"tasks": {
            "slug": True,
            "id": True
        }})

    task_lookup = {t.slug: t.id for t in flow.tasks}

    edges = parse_obj_as(List[EdgeSchema], edges)

    try:
        for edges_chunk in chunked_iterable(edges, batch_insertion_size):
            await models.Edge.insert_many(
                [
                    models.Edge(
                        tenant_id=tenant_id,
                        flow_id=flow_id,
                        upstream_task_id=task_lookup[e.upstream_task],
                        downstream_task_id=task_lookup[e.downstream_task],
                        key=e.key,
                        mapped=e.mapped,
                    ) for e in edges_chunk
                ],
                on_conflict=dict(constraint="edge_flow_id_task_ids_key",
                                 update_columns=[]),
            )
    except KeyError:
        raise ValueError(
            "Edges could not be registered - some edges reference tasks that do not exist within this flow."
        ) from None
Пример #2
0
async def create_flow(
    serialized_flow: dict,
    project_id: str,
    version_group_id: str = None,
    set_schedule_active: bool = True,
    description: str = None,
) -> str:
    """
    Add a flow to the database.

    Args:
        - project_id (str): A project id
        - serialized_flow (dict): A dictionary of information used to represent a flow
        - version_group_id (str): A version group to add the Flow to
        - set_schedule_active (bool): Whether to set the flow's schedule to active
        - description (str): a description of the flow being created

    Returns:
        str: The id of the new flow

    Raises:
        - ValueError: if the flow's version of Prefect Core falls below the cutoff

    """
    flow = FlowSchema(**serialized_flow)

    # core versions before 0.6.1 were used only for internal purposes-- this is our cutoff
    core_version = flow.environment.get("__version__", None)
    if core_version and module_version.parse(
            core_version) < module_version.parse(config.core_version_cutoff):
        raise ValueError(
            "Prefect Server requires new flows to be built with Prefect "
            f"{config.core_version_cutoff}+, but this flow was built with "
            f"Prefect {core_version}.")

    # load project
    project = await models.Project.where(id=project_id).first({"tenant_id"})
    if not project:
        raise ValueError("Invalid project.")
    tenant_id = project.tenant_id  # type: ignore

    # set up task detail info
    task_lookup = {t.slug: t for t in flow.tasks}
    tasks_with_upstreams = {e.downstream_task for e in flow.edges}
    tasks_with_downstreams = {e.upstream_task for e in flow.edges}
    reference_tasks = set(flow.reference_tasks) or {
        t.slug
        for t in flow.tasks if t.slug not in tasks_with_downstreams
    }

    for t in flow.tasks:
        t.mapped = any(e.mapped for e in flow.edges
                       if e.downstream_task == t.slug)
        t.is_reference_task = t.slug in reference_tasks
        t.is_root_task = t.slug not in tasks_with_upstreams
        t.is_terminal_task = t.slug not in tasks_with_downstreams

    # set up versioning
    version_group_id = version_group_id or str(uuid.uuid4())
    version_where = {
        "version_group_id": {
            "_eq": version_group_id
        },
        "tenant_id": {
            "_eq": tenant_id
        },
    }
    # set up a flow group if it's not already in the system
    flow_group = await models.FlowGroup.where({
        "_and": [
            {
                "tenant_id": {
                    "_eq": tenant_id
                }
            },
            {
                "name": {
                    "_eq": version_group_id
                }
            },
        ]
    }).first({"id", "schedule"})
    if flow_group is None:
        flow_group_id = await models.FlowGroup(
            tenant_id=tenant_id,
            name=version_group_id,
            settings={
                "heartbeat_enabled": True,
                "lazarus_enabled": True,
                "version_locking_enabled": False,
            },
        ).insert()
    else:
        flow_group_id = flow_group.id

    version = (await models.Flow.where(version_where).max({"version"}
                                                          ))["version"] or 0

    # if there is no referenceable schedule for this Flow,
    # we should set its "schedule" to inactive to avoid confusion
    if flow.schedule is None and getattr(flow_group, "schedule", None) is None:
        set_schedule_active = False

    # precompute task ids to make edges easy to add to database
    flow_id = await models.Flow(
        tenant_id=tenant_id,
        project_id=project_id,
        name=flow.name,
        serialized_flow=serialized_flow,
        environment=flow.environment,
        core_version=flow.environment.get("__version__"),
        storage=flow.storage,
        parameters=flow.parameters,
        version_group_id=version_group_id,
        version=version + 1,
        archived=False,
        flow_group_id=flow_group_id,
        description=description,
        schedule=serialized_flow.get("schedule"),
        is_schedule_active=False,
        tasks=[
            models.Task(
                id=t.id,
                tenant_id=tenant_id,
                name=t.name,
                slug=t.slug,
                type=t.type,
                max_retries=t.max_retries,
                tags=t.tags,
                retry_delay=t.retry_delay,
                trigger=t.trigger,
                mapped=t.mapped,
                auto_generated=t.auto_generated,
                cache_key=t.cache_key,
                is_reference_task=t.is_reference_task,
                is_root_task=t.is_root_task,
                is_terminal_task=t.is_terminal_task,
            ) for t in flow.tasks
        ],
        edges=[
            models.Edge(
                tenant_id=tenant_id,
                upstream_task_id=task_lookup[e.upstream_task].id,
                downstream_task_id=task_lookup[e.downstream_task].id,
                key=e.key,
                mapped=e.mapped,
            ) for e in flow.edges
        ],
    ).insert()

    # schedule runs
    if set_schedule_active:
        # we don't want to error the Flow creation call as it would prevent other archiving logic
        # from kicking in
        try:
            await api.flows.set_schedule_active(flow_id=flow_id)
        except ValueError:
            pass

    return flow_id
Пример #3
0
async def create_flow(
    serialized_flow: dict,
    project_id: str,
    version_group_id: str = None,
    set_schedule_active: bool = True,
    description: str = None,
    idempotency_key: str = None,
) -> str:
    """
    Add a flow to the database.

    Args:
        - project_id (str): A project id
        - serialized_flow (dict): A dictionary of information used to represent a flow
        - version_group_id (str): A version group to add the Flow to
        - set_schedule_active (bool): Whether to set the flow's schedule to active
        - description (str): a description of the flow being created
        - idempotency_key (optional, str): a key that, if matching the most recent call
            to `create_flow` for this flow group, will prevent the creation of another
            flow version

    Returns:
        str: The id of the new flow

    Raises:
        - ValueError: if the flow's version of Prefect Core falls below the cutoff

    """
    flow = FlowSchema(**serialized_flow)

    # core versions before 0.6.1 were used only for internal purposes-- this is our cutoff
    core_version = flow.__version__
    if core_version and module_version.parse(
            core_version) < module_version.parse(config.core_version_cutoff):
        raise ValueError(
            "Prefect backends require new flows to be built with Prefect "
            f"{config.core_version_cutoff}+, but this flow was built with "
            f"Prefect {core_version}.")

    # load project
    project = await models.Project.where(id=project_id).first({"tenant_id"})
    if not project:
        raise ValueError("Invalid project.")
    tenant_id = project.tenant_id  # type: ignore

    # set up task detail info
    task_lookup = {t.slug: t for t in flow.tasks}
    tasks_with_upstreams = {e.downstream_task for e in flow.edges}
    tasks_with_downstreams = {e.upstream_task for e in flow.edges}
    reference_tasks = set(flow.reference_tasks) or {
        t.slug
        for t in flow.tasks if t.slug not in tasks_with_downstreams
    }

    for t in flow.tasks:
        t.mapped = any(e.mapped for e in flow.edges
                       if e.downstream_task == t.slug)
        t.is_reference_task = t.slug in reference_tasks
        t.is_root_task = t.slug not in tasks_with_upstreams
        t.is_terminal_task = t.slug not in tasks_with_downstreams

    # set up versioning
    version_group_id = version_group_id or str(uuid.uuid4())
    version_where = {
        "version_group_id": {
            "_eq": version_group_id
        },
        "tenant_id": {
            "_eq": tenant_id
        },
    }

    # lookup the associated flow group (may not exist yet)
    flow_group = await models.FlowGroup.where({
        "_and": [
            {
                "tenant_id": {
                    "_eq": tenant_id
                }
            },
            {
                "name": {
                    "_eq": version_group_id
                }
            },
        ]
    }).first({"id", "schedule", "settings"})

    # create the flow group or check for the idempotency key in the existing flow group
    # settings
    if flow_group is None:
        flow_group_id = await models.FlowGroup(
            tenant_id=tenant_id,
            name=version_group_id,
            settings={
                "heartbeat_enabled": True,
                "lazarus_enabled": True,
                "version_locking_enabled": False,
                "idempotency_key": idempotency_key,
            },
        ).insert()
    else:
        flow_group_id = flow_group.id

        # check idempotency key and exit early if we find a matching key and flow,
        # otherwise update the key for the group
        last_idempotency_key = flow_group.settings.get("idempotency_key", None)
        if (last_idempotency_key and idempotency_key
                and last_idempotency_key == idempotency_key):
            # get the most recent unarchived version, there should only be one
            # unarchived flow at a time but it is safer not to presume
            flow_model = await models.Flow.where({
                "version_group_id": {
                    "_eq": version_group_id
                },
                "archived": {
                    "_eq": False
                },
            }).first(order_by={"version": EnumValue("desc")})
            if flow_model:
                return flow_model.id
            # otherwise, despite the key matching we don't have a valid flow to return
            # and will continue as though the key did not match

        settings = flow_group.settings
        settings["idempotency_key"] = idempotency_key
        await models.FlowGroup.where({
            "id": {
                "_eq": flow_group.id
            }
        }).update(set={"settings": settings})

    version = (await models.Flow.where(version_where).max({"version"}
                                                          ))["version"] or 0

    # if there is no referenceable schedule for this Flow,
    # we should set its "schedule" to inactive to avoid confusion
    if flow.schedule is None and getattr(flow_group, "schedule", None) is None:
        set_schedule_active = False

    # precompute task ids to make edges easy to add to database
    flow_id = await models.Flow(
        tenant_id=tenant_id,
        project_id=project_id,
        name=flow.name,
        serialized_flow=serialized_flow,
        environment=flow.environment,
        run_config=flow.run_config,
        core_version=flow.__version__,
        storage=flow.storage,
        parameters=flow.parameters,
        version_group_id=version_group_id,
        version=version + 1,
        archived=False,
        flow_group_id=flow_group_id,
        description=description,
        schedule=serialized_flow.get("schedule"),
        is_schedule_active=False,
        tasks=[
            models.Task(
                id=t.id,
                tenant_id=tenant_id,
                name=t.name,
                slug=t.slug,
                type=t.type,
                max_retries=t.max_retries,
                tags=t.tags,
                retry_delay=t.retry_delay,
                trigger=t.trigger,
                mapped=t.mapped,
                auto_generated=t.auto_generated,
                cache_key=t.cache_key,
                is_reference_task=t.is_reference_task,
                is_root_task=t.is_root_task,
                is_terminal_task=t.is_terminal_task,
            ) for t in flow.tasks
        ],
        edges=[
            models.Edge(
                tenant_id=tenant_id,
                upstream_task_id=task_lookup[e.upstream_task].id,
                downstream_task_id=task_lookup[e.downstream_task].id,
                key=e.key,
                mapped=e.mapped,
            ) for e in flow.edges
        ],
    ).insert()

    # schedule runs
    if set_schedule_active:
        # we don't want to error the Flow creation call as it would prevent other archiving logic
        # from kicking in
        try:
            await api.flows.set_schedule_active(flow_id=flow_id)
        except ValueError:
            pass

    return flow_id