示例#1
0
def create_project_if_not_exists(client: prefect.Client,
                                 project_name: str) -> None:
    """Checks whether a project named "Monitorfish" already exists in Prefect Server.
    If not, the project is created.

    Args:
        client (prefect.Client): Prefect client instance

    Raises:
        ValueError: if more than 1 project with the name "Monitorfish" are found.
    """
    r = client.graphql(
        'query{project(where: {name: {_eq : "Monitorfish"}}){name}}')
    projects = r["data"]["project"]
    if len(projects) == 0:
        print("Monitorfish project does not exists, it will be created.")
        client.create_project(project_name)
    elif len(projects) == 1:
        print("Monitorfish project already exists. Skipping project creation.")
    else:
        raise ValueError(
            "Several projects with the name 'Monitorfish' were found.")
示例#2
0
        raise ("Failed to query " + api_endpoint)


@task
def get_weather(woeid: int):
    logger.info("Getting weather of {}".format(woeid))
    api_endpoint = "https://www.metaweather.com/api/location/{}".format(woeid)
    response = requests.get(api_endpoint)
    if response.status_code == 200:
        weather_data = json.loads(response.text)
        logger.debug(weather_data)
        return weather_data
    else:
        raise ("Failed to query " + api_endpoint)


with Flow("Get Paris' weather") as flow:
    woeid = get_woeid("Paris")
    weather_data = get_weather(woeid)

try:
    client = Client()
    client.create_project(project_name="weather")
except prefect.utilities.exceptions.ClientError as e:
    logger.info("Project already exists")

flow.register(project_name="weather", labels=["development"])

# Optionally run the code now
flow.run()
示例#3
0
                                          lambda flow: flow['project']['name'])
}

(root, projects, _) = next(walk(path.dirname(__file__)))

for project_name in projects:
    if project_name in flows_by_project:
        for old_flow in flows_by_project[project_name].keys():
            error = client.graphql(f'''mutation {{
                archive_flow(input: {{ flow_id: "{old_flow}" }}) {{ error }}
            }}''').to_dict()['data']['archive_flow']['error']

            print('Archiving existing flow %s.. %s' %
                  (old_flow, error or 'OK'))

    files = next(walk(path.join(root, project_name)))[-1]

    for file in files:
        if not file.endswith('.py'): continue
        module = path.splitext(file)[0]

        flow = getattr(
            import_module('.%s.%s' % (project_name, module), __name__), 'flow')
        if callable(getattr(flow, 'register', None)):
            if (project_name not in flows_by_project) and file == files[0]:
                client.create_project(project_name)

            flow.register(project_name)
            flow.visualize(format='png',
                           filename=path.join(root, project_name, module))
示例#4
0
    df = ph.read_clickhouse(query, connection=connection)
    return df


@prefect.task(max_retries=5, retry_delay=timedelta(seconds=2))
def agregate(df):
    agg = df.groupby(['SalesDate', 'UserID'], as_index=False).sum()
    return agg


# schedule to run every 12 hours
schedule = IntervalSchedule(
    start_date=datetime.utcnow() + timedelta(seconds=1),
    # interval=timedelta(hours=12))
    interval=timedelta(minutes=5),
    end_date=datetime.utcnow() + timedelta(minutes=10))

with prefect.Flow(
        name="SQL",
        schedule=schedule,
        # state_handlers=[handler],
) as flow:
    dataframes = download(connection)
    fin = agregate(dataframes)

#
client = Client()
client.create_project(project_name='SQL')
flow.register(project_name='SQL')
#
flow.run()