示例#1
0
    def _get_vault_secret(self, name: str):
        """
        Connect to vault service and retrieve a secret value
        Arguments:
            - name (str): name of the secret to be retrieved using "<mount_point>/<path>"

        Returns:
            - secret (dict): a dict of the secret key/value items retrieved

        Raises:
            - ValueError: unable to configure the vault client or unable to parse the provided
                secret name into a "<mount_point>/<path>" pattern
            - RuntimeError: unable to authenticate or attempting an unsupported auth method or
                or provided token/role not authorised to access the the secret
            - KeyError: unable to lookup secret in vault using the secret path
        """
        self.logger.debug(f"Looking up vault path: {name}")
        client = hvac.Client()
        # get vault address url
        vault_url = os.getenv("VAULT_ADDR") or os.getenv("vault_addr")
        if not vault_url:
            raise ValueError(
                "VAULT_ADDR url var not found. "
                'Either "VAULT_ADDR" or "vault_addr" env var required.')
        client.url = vault_url
        self.logger.debug(f"Vault addr set to: {client.url}")

        # get vault auth credentials from the PrefectSecret
        vault_creds = PrefectSecret(self.vault_credentials_secret).run()
        if "VAULT_TOKEN" in vault_creds.keys():
            client.token = vault_creds["VAULT_TOKEN"]
        elif ("VAULT_ROLE_ID" in vault_creds.keys()
              and "VAULT_SECRET_ID" in vault_creds.keys()):
            client.auth_approle(vault_creds["VAULT_ROLE_ID"],
                                vault_creds["VAULT_SECRET_ID"])
        else:
            raise ValueError("Unable to authenticate with vault service.  "
                             "Supported methods: token, appRole")
        if not client.is_authenticated():
            raise RuntimeError(
                "Unable to autheticate with vault using supplied credentials")
        self.logger.debug("Passed vault authentication check")
        # regex to parse path into 2 named parts: <mount_point>/<path>
        secret_path_re = r"^(?P<mount_point>[^/]+)/(?P<path>.+)$"
        m = re.fullmatch(secret_path_re, name)
        if m is None:
            raise ValueError(
                f'Invalid secret path: {name}.  Expected: "<mount_point>/<path>"'
            )
        vault_path = m.groupdict()
        value = ""
        try:
            vault_secret = client.secrets.kv.v2.read_secret_version(
                path=vault_path["path"], mount_point=vault_path["mount_point"])
            value = vault_secret["data"]["data"]
        except hvac.exceptions.InvalidPath as exc:
            raise KeyError(f"Secret not found: {name}") from exc
        except hvac.exceptions.Forbidden as exc:
            raise RuntimeError(f"Access forbidden: {name}") from exc
        return value
示例#2
0
 def test_secret_value_pulled_from_context(self):
     secret = PrefectSecret(name="test")
     with set_temporary_config({"cloud.use_local_secrets": True}):
         with prefect.context(secrets=dict(test=42)):
             assert secret.run() == 42
         with pytest.raises(ValueError):
             secret.run()
示例#3
0
def get_key_vault(credentials: str,
                  secret_client_kwargs: dict,
                  vault_name: str = None) -> SecretClient:
    if not vault_name:
        vault_name = PrefectSecret("AZURE_DEFAULT_KEYVAULT").run()
    if credentials:
        # set credentials as env variables so that they're discoverable by EnvironmentCredential
        key_vault_credentials = credentials["KEY_VAULT"][vault_name]
        os.environ["AZURE_TENANT_ID"] = key_vault_credentials[
            "AZURE_TENANT_ID"]
        os.environ["AZURE_CLIENT_ID"] = key_vault_credentials[
            "AZURE_CLIENT_ID"]
        os.environ["AZURE_CLIENT_SECRET"] = key_vault_credentials[
            "AZURE_CLIENT_SECRET"]
    else:
        try:
            # we can read the credentials automatically if user uses the default name
            credentials = PrefectSecret("AZURE_CREDENTIALS").run()
            key_vault_credentials = credentials["KEY_VAULT"][vault_name]
            os.environ["AZURE_TENANT_ID"] = key_vault_credentials[
                "AZURE_TENANT_ID"]
            os.environ["AZURE_CLIENT_ID"] = key_vault_credentials[
                "AZURE_CLIENT_ID"]
            os.environ["AZURE_CLIENT_SECRET"] = key_vault_credentials[
                "AZURE_CLIENT_SECRET"]
        except ValueError as e:
            # go to step 3 (attempt to read from env)
            pass
    credentials = EnvironmentCredential()
    vault_url = f"https://{vault_name}.vault.azure.net"
    key_vault = SecretClient(vault_url=vault_url,
                             credential=credentials,
                             **secret_client_kwargs)
    return key_vault
示例#4
0
 def test_local_secrets_auto_load_json_strings(self):
     secret = PrefectSecret(name="test")
     with set_temporary_config({"cloud.use_local_secrets": True}):
         with prefect.context(secrets=dict(test='{"x": 42}')):
             assert secret.run() == {"x": 42}
         with pytest.raises(ValueError):
             secret.run()
示例#5
0
 def test_cloud_secrets_remain_plain_dictionaries(self, monkeypatch):
     response = {
         "data": {
             "secret_value": {
                 "a": "1234",
                 "b": [1, 2, {
                     "c": 3
                 }]
             }
         }
     }
     post = MagicMock(return_value=MagicMock(json=MagicMock(
         return_value=response)))
     session = MagicMock()
     session.return_value.post = post
     monkeypatch.setattr("requests.Session", session)
     with set_temporary_config({
             "cloud.auth_token": "secret_token",
             "cloud.use_local_secrets": False
     }):
         my_secret = PrefectSecret(name="the-key")
         val = my_secret.run()
     assert val == {"a": "1234", "b": [1, 2, {"c": 3}]}
     assert isinstance(val, dict) and not isinstance(val, box.Box)
     val2 = val["b"]
     assert isinstance(val2, list) and not isinstance(val2, box.BoxList)
     val3 = val["b"][2]
     assert isinstance(val3, dict) and not isinstance(val3, box.Box)
示例#6
0
 def test_local_secrets_remain_plain_dictionaries(self):
     secret = PrefectSecret(name="test")
     with set_temporary_config({"cloud.use_local_secrets": True}):
         with prefect.context(secrets=dict(test={"x": 42})):
             assert isinstance(prefect.context.secrets["test"], dict)
             val = secret.run()
             assert val == {"x": 42}
             assert isinstance(val, dict) and not isinstance(val, box.Box)
示例#7
0
    def test_reads_by_rerunning_task(self):
        task = PrefectSecret("foo")
        task.run = lambda *args, **kwargs: 42
        result = SecretResult(task)
        result.location == "foo"

        new_result = result.read("foo")
        assert new_result.value == 42
        new_result.location == "foo"
示例#8
0
 def test_secrets_use_client(self, monkeypatch):
     response = {"data": {"secret_value": '"1234"'}}
     post = MagicMock(return_value=MagicMock(json=MagicMock(return_value=response)))
     session = MagicMock()
     session.return_value.post = post
     monkeypatch.setattr("requests.Session", session)
     with set_temporary_config(
         {"cloud.auth_token": "secret_token", "cloud.use_local_secrets": False}
     ):
         my_secret = PrefectSecret(name="the-key")
         val = my_secret.run()
     assert val == "1234"
示例#9
0
    def test_cloud_secrets_auto_load_json_strings(self, monkeypatch):
        response = {"data": {"secret_value": '{"x": 42}'}}
        post = MagicMock(return_value=MagicMock(json=MagicMock(return_value=response)))
        session = MagicMock()
        session.return_value.post = post
        monkeypatch.setattr("requests.Session", session)
        with set_temporary_config(
            {"cloud.auth_token": "secret_token", "cloud.use_local_secrets": False}
        ):
            my_secret = PrefectSecret(name="the-key")
            val = my_secret.run()

        assert isinstance(val, dict)
示例#10
0
    def test_secret_value_depends_on_use_local_secrets(self, monkeypatch):
        response = {"errors": "Malformed Authorization header"}
        post = MagicMock(return_value=MagicMock(json=MagicMock(return_value=response)))
        session = MagicMock()
        session.return_value.post = post
        monkeypatch.setattr("requests.Session", session)

        secret = PrefectSecret(name="test")
        with set_temporary_config(
            {"cloud.use_local_secrets": False, "cloud.auth_token": None}
        ):
            with prefect.context(secrets=dict()):
                with pytest.raises(ClientError):
                    secret.run()
示例#11
0
 def test_secret_is_pickleable(self):
     secret = PrefectSecret(name="long name")
     new = cloudpickle.loads(cloudpickle.dumps(secret))
     assert new.name == "long name"
     assert new.max_retries == 2
     assert new.retry_delay.total_seconds() == 1.0
     assert isinstance(new.result_handler, SecretResultHandler)
示例#12
0
    def run(
        self,
        from_path: str = None,
        to_path: str = None,
        recursive: bool = None,
        overwrite: bool = None,
        gen: int = None,
        sp_credentials_secret: str = None,
        vault_name: str = None,
        max_retries: int = None,
        retry_delay: timedelta = None,
    ) -> None:
        """Task run method.

        Args:
            from_path (str): The path from which to upload the file(s).
            to_path (str): The destination path.
            recursive (bool): Set to true if uploading entire directories.
            overwrite (bool): Whether to overwrite the file(s) if they exist.
            gen (int): The generation of the Azure Data Lake.
            sp_credentials_secret (str, optional): The name of the Azure Key Vault secret containing a dictionary with
            ACCOUNT_NAME and Service Principal credentials (TENANT_ID, CLIENT_ID, CLIENT_SECRET). Defaults to None.
            vault_name (str, optional): The name of the vault from which to obtain the secret. Defaults to None.
        """

        if not sp_credentials_secret:
            # attempt to read a default for the service principal secret name
            try:
                sp_credentials_secret = PrefectSecret(
                    "AZURE_DEFAULT_ADLS_SERVICE_PRINCIPAL_SECRET").run()
            except ValueError:
                pass

        if sp_credentials_secret:
            azure_secret_task = AzureKeyVaultSecret()
            credentials_str = azure_secret_task.run(
                secret=sp_credentials_secret, vault_name=vault_name)
            credentials = json.loads(credentials_str)
        else:
            credentials = {
                "ACCOUNT_NAME": os.environ["AZURE_ACCOUNT_NAME"],
                "AZURE_TENANT_ID": os.environ["AZURE_TENANT_ID"],
                "AZURE_CLIENT_ID": os.environ["AZURE_CLIENT_ID"],
                "AZURE_CLIENT_SECRET": os.environ["AZURE_CLIENT_SECRET"],
            }
        lake = AzureDataLake(gen=gen, credentials=credentials)

        full_to_path = os.path.join(credentials["ACCOUNT_NAME"], to_path)
        self.logger.info(
            f"Uploading data from {from_path} to {full_to_path}...")
        lake.upload(
            from_path=from_path,
            to_path=to_path,
            recursive=recursive,
            overwrite=overwrite,
        )
        self.logger.info(f"Successfully uploaded data to {full_to_path}.")
示例#13
0
def _execute_flow_run():
    flow_run_id = prefect.context.get("flow_run_id")
    if not flow_run_id:
        click.echo("Not currently executing a flow within a Cloud context.")
        raise Exception(
            "Not currently executing a flow within a Cloud context.")

    query = {
        "query": {
            with_args("flow_run", {"where": {
                "id": {
                    "_eq": flow_run_id
                }
            }}): {
                "flow": {
                    "name": True,
                    "storage": True,
                    "environment": True
                },
                "version": True,
            }
        }
    }

    client = Client()
    result = client.graphql(query)
    flow_run = result.data.flow_run

    if not flow_run:
        click.echo("Flow run {} not found".format(flow_run_id))
        raise ValueError("Flow run {} not found".format(flow_run_id))

    try:
        flow_data = flow_run[0].flow
        storage_schema = prefect.serialization.storage.StorageSchema()
        storage = storage_schema.load(flow_data.storage)

        # populate global secrets
        secrets = prefect.context.get("secrets", {})
        for secret in storage.secrets:
            secrets[secret] = PrefectSecret(name=secret).run()

        with prefect.context(secrets=secrets, loading_flow=True):
            flow = storage.get_flow(storage.flows[flow_data.name])
            environment = flow.environment

            environment.setup(flow)
            environment.execute(flow)
    except Exception as exc:
        msg = "Failed to load and execute Flow's environment: {}".format(
            repr(exc))
        state = prefect.engine.state.Failed(message=msg)
        client.set_flow_run_state(flow_run_id=flow_run_id, state=state)
        click.echo(str(exc))
        raise exc
示例#14
0
    def run(
        self,
        path: str = None,
        gen: int = None,
        sp_credentials_secret: str = None,
        vault_name: str = None,
        max_retries: int = None,
        retry_delay: timedelta = None,
    ) -> List[str]:
        """Task run method.

        Args:
            from_path (str): The path to the directory which contents you want to list. Defaults to None.
            gen (int): The generation of the Azure Data Lake. Defaults to None.
            sp_credentials_secret (str, optional): The name of the Azure Key Vault secret containing a dictionary with
            ACCOUNT_NAME and Service Principal credentials (TENANT_ID, CLIENT_ID, CLIENT_SECRET). Defaults to None.
            vault_name (str, optional): The name of the vault from which to obtain the secret. Defaults to None.

        Returns:
            List[str]: The list of paths to the contents of `path`. These paths
            do not include the container, eg. the path to the file located at
            "https://my_storage_acc.blob.core.windows.net/raw/supermetrics/test_file.txt"
            will be shown as "raw/supermetrics/test_file.txt".
        """

        if not sp_credentials_secret:
            # attempt to read a default for the service principal secret name
            try:
                sp_credentials_secret = PrefectSecret(
                    "AZURE_DEFAULT_ADLS_SERVICE_PRINCIPAL_SECRET").run()
            except ValueError:
                pass

        if sp_credentials_secret:
            azure_secret_task = AzureKeyVaultSecret()
            credentials_str = azure_secret_task.run(
                secret=sp_credentials_secret, vault_name=vault_name)
            credentials = json.loads(credentials_str)
        else:
            credentials = {
                "ACCOUNT_NAME": os.environ["AZURE_ACCOUNT_NAME"],
                "AZURE_TENANT_ID": os.environ["AZURE_TENANT_ID"],
                "AZURE_CLIENT_ID": os.environ["AZURE_CLIENT_ID"],
                "AZURE_CLIENT_SECRET": os.environ["AZURE_CLIENT_SECRET"],
            }
        lake = AzureDataLake(gen=gen, credentials=credentials)

        full_dl_path = os.path.join(credentials["ACCOUNT_NAME"], path)

        self.logger.info(f"Listing files in {full_dl_path}...")
        files = lake.ls(path)
        self.logger.info(f"Successfully listed files in {full_dl_path}.")

        return files
示例#15
0
    def test_reads_with_new_name(self):
        task = PrefectSecret("foo")
        result = SecretResult(task)

        with prefect.context(secrets=dict(x=99, foo="bar")):
            res1 = result.read("x")
            res2 = result.read("foo")

        assert res1.value == 99
        assert res1.location == "x"

        assert res2.value == "bar"
        assert res2.location == "foo"
示例#16
0
def select_session_csvs(local_csvs: list, job_size: int) -> list:
    return_list = []

    # LOCAL SET
    csv_set = set()
    for csv in local_csvs:
        csv_list = csv.split('/') if '/' in csv else csv.split('\\')
        csv_str = f'{csv_list[-2]}-{csv_list[-1]}'
        csv_set.add(csv_str)
    print(f'csvs from folder: {len(csv_set)}')

    year_db_csvs = PostgresFetch(
        db_name=local_config.DB_NAME,
        user=local_config.DB_USER,
        host=local_config.DB_HOST,
        port=local_config.DB_PORT,
        fetch="all",
        query=f"""
        select year, station from climate.csv_checker
        order by date_update
        """
    ).run(password=PrefectSecret('NOAA_LOCAL_DB').run())

    # DB SET
    year_db_set = set()
    for year_db in year_db_csvs:
        year_db_str = f'{year_db[0]}-{year_db[1]}'
        year_db_set.add(year_db_str)
    print(f'csv_checker set: {len(year_db_set)}')

    # SET DIFF, SORT
    new_set = csv_set.difference(year_db_set)
    new_set = sorted(new_set)
    print(f'new_set: {len(new_set)}')

    # CONVERT TO LIST, SELECT SHORT SUBSET
    new_list = []
    set_empty = False
    while len(new_list) < job_size and not set_empty:
        if len(new_set)>0:
            new_list.append(new_set.pop())
        else:
            set_empty = True
    new_list = [x.split('-') for x in new_list]
    new_list = new_list[:job_size]

    # REBUILD LIST OF FILE PATH LOCATIONS
    data_dir = Path(config.NOAA_TEMP_CSV_DIR)
    return_list = [f'{data_dir}/{x[0]}/{x[1]}' for x in new_list]
    print(f'retun_list: {len(return_list)}')
    return return_list
示例#17
0
def test_secrets_are_rerun_on_restart():
    @prefect.task
    def identity(x):
        return x

    with Flow("test") as flow:
        secret = PrefectSecret("key")
        val = identity(secret)

    with prefect.context(secrets={"key": "val"}):
        state = FlowRunner(flow=flow).run(task_states={secret: Success()},
                                          return_tasks=[val])
    assert state.is_successful()
    assert state.result[val].result == "val"
示例#18
0
def test_secrets_dynamically_pull_from_context():
    flow = Flow(name="test")
    task1 = PrefectSecret("foo", max_retries=1, retry_delay=datetime.timedelta(0))

    flow.add_task(task1)

    flow_state = FlowRunner(flow=flow).run(return_tasks=[task1])
    assert flow_state.is_running()
    assert flow_state.result[task1].is_retrying()

    with prefect.context(secrets=dict(foo=42)):
        time.sleep(1)
        flow_state = FlowRunner(flow=flow).run(task_states=flow_state.result)

    assert flow_state.is_successful()
示例#19
0
def get_credentials(credentials_secret: str, vault_name: str = None):
    if not credentials_secret:
        # attempt to read a default for the service principal secret name
        try:
            credentials_secret = PrefectSecret(
                "AZURE_DEFAULT_SQLDB_SERVICE_PRINCIPAL_SECRET").run()
        except ValueError:
            pass

    if credentials_secret:
        azure_secret_task = AzureKeyVaultSecret()
        credentials_str = azure_secret_task.run(secret=credentials_secret,
                                                vault_name=vault_name)
        credentials = json.loads(credentials_str)

        return credentials
示例#20
0
文件: bcp.py 项目: dyvenia/viadot
    def run(
        self,
        path: str = None,
        schema: str = None,
        table: str = None,
        credentials_secret: str = None,
        vault_name: str = None,
        max_retries: int = None,
        retry_delay: timedelta = None,
        **kwargs,
    ) -> str:
        """
        Task run method.

        Args:
        - path (str, optional): the path to the local CSV file to be inserted
        - schema (str, optional): the destination schema
        - table (str, optional): the destination table
        - credentials_secret (str, optional): the name of the Key Vault secret containing database credentials
        (server, db_name, user, password)
        - vault_name (str): the name of the vault from which to fetch the secret

        Returns:
            str: the output of the bcp CLI command
        """
        if not credentials_secret:
            # attempt to read a default for the service principal secret name
            try:
                credentials_secret = PrefectSecret(
                    "AZURE_DEFAULT_SQLDB_SERVICE_PRINCIPAL_SECRET").run()
            except ValueError:
                pass

        if credentials_secret:
            credentials_str = AzureKeyVaultSecret(credentials_secret,
                                                  vault_name=vault_name).run()
            credentials = json.loads(credentials_str)

        fqn = f"{schema}.{table}" if schema else table

        server = credentials["server"]
        db_name = credentials["db_name"]
        uid = credentials["user"]
        pwd = credentials["password"]

        command = f"/opt/mssql-tools/bin/bcp {fqn} in {path} -S {server} -d {db_name} -U {uid} -P '{pwd}' -c -F 2 -b 5000 -h 'TABLOCK'"
        return super().run(command=command, **kwargs)
示例#21
0
def list_db_years(waiting_for: str) -> list: #list of sets
    db_years = PostgresFetch(
        db_name=local_config.DB_NAME,
        user=local_config.DB_USER,
        host=local_config.DB_HOST,
        port=local_config.DB_PORT,
        fetch="all",
        query="""
        select distinct year, date_update from climate.csv_checker
        order by date_update
        """
    ).run(password=PrefectSecret('NOAA_LOCAL_DB').run())
    db_years.insert(0, db_years.pop())   # Move last item in the list to the first
                                         # - We want to check the most recent year first, since csvs in that dir
                                         #   may not be complete (we are not doing the full number of csvs for some dirs
                                         #   with each run)
                                         # - Then we move to the oldest checked folder in the list to move forward
    return db_years
示例#22
0
def get_report_flow(username: str = None, flow_name: str = None) -> Flow:
    """
    Get a flow that generates a progress report.

    Args:
       - username (str): MyFitnessPaw username to be used for flow generation and dispatch
       - flow_name (str, optional): An optional name to be applied to the flow

    Returns:
       - prefect.Flow: The created Prefect flow ready to be run

    Raises:
       - ValueError: if the `username` keyword argument is not provided
    """
    if not username:
        raise ValueError("An user must be provided for the flow")

    flow_name = flow_name or f"MyFitnessPaw Progress Report <{username.upper()}>"

    with Flow(name=flow_name) as progress_report_flow:
        usermail = PrefectSecret(f"MYFITNESSPAL_USERNAME_{username.upper()}")
        starting_date = Parameter(
            name="starting_date",
            default=datetime.datetime.strftime(datetime.datetime.now(),
                                               "%Y-%m-%d"),
        )
        end_goal = Parameter(name="end_goal", default=150000)
        num_rows_report_tbl = Parameter(name="num_rows_report_tbl", default=7)
        report_style = Parameter(name="report_style", default="default")

        user = tasks.get_user(username, usermail)
        report_data = tasks.mfp_select_progress_report_data(
            usermail, starting_date, end_goal, num_rows_report_tbl)
        report = tasks.make_report(user, report_data, report_style)
        report_html = tasks.render_html_email_report(report)
        t = tasks.save_email_report_locally(report_html)  # noqa
        r = tasks.send_email_report(report, report_html)  # noqa
    return progress_report_flow
def insert_stations(list_of_tuples: list):  #, password: str):
    insert = 0
    unique_key_violation = 0

    #print(len(list_of_tuples))
    insert = 0
    unique_key_violation = 0
    for row in list_of_tuples[1:2]:
        print(row)
        station = row[0]
        latitude = row[2] if row[2] != '' else None
        longitude = row[3] if row[3] != '' else None
        elevation = row[4] if row[4] != '' else None
        name = row[5]
        try:
            PostgresExecute(
                db_name=local_config.DB_NAME,  #'climatedb', 
                user=local_config.DB_USER,  #'postgres', 
                host=local_config.DB_HOST,  #'192.168.86.32', 
                port=local_config.DB_PORT,  #5432, 
                query="""
                insert into climate.noaa_global_temp_sites 
                    (station, latitude, longitude, elevation, name)
                values (%s, %s, %s, %s, %s)
                """,
                data=(station, latitude, longitude, elevation, name),
                commit=True,
            ).run(password=PrefectSecret('NOAA_LOCAL_DB').run())
            insert += 1
        except UniqueViolation:
            unique_key_violation += 1
        except InvalidTextRepresentation as e:
            print(e)
    print(
        f'STATION INSERT RESULT: inserted {insert} records | {unique_key_violation} duplicates'
    )
示例#24
0
def get_backup_flow(flow_name: str = None) -> Flow:
    """
    Get a backup flow to upload the MyFitnessPaw database to a dropbox location.

    Args:
       - flow_name (str, optional): An optional name to be applied to the flow

    Returns:
       - prefect.Flow: The created Prefect flow ready to be run
    """

    flow_name = flow_name or "MyFitnessPaw DB Backup"

    with Flow(flow_name) as backup_flow:
        dbx_mfp_dir = prefect.config.myfitnesspaw.backup.dbx_backup_dir
        dbx_token = PrefectSecret("MYFITNESSPAW_DROPBOX_ACCESS_TOKEN")
        backup_result = tasks.make_dropbox_backup(dbx_token,
                                                  dbx_mfp_dir)  # noqa
        avail_backups = tasks.dbx_list_available_backups(
            dbx_token, dbx_mfp_dir)
        res = tasks.apply_backup_rotation_scheme(  # noqa
            dbx_token, dbx_mfp_dir, avail_backups)

    return backup_flow
示例#25
0
def flow_run():
    """
    Execute a flow run in the context of a backend API.
    """
    flow_run_id = prefect.context.get("flow_run_id")
    if not flow_run_id:
        click.echo("Not currently executing a flow within a Cloud context.")
        raise Exception(
            "Not currently executing a flow within a Cloud context.")

    query = {
        "query": {
            with_args("flow_run", {"where": {
                "id": {
                    "_eq": flow_run_id
                }
            }}): {
                "flow": {
                    "name": True,
                    "storage": True,
                    "run_config": True
                },
                "version": True,
            }
        }
    }

    client = Client()
    result = client.graphql(query)
    flow_run = result.data.flow_run

    if not flow_run:
        click.echo("Flow run {} not found".format(flow_run_id))
        raise ValueError("Flow run {} not found".format(flow_run_id))

    # Set the `running_with_backend` context variable to enable logging
    with prefect.context(running_with_backend=True):
        try:
            flow_data = flow_run[0].flow
            storage_schema = prefect.serialization.storage.StorageSchema()
            storage = storage_schema.load(flow_data.storage)

            # populate global secrets
            secrets = prefect.context.get("secrets", {})
            for secret in storage.secrets:
                secrets[secret] = PrefectSecret(name=secret).run()

            with prefect.context(secrets=secrets, loading_flow=True):
                flow = storage.get_flow(flow_data.name)

            with prefect.context(secrets=secrets):
                if flow_data.run_config is not None:
                    runner_cls = get_default_flow_runner_class()
                    runner_cls(flow=flow).run()
                else:
                    environment = flow.environment
                    environment.setup(flow)
                    environment.execute(flow)
        except Exception as exc:
            msg = "Failed to load and execute Flow's environment: {}".format(
                repr(exc))
            state = prefect.engine.state.Failed(message=msg)
            client.set_flow_run_state(flow_run_id=flow_run_id, state=state)
            client.write_run_logs(
                dict(
                    flow_run_id=flow_run_id,  # type: ignore
                    name="execute flow-run",
                    message=msg,
                    level="ERROR",
                ))
            click.echo(str(exc))
            raise exc
示例#26
0
 def test_secret_name_set_at_runtime(self):
     secret = PrefectSecret()
     with set_temporary_config({"cloud.use_local_secrets": True}):
         with prefect.context(secrets=dict(foo="bar")):
             assert secret.run(name="foo") == "bar"
示例#27
0
 def test_secret_raises_if_no_name_provided(self):
     secret = PrefectSecret()
     with set_temporary_config({"cloud.use_local_secrets": True}):
         with pytest.raises(ValueError,
                            match="secret name must be provided"):
             secret.run()
示例#28
0
def insert_records(filename):#list_of_tuples: list):#, waiting_for):
    with open(filename) as read_obj:
        csv_reader = reader(read_obj)
        # Get all rows of csv from csv_reader object as list of tuples
        list_of_tuples = list(map(tuple, csv_reader))
    
    #insert = 0
    if not list_of_tuples:
        return
    unique_key_violation = 0
    new_list = []
    for row in list_of_tuples[1:]:
        # print(row)
        date=row[1]
        station=row[0]
        latitude=row[2]# if row[2] != '' else None
        longitude=row[3]# if row[3] != '' else None
        elevation=row[4]# if row[4] != '' else None
        temp=row[6]
        temp_attributes=row[7]
        dewp=row[8]
        dewp_attributes=row[9]
        slp=row[10]
        slp_attributes=row[11]
        stp=row[12]
        stp_attributes=row[13]
        visib=row[14] 
        visib_attributes=row[15]
        wdsp=row[16]
        wdsp_attributes=row[17]
        mxspd=row[18]
        gust=row[19]
        max_v=row[20]
        max_attributes=row[21]
        min_v=row[22]
        min_attributes=row[23]
        prcp=row[24] 
        prcp_attributes=row[25]
        sndp=row[26]
        frshtt=row[27]
        name=row[5]
        new_tuple = (date, station, latitude, longitude, elevation, temp, temp_attributes, dewp, 
                    dewp_attributes, slp, slp_attributes, stp, stp_attributes, visib, visib_attributes, 
                    wdsp, wdsp_attributes, mxspd, gust, max_v, max_attributes, min_v, min_attributes, 
                    prcp, prcp_attributes, sndp, frshtt, name)
        new_list.append(new_tuple)
        insert = 0
    try:
        PostgresExecuteMany(
            db_name=local_config.DB_NAME, #'climatedb', 
            user=local_config.DB_USER, #'postgres', 
            host=local_config.DB_HOST, #'192.168.86.32', 
            port=local_config.DB_PORT, #5432,  
            query="""
            insert into climate.noaa_global_daily_temps 
                (date, station, latitude, longitude, elevation, temp, temp_attributes, dewp, dewp_attributes, slp, slp_attributes, 
                    stp, stp_attributes, visib, visib_attributes, wdsp, wdsp_attributes, mxspd, gust, 
                    max, max_attributes, min, min_attributes, prcp, prcp_attributes, sndp, frshtt, name)
            values (%s, %s, %s, %s, %s,  %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
            """, 
            data=new_list,
                 #(date, station, temp, temp_attributes, dewp, dewp_attributes, slp, slp_attributes, 
                 #   stp, stp_attributes, visib, visib_attributes, wdsp, wdsp_attributes, mxspd, gust, 
                 #   max_v, max_attributes, min_v, min_attributes, prcp, prcp_attributes, sndp, frshtt),
            commit=True,
        ).run(password=PrefectSecret('NOAA_LOCAL_DB').run())
        insert = len(new_list)
        # insert += 1
    except UniqueViolation:
        unique_key_violation += 1
    try:
        csv_filename = station + '.csv'
        PostgresExecute(
            db_name=local_config.DB_NAME, #'climatedb', 
            user=local_config.DB_USER, #'postgres', 
            host=local_config.DB_HOST, #'192.168.86.32', 
            port=local_config.DB_PORT, #5432,  
            query="""
            insert into climate.csv_checker 
                (station, date_create, date_update, year)
            values (%s, CURRENT_DATE, CURRENT_DATE, %s)
            """, 
            data=(csv_filename, date[0:4]),
            commit=True,
        ).run(password=PrefectSecret('NOAA_LOCAL_DB').run())
    except UniqueViolation:
        pass
    print(f'RECORD INSERT RESULT: inserted {insert} records | {unique_key_violation} duplicates')
示例#29
0
dtypes_dir = path.join(data_dir, "dtypes")
external_dir = path.join(data_dir, "external")
interim_dir = path.join(data_dir, "interim")
processed_dir = path.join(data_dir, "processed")

ber_publicsearch_filename = "BERPublicsearch"
cso_gas_filename = "cso_gas_2019"
dublin_postcode_geometries_filename = "dublin_postcodes"
small_area_statistics_filename = "small_area_statistics_2016"
small_area_glossary_filename = "small_area_glossary_2016"
small_area_geometries_filename = "small_area_geometries_2016"

# Get Prefect secrets
# -------------------
email_address = PrefectSecret("email_address")

# Setup Download Tasks
# --------------------
download_sa_statistics = Download(
    name="Download Small Area Statistics",
    url="https://www.cso.ie/en/media/csoie/census/census2016/census2016boundaryfiles/SAPS2016_SA2017.csv",
    dirpath=external_dir,
    filename=f"{small_area_statistics_filename}.zip",
)
download_sa_glossary = Download(
    name="Download Small Area Glossary",
    url="https://www.cso.ie/en/media/csoie/census/census2016/census2016boundaryfiles/SAPS_2016_Glossary.xlsx",
    dirpath=external_dir,
    filename=f"{small_area_glossary_filename}.xlsx",
)
示例#30
0
 def test_secret_raises_if_doesnt_exist(self):
     secret = PrefectSecret(name="test")
     with set_temporary_config({"cloud.use_local_secrets": True}):
         with pytest.raises(ValueError, match="not found"):
             secret.run()