Python Source примеры использования

Язык программирования: Python

Пространство имен/Пакет: marquez_airflow.extractors

Класс/Тип: Source

Примеров на hotexamples.com: 7

Python Source - 7 примеров найдено. Это лучшие примеры Python кода для marquez_airflow.extractors.Source, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Source(7)

Основные методы

Source (7)

Пример #1

Показать файл

class TestFixtureDummyExtractor(BaseExtractor):
    operator_class = TestFixtureDummyOperator
    source = Source(
        type="DummySource",
        name="dummy_source_name",
        connection_url="http://dummy/source/url")

    def __init__(self, operator):
        super().__init__(operator)

    def extract(self) -> StepMetadata:
        inputs = [
            Dataset.from_table(self.source, "extract_input1")
        ]
        outputs = [
            Dataset.from_table(self.source, "extract_output1")
        ]
        return StepMetadata(
            name=get_job_name(task=self.operator),
            inputs=inputs,
            outputs=outputs,
            context={
                "extract": "extract"
            }
        )

    def extract_on_complete(self, task_instance) -> StepMetadata:
        return None

Пример #2

Показать файл

def test_extract(mock_get_table_schemas):
    mock_get_table_schemas.side_effect = \
        [[DB_TABLE_SCHEMA], NO_DB_TABLE_SCHEMA]

    expected_inputs = [
        Dataset(type=DatasetType.DB_TABLE,
                name=f"{DB_SCHEMA_NAME}.{DB_TABLE_NAME.name}",
                source=Source(type='POSTGRESQL',
                              name=CONN_ID,
                              connection_url=CONN_URI),
                fields=[])
    ]

    expected_context = {
        'sql': SQL,
    }

    # Set the environment variable for the connection
    os.environ[f"AIRFLOW_CONN_{CONN_ID.upper()}"] = CONN_URI

    step_metadata = PostgresExtractor(TASK).extract()

    assert step_metadata.name == f"{DAG_ID}.{TASK_ID}"
    assert step_metadata.inputs == expected_inputs
    assert step_metadata.outputs == []
    assert step_metadata.context == expected_context

Пример #3

Показать файл

def test_extract(mock_get_table_schemas):
    mock_get_table_schemas.side_effect = \
        [[DB_TABLE_SCHEMA], NO_DB_TABLE_SCHEMA]

    expected_inputs = [
        Dataset(type=DatasetType.DB_TABLE,
                name=f"{DB_SCHEMA_NAME}.{DB_TABLE_NAME.name}",
                source=Source(type='POSTGRESQL',
                              name=CONN_ID,
                              connection_url=CONN_URI),
                fields=[])
    ]

    expected_context = {
        'sql': SQL,
    }

    # Set the environment variable for the connection
    os.environ[f"AIRFLOW_CONN_{CONN_ID.upper()}"] = CONN_URI

    # NOTE: When extracting operator metadata, only a single StepMetadata
    # object is returned. We'll want to cleanup the Extractor interface to
    # not return an array.
    step_metadata = PostgresExtractor(TASK).extract()[0]

    assert step_metadata.name == f"{DAG_ID}.{TASK_ID}"
    assert step_metadata.inputs == expected_inputs
    assert step_metadata.outputs == []
    assert step_metadata.context == expected_context

Пример #4

Показать файл

Файл: postgres_extractor.py Проект: younghai/marquez

    def extract(self) -> [StepMetadata]:
        # (1) Parse sql statement to obtain input / output tables.
        sql_meta: SqlMeta = SqlParser.parse(self.operator.sql)

        # (2) Default all inputs / outputs to current connection.
        # NOTE: We'll want to look into adding support for the `database`
        # property that is used to override the one defined in the connection.
        conn_id = self.operator.postgres_conn_id
        source = Source(type='POSTGRESQL',
                        name=conn_id,
                        connection_url=get_connection_uri(conn_id))

        # (3) Map input / output tables to dataset objects with source set
        # as the current connection. We need to also fetch the schema for the
        # input tables to format the dataset name as:
        # {schema_name}.{table_name}
        inputs = [
            Dataset.from_table(source=source,
                               table_name=in_table_schema.table_name.name,
                               schema_name=in_table_schema.schema_name)
            for in_table_schema in self._get_table_schemas(sql_meta.in_tables)
        ]
        outputs = [
            Dataset.from_table_schema(source=source,
                                      table_schema=out_table_schema) for
            out_table_schema in self._get_table_schemas(sql_meta.out_tables)
        ]

        return [
            StepMetadata(
                name=f"{self.operator.dag_id}.{self.operator.task_id}",
                inputs=inputs,
                outputs=outputs,
                context={'sql': self.operator.sql})
        ]

Пример #5

Показать файл

class TestFixtureDummyExtractorOnComplete(BaseExtractor):
    operator_class = TestFixtureDummyOperator
    source = Source(
        type="DummySource",
        name="dummy_source_name",
        connection_url="http://dummy/source/url")

    def __init__(self, operator):
        super().__init__(operator)

    def extract(self) -> StepMetadata:
        return None

    def extract_on_complete(self, task_instance) -> StepMetadata:
        inputs = [
            Dataset.from_table_schema(self.source, DbTableSchema(
                schema_name='schema',
                table_name=DbTableName('extract_on_complete_input1'),
                columns=[DbColumn(
                    name='field1',
                    type='text',
                    description='',
                    ordinal_position=1
                ),
                    DbColumn(
                    name='field2',
                    type='text',
                    description='',
                    ordinal_position=2
                )]
            ))
        ]
        outputs = [
            Dataset.from_table(self.source, "extract_on_complete_output1")
        ]
        return StepMetadata(
            name=get_job_name(task=self.operator),
            inputs=inputs,
            outputs=outputs,
            context={
                "extract_on_complete": "extract_on_complete"
            }
        )

Пример #6

Показать файл

 def _source(self, bq_table) -> Source:
     conn_id = self.operator.bigquery_conn_id
     return Source(
         type="BIGQUERY",
         name=conn_id,
         connection_url=_BIGQUERY_CONN_URL.format(self._bq_table_name(bq_table)))

Пример #7

Показать файл

 def _source(self) -> Source:
     conn_id = self.operator.bigquery_conn_id
     return Source(type="BIGQUERY",
                   name=conn_id,
                   connection_url=_BIGQUERY_CONN_URL)