def test_extraction_with_multiple_query_result(self, mock_method):
        # type: (Any, Any) -> None
        """
        Test Extraction from list of results from query
        """
        extractor = SQLAlchemyExtractor()
        extractor.results = ['test_result', 'test_result2', 'test_result3']
        extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                              scope=extractor.get_scope()))
        result = [extractor.extract() for _ in range(3)]

        self.assertEqual(len(result), 3)
        self.assertEqual(result,
                         ['test_result', 'test_result2', 'test_result3'])
 def init(self, conf: ConfigTree) -> None:
     conf = Scoped.get_scoped_conf(conf, self.get_scope()) \
         .with_fallback(conf) \
         .with_fallback(NeptuneStalenessRemovalTask.DEFAULT_CONFIG)
     self.target_nodes = list(
         set(conf.get_list(NeptuneStalenessRemovalTask.TARGET_NODES)))
     self.target_relations = list(
         set(conf.get_list(NeptuneStalenessRemovalTask.TARGET_RELATIONS)))
     self.dry_run = conf.get_bool(NeptuneStalenessRemovalTask.DRY_RUN)
     self.staleness_pct = conf.get_int(
         NeptuneStalenessRemovalTask.STALENESS_MAX_PCT)
     self.staleness_pct_dict = conf.get(
         NeptuneStalenessRemovalTask.STALENESS_PCT_MAX_DICT)
     self.graph_label_id = conf.get(
         NeptuneStalenessRemovalTask.GRAPH_LABEL_ID_PROPERTY_NAME)
     self.staleness_cut_off_in_seconds = conf.get_int(
         NeptuneStalenessRemovalTask.STALENESS_CUT_OFF_IN_SECONDS)
     self.cutoff_datetime = datetime.utcnow() - timedelta(
         seconds=self.staleness_cut_off_in_seconds)
     self.gremlin_client = NeptuneSessionClient()
     gremlin_client_conf = Scoped.get_scoped_conf(
         conf, self.gremlin_client.get_scope())
     self.gremlin_client.init(gremlin_client_conf)
    def test_extraction_with_model_class(self):
        # type: (Any) -> None
        """
        Test Extraction using model class
        """
        config_dict = {
            'extractor.neo4j.{}'.format(Neo4jExtractor.GRAPH_URL_CONFIG_KEY):
            'TEST_GRAPH_URL',
            'extractor.neo4j.{}'.format(Neo4jExtractor.CYPHER_QUERY_CONFIG_KEY):
            'TEST_QUERY',
            'extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_USER):
            'TEST_USER',
            'extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_PW):
            'TEST_PW',
            'extractor.neo4j.{}'.format(Neo4jExtractor.MODEL_CLASS_CONFIG_KEY):
            'databuilder.models.table_elasticsearch_document.TableESDocument'
        }

        self.conf = ConfigFactory.from_dict(config_dict)

        with patch.object(Neo4jExtractor, '_get_driver'):
            extractor = Neo4jExtractor()
            extractor.init(
                Scoped.get_scoped_conf(conf=self.conf,
                                       scope=extractor.get_scope()))

            result_dict = dict(
                database='test_database',
                cluster='test_cluster',
                schema='test_schema',
                name='test_table_name',
                display_name='test_schema.test_table_name',
                key='test_table_key',
                description='test_table_description',
                last_updated_timestamp=123456789,
                column_names=['test_col1', 'test_col2', 'test_col3'],
                column_descriptions=[
                    'test_description1', 'test_description2', ''
                ],
                total_usage=100,
                unique_usage=5,
                tags=['hive'],
                badges=['badge1'],
                schema_description='schema_description')

            extractor.results = [result_dict]
            result_obj = extractor.extract()

            self.assertIsInstance(result_obj, TableESDocument)
            self.assertDictEqual(vars(result_obj), result_dict)
Пример #4
0
    def init(self, conf: ConfigTree) -> None:
        self._conf = conf
        restapi_query = self._build_restapi_query()
        self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor(
            restapi_query=restapi_query,
            conf=self._conf
        )

        dict_to_model_transformer = DictToModel()
        dict_to_model_transformer.init(
            conf=Scoped.get_scoped_conf(self._conf, dict_to_model_transformer.get_scope()).with_fallback(
                ConfigFactory.from_dict(
                    {MODEL_CLASS: 'databuilder.models.dashboard.dashboard_chart.DashboardChart'})))
        self._transformer = dict_to_model_transformer
    def test_keypath_and_pagesize_can_be_set(self, mock_build: Any) -> None:
        config_dict = {
            f'extractor.bigquery_table_metadata.{BigQueryMetadataExtractor.PROJECT_ID_KEY}': 'your-project-here',
            f'extractor.bigquery_table_metadata.{BigQueryMetadataExtractor.PAGE_SIZE_KEY}': 200,
            f'extractor.bigquery_table_metadata.{BigQueryMetadataExtractor.KEY_PATH_KEY}': '/tmp/doesnotexist',
        }
        conf = ConfigFactory.from_dict(config_dict)

        mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, TABLE_DATA)
        extractor = BigQueryMetadataExtractor()

        with self.assertRaises(FileNotFoundError):
            extractor.init(Scoped.get_scoped_conf(conf=conf,
                                                  scope=extractor.get_scope()))
    def text_extraction_with_empty_query_result(self):
        # type: (Any) -> None
        """
        Test Extraction with empty results from query
        """
        with patch.object(Neo4jExtractor, '_get_driver'):
            extractor = Neo4jExtractor()
            extractor.init(
                Scoped.get_scoped_conf(conf=self.conf,
                                       scope=extractor.get_scope()))

            extractor.results = ['']
            result = extractor.extract()
            self.assertIsNone(result)
Пример #7
0
    def test_consume_success(self) -> None:
        kafka_extractor = KafkaSourceExtractor()
        kafka_extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                                    scope=kafka_extractor.get_scope()))

        with patch.object(kafka_extractor, 'consumer') as mock_consumer:
            mock_poll = MagicMock()
            mock_poll.error.return_value = False
            # only return once
            mock_poll.value.side_effect = ['msg']
            mock_consumer.poll.return_value = mock_poll

            records = kafka_extractor.consume()
            self.assertEqual(len(records), 1)
    def test_table_without_columns(self, mock_build):
        mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, NO_COLS)
        extractor = BigQueryMetadataExtractor()
        extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                              scope=extractor.get_scope()))
        result = extractor.extract()

        self.assertEqual(result.database, 'bigquery')
        self.assertEqual(result.cluster, 'your-project-here')
        self.assertEqual(result.schema, 'fdgdfgh')
        self.assertEqual(result.name, 'nested_recs')
        self.assertEqual(result.description, "")
        self.assertEqual(result.columns, [])
        self.assertEqual(result.is_view, False)
Пример #9
0
 def _init_extractor(self,
                     programmatic_description_enabled: bool = True) -> None:
     repository_path = pathlib.Path(
         __file__).parent.parent.resolve() / "resources/extractor/feast/fs"
     conf = {
         f"extractor.feast.{FeastExtractor.FEAST_REPOSITORY_PATH}":
         repository_path,
         f"extractor.feast.{FeastExtractor.DESCRIBE_FEATURE_VIEWS}":
         programmatic_description_enabled,
     }
     self.extractor = FeastExtractor()
     self.extractor.init(
         Scoped.get_scoped_conf(conf=ConfigFactory.from_dict(conf),
                                scope=self.extractor.get_scope()))
    def init(self, conf: ConfigTree) -> None:
        self.conf = conf
        self.consumer_config = conf.get_config(KafkaSourceExtractor.CONSUMER_CONFIG).\
            as_plain_ordered_dict()

        self.topic_names: list = conf.get_list(
            KafkaSourceExtractor.TOPIC_NAME_LIST)

        if not self.topic_names:
            raise Exception('Kafka topic needs to be provided by the user.')

        self.consumer_total_timeout = conf.get_int(
            KafkaSourceExtractor.CONSUMER_TOTAL_TIMEOUT_SEC, default=10)

        self.consumer_poll_timeout = conf.get_int(
            KafkaSourceExtractor.CONSUMER_POLL_TIMEOUT_SEC, default=1)

        self.transformer_thrown_exception = conf.get_bool(
            KafkaSourceExtractor.TRANSFORMER_THROWN_EXCEPTION, default=False)

        # Transform the protoBuf message with a transformer
        val_transformer = conf.get(KafkaSourceExtractor.RAW_VALUE_TRANSFORMER)
        if val_transformer is None:
            raise Exception('A message transformer should be provided.')
        else:
            try:
                module_name, class_name = val_transformer.rsplit(".", 1)
                mod = importlib.import_module(module_name)
                self.transformer = getattr(mod, class_name)()
            except Exception:
                raise RuntimeError(
                    'The Kafka message value deserde class cant instantiated!')

            if not isinstance(self.transformer, Transformer):
                raise Exception(
                    'The transformer needs to be subclass of the base transformer'
                )
            self.transformer.init(
                Scoped.get_scoped_conf(conf, self.transformer.get_scope()))

        # Consumer init
        try:
            # Disable enable.auto.commit
            self.consumer_config['enable.auto.commit'] = False

            self.consumer = Consumer(self.consumer_config)
            # TODO: to support only consume a subset of partitions.
            self.consumer.subscribe(self.topic_names)
        except Exception:
            raise RuntimeError('Consumer could not start correctly!')
    def test_extraction_with_model_class(self) -> None:
        """
        Test Extraction using model class
        """
        extractor = CsvExtractor()
        extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                              scope=extractor.get_scope()))

        result = extractor.extract()
        self.assertEqual(result.name, 'test_table1')
        self.assertEqual(result.description._text, '1st test table')
        self.assertEqual(result.database, 'hive')
        self.assertEqual(result.cluster, 'gold')
        self.assertEqual(result.schema, 'test_schema')
    def init(self, conf: ConfigTree) -> None:
        self._conf = conf

        restapi_query = self._build_restapi_query()
        self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor(
            restapi_query=restapi_query,
            conf=self._conf
        )

        # Constructing URL using several ID via TemplateVariableSubstitutionTransformer
        transformers: List[Transformer] = []
        variable_substitution_transformer = TemplateVariableSubstitutionTransformer()
        variable_substitution_transformer.init(
            conf=Scoped.get_scoped_conf(self._conf,
                                        variable_substitution_transformer.get_scope()).with_fallback(
                ConfigFactory.from_dict({FIELD_NAME: 'url',
                                         TEMPLATE: 'https://app.mode.com/{organization}'
                                                   '/reports/{dashboard_id}/queries/{query_id}'})))

        transformers.append(variable_substitution_transformer)

        # Escape backslash as it breaks Cypher statement.
        replace_transformer = RegexStrReplaceTransformer()
        replace_transformer.init(
            conf=Scoped.get_scoped_conf(self._conf, replace_transformer.get_scope()).with_fallback(
                ConfigFactory.from_dict(
                    {REGEX_REPLACE_TUPLE_LIST: [('\\', '\\\\')], ATTRIBUTE_NAME: 'query_text'})))
        transformers.append(replace_transformer)

        dict_to_model_transformer = DictToModel()
        dict_to_model_transformer.init(
            conf=Scoped.get_scoped_conf(self._conf, dict_to_model_transformer.get_scope()).with_fallback(
                ConfigFactory.from_dict(
                    {MODEL_CLASS: 'databuilder.models.dashboard.dashboard_query.DashboardQuery'})))
        transformers.append(dict_to_model_transformer)

        self._transformer = ChainedTransformer(transformers=transformers)
Пример #13
0
    def test_publish_with_data_and_no_old_index(self) -> None:
        """
        Test Publish functionality with data but no index in place
        """
        mock_data = json.dumps({
            'KEY_DOESNOT_MATTER': 'NO_VALUE',
            'KEY_DOESNOT_MATTER2': 'NO_VALUE2'
        })
        self.mock_es_client.indices.get_alias.return_value = {}

        with patch('builtins.open',
                   mock_open(read_data=mock_data)) as mock_file:
            publisher = ElasticsearchPublisher()
            publisher.init(conf=Scoped.get_scoped_conf(
                conf=self.conf, scope=publisher.get_scope()))

            # assert mock was called with test_file_path and test_file_mode
            mock_file.assert_called_once_with(self.test_file_path,
                                              self.test_file_mode)

            publisher.publish()
            # ensure indices create endpoint was called
            default_mapping = ElasticsearchPublisher.DEFAULT_ELASTICSEARCH_INDEX_MAPPING
            self.mock_es_client.indices.create.assert_called_once_with(
                index=self.test_es_new_index, body=default_mapping)

            # bulk endpoint called once
            self.mock_es_client.bulk.assert_called_once_with([{
                'index': {
                    '_type': self.test_doc_type,
                    '_index': self.test_es_new_index
                }
            }, {
                'KEY_DOESNOT_MATTER':
                'NO_VALUE',
                'KEY_DOESNOT_MATTER2':
                'NO_VALUE2'
            }])

            # update alias endpoint called once
            self.mock_es_client.indices.update_aliases.assert_called_once_with(
                {
                    'actions': [{
                        "add": {
                            "index": self.test_es_new_index,
                            "alias": self.test_es_alias
                        }
                    }]
                })
Пример #14
0
    def test_publish_with_no_data(self) -> None:
        """
        Test Publish functionality with no data
        """
        with patch('builtins.open', mock_open(read_data='')) as mock_file:
            publisher = ElasticsearchPublisher()
            publisher.init(conf=Scoped.get_scoped_conf(conf=self.conf,
                                                       scope=publisher.get_scope()))

            # assert mock was called with test_file_path and test_file_mode
            mock_file.assert_called_with(self.test_file_path, self.test_file_mode)

            publisher.publish()
            # no calls should be made through elasticseach_client when there is no data
            self.assertTrue(self.mock_es_client.call_count == 0)
    def test_email_filter_not_counted(self, mock_build: Any) -> None:
        config_dict = {
            'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY):
            'your-project-here',
            'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.EMAIL_PATTERN):
            'emailFilter',
        }
        conf = ConfigFactory.from_dict(config_dict)

        mock_build.return_value = MockLoggingClient(CORRECT_DATA)
        extractor = BigQueryTableUsageExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))
        result = extractor.extract()
        self.assertIsNone(result)
def from_surrounding_config(conf: ConfigTree,
                            sql_stmt: str) -> SQLAlchemyExtractor:
    """
    A factory to create SQLAlchemyExtractors that are wrapped by another, specialized
    extractor. This function pulls the config from the wrapping extractor's config, and
    returns a newly configured SQLAlchemyExtractor.
    :param conf: A config tree from which the sqlalchemy config still needs to be taken.
    :param conf: The SQL statement to use for extraction. Expected to be set by the
        wrapping extractor implementation, and not by the config.
    """
    ae = SQLAlchemyExtractor()
    c = Scoped.get_scoped_conf(conf, ae.get_scope()) \
        .with_fallback(ConfigFactory.from_dict({SQLAlchemyExtractor.EXTRACT_SQL: sql_stmt}))
    ae.init(c)
    return ae
    def init(self, conf):
        # type: (ConfigTree) -> None
        conf = conf.with_fallback(DruidMetadataExtractor.DEFAULT_CONFIG)
        self._cluster = '{}'.format(conf.get_string(DruidMetadataExtractor.CLUSTER_KEY))

        self.sql_stmt = DruidMetadataExtractor.SQL_STATEMENT.format(
            where_clause_suffix=conf.get_string(DruidMetadataExtractor.WHERE_CLAUSE_SUFFIX_KEY,
                                                default=''))

        self._alchemy_extractor = SQLAlchemyExtractor()
        sql_alch_conf = Scoped.get_scoped_conf(conf, self._alchemy_extractor.get_scope())\
            .with_fallback(ConfigFactory.from_dict({SQLAlchemyExtractor.EXTRACT_SQL: self.sql_stmt}))

        self._alchemy_extractor.init(sql_alch_conf)
        self._extract_iter = None  # type: Union[None, Iterator]
    def test_table_part_of_table_date_range(self, mock_build):
        mock_build.return_value = MockBigQueryClient(ONE_DATASET, TABLE_DATE_RANGE, TABLE_DATA)
        extractor = BigQueryMetadataExtractor()
        extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                              scope=extractor.get_scope()))

        count = 0
        result = extractor.extract()
        table_name = result.name
        while result:
            count += 1
            result = extractor.extract()

        self.assertEqual(count, 1)
        self.assertEqual(table_name, 'date_range_')
    def test_accepts_dataset_filter_by_label(self, mock_build):
        config_dict = {
            'extractor.bigquery_table_metadata.{}'.format(BigQueryMetadataExtractor.PROJECT_ID_KEY):
                'your-project-here',
            'extractor.bigquery_table_metadata.{}'.format(BigQueryMetadataExtractor.FILTER_KEY):
                'label.key:value'
        }
        conf = ConfigFactory.from_dict(config_dict)

        mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, TABLE_DATA)
        extractor = BigQueryMetadataExtractor()
        extractor.init(Scoped.get_scoped_conf(conf=conf,
                                              scope=extractor.get_scope()))
        result = extractor.extract()
        self.assertIsInstance(result, TableMetadata)
Пример #20
0
    def init(self, conf: ConfigTree) -> None:
        self._conf = conf
        self.query = """query {
            workbooks {
                id
                name
                createdAt
                description
                projectName
                projectVizportalUrlId
                vizportalUrlId
            }
        }"""

        self._extractor = self._build_extractor()

        transformers: List[Transformer] = []
        timestamp_str_to_epoch_transformer = TimestampStringToEpoch()
        timestamp_str_to_epoch_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf,
            timestamp_str_to_epoch_transformer.get_scope()).with_fallback(
                ConfigFactory.from_dict({
                    FIELD_NAME: 'created_timestamp',
                })))
        transformers.append(timestamp_str_to_epoch_transformer)

        dict_to_model_transformer = DictToModel()
        dict_to_model_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf, dict_to_model_transformer.get_scope()
        ).with_fallback(
            ConfigFactory.from_dict({
                MODEL_CLASS:
                'databuilder.models.dashboard.dashboard_metadata.DashboardMetadata'
            })))
        transformers.append(dict_to_model_transformer)
        self._transformer = ChainedTransformer(transformers=transformers)
    def init(self, conf):
        # type: (ConfigTree) -> None
        self._conf = conf

        restapi_query = self._build_restapi_query()
        self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor(
            restapi_query=restapi_query, conf=self._conf)

        # Constructing URL using several ID via TemplateVariableSubstitutionTransformer
        transformers = []
        variable_substitution_transformer = TemplateVariableSubstitutionTransformer(
        )
        variable_substitution_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf,
            variable_substitution_transformer.get_scope()).with_fallback(
                ConfigFactory.from_dict({
                    FIELD_NAME:
                    'url',
                    TEMPLATE:
                    'https://app.mode.com/{organization}'
                    '/reports/{dashboard_id}/queries/{query_id}'
                })))

        transformers.append(variable_substitution_transformer)

        dict_to_model_transformer = DictToModel()
        dict_to_model_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf, dict_to_model_transformer.get_scope()
        ).with_fallback(
            ConfigFactory.from_dict({
                MODEL_CLASS:
                'databuilder.models.dashboard.dashboard_query.DashboardQuery'
            })))
        transformers.append(dict_to_model_transformer)

        self._transformer = ChainedTransformer(transformers=transformers)
Пример #22
0
 def _init_extractor(self,
                     programmatic_description_enabled: bool = True) -> None:
     conf = {
         f'extractor.feast.{FeastExtractor.FEAST_ENDPOINT_CONFIG_KEY}':
         'feast-core.example.com:6565',
         f'extractor.feast.{FeastExtractor.FEAST_SERVICE_CONFIG_KEY}':
         'unittest-feast-instance',
         f'extractor.feast.{FeastExtractor.DESCRIBE_FEATURE_TABLES}':
         programmatic_description_enabled,
     }
     self.extractor = FeastExtractor()
     self.extractor.init(
         Scoped.get_scoped_conf(conf=ConfigFactory.from_dict(conf),
                                scope=self.extractor.get_scope()))
     self.extractor._client = MagicMock(return_value=None)
Пример #23
0
    def test_failed_jobs_should_not_be_counted(self, mock_build: Any) -> None:
        config_dict = {
            f'extractor.bigquery_table_usage.{BigQueryTableUsageExtractor.PROJECT_ID_KEY}':
            'bigquery-public-data',
        }
        conf = ConfigFactory.from_dict(config_dict)

        client = MockLoggingClient(FAILURE)
        mock_build.return_value = client
        extractor = BigQueryTableUsageExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))

        result = extractor.extract()
        self.assertIsNone(result)
Пример #24
0
 def __init__(self, conf, task, publisher=NoopPublisher()):
     # type: (ConfigTree, Task, Publisher) -> None
     self.task = task
     self.conf = conf
     self.publisher = publisher
     self.scoped_conf = Scoped.get_scoped_conf(self.conf, self.get_scope())
     if self.scoped_conf.get_bool(DefaultJob.IS_STATSD_ENABLED, False):
         prefix = 'amundsen.databuilder.job.{}'.format(
             self.scoped_conf.get_string(DefaultJob.JOB_IDENTIFIER))
         LOGGER.info(
             'Setting statsd for job metrics with prefix: {}'.format(
                 prefix))
         self.statsd = StatsClient(prefix=prefix)
     else:
         self.statsd = None
Пример #25
0
    def init(self, conf: ConfigTree) -> None:
        # initialize extractor with configurarion
        self.extractor.init(Scoped.get_scoped_conf(conf, self.extractor.get_scope()))
        # initialize transformer with configuration
        self.transformer.init(Scoped.get_scoped_conf(conf, self.transformer.get_scope()))

        # task configuration
        conf = Scoped.get_scoped_conf(conf, self.get_scope())
        self.date = conf.get_string(SearchMetadatatoElasticasearchTask.DATE, self.today)
        self.entity = conf.get_string(SearchMetadatatoElasticasearchTask.ENTITY_TYPE).lower()
        self.elasticsearch_client = conf.get(
            SearchMetadatatoElasticasearchTask.ELASTICSEARCH_CLIENT_CONFIG_KEY
        )
        self.elasticsearch_alias = conf.get(
            SearchMetadatatoElasticasearchTask.ELASTICSEARCH_ALIAS_CONFIG_KEY
        )
        self.elasticsearch_new_index = conf.get(
            SearchMetadatatoElasticasearchTask.ELASTICSEARCH_NEW_INDEX,
            self.create_new_index_name())
        self.document_mapping = conf.get(SearchMetadatatoElasticasearchTask.MAPPING_CLASS,
                                         RESOURCE_TO_MAPPING[self.entity])

        LOGGER.info(issubclass(self.document_mapping, SearchableResource))

        if not issubclass(self.document_mapping, SearchableResource):
            msg = "Provided document_mapping should be instance" \
                f" of SearchableResource not {type(self.document_mapping)}"
            LOGGER.error(msg)
            raise TypeError(msg)

        self.elasticsearch_batch_size = conf.get(
            SearchMetadatatoElasticasearchTask.ELASTICSEARCH_PUBLISHER_BATCH_SIZE, 10000
        )
        self.elasticsearch_timeout_sec = conf.get(
            SearchMetadatatoElasticasearchTask.ELASTICSEARCH_TIMEOUT_SEC, 120
        )
Пример #26
0
    def test_failed_jobs_should_not_be_counted(self, mock_build):

        config_dict = {
            'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY):
            'your-project-here',
        }
        conf = ConfigFactory.from_dict(config_dict)

        client = MockLoggingClient(FAILURE)
        mock_build.return_value = client
        extractor = BigQueryTableUsageExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))

        result = extractor.extract()
        self.assertIsNone(result)
Пример #27
0
    def _create_schema_by_table_mapping(self):
        # type: () -> dict
        # TODO: Make extractor generic
        table_metadata_extractor = HiveTableMetadataExtractor()
        table_metadata_extractor.init(
            Scoped.get_scoped_conf(self._conf,
                                   table_metadata_extractor.get_scope()))

        table_to_schema = {}
        table_metadata = table_metadata_extractor.extract()
        while table_metadata:
            # TODO: deal with collision
            table_to_schema[table_metadata.name.lower(
            )] = table_metadata.schema_name.lower()
            table_metadata = table_metadata_extractor.extract()
        return table_to_schema
    def test_transform_with_dict_object(self):
        # type: () -> None
        """
        Test Transform functionality with Dict object
        """
        transformer = ElasticsearchDocumentTransformer()
        transformer.init(conf=Scoped.get_scoped_conf(
            conf=self.conf, scope=transformer.get_scope()))

        data = dict(test_key="DOES_NOT_MATTER", test_key2="DOES_NOT_MATTER2")

        with self.assertRaises(Exception) as context:
            transformer.transform(data)  # type: ignore
        self.assertTrue(
            "ElasticsearchDocumentTransformer expects record of type 'Neo4jDataResult'!"
            in context.exception)
 def _build_extractor(self) -> TableauGraphQLApiLastModifiedExtractor:
     """
     Builds a TableauGraphQLApiExtractor. All data required can be retrieved with a single GraphQL call.
     :return: A TableauGraphQLApiLastModifiedExtractor that provides dashboard update metadata.
     """
     extractor = TableauGraphQLApiLastModifiedExtractor()
     tableau_extractor_conf = \
         Scoped.get_scoped_conf(self._conf, extractor.get_scope())\
               .with_fallback(self._conf)\
               .with_fallback(ConfigFactory.from_dict({TableauGraphQLApiExtractor.QUERY: self.query,
                                                       STATIC_RECORD_DICT: {'product': 'tableau'}
                                                       }
                                                      )
                              )
     extractor.init(conf=tableau_extractor_conf)
     return extractor
    def test_keypath_can_be_set(self, mock_build):
        config_dict = {
            'extractor.bigquery_watermarks.{}'.format(BigQueryWatermarkExtractor.PROJECT_ID_KEY):
            'your-project-here',
            'extractor.bigquery_watermarks.{}'.format(BigQueryWatermarkExtractor.KEY_PATH_KEY):
            '/tmp/doesnotexist',
        }
        conf = ConfigFactory.from_dict(config_dict)

        mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE,
                                                     None)
        extractor = BigQueryWatermarkExtractor()

        with self.assertRaises(FileNotFoundError):
            extractor.init(
                Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))