def execute(self, context): hook = DruidHook( druid_ingest_conn_id=self.conn_id, max_ingestion_time=self.max_ingestion_time ) self.log.info("Submitting %s", self.json_index_file) hook.submit_indexing_job(json.loads(self.json_index_file))
def execute(self, context): hook = DruidHook( druid_ingest_conn_id=self.conn_id, max_ingestion_time=self.max_ingestion_time ) self.log.info("Submitting %s", self.index_spec_str) hook.submit_indexing_job(self.index_spec_str)
def execute(self, context: Dict[Any, Any]) -> None: hook = DruidHook( druid_ingest_conn_id=self.conn_id, timeout=self.timeout, max_ingestion_time=self.max_ingestion_time, ) self.log.info("Submitting %s", self.json_index_file) hook.submit_indexing_job(self.json_index_file)
def test_get_conn_url(self, mock_get_connection): get_conn_value = MagicMock() get_conn_value.host = 'test_host' get_conn_value.conn_type = 'https' get_conn_value.port = '1' get_conn_value.extra_dejson = {'endpoint': 'ingest'} mock_get_connection.return_value = get_conn_value hook = DruidHook(timeout=1, max_ingestion_time=5) self.assertEqual(hook.get_conn_url(), 'https://test_host:1/ingest')
def execute(self, context: Dict[str, Any]) -> None: hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id) self.log.info("Extracting data from Hive") hive_table = 'druid.' + context['task_instance_key_str'].replace( '.', '_') sql = self.sql.strip().strip(';') tblproperties = ''.join([ ", '{}' = '{}'".format(k, v) for k, v in self.hive_tblproperties.items() ]) hql = f"""\ SET mapred.output.compress=false; SET hive.exec.compress.output=false; DROP TABLE IF EXISTS {hive_table}; CREATE TABLE {hive_table} ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE TBLPROPERTIES ('serialization.null.format' = ''{tblproperties}) AS {sql} """ self.log.info("Running command:\n %s", hql) hive.run_cli(hql) meta_hook = HiveMetastoreHook(self.metastore_conn_id) # Get the Hive table and extract the columns table = meta_hook.get_table(hive_table) columns = [col.name for col in table.sd.cols] # Get the path on hdfs static_path = meta_hook.get_table(hive_table).sd.location druid = DruidHook(druid_ingest_conn_id=self.druid_ingest_conn_id) try: index_spec = self.construct_ingest_query( static_path=static_path, columns=columns, ) self.log.info("Inserting rows into Druid, hdfs path: %s", static_path) druid.submit_indexing_job(index_spec) self.log.info("Load seems to have succeeded!") finally: self.log.info("Cleaning up by dropping the temp Hive table %s", hive_table) hql = "DROP TABLE IF EXISTS {}".format(hive_table) hive.run_cli(hql)