def test_construct_ingest_query(self): operator = HiveToDruidTransfer( task_id='hive_to_druid', dag=self.dag, **self.hook_config ) provided_index_spec = operator.construct_ingest_query( **self.index_spec_config ) expected_index_spec = { "hadoopDependencyCoordinates": self.hook_config['hadoop_dependency_coordinates'], "type": "index_hadoop", "spec": { "dataSchema": { "metricsSpec": self.hook_config['metric_spec'], "granularitySpec": { "queryGranularity": self.hook_config['query_granularity'], "intervals": self.hook_config['intervals'], "type": "uniform", "segmentGranularity": self.hook_config['segment_granularity'], }, "parser": { "type": "string", "parseSpec": { "columns": self.index_spec_config['columns'], "dimensionsSpec": { "dimensionExclusions": [], "dimensions": self.index_spec_config['columns'], "spatialDimensions": [] }, "timestampSpec": { "column": self.hook_config['ts_dim'], "format": "auto" }, "format": "tsv" } }, "dataSource": self.hook_config['druid_datasource'] }, "tuningConfig": { "type": "hadoop", "jobProperties": { "mapreduce.job.user.classpath.first": "false", "mapreduce.map.output.compress": "false", "mapreduce.output.fileoutputformat.compress": "false", }, "partitionsSpec": { "type": "hashed", "targetPartitionSize": self.hook_config['target_partition_size'], "numShards": self.hook_config['num_shards'], }, }, "ioConfig": { "inputSpec": { "paths": self.index_spec_config['static_path'], "type": "static" }, "type": "hadoop" } } } # Make sure it is like we expect it self.assertEqual(provided_index_spec, expected_index_spec)
def test_construct_ingest_query(self): operator = HiveToDruidTransfer(task_id='hive_to_druid', dag=self.dag, **self.hook_config) provided_index_spec = operator.construct_ingest_query( **self.index_spec_config) expected_index_spec = { "hadoopDependencyCoordinates": self.hook_config['hadoop_dependency_coordinates'], "type": "index_hadoop", "spec": { "dataSchema": { "metricsSpec": self.hook_config['metric_spec'], "granularitySpec": { "queryGranularity": self.hook_config['query_granularity'], "intervals": self.hook_config['intervals'], "type": "uniform", "segmentGranularity": self.hook_config['segment_granularity'], }, "parser": { "type": "string", "parseSpec": { "columns": self.index_spec_config['columns'], "dimensionsSpec": { "dimensionExclusions": [], "dimensions": self.index_spec_config['columns'], "spatialDimensions": [] }, "timestampSpec": { "column": self.hook_config['ts_dim'], "format": "auto" }, "format": "tsv" } }, "dataSource": self.hook_config['druid_datasource'] }, "tuningConfig": { "type": "hadoop", "jobProperties": self.hook_config['job_properties'], "partitionsSpec": { "type": "hashed", "targetPartitionSize": self.hook_config['target_partition_size'], "numShards": self.hook_config['num_shards'], }, }, "ioConfig": { "inputSpec": { "paths": self.index_spec_config['static_path'], "type": "static" }, "type": "hadoop" } } } # Make sure it is like we expect it self.assertEqual(provided_index_spec, expected_index_spec)
# you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from airflow.operators.hive_to_druid import HiveToDruidTransfer from airflow import DAG from datetime import datetime args = { 'owner': 'qi_wang', 'start_date': datetime(2015, 4, 4), } dag = DAG("test_druid", default_args=args) HiveToDruidTransfer(task_id="load_dummy_test", sql="select * from qi.druid_test_dataset_w_platform_1 \ limit 10;", druid_datasource="airflow_test", ts_dim="ds", dag=dag )