def __init__( self, url="https://druid.broker.develop.otonomousmobility.com/", endpoint="druid/v2", datasource="mytaxi_gps_probes_index_parallel_v4", username=None, password=None, ): super().__init__("druid") self.url = url self.endpoint = endpoint self.datasource = datasource self.connector = PyDruid(url, endpoint) self.connector.set_basic_auth_credentials( username or os.environ["USERNAME"], password or os.environ["PASSWORD"]) interval = self.connector.time_boundary( datasource=self.datasource).result[0]["result"] self.interval = f'{interval["minTime"]}/{interval["maxTime"]}' self.default_query = { "datasource": self.datasource, "granularity": "all", "intervals": self.interval, "paging_spec": { "paging_identifiers": {}, "threshold": 100 }, }
def get_conn(self): """ Returns a druid connection object for query """ conn = self.get_connection(self.druid_query_conn_id) return PyDruid("http://{conn.host}:{conn.port}".format(**locals()), conn.extra_dejson.get('endpoint', ''))
def get_client(self): conn = self.get_connection(self.druid_broker_conn_id) druid_client = PyDruid( url="{conn.schema}://{conn.host}".format(conn=conn), endpoint=conn.extra_dejson.get("endpoint", "druid/v2/"), ) druid_client.set_basic_auth_credentials( username=conn.login, password=conn.password, ) return druid_client
def query_druid(): client = PyDruid(DRUID_URL, 'druid/v2') query = client.select( datasource='pageviews1', granularity='all', dimensions=["url", "user"], filter=Dimension('user') == 'ethan', paging_spec={"pagingIdentifiers": {}, "threshold": 5}, intervals=["2016-07-08/2017-09-13"] ) # print json.dumps(query.result, indent=2) return query.result
def test_cube_query(self): query = PyDruid("http://pipeline.qiniu.com", 'v2/stream/cubes/query') query.set_qiniu("", "") top = query.topn( datasource='domain_top_statics', granularity='all', intervals='2019-08-13/pt1h', # utc time of 2014 oscars aggregations={'count': doublesum('count')}, metric='count', dimension='Country', threshold=10) df = query.export_pandas() print(df) top.export_tsv('top.tsv')
def druid_simple_groupby(dimensions, filter_list=[], filter_type="and", datasource=settings.DRUID_SPRAYDAY_DATASOURCE): """ Inputs: dimensions => list of dimensions to group by filter_list => list of list of things to filter with e.g. filter_list=[['target_area_id', operator.ne, 1], ['sprayable', operator.eq, "true"], ['dimension', operator, "value"]]) filter_type => type of Druid filter to perform """ query = PyDruid(get_druid_broker_url(), 'druid/v2') params = dict( datasource=datasource, granularity='all', intervals=settings.DRUID_INTERVAL, limit_spec={ "type": "default", "limit": 50000, } ) params['dimensions'] = dimensions if filter_list: fields = [] for this_filter in filter_list: compare_dim = filters.Dimension(this_filter[0]) comparison_operator = this_filter[1] # e.g. operator.eq compare_dim_value = this_filter[2] fields.append(comparison_operator(compare_dim, compare_dim_value)) params['filter'] = filters.Filter( type=filter_type, fields=fields ) try: request = query.groupby(**params) except OSError: pass else: return request.result return []
def get_pydruid_client(self): cli = PyDruid( "http://{0}:{1}/".format(self.broker_host, self.broker_port), self.broker_endpoint) return cli
def get_druid_data(dimensions=None, filter_list=[], filter_type="and", order_by=["target_area_name"], datasource=settings.DRUID_SPRAYDAY_DATASOURCE): """ Runs a query against Druid, returns data with metrics Inputs: dimensions => list of dimensions to group by filter_list => list of list of things to filter with e.g. filter_list=[['target_area_id', operator.ne, 1], ['sprayable', operator.eq, "true"], ['dimension', operator, "value"]]) filter_type => type of Druid filter to perform, order_by => field(s) to order the data by """ query = PyDruid(get_druid_broker_url(), 'druid/v2') params = dict( datasource=datasource, granularity='all', intervals=settings.DRUID_INTERVAL, aggregations={ 'num_not_sprayable': aggregators.filtered( filters.Filter( type='and', fields=[filters.Dimension('sprayable') == 'false'] ), aggregators.longsum('count') ), 'num_not_sprayed': aggregators.filtered( filters.Filter( type='and', fields=[filters.Dimension('sprayable') == 'true', filters.Dimension('sprayed') == settings.MSPRAY_WAS_NOT_SPRAYED_VALUE] ), aggregators.longsum('count') ), 'num_sprayed': aggregators.filtered( filters.Dimension('sprayed') == settings.MSPRAY_WAS_SPRAYED_VALUE, aggregators.longsum('count') ), 'num_new': aggregators.filtered( filters.Dimension('is_new') == 'true', aggregators.longsum('count') ), 'num_new_no_duplicates': aggregators.filtered( filters.Filter( type='and', fields=[filters.Dimension('is_duplicate') == 'false', filters.Dimension('is_new') == 'true'] ), aggregators.longsum('count') ), 'num_duplicate': aggregators.filtered( filters.Dimension('is_duplicate') == 'true', aggregators.longsum('count') ), 'num_sprayed_no_duplicates': aggregators.filtered( filters.Filter( type='and', fields=[filters.Dimension('is_duplicate') == 'false', filters.Dimension('sprayed') == settings.MSPRAY_WAS_SPRAYED_VALUE] ), aggregators.longsum('count') ), 'num_not_sprayed_no_duplicates': aggregators.filtered( filters.Filter( type='and', fields=[filters.Dimension('is_duplicate') == 'false', filters.Dimension('sprayable') == 'true', filters.Dimension('sprayed') == settings.MSPRAY_WAS_NOT_SPRAYED_VALUE] ), aggregators.longsum('count') ), 'num_sprayed_duplicates': aggregators.filtered( filters.Filter( type='and', fields=[filters.Dimension('is_duplicate') == 'true', filters.Dimension('sprayable') == 'true', filters.Dimension('sprayed') == settings.MSPRAY_WAS_SPRAYED_VALUE] ), aggregators.longsum('count') ), 'num_not_sprayable_no_duplicates': aggregators.filtered( filters.Filter( type='and', fields=[filters.Dimension('is_duplicate') == 'false', filters.Dimension('sprayable') == 'false'] ), aggregators.longsum('count') ), 'num_refused': aggregators.filtered( filters.Filter( type='and', fields=[filters.Dimension('is_duplicate') == 'false', filters.Dimension('is_refused') == 'true', filters.Dimension('sprayed') == settings.MSPRAY_WAS_NOT_SPRAYED_VALUE] ), aggregators.longsum('count') ), }, post_aggregations={ 'num_found': Field('num_sprayed_no_duplicates') + Field('num_sprayed_duplicates') + Field('num_not_sprayed_no_duplicates') }, limit_spec={ "type": "default", "limit": 50000, "columns": order_by } ) if filter_list: fields = [] for this_filter in filter_list: compare_dim = filters.Dimension(this_filter[0]) comparison_operator = this_filter[1] # e.g. operator.eq compare_dim_value = this_filter[2] fields.append(comparison_operator(compare_dim, compare_dim_value)) params['filter'] = filters.Filter( type=filter_type, fields=fields ) if dimensions is None: params['dimensions'] = ['target_area_id', 'target_area_name', 'target_area_structures'] else: params['dimensions'] = dimensions try: request = query.groupby(**params) except OSError: return [] else: return request.result
def create_client(): return PyDruid("http://localhost:8083", "druid/v2/")
def __init__(self): self.client = PyDruid(DRUID_BROKER_URL, 'druid/v2')
def init_druid(): fraud_druid = PyDruid(cm.FRAUD_DRUID_URL, cm.FRAUD_DRUID_PATH) fraud_druid.set_basic_auth_credentials(cm.FRAUD_DRUID_USER, cm.FRAUD_DRUID_PASS) return fraud_druid
def __init__(self, address, port=8082): url = f"http://{address}:{port}" self.async_client = AsyncPyDruid(url, 'druid/v2/') self.client = PyDruid(url, 'druid/v2/')