Пример #1
0
    def __init__(
        self,
        url="https://druid.broker.develop.otonomousmobility.com/",
        endpoint="druid/v2",
        datasource="mytaxi_gps_probes_index_parallel_v4",
        username=None,
        password=None,
    ):
        super().__init__("druid")
        self.url = url
        self.endpoint = endpoint
        self.datasource = datasource
        self.connector = PyDruid(url, endpoint)
        self.connector.set_basic_auth_credentials(
            username or os.environ["USERNAME"], password
            or os.environ["PASSWORD"])

        interval = self.connector.time_boundary(
            datasource=self.datasource).result[0]["result"]
        self.interval = f'{interval["minTime"]}/{interval["maxTime"]}'
        self.default_query = {
            "datasource": self.datasource,
            "granularity": "all",
            "intervals": self.interval,
            "paging_spec": {
                "paging_identifiers": {},
                "threshold": 100
            },
        }
Пример #2
0
 def get_conn(self):
     """
     Returns a druid connection object for query
     """
     conn = self.get_connection(self.druid_query_conn_id)
     return PyDruid("http://{conn.host}:{conn.port}".format(**locals()),
                    conn.extra_dejson.get('endpoint', ''))
Пример #3
0
 def get_client(self):
     conn = self.get_connection(self.druid_broker_conn_id)
     druid_client = PyDruid(
         url="{conn.schema}://{conn.host}".format(conn=conn),
         endpoint=conn.extra_dejson.get("endpoint", "druid/v2/"),
     )
     druid_client.set_basic_auth_credentials(
         username=conn.login,
         password=conn.password,
     )
     return druid_client
Пример #4
0
def query_druid():
    client = PyDruid(DRUID_URL, 'druid/v2')
    query = client.select(
        datasource='pageviews1',
        granularity='all',
        dimensions=["url", "user"],
        filter=Dimension('user') == 'ethan',
        paging_spec={"pagingIdentifiers": {}, "threshold": 5},
        intervals=["2016-07-08/2017-09-13"]
    )
    # print json.dumps(query.result, indent=2)
    return query.result
Пример #5
0
 def test_cube_query(self):
     query = PyDruid("http://pipeline.qiniu.com", 'v2/stream/cubes/query')
     query.set_qiniu("", "")
     top = query.topn(
         datasource='domain_top_statics',
         granularity='all',
         intervals='2019-08-13/pt1h',  # utc time of 2014 oscars
         aggregations={'count': doublesum('count')},
         metric='count',
         dimension='Country',
         threshold=10)
     df = query.export_pandas()
     print(df)
     top.export_tsv('top.tsv')
Пример #6
0
def druid_simple_groupby(dimensions, filter_list=[], filter_type="and",
                         datasource=settings.DRUID_SPRAYDAY_DATASOURCE):
    """
    Inputs:
        dimensions => list of dimensions to group by
        filter_list => list of list of things to filter with e.g.
                        filter_list=[['target_area_id', operator.ne, 1],
                                     ['sprayable', operator.eq, "true"],
                                     ['dimension', operator, "value"]])
        filter_type => type of Druid filter to perform
    """
    query = PyDruid(get_druid_broker_url(), 'druid/v2')
    params = dict(
        datasource=datasource,
        granularity='all',
        intervals=settings.DRUID_INTERVAL,
        limit_spec={
            "type": "default",
            "limit": 50000,
        }
    )
    params['dimensions'] = dimensions
    if filter_list:
        fields = []
        for this_filter in filter_list:
            compare_dim = filters.Dimension(this_filter[0])
            comparison_operator = this_filter[1]  # e.g. operator.eq
            compare_dim_value = this_filter[2]
            fields.append(comparison_operator(compare_dim, compare_dim_value))
        params['filter'] = filters.Filter(
            type=filter_type,
            fields=fields
        )

    try:
        request = query.groupby(**params)
    except OSError:
        pass
    else:
        return request.result
    return []
Пример #7
0
 def get_pydruid_client(self):
     cli = PyDruid(
         "http://{0}:{1}/".format(self.broker_host, self.broker_port),
         self.broker_endpoint)
     return cli
Пример #8
0
def get_druid_data(dimensions=None, filter_list=[], filter_type="and",
                   order_by=["target_area_name"],
                   datasource=settings.DRUID_SPRAYDAY_DATASOURCE):
    """
    Runs a query against Druid, returns data with metrics
    Inputs:
        dimensions => list of dimensions to group by
        filter_list => list of list of things to filter with e.g.
                        filter_list=[['target_area_id', operator.ne, 1],
                                     ['sprayable', operator.eq, "true"],
                                     ['dimension', operator, "value"]])
        filter_type => type of Druid filter to perform,
        order_by => field(s) to order the data by
    """
    query = PyDruid(get_druid_broker_url(), 'druid/v2')
    params = dict(
        datasource=datasource,
        granularity='all',
        intervals=settings.DRUID_INTERVAL,
        aggregations={
            'num_not_sprayable': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('sprayable') == 'false']
                ),
                aggregators.longsum('count')
            ),
            'num_not_sprayed': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('sprayable') == 'true',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_NOT_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
            'num_sprayed': aggregators.filtered(
                filters.Dimension('sprayed') ==
                settings.MSPRAY_WAS_SPRAYED_VALUE,
                aggregators.longsum('count')
            ),
            'num_new': aggregators.filtered(
                filters.Dimension('is_new') == 'true',
                aggregators.longsum('count')
            ),
            'num_new_no_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('is_new') == 'true']
                ),
                aggregators.longsum('count')
            ),
            'num_duplicate': aggregators.filtered(
                filters.Dimension('is_duplicate') == 'true',
                aggregators.longsum('count')
            ),
            'num_sprayed_no_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
            'num_not_sprayed_no_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('sprayable') == 'true',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_NOT_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
            'num_sprayed_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'true',
                            filters.Dimension('sprayable') == 'true',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
            'num_not_sprayable_no_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('sprayable') == 'false']
                ),
                aggregators.longsum('count')
            ),
            'num_refused': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('is_refused') == 'true',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_NOT_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
        },
        post_aggregations={
            'num_found': Field('num_sprayed_no_duplicates') +
            Field('num_sprayed_duplicates') +
            Field('num_not_sprayed_no_duplicates')
        },
        limit_spec={
            "type": "default",
            "limit": 50000,
            "columns": order_by
        }
    )
    if filter_list:
        fields = []
        for this_filter in filter_list:
            compare_dim = filters.Dimension(this_filter[0])
            comparison_operator = this_filter[1]  # e.g. operator.eq
            compare_dim_value = this_filter[2]
            fields.append(comparison_operator(compare_dim, compare_dim_value))
        params['filter'] = filters.Filter(
            type=filter_type,
            fields=fields
        )

    if dimensions is None:
        params['dimensions'] = ['target_area_id', 'target_area_name',
                                'target_area_structures']
    else:
        params['dimensions'] = dimensions

    try:
        request = query.groupby(**params)
    except OSError:
        return []
    else:
        return request.result
Пример #9
0
def create_client():
    return PyDruid("http://localhost:8083", "druid/v2/")
Пример #10
0
 def __init__(self):
     self.client = PyDruid(DRUID_BROKER_URL, 'druid/v2')
Пример #11
0
 def init_druid():
     fraud_druid = PyDruid(cm.FRAUD_DRUID_URL, cm.FRAUD_DRUID_PATH)
     fraud_druid.set_basic_auth_credentials(cm.FRAUD_DRUID_USER,
                                            cm.FRAUD_DRUID_PASS)
     return fraud_druid
Пример #12
0
 def __init__(self, address, port=8082):
     url = f"http://{address}:{port}"
     self.async_client = AsyncPyDruid(url, 'druid/v2/')
     self.client = PyDruid(url, 'druid/v2/')