Python StatPipe示例

编程语言: Python

命名空间/包名称: hustle.core.stat

类/类型: StatPipe

hotexamples.com的示例: 4

Python StatPipe - 已找到4个示例。这些是从开源项目中提取的最受好评的hustle.core.stat.StatPipe现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

StatPipe(2)

run(2)

wait(2)

示例#1

显示文件

文件： __init__.py 项目： lucciano/hustle

def stat(where, limit=16, **kwargs):
    from hustle.core.settings import Settings
    from hustle.core.stat import StatPipe
    from disco.core import result_iterator
    from collections import defaultdict

    settings = Settings(**kwargs)
    ddfs = settings['ddfs']
    job_blobs = set(tuple(sorted(w)) for w in _get_blobs(where, ddfs, limit))
    # print job_blobs
    job = StatPipe(settings['server'])
    job.run(name="stat_" + where._name, input=job_blobs, **settings)
    res = job.wait()

    # first we need the total, so that we can calculate weighted average
    total = float(sum(v['_'] for _, v in result_iterator(res)))
    final = defaultdict(int)
    for _, cols in result_iterator(res):
        weight = cols.pop('_') / total
        for col, card in cols.iteritems():
            final[col] += card * weight

    # round everything up to a number between 0 .. 100
    really_final = {}
    for key in final:
        card = int(final[key] * 100)
        if card > 0:
            really_final[key] = card
    really_final['_'] = int(total)

    return really_final

示例#2

显示文件

def stat(where, limit=16, **kwargs):
    from hustle.core.settings import Settings
    from hustle.core.stat import StatPipe
    from disco.core import result_iterator
    from collections import defaultdict

    settings = Settings(**kwargs)
    ddfs = settings['ddfs']
    job_blobs = set(tuple(sorted(w)) for w in _get_blobs(where, ddfs, limit))
    # print job_blobs
    job = StatPipe(settings['server'])
    job.run(name="stat_" + where._name, input=job_blobs, **settings)
    res = job.wait()

    # first we need the total, so that we can calculate weighted average
    total = float(sum(v['_'] for _, v in result_iterator(res)))
    final = defaultdict(int)
    for _, cols in result_iterator(res):
        weight = cols.pop('_') / total
        for col, card in cols.iteritems():
            final[col] += card * weight

    # round everything up to a number between 0 .. 100
    really_final = {}
    for key in final:
        card = int(final[key] * 100)
        if card > 0:
            really_final[key] = card
    really_final['_'] = int(total)

    return really_final

示例#3

显示文件

def stat(where, limit=16, **kwargs):
    """
    Fetch statistical information of a collection of selected `Table <hustle.Table>`.

    :type where: sequence of :class:`Table <hustle.Table>` | :class:`Expr <hustle.core.marble.Expr>`
    :param where: the Tables to fetch data from, as well as the conditions in the *where clause*

    :type limit: int
    :param limit: the maximum number of blobs from the where clause, default value is 16

    Return a dict of column key cardinalities [0-100] for indexed columns in a table
    """
    from hustle.core.settings import Settings
    from hustle.core.stat import StatPipe
    from disco.core import result_iterator
    from collections import defaultdict

    settings = Settings(**kwargs)
    ddfs = settings['ddfs']
    job_blobs = set(tuple(sorted(w)) for w in _get_blobs(where, ddfs, limit))
    # print job_blobs
    job = StatPipe(settings['server'])
    job.run(name="stat_" + where._name, input=job_blobs, **settings)
    res = job.wait()

    # first we need the total, so that we can calculate weighted average
    total = float(sum(v['_'] for _, v in result_iterator(res)))
    final = defaultdict(int)
    for _, cols in result_iterator(res):
        weight = cols.pop('_') / total
        for col, card in cols.iteritems():
            final[col] += card * weight

    # round everything up to a number between 0 .. 100
    really_final = {}
    for key in final:
        card = int(final[key] * 100)
        if card > 0:
            really_final[key] = card
    really_final['_'] = int(total)

    return really_final

示例#4

显示文件

文件： __init__.py 项目： tspurway/hustle

def stat(where, limit=16, **kwargs):
    """
    Fetch statistical information of a collection of selected `Table <hustle.Table>`.

    :type where: sequence of :class:`Table <hustle.Table>` | :class:`Expr <hustle.core.marble.Expr>`
    :param where: the Tables to fetch data from, as well as the conditions in the *where clause*

    :type limit: int
    :param limit: the maximum number of blobs from the where clause, default value is 16

    Return a dict of column key cardinalities [0-100] for indexed columns in a table
    """
    from hustle.core.settings import Settings
    from hustle.core.stat import StatPipe
    from disco.core import result_iterator
    from collections import defaultdict

    settings = Settings(**kwargs)
    ddfs = settings['ddfs']
    job_blobs = set(tuple(sorted(w)) for w in _get_blobs(where, ddfs, limit))
    # print job_blobs
    job = StatPipe(settings['server'])
    job.run(name="stat_" + where._name, input=job_blobs, **settings)
    res = job.wait()

    # first we need the total, so that we can calculate weighted average
    total = float(sum(v['_'] for _, v in result_iterator(res)))
    final = defaultdict(int)
    for _, cols in result_iterator(res):
        weight = cols.pop('_') / total
        for col, card in cols.iteritems():
            final[col] += card * weight

    # round everything up to a number between 0 .. 100
    really_final = {}
    for key in final:
        card = int(final[key] * 100)
        if card > 0:
            really_final[key] = card
    really_final['_'] = int(total)

    return really_final