Python groupby примеры, toolz.itertoolz.groupby Python примеры использования

Пример #1

0

Показать файл

def to_html_doc(title, items, image_base_path, image_width, image_height):
    doc, tag, text = Doc().tagtext()
    with tag('html'):
        with tag('head'):
            with tag('style'):
                doc.asis(
                    f'img {{max-width:{image_width}px;max-height:{image_height}px;width:auto;height:auto;}}'
                )
        with tag('body'):
            with tag('h1'):
                text(title)
            for make, make_items in groupby(lambda item: item['make'],
                                            items).items():
                for model, model_items in groupby(lambda item: item['model'],
                                                  make_items).items():
                    for seller, seller_items in groupby(
                            lambda item: item['seller'], model_items).items():
                        with tag('h3'):
                            text(make + ' - ' + model + ' - ' + seller + ' (' +
                                 str(len(seller_items)) + ')')
                        for item in seller_items:
                            with tag('img',
                                     src=image_base_path + '/' +
                                     item['image_id'] + '.jpg?authuser=1',
                                     height=f'{image_height}',
                                     width=f'{image_width}'):
                                text('')
    return doc

Пример #2

0

Показать файл

Файл: test_itertoolz.py Проект: zhouyujoe/toolz

def test_groupby_non_callable():
    assert groupby(0, [(1, 2), (1, 3), (2, 2), (2, 4)]) == \
        {1: [(1, 2), (1, 3)],
         2: [(2, 2), (2, 4)]}

    assert groupby([0], [(1, 2), (1, 3), (2, 2), (2, 4)]) == \
        {(1,): [(1, 2), (1, 3)],
         (2,): [(2, 2), (2, 4)]}

    assert groupby([0, 0], [(1, 2), (1, 3), (2, 2), (2, 4)]) == \
        {(1, 1): [(1, 2), (1, 3)],
         (2, 2): [(2, 2), (2, 4)]}

Пример #3

0

Показать файл

Файл: test_itertoolz.py Проект: danoneata/toolz

def test_groupby_non_callable():
    assert groupby(0, [(1, 2), (1, 3), (2, 2), (2, 4)]) == \
        {1: [(1, 2), (1, 3)],
         2: [(2, 2), (2, 4)]}

    assert groupby([0], [(1, 2), (1, 3), (2, 2), (2, 4)]) == \
        {(1,): [(1, 2), (1, 3)],
         (2,): [(2, 2), (2, 4)]}

    assert groupby([0, 0], [(1, 2), (1, 3), (2, 2), (2, 4)]) == \
        {(1, 1): [(1, 2), (1, 3)],
         (2, 2): [(2, 2), (2, 4)]}

Пример #4

0

Показать файл

Файл: toolz-test.py Проект: wfelipe3/learning

def test_last_drop_take():
    l = list([1, 2, 3])
    assert_that(pvector(drop(2, l))).is_equal_to(v(3))
    assert_that(pvector(take(2, l))).is_equal_to(v(1, 2))
    assert_that(pmap(groupby(first,
                             ['ABC', 'ABA', 'BAB', 'BAA']))).is_equal_to(
                                 m(A=['ABC', 'ABA'], B=['BAB', 'BAA']))
    assert_that(pmap(groupby(identity,
                             ['ABC', 'ABA', 'BAB', 'BAA']))).is_equal_to(
                                 m(ABC=['ABC'],
                                   ABA=['ABA'],
                                   BAB=['BAB'],
                                   BAA=['BAA']))

Пример #5

0

Показать файл

Файл: match_parser.py Проект: ItsCEED/DOTA2Predictor

def formFeatureMatrix(heroIDs, match):
    currentHeroAmount = len(heroIDs) + 1
    result = match['radiant_win']  # True if radiant won
    teams = groupby('team', match['players'])
    dire = teams['D']
    radiant = teams['R']

    # Dire is first, then Radiant

    matchVector = []
    for player in dire:
        matchVector.append(player['hero_id'])
    for player in radiant:
        matchVector.append(player['hero_id'])

    matchVector.append(result)

    finalVector = list(concat([(2 * currentHeroAmount) * [0], [0]]))
    for direPick in matchVector[:5]:
        normalizeDirePick = direPick - 1
        finalVector[normalizeDirePick] = 1
    for radiantPick in matchVector[5:10]:
        normalizeRadiantPick = currentHeroAmount + (radiantPick - 1)
        finalVector[normalizeRadiantPick] = 1

    if result > 0:
        finalVector[-1] = 0  # dire lost aka radiant won
    else:
        finalVector[-1] = 1  # dire|radiant|direwon

    return finalVector

Пример #6

0

Показать файл

Файл: test_dataframes.py Проект: ki-tools/study-explorer

def test_get_variable_counts_domain_not_in_df(pivot_df):
    # implementation detail to avoid repeating query
    variables = Variable.objects.all()
    var_lookup = groupby('id', variables.values('id', 'label', 'code'))
    df = get_variable_counts(pivot_df, var_lookup, "FAKEDOMAIN")

    assert df is None

Пример #7

0

Показать файл

def sample(dataset, key, limit):
    by_id = groupby(
        lambda x:
        (x['id'], x['make'], x['model'], x['seller'], x['color'], x['year']),
        dataset)
    classes = groupby(key, by_id.items())

    def sample_images(xs):
        ads = random.sample(xs, limit if len(xs) >= limit else len(xs))
        images = mapcat(second, ads)
        return list(take(limit, images))

    samples = map(sample_images, classes.values())

    sample = concat(samples)

    return sample

Пример #8

0

Показать файл

Файл: Netlist.py Проект: chuangll/ODB-forPython

def read_netlist(directory):
    netlist_path = os.path.join(directory, "steps/pcb/netlists/cadnet/netlist")
    linerec = read_linerecords(netlist_path)
    netnames = parse_net_names(linerec)
    # All the following operations are performed lazily
    decoded = run_decoder(linerec["Netlist points"], netlist_decoder_options)
    decoded = filter(not_none, decoded)
    decoded_mapped = map(functools.partial(assign_net_name, netnames), decoded)
    return groupby(operator.attrgetter("netid"), decoded_mapped)

Пример #9

0

Показать файл

Файл: manage_data_sources.py Проект: chintanms/fh-immuta-utils

def get_tables_in_database(
    client: "ImmutaClient", config: Dict[str, Any]
) -> Dict[str, List[Dict[str, str]]]:
    """Returns a list of schema_name: [tables...] mapping in the database
    specified by the config"""
    # Grab list of all tables in all schemas in the database
    tables_in_database = client.get_table_names(config)
    # Group the tables per schema
    return groupby("tableSchema", tables_in_database)

Пример #10

0

Показать файл

def process_shuffle_sort(in_seq):
    """
    Simulates shuffle-sort phase
    :param in_seq: (k, v) pairs from mapper application
    :return: shuffle-sorted (k, [v, v, v...]) pairs to be used for reduce
    """
    grp = groupby(lambda t: t[0], in_seq)
    for k, vs in grp.items():
        yield ((k, [v[1] for v in vs]))

Пример #11

0

Показать файл

Файл: models.py Проект: kivo360/placeholder-python-cli

    def grouped_craft(self) -> str:
        groups = groupby('craft', [x.dict() for x in self.people])
        person_groups: List[PersonGroup] = [
            PersonGroup(craft=craft, number=len(persons), people=persons)
            for craft,
            persons in groups.items()
        ]

        return "\n\n".join([x.extract() for x in person_groups])

Пример #12

0

Показать файл

Файл: metrics.py Проект: stephamon/otter

def collect_metrics(reactor, config, log, client=None, authenticator=None,
                    _print=False):
    """
    Start collecting the metrics

    :param reactor: Twisted reactor
    :param dict config: Configuration got from file containing all info
        needed to collect metrics
    :param :class:`silverberg.client.CQLClient` client:
        Optional cassandra client. A new client will be created
        if this is not given and disconnected before returing
    :param :class:`otter.auth.IAuthenticator` authenticator:
        Optional authenticator. A new authenticator will be created
        if this is not given
    :param bool _print: Should debug messages be printed to stdout?

    :return: :class:`Deferred` fired with ``list`` of `GroupMetrics`
    """
    _client = client or connect_cass_servers(reactor, config['cassandra'])
    authenticator = authenticator or generate_authenticator(reactor,
                                                            config['identity'])
    store = CassScalingGroupCollection(_client, reactor, 1000)
    dispatcher = get_dispatcher(reactor, authenticator, log,
                                get_service_configs(config), store)

    # calculate metrics on launch_server and non-paused groups
    groups = yield perform(dispatcher, Effect(GetAllValidGroups()))
    groups = [
        g for g in groups
        if json.loads(g["launch_config"]).get("type") == "launch_server" and
        (not g.get("paused", False))]
    tenanted_groups = groupby(lambda g: g["tenantId"], groups)
    group_metrics = yield get_all_metrics(
        dispatcher, tenanted_groups, log, _print=_print)

    # Add to cloud metrics
    metr_conf = config.get("metrics", None)
    if metr_conf is not None:
        eff = add_to_cloud_metrics(
            metr_conf['ttl'], config['region'], group_metrics,
            len(tenanted_groups), config, log, _print)
        eff = Effect(TenantScope(eff, metr_conf['tenant_id']))
        yield perform(dispatcher, eff)
        log.msg('added to cloud metrics')
        if _print:
            print('added to cloud metrics')
    if _print:
        group_metrics.sort(key=lambda g: abs(g.desired - g.actual),
                           reverse=True)
        print('groups sorted as per divergence')
        print('\n'.join(map(str, group_metrics)))

    # Disconnect only if we created the client
    if not client:
        yield _client.disconnect()

    defer.returnValue(group_metrics)

Пример #13

0

Показать файл

Файл: simul.py Проект: ramonpin/mredu

def process_shuffle_sort(in_seq):
    """
    Simulates shuffle-sort phase
    :param in_seq: (k, v) pairs from mapper application
    :return: shuffle-sorted (k, [v, v, v...]) pairs to be used for reduce
    """
    # if t[0] is a list needs to be casted as a tuple because lists can't be hash keys in python.
    grp = groupby(lambda t: (tuple(t[0]) if type(t[0]) is list else t[0]), in_seq)
    for k, vs in grp.items():
        yield((k, [v[1] for v in vs]))

Пример #14

0

Показать файл

Файл: iter_utils.py Проект: mylove1/python

def group_by(key, seq):
    """ 通过key function 给list分组
    >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank']
    >>> group_by(len, names)  # doctest: +SKIP
    {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}
    >>> iseven = lambda x: x % 2 == 0
    >>> group_by(iseven, [1, 2, 3, 4, 5, 6, 7, 8])  # doctest: +SKIP
    {False: [1, 3, 5, 7], True: [2, 4, 6, 8]}
    Non-callable keys imply grouping on a member.
    >>> group_by('gender', [{'name': 'Alice', 'gender': 'F'},
    ...                    {'name': 'Bob', 'gender': 'M'},
    ...                    {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP
    {'F': [{'gender': 'F', 'name': 'Alice'}],
     'M': [{'gender': 'M', 'name': 'Bob'},
           {'gender': 'M', 'name': 'Charlie'}]}
    See Also:
        countby
    """
    groupby(key, seq)

Пример #15

0

Показать файл

Файл: etl.py Проект: mshenfield/hrc-employment-diversity-report

def group_all(sanitized_data):
    """
    Handle all the required data manipulations in memory.  Should simply map to the required objects for the front end.
    """
    grouped = groupby("current_dept_description", sanitized_data)
    assert isinstance(grouped, dict)
    assert grouped

    double_grouped = [
        {
            "name": key,
            "ethnicity": count_by_key(groupby("income_level", grouped[key]), "ethnic_code_description"),
            "gender": count_by_key(groupby("income_level", grouped[key]), "gender")
        }
        for key in grouped
    ]

    assert all(isinstance(key, dict) for key in double_grouped)

    return double_grouped

Пример #16

0

Показать файл

Файл: ep.py Проект: Freakwill/pyrimidine

 def select(self):
     d = groupby(attrgetter('fitness'), self.sorted_individuals)
     inds = []
     ks = np.sort(list(d.keys()))[::-1]
     while len(inds) < self.n_individuals:
         for k in ks:
             if d[k]:
                 a = choice(d[k])
                 inds.append(a)
                 d[k].remove(a)
     self.individuals = inds

Пример #17

0

Показать файл

Файл: simul.py Проект: ramonpin/mredu

def process_shuffle_sort(in_seq):
    """
    Simulates shuffle-sort phase
    :param in_seq: (k, v) pairs from mapper application
    :return: shuffle-sorted (k, [v, v, v...]) pairs to be used for reduce
    """
    # if t[0] is a list needs to be casted as a tuple because lists can't be hash keys in python.
    grp = groupby(lambda t: (tuple(t[0])
                             if type(t[0]) is list else t[0]), in_seq)
    for k, vs in grp.items():
        yield ((k, [v[1] for v in vs]))

Пример #18

0

Показать файл

def group_all(sanitized_data):
    """
    Handle all the required data manipulations in memory.  Should simply map to the required objects for the front end.
    """
    grouped = groupby("current_dept_description", sanitized_data)
    assert isinstance(grouped, dict)
    assert grouped

    double_grouped = [{
        "name":
        key,
        "ethnicity":
        count_by_key(groupby("income_level", grouped[key]),
                     "ethnic_code_description"),
        "gender":
        count_by_key(groupby("income_level", grouped[key]), "gender")
    } for key in grouped]

    assert all(isinstance(key, dict) for key in double_grouped)

    return double_grouped

Пример #19

0

Показать файл

def generate_example_timeseries_table(filename: Path,
                                      conditions: List[str],
                                      strains: List[str],
                                      timelimit=100):
    logger.debug(
        f"generate_example_timeseries({filename.name}, {conditions}, {strains}, {timelimit})"
    )
    output_filename = folder_data / "example_timeseries.tsv"

    table = pandas.read_csv(filename, sep="\t").set_index('time')

    logger.debug(f"The input table has {len(table.columns)} columns.")
    cols = list()

    for column in table.columns:
        strain, condition, plate, replicate = column.split('.')
        #logger.debug(f"{strain, condition}")
        if strain in strains and condition in conditions:
            cols.append(column)
    logger.debug(f"Reduced the number of columns to {len(cols)}")
    table = table[cols]
    logger.debug(f"The second table has {len(table.columns)} columns.")
    table = table.loc[[i for i in table.index if i <= timelimit]]

    # Change the plate, replicate fields
    groups = itertoolz.groupby(lambda s: ".".join(s.split('.')[:2]),
                               table.columns)

    wanted = [
        '.1.1', '.1.2', '.1.3', '.2.1', '.2.2', '.2.3', '.3.1', '.3.2', '.3.3'
    ]
    columnmap = dict()
    for name, group in groups.items():
        if name in ['A224T.RKS', 'N455K.RKS', 'WT.RKS']:
            result = {col: col for col in group[:9]}
        else:
            result = dict()
            for item_wanted, item_actual in zip(wanted, group):
                s, c, p, r = item_actual.split('.')
                item = f"{s}.{c}" + item_wanted
                result[item_actual] = item
        columnmap.update(result)

    # First remove any columns not in `columnmap`
    table = table[sorted(columnmap.keys())]

    # Rename columns
    table.columns = [columnmap[i] for i in table.columns]

    table.to_csv(output_filename, sep="\t")

Пример #20

0

Показать файл

Файл: composition.py Проект: pratikmallya/otter

def json_to_LBConfigs(lbs_json):
    """
    Convert load balancer config from JSON to :obj:`CLBDescription`

    :param lbs_json: Sequence of load balancer configs
    :return: Sequence of :class:`ILBDescription` providers
    """
    by_type = groupby(lambda lb: lb.get('type', 'CloudLoadBalancer'), lbs_json)
    return pset(
        [CLBDescription(lb_id=str(lb['loadBalancerId']), port=lb['port'])
         for lb in by_type.get('CloudLoadBalancer', [])] +
        [RCv3Description(lb_id=str(lb['loadBalancerId']))
         for lb in by_type.get('RackConnectV3', [])]
    )

Пример #21

0

Показать файл

Файл: fp.py Проект: zancas/otter

def partition_groups(grouper, seq, keys):
    """
    Partition a sequence based on a grouping function. This is like groupby,
    but it returns a tuple of fixed length instead of a dict of arbitrary
    length.

    :param callable grouper: A function which returns a key for an item.
    :param seq: A sequence of items.
    :param keys: A sequence of key names to expect.
    :return: A tuple of lists for which the grouper returned each key, in the
        same order as this keys argument.
    """
    groups = groupby(grouper, seq)
    return tuple(groups.get(key, []) for key in keys)

Пример #22

0

Показать файл

Файл: composition.py Проект: stephamon/otter

def json_to_LBConfigs(lbs_json):
    """
    Convert load balancer config from JSON to :obj:`CLBDescription`

    :param lbs_json: Sequence of load balancer configs
    :return: Sequence of :class:`ILBDescription` providers
    """
    by_type = groupby(lambda lb: lb.get('type', 'CloudLoadBalancer'), lbs_json)
    return pset(
        [CLBDescription(lb_id=str(lb['loadBalancerId']), port=lb['port'])
         for lb in by_type.get('CloudLoadBalancer', [])] +
        [RCv3Description(lb_id=str(lb['loadBalancerId']))
         for lb in by_type.get('RackConnectV3', [])]
    )

Пример #23

0

Показать файл

Файл: fp.py Проект: stephamon/otter

def partition_groups(grouper, seq, keys):
    """
    Partition a sequence based on a grouping function. This is like groupby,
    but it returns a tuple of fixed length instead of a dict of arbitrary
    length.

    :param callable grouper: A function which returns a key for an item.
    :param seq: A sequence of items.
    :param keys: A sequence of key names to expect.
    :return: A tuple of lists for which the grouper returned each key, in the
        same order as this keys argument.
    """
    groups = groupby(grouper, seq)
    return tuple(groups.get(key, []) for key in keys)

Пример #24

0

Показать файл

Файл: combine.py Проект: mikegraham/xarray

def auto_combine(datasets, concat_dim=None):
    """Attempt to auto-magically combine the given datasets into one.

    This method attempts to combine a list of datasets into a single entity by
    inspecting metadata and using a combination of concat and merge.

    It does not concatenate along more than one dimension or align or sort data
    under any circumstances. It will fail in complex cases, for which you
    should use ``concat`` and ``merge`` explicitly.

    When ``auto_combine`` may succeed:

    * You have N years of data and M data variables. Each combination of a
      distinct time period and test of data variables is saved its own dataset.

    Examples of when ``auto_combine`` fails:

    * In the above scenario, one file is missing, containing the data for one
      year's data for one variable.
    * In the most recent year, there is an additional data variable.
    * Your data includes "time" and "station" dimensions, and each year's data
      has a different set of stations.

    Parameters
    ----------
    datasets : sequence of xarray.Dataset
        Dataset objects to merge.
    concat_dim : str or DataArray or Index, optional
        Dimension along which to concatenate variables, as used by
        :py:func:`xarray.concat`. You only need to provide this argument if the
        dimension along which you want to concatenate is not a dimension in
        the original datasets, e.g., if you want to stack a collection of
        2D arrays along a third dimension.

    Returns
    -------
    combined : xarray.Dataset

    See also
    --------
    concat
    Dataset.merge
    """
    from toolz import itertoolz
    grouped = itertoolz.groupby(lambda ds: tuple(sorted(ds.data_vars)),
                                datasets).values()
    concatenated = [_auto_concat(ds, dim=concat_dim) for ds in grouped]
    merged = reduce(lambda ds, other: ds.merge(other), concatenated)
    return merged

Пример #25

0

Показать файл

Файл: combine.py Проект: OXPHOS/xarray

def auto_combine(datasets, concat_dim=None):
    """Attempt to auto-magically combine the given datasets into one.

    This method attempts to combine a list of datasets into a single entity by
    inspecting metadata and using a combination of concat and merge.

    It does not concatenate along more than one dimension or align or sort data
    under any circumstances. It will fail in complex cases, for which you
    should use ``concat`` and ``merge`` explicitly.

    When ``auto_combine`` may succeed:

    * You have N years of data and M data variables. Each combination of a
      distinct time period and test of data variables is saved its own dataset.

    Examples of when ``auto_combine`` fails:

    * In the above scenario, one file is missing, containing the data for one
      year's data for one variable.
    * In the most recent year, there is an additional data variable.
    * Your data includes "time" and "station" dimensions, and each year's data
      has a different set of stations.

    Parameters
    ----------
    datasets : sequence of xarray.Dataset
        Dataset objects to merge.
    concat_dim : str or DataArray or Index, optional
        Dimension along which to concatenate variables, as used by
        :py:func:`xarray.concat`. You only need to provide this argument if the
        dimension along which you want to concatenate is not a dimension in
        the original datasets, e.g., if you want to stack a collection of
        2D arrays along a third dimension.

    Returns
    -------
    combined : xarray.Dataset

    See also
    --------
    concat
    Dataset.merge
    """
    from toolz import itertoolz
    grouped = itertoolz.groupby(lambda ds: tuple(sorted(ds.data_vars)),
                                datasets).values()
    concatenated = [_auto_concat(ds, dim=concat_dim) for ds in grouped]
    merged = reduce(lambda ds, other: ds.merge(other), concatenated)
    return merged

Пример #26

0

Показать файл

Файл: bank_account_reconciliation.py Проект: odoonz/odoo_public

    def _get_balance(self, cr, uid, ids, name, args, context=None):
        """Computed as following:
        A) Cleared Deposits, Credits, and Interest Amount: SUM of Amts of lines
           Cleared Deposits, Credits, and Interest # of Items: Number of lines

        B) Checks, Withdrawals, Debits, and Service Charges Amount:
           Checks, Withdrawals, Debits, and Service Charges Amount # of Items:

        Cleared Balance:
            (Total Sum of the Deposit Amount Cleared (A) –
             Total Sum of Checks Amount Cleared (B))
        Difference=
            (Ending Balance – Beginning Balance) - cleared balance
            should be zero.
        """
        res = {}
        account_precision = self.pool['decimal.precision'].precision_get(
            cr, uid, 'Account')
        for stmt in self.browse(cr, uid, ids, context=context):
            res[stmt.id] = {}
            cleared = lambda l: l.cleared_bank_account and 'Cleared' or 'Uncleared'
            get_amount = lambda l: [
                round(v.amount, account_precision) for v in l
            ]
            process_lines = compose(valmap(get_amount), groupby(cleared))

            for line_type in ('debit', 'credit'):
                r = process_lines(eval('stmt.%s_move_line_ids' % line_type))
                res[stmt.id].update({
                    'sum_of_%ss' % line_type:
                    sum(r.get('Cleared', [])),
                    'sum_of_%ss_lines' % line_type:
                    len(r.get('Cleared', [])),
                    'sum_of_%ss_unclear' % line_type:
                    sum(r.get('Uncleared', [])),
                    'sum_of_%ss_lines_unclear' % line_type:
                    len(r.get('Uncleared', []))
                })

            res[stmt.id]['cleared_balance'] = round(
                res[stmt.id]['sum_of_debits'] - res[stmt.id]['sum_of_credits'],
                account_precision)
            res[stmt.id]['uncleared_balance'] = round(
                res[stmt.id]['sum_of_debits_unclear'] -
                res[stmt.id]['sum_of_credits_unclear'], account_precision)
            res[stmt.id]['difference'] = round(
                (stmt.ending_balance - stmt.starting_balance) -
                res[stmt.id]['cleared_balance'], account_precision)
        return res

Пример #27

0

Показать файл

def plot_points(title, X, L):

    plt.figure(figsize=(6, 6))

    for label, X_label in groupby(lambda t: L[t[0]], enumerate(X)).items():
        # these 2 lines are hacks!!!!
        X_label = list(map(lambda x: x[1], X_label))
        X_label = np.array(X_label)
        plt.scatter(X_label[:, 0], X_label[:, 1], label=label)

    plt.title(title)
    plt.xlabel('t-sne 1')
    plt.ylabel('t-sne 2')
    plt.legend()
    return plt

Пример #28

0

Показать файл

Файл: test_dataframes.py Проект: ki-tools/study-explorer

def test_get_variable_counts(pivot_df):
    # implementation detail to avoid repeating query
    variables = Variable.objects.all()
    var_lookup = groupby('id', variables.values('id', 'label', 'code'))
    df = get_variable_counts(pivot_df, var_lookup, "FOO")

    #    study study_label   FOO  id  count  var_code   var_label
    # 0      9        ID#8  11.0  23     45      11.0         var
    # 1     10        ID#9  11.0  10     21      11.0         var

    assert set(df.columns) == set([
        'study', 'study_label', 'FOO', 'id', 'count', 'var_code', 'var_label',
        'subjects'
    ])
    pd.util.testing.assert_series_equal(
        df.var_label, pd.Series(data=["var", "var"], name='var_label'))

Пример #29

0

Показать файл

Файл: grouptools.py Проект: cdeitrick/growthcurves

def group_by_plate_and_treatment(
        columns: List[str]) -> Dict[Tuple[str, str, str], List[str]]:
    """ Combines all of the columns in `table` based on their `plate` and `replicate` values.
		Parameters
		----------
		columns: List[str]
			The columns to group
	"""
    def groupbykey(s) -> Tuple[str, str, str]:
        a, b, c, d = s.split('.')

        return a, c, d

    # Group the columns by plate, replicate, and strain while ignoring the condition.
    groups = itertoolz.groupby(groupbykey, columns)

    return groups

Пример #30

0

Показать файл

Файл: rpc_common.py Проект: PlaidCloud/plaid-rpc

def apply_sort(data, sort_keys):
    # Data is a list to be sorted. Sort_keys is a list of tuples (key, reverse)
    # where key is a dict key in a list item, and reverse says whether to sort
    # in reverse order or not. (i.e. False for ascending, True for descending)
    if not sort_keys:
        return data
    else:
        # Parse the first sort_key
        if isinstance(sort_keys[0], string_types):
            key = sort_keys
            reverse = False
        else:
            key, reverse = sort_keys[0]

        remaining_sort_keys = sort_keys[1:]

        # Sort into groups by this key
        groups = groupby(itemgetter(key), data)

        try:
            key_sample = next((k for k in groups.keys() if k is not None))
        except StopIteration:
            key_sample = None

        if key_sample is None:
            key_fn = lambda _: True
        elif isinstance(key_sample, string_types):
            key_fn = lambda s: s.lower() if s is not None else ''
        elif isinstance(key_sample, bool):
            key_fn = bool
        elif isinstance(key_sample, numbers.Number):
            key_fn = lambda n: n if n is not None else 0
        else:
            # Unknown, so we'll just use ident
            key_fn = lambda x: x

        sorted_indices = sorted(list(groups.keys()),
                                key=key_fn,
                                reverse=reverse)

        # Sort each group by remaining keys, and concat them together in an
        # order sorted by this key.
        return list(
            concat(
                apply_sort(groups[index], remaining_sort_keys)
                for index in sorted_indices))

Пример #31

0

Показать файл

Файл: bank_account_reconciliation.py Проект: odoonz/odoo_public

    def action_process(self, cr, uid, ids, context=None):
        """Set the account move lines as 'Cleared' and
        Assign 'Bank Acc Rec Statement ID'
        for the statement lines which are marked as 'Cleared'."""
        aml_obj = self.pool['account.move.line']
        # If difference balance not zero prevent further processing
        self.check_difference_balance(cr, uid, ids, context=context)

        cleared = lambda l: l.cleared_bank_account and 'Cleared' or 'Uncleared'
        has_move = lambda l: l.move_line_id and True or False
        move_id = lambda res: [l.move_line_id.id for l in res]
        process_lines = compose(valmap(move_id), groupby(cleared),
                                filter(has_move))

        for stmt in self.browse(cr, uid, ids, context=context):
            statement_lines = process_lines(stmt.credit_move_line_ids +
                                            stmt.debit_move_line_ids)
            if statement_lines.get('Cleared'):
                aml_obj.write(cr,
                              uid,
                              statement_lines['Cleared'], {
                                  'cleared_bank_account': True,
                                  'bank_acc_rec_statement_id': stmt.id
                              },
                              context=context)
            if statement_lines.get('Uncleared'):
                aml_obj.write(cr,
                              uid,
                              statement_lines['Uncleared'], {
                                  'cleared_bank_account': False,
                                  'bank_acc_rec_statement_id': False
                              },
                              context=context)
            self.write(
                cr,
                uid, [stmt.id], {
                    'state':
                    'done',
                    'verified_by_user_id':
                    uid,
                    'verified_date':
                    fields.date.context_today(self, cr, uid, context=context)
                },
                context=context)
        return True

Пример #32

0

Показать файл

Файл: resolution.py Проект: davehadley/watchoptical

def _iter_subplots(
    hist: CategoryBootstrapHistogram,
    xattr: str = "attenuation",
    groupbyattr: str = "scattering",
) -> Generator[_SubplotData, None, None]:
    signalonly = filter(lambda item: "ibd" in item.category.eventtype.lower(),
                        hist)
    groupedbyattr = groupby(lambda item: getattr(item.category, groupbyattr),
                            signalonly)
    for attrvalue, items in groupedbyattr.items():
        X = [getattr(it.category, xattr) for it in items]
        mean, meanerr = zip(*[_calcmu(it.histogram) for it in items])
        sigma, sigmaerr = zip(*[_calcsigma(it.histogram) for it in items])
        (X, mean, meanerr, sigma, sigmaerr) = (np.array(it)
                                               for it in (X, mean, meanerr,
                                                          sigma, sigmaerr))
        yield _SubplotData(groupbyattr, attrvalue, xattr, X, mean, meanerr,
                           sigma, sigmaerr)

Пример #33

0

Показать файл

Файл: views.py Проект: ki-tools/study-explorer

 def get(self, request, *args, **kwargs):
     studies = self.resolve_studies()
     domain = Domain.objects.get(pk=kwargs.get('domain_id'))
     df = get_counts_df(studies)
     pivot_df = pivot_counts_df(df)
     # need for later, but storing for single query
     variables = Variable.objects.all()
     var_lookup = groupby('id', variables.values('id', 'label', 'code'))
     if self.by_age is True:
         domain_df = get_variable_count_by_variable(pivot_df, var_lookup, domain.code)
     else:
         domain_df = get_variable_counts(pivot_df, var_lookup, domain.code)
     # Build response
     filename = self.get_filename(domain)
     content_type = 'text/csv'
     response = HttpResponse(content_type=content_type)
     response['Content-Disposition'] = 'attachment; filename="{0}"'.format(filename)
     domain_df.to_csv(response)
     return response

Пример #34

0

Показать файл

Файл: align.py Проект: goncaloperes/summvis

    def _get_ngram_spans(
        self,
        doc: Doc,
    ):
        ngrams = []
        for sent in doc.sents:
            for n in range(1, len(list(sent))):
                tokens = [t for t in sent if not (t.is_stop or t.is_punct)]
                ngrams.extend(_ngrams(tokens, n))

        def ngram_key(ngram):
            return tuple(
                self.stemmer.stem(token.text).lower() for token in ngram)

        key_to_ngrams = itertoolz.groupby(ngram_key, ngrams)
        key_to_spans = {}
        for k, grouped_ngrams in key_to_ngrams.items():
            key_to_spans[k] = [(ngram[0].i, ngram[-1].i + 1)
                               for ngram in grouped_ngrams]
        return key_to_spans

Пример #35

0

Показать файл

Файл: test_dataframes.py Проект: ki-tools/study-explorer

def test_get_variable_count_by_variable(pivot_df):
    # implementation detail to avoid repeating query
    variables = Variable.objects.all()
    var_lookup = groupby('id', variables.values('id', 'label', 'code'))
    df = get_variable_count_by_variable(pivot_df,
                                        var_lookup,
                                        "FOO",
                                        qualifier_code="AGECAT")

    # domain_code  study study_label  AGECAT   FOO  id  count  var_code var_label  domain_code  qual_code  qual_label
    # 0               11       ID#10    13.0  14.0  29     45      14.0       var  0                 13.0         age

    assert set(df.columns) == set([
        'study', 'study_label', 'AGECAT', 'FOO', 'id', 'count', 'var_code',
        'var_label', 'qual_code', 'qual_label', 'subjects'
    ])
    pd.util.testing.assert_series_equal(
        df.var_label, pd.Series(data=["var"], name='var_label'))
    pd.util.testing.assert_series_equal(
        df.qual_label, pd.Series(data=["age"], name='qual_label'))

Пример #36

0

Показать файл

Файл: trend.py Проект: mrgriffin/market-dsl

    def max(expr, env):
        coll = eval(expr.coll, env)

        groups = groupby(itemgetter(1), coll.iteritems())
        maxgroup = max(groups)
        unique_max = max(groups) + (len(groups[maxgroup]) != 1)

        keys = members(typeof(expr), env)

        def maxdiff(ks):
            if len(ks) > 1:
                return max(abs(coll[a] - coll[b])
                           for a, b in combinations(ks, 2))
            else:
                if coll[ks[0]] == unique_max:
                    return 0
                else:
                    # TODO: How does this math change for sports with non-1
                    #       scores (e.g. rugby)?
                    return 1 + maxgroup - coll[ks[0]]

        return {ks: maxdiff(ks) for ks in keys}

Пример #37

0

Показать файл

Файл: report_trainset.py Проект: Xeeshanmalik/image-search-poc

def to_html_doc(title, items, image_base_path, image_width, image_height):
    doc, tag, text = Doc().tagtext()
    with tag('html'):
        with tag('head'):
            with tag('style'):
                doc.asis(
                    f'img {{max-width:{image_width}px;max-height:{image_height}px;width:auto;height:auto;}}'
                )
        with tag('body'):
            with tag('h1'):
                text(title)
            for year, year_items in ordered_dict_sorted_by_key(
                    groupby(lambda item: item['year'], items)).items():
                with tag('h3'):
                    text(f'{year}')
                for item in year_items:
                    with tag('img',
                             src=image_base_path + '/' + item['image_id'] +
                             '.jpg?authuser=1',
                             height=f'{image_height}',
                             width=f'{image_width}'):
                        text('')
    return doc

Пример #38

0

Показать файл

Файл: model.py Проект: pratikmallya/otter

def generate_metadata(group_id, lb_descriptions):
    """
    Generate autoscale-specific Nova server metadata given the group ID and
    an iterable of :class:`ILBDescription` providers.

    :return: a metadata `dict` containing the group ID and LB information
    """
    metadata = {
        'rax:auto_scaling_group_id': group_id,
        'rax:autoscale:group:id': group_id
    }

    descriptions = groupby(lambda desc: (desc.lb_id, type(desc)),
                           lb_descriptions)

    for (lb_id, desc_type), descs in descriptions.iteritems():
        if desc_type == CLBDescription:
            key = 'rax:autoscale:lb:CloudLoadBalancer:{0}'.format(lb_id)
            metadata[key] = json.dumps([
                {'port': desc.port} for desc in descs])
        elif desc_type == RCv3Description:
            metadata['rax:autoscale:lb:RackConnectV3:{0}'.format(lb_id)] = ""

    return metadata

Пример #39

0

Показать файл

Файл: _parse.py Проект: jml/trial-eliot

def _to_tasks(messages):
    return freeze(groupby(attrgetter('task_uuid'), messages))

Пример #40

0

Показать файл

Файл: test_metrics.py Проект: pratikmallya/otter

 def setUp(self):
     self.groups = groupby(
         lambda g: g["tenantId"],
         ([{"tenantId": "t1", "a": "1"}, {"tenantId": "t1", "a": "2"}] +
          [{"tenantId": "t{}".format(i), "b": str(i)}
           for i in range(2, 10)]))

Пример #41

0

Показать файл

Файл: result.py Проект: mrgriffin/market-dsl

 def partition(expr, env):
     groups = groupby(attrgetter(expr.by), eval(expr.coll, env))
     return {k: groups.get(k, []) for k in members(typeof(expr.by), env)}

Пример #42

0

Показать файл

Файл: result.py Проект: mrgriffin/market-dsl

 def max(expr, env):
     groups = groupby(itemgetter(1), eval(expr.coll, env).iteritems())
     return {k for k, v in groups[max(groups)]}

Пример #43

0

Показать файл

Файл: planning.py Проект: rackerlabs/otter

def converge_launch_server(desired_state, servers_with_cheese,
                           load_balancer_nodes, load_balancers,
                           now, timeout=3600):
    """
    Create steps that indicate how to transition from the state provided
    by the given parameters to the :obj:`DesiredServerGroupState` described by
    ``desired_state``.

    :param DesiredServerGroupState desired_state: The desired group state.
    :param set servers_with_cheese: a list of :obj:`NovaServer` instances.
        This must only contain servers that are being managed for the specified
        group.
    :param load_balancer_nodes: a set of :obj:`ILBNode` providers. This
        must contain all the load balancer mappings for all the load balancers
        (of all types) on the tenant.
    :param dict load_balancers: Collection of load balancer objects accessed
        based on its ID. The object is opaque and is not used by planner
        directly. It is intended to contain extra info for specific LB provider
    :param float now: number of seconds since the POSIX epoch indicating the
        time at which the convergence was requested.
    :param float timeout: Number of seconds after which we will delete a server
        in BUILD.
    :rtype: :obj:`pbag` of `IStep`

    """
    newest_to_oldest = sorted(servers_with_cheese, key=lambda s: -s.created)

    servers = defaultdict(lambda: [], groupby(get_destiny, newest_to_oldest))
    servers_in_active = servers[Destiny.CONSIDER_AVAILABLE]

    building_too_long, waiting_for_build = partition_bool(
        lambda server: now - server.created >= timeout,
        servers[Destiny.WAIT_WITH_TIMEOUT])

    create_server = CreateServer(server_config=desired_state.server_config)

    # delete any servers that have been building for too long
    delete_timeout_steps = [DeleteServer(server_id=server.id)
                            for server in building_too_long]

    # create servers
    create_steps = [create_server] * (
        desired_state.capacity - (
            len(servers_in_active) +
            len(waiting_for_build) +
            len(servers[Destiny.WAIT]) +
            len(servers[Destiny.AVOID_REPLACING])))

    # Scale down over capacity, starting with building, then WAIT, then
    # AVOID_REPLACING, then active, preferring older.  Also, finish
    # draining/deleting servers already in draining state
    servers_in_preferred_order = (
        servers_in_active +
        servers[Destiny.AVOID_REPLACING] +
        servers[Destiny.WAIT] +
        waiting_for_build)
    servers_to_delete = servers_in_preferred_order[desired_state.capacity:]

    def drain_and_delete_a_server(server):
        return _drain_and_delete(
            server,
            desired_state.draining_timeout,
            [node for node in load_balancer_nodes if node.matches(server)],
            now)

    try:
        scale_down_steps = list(
                mapcat(drain_and_delete_a_server,
                       servers_to_delete + servers[Destiny.DRAIN]))
    except DrainingUnavailable as de:
        return pbag([fail_convergence(de)])

    # delete all servers in error - draining does not need to be
    # handled because servers in error presumably are not serving
    # traffic anyway
    delete_error_steps = [DeleteServer(server_id=server.id)
                          for server in servers[Destiny.DELETE]]

    # clean up all the load balancers from deleted and errored servers
    cleanup_errored_and_deleted_steps = [
        remove_node_from_lb(lb_node)
        for server in servers[Destiny.DELETE] + servers[Destiny.CLEANUP]
        for lb_node in load_balancer_nodes if lb_node.matches(server)]

    # converge all the servers that remain to their desired load balancer state
    still_active_servers = filter(lambda s: s not in servers_to_delete,
                                  servers_in_active)
    try:
        lb_converge_steps = [
            step
            for server in still_active_servers
            for step in _converge_lb_state(
                server,
                [node for node in load_balancer_nodes if node.matches(server)],
                load_balancers,
                now,
                # Temporarily using build timeout as node offline timeout.
                # See https://github.com/rackerlabs/otter/issues/1905
                timeout)
            ]
    except DrainingUnavailable as de:
        return pbag([fail_convergence(de)])

    # Converge again if we expect state transitions on any servers
    converge_later = []
    if any((s not in servers_to_delete
            for s in waiting_for_build)):
        converge_later = [
            ConvergeLater(reasons=[ErrorReason.String('waiting for servers')])]

    unavail_fmt = ('Waiting for server {server_id} to transition to ACTIVE '
                   'from {status}')
    reasons = [ErrorReason.UserMessage(unavail_fmt.format(server_id=s.id,
                                                          status=s.state.name))
               for s in servers[Destiny.WAIT] if s not in servers_to_delete]
    if reasons:
        converge_later.append(ConvergeLater(limited=True, reasons=reasons))

    return pbag(create_steps +
                scale_down_steps +
                delete_error_steps +
                cleanup_errored_and_deleted_steps +
                delete_timeout_steps +
                lb_converge_steps +
                converge_later)

Пример #44

0

Показать файл

Файл: planning.py Проект: rackerlabs/otter

def converge_launch_stack(desired_state, stacks):
    """
    Create steps that indicate how to transition from the state provided
    by the given parameters to the :obj:`DesiredStackGroupState` described by
    ``desired_state``.

    See note [Converging stacks] for more information.

    :param DesiredStackGroupState desired_state: The desired group state.
    :param set stacks: a set of :obj:`HeatStack` instances.
        This must only contain stacks that are being managed for the specified
        group.
    :rtype: :obj:`pbag` of `IStep`

    """
    config = desired_state.stack_config

    by_state = groupby(lambda stack: stack.get_state(), stacks)

    stacks_complete = by_state.get(StackState.CREATE_UPDATE_COMPLETE, [])
    stacks_failed = by_state.get(StackState.CREATE_UPDATE_FAILED, [])
    stacks_check_complete = by_state.get(StackState.CHECK_COMPLETE, [])
    stacks_check_failed = by_state.get(StackState.CHECK_FAILED, [])
    stacks_in_progress = by_state.get(StackState.IN_PROGRESS, [])
    stacks_delete_in_progress = by_state.get(StackState.DELETE_IN_PROGRESS, [])
    stacks_delete_failed = by_state.get(StackState.DELETE_FAILED, [])

    stacks_good = stacks_complete + stacks_check_complete
    stacks_amiss = (stacks_failed +
                    stacks_check_failed +
                    stacks_in_progress +
                    stacks_delete_in_progress)

    if stacks_delete_failed:
        reasons = [ErrorReason.String("Stacks in DELETE_FAILED found.")]
        return pbag([FailConvergence(reasons)])

    # If there are no stacks in CHECK_* or other work to be done, we assume
    # we're at the beginning of a convergence cycle and need to perform stack
    # checks.
    if stacks_complete and not (stacks_check_complete or stacks_amiss):
        return pbag([CheckStack(stack) for stack in stacks_complete])

    # Otherwise, if all stacks are in a good state and we have the right number
    # of stacks, we call update on the stacks in CHECK_COMPLETE and return
    # SUCCESS without waiting for it to finish (calling update on a stack in
    # CREATE_COMPLETE is essentially a no-op) so that there will be no stacks
    # in CREATE_* the next time otter tries to converge this group. This will
    # cause all of the stacks to be checked at that time and let otter know
    # if there are any stacks that have fallen into an error state.
    elif not stacks_amiss and len(stacks_good) == desired_state.capacity:
        return pbag([UpdateStack(stack=stack, stack_config=config, retry=False)
                     for stack in stacks_check_complete])

    def get_create_steps():
        create_stack = CreateStack(stack_config=config)
        good_or_fixable_stack_count = (len(stacks_good) +
                                       len(stacks_in_progress) +
                                       len(stacks_check_failed))
        return [create_stack] * (desired_state.capacity -
                                 good_or_fixable_stack_count)

    def get_scale_down_steps():
        stacks_in_preferred_order = (
            stacks_good + stacks_in_progress + stacks_check_failed)
        unneeded_stacks = stacks_in_preferred_order[desired_state.capacity:]
        return map(DeleteStack, unneeded_stacks)

    def get_fix_steps(scale_down_steps):
        num_stacks_to_update = len(stacks_check_failed) - len(scale_down_steps)
        stacks_to_update = (stacks_check_failed[:num_stacks_to_update]
                            if num_stacks_to_update > 0 else [])
        return [UpdateStack(stack=s, stack_config=config)
                for s in stacks_to_update]

    create_steps = get_create_steps()
    scale_down_steps = get_scale_down_steps()
    fix_steps = get_fix_steps(scale_down_steps)
    delete_stacks_failed_steps = map(DeleteStack, stacks_failed)

    converge_later = (
        [ConvergeLater([ErrorReason.String("Waiting for stacks to finish.")])]
        if stacks_delete_in_progress or stacks_in_progress
        else [])

    return pbag(create_steps +
                fix_steps +
                scale_down_steps +
                delete_stacks_failed_steps +
                converge_later)

Пример #45

0

Показать файл

Файл: test_itertoolz.py Проект: karansag/toolz

def test_groupby():
    assert groupby(iseven, [1, 2, 3, 4]) == {True: [2, 4], False: [1, 3]}

Пример #46

0

Показать файл

Файл: combine.py Проект: edoddridge/xarray

def auto_combine(datasets,
                 concat_dim=_CONCAT_DIM_DEFAULT,
                 compat='no_conflicts',
                 data_vars='all', coords='different'):
    """Attempt to auto-magically combine the given datasets into one.

    This method attempts to combine a list of datasets into a single entity by
    inspecting metadata and using a combination of concat and merge.

    It does not concatenate along more than one dimension or sort data under
    any circumstances. It does align coordinates, but different variables on
    datasets can cause it to fail under some scenarios. In complex cases, you
    may need to clean up your data and use ``concat``/``merge`` explicitly.

    ``auto_combine`` works well if you have N years of data and M data
    variables, and each combination of a distinct time period and set of data
    variables is saved its own dataset.

    Parameters
    ----------
    datasets : sequence of xarray.Dataset
        Dataset objects to merge.
    concat_dim : str or DataArray or Index, optional
        Dimension along which to concatenate variables, as used by
        :py:func:`xarray.concat`. You only need to provide this argument if
        the dimension along which you want to concatenate is not a dimension
        in the original datasets, e.g., if you want to stack a collection of
        2D arrays along a third dimension.
        By default, xarray attempts to infer this argument by examining
        component files. Set ``concat_dim=None`` explicitly to disable
        concatenation.
    compat : {'identical', 'equals', 'broadcast_equals',
              'no_conflicts'}, optional
        String indicating how to compare variables of the same name for
        potential conflicts:

        - 'broadcast_equals': all values must be equal when variables are
          broadcast against each other to ensure common dimensions.
        - 'equals': all values and dimensions must be the same.
        - 'identical': all values, dimensions and attributes must be the
          same.
        - 'no_conflicts': only values which are not null in both datasets
          must be equal. The returned dataset then contains the combination
          of all non-null values.
    data_vars : {'minimal', 'different', 'all' or list of str}, optional
        Details are in the documentation of concat
    coords : {'minimal', 'different', 'all' o list of str}, optional
        Details are in the documentation of concat

    Returns
    -------
    combined : xarray.Dataset

    See also
    --------
    concat
    Dataset.merge
    """
    from toolz import itertoolz
    if concat_dim is not None:
        dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
        grouped = itertoolz.groupby(lambda ds: tuple(sorted(ds.data_vars)),
                                    datasets).values()
        concatenated = [_auto_concat(ds, dim=dim,
                                     data_vars=data_vars, coords=coords)
                        for ds in grouped]
    else:
        concatenated = datasets
    merged = merge(concatenated, compat=compat)
    return merged

Пример #47

0

Показать файл

Файл: test_itertoolz.py Проект: asifiqbal/toolz

def test_groupby_non_callable():
    assert groupby(0, [(1, 2), (1, 3), (2, 2), (2, 4)]) == \
        {1: [(1, 2), (1, 3)],
         2: [(2, 2), (2, 4)]}

Python groupby примеры использования