def to_html_doc(title, items, image_base_path, image_width, image_height): doc, tag, text = Doc().tagtext() with tag('html'): with tag('head'): with tag('style'): doc.asis( f'img {{max-width:{image_width}px;max-height:{image_height}px;width:auto;height:auto;}}' ) with tag('body'): with tag('h1'): text(title) for make, make_items in groupby(lambda item: item['make'], items).items(): for model, model_items in groupby(lambda item: item['model'], make_items).items(): for seller, seller_items in groupby( lambda item: item['seller'], model_items).items(): with tag('h3'): text(make + ' - ' + model + ' - ' + seller + ' (' + str(len(seller_items)) + ')') for item in seller_items: with tag('img', src=image_base_path + '/' + item['image_id'] + '.jpg?authuser=1', height=f'{image_height}', width=f'{image_width}'): text('') return doc
def test_groupby_non_callable(): assert groupby(0, [(1, 2), (1, 3), (2, 2), (2, 4)]) == \ {1: [(1, 2), (1, 3)], 2: [(2, 2), (2, 4)]} assert groupby([0], [(1, 2), (1, 3), (2, 2), (2, 4)]) == \ {(1,): [(1, 2), (1, 3)], (2,): [(2, 2), (2, 4)]} assert groupby([0, 0], [(1, 2), (1, 3), (2, 2), (2, 4)]) == \ {(1, 1): [(1, 2), (1, 3)], (2, 2): [(2, 2), (2, 4)]}
def test_last_drop_take(): l = list([1, 2, 3]) assert_that(pvector(drop(2, l))).is_equal_to(v(3)) assert_that(pvector(take(2, l))).is_equal_to(v(1, 2)) assert_that(pmap(groupby(first, ['ABC', 'ABA', 'BAB', 'BAA']))).is_equal_to( m(A=['ABC', 'ABA'], B=['BAB', 'BAA'])) assert_that(pmap(groupby(identity, ['ABC', 'ABA', 'BAB', 'BAA']))).is_equal_to( m(ABC=['ABC'], ABA=['ABA'], BAB=['BAB'], BAA=['BAA']))
def formFeatureMatrix(heroIDs, match): currentHeroAmount = len(heroIDs) + 1 result = match['radiant_win'] # True if radiant won teams = groupby('team', match['players']) dire = teams['D'] radiant = teams['R'] # Dire is first, then Radiant matchVector = [] for player in dire: matchVector.append(player['hero_id']) for player in radiant: matchVector.append(player['hero_id']) matchVector.append(result) finalVector = list(concat([(2 * currentHeroAmount) * [0], [0]])) for direPick in matchVector[:5]: normalizeDirePick = direPick - 1 finalVector[normalizeDirePick] = 1 for radiantPick in matchVector[5:10]: normalizeRadiantPick = currentHeroAmount + (radiantPick - 1) finalVector[normalizeRadiantPick] = 1 if result > 0: finalVector[-1] = 0 # dire lost aka radiant won else: finalVector[-1] = 1 # dire|radiant|direwon return finalVector
def test_get_variable_counts_domain_not_in_df(pivot_df): # implementation detail to avoid repeating query variables = Variable.objects.all() var_lookup = groupby('id', variables.values('id', 'label', 'code')) df = get_variable_counts(pivot_df, var_lookup, "FAKEDOMAIN") assert df is None
def sample(dataset, key, limit): by_id = groupby( lambda x: (x['id'], x['make'], x['model'], x['seller'], x['color'], x['year']), dataset) classes = groupby(key, by_id.items()) def sample_images(xs): ads = random.sample(xs, limit if len(xs) >= limit else len(xs)) images = mapcat(second, ads) return list(take(limit, images)) samples = map(sample_images, classes.values()) sample = concat(samples) return sample
def read_netlist(directory): netlist_path = os.path.join(directory, "steps/pcb/netlists/cadnet/netlist") linerec = read_linerecords(netlist_path) netnames = parse_net_names(linerec) # All the following operations are performed lazily decoded = run_decoder(linerec["Netlist points"], netlist_decoder_options) decoded = filter(not_none, decoded) decoded_mapped = map(functools.partial(assign_net_name, netnames), decoded) return groupby(operator.attrgetter("netid"), decoded_mapped)
def get_tables_in_database( client: "ImmutaClient", config: Dict[str, Any] ) -> Dict[str, List[Dict[str, str]]]: """Returns a list of schema_name: [tables...] mapping in the database specified by the config""" # Grab list of all tables in all schemas in the database tables_in_database = client.get_table_names(config) # Group the tables per schema return groupby("tableSchema", tables_in_database)
def process_shuffle_sort(in_seq): """ Simulates shuffle-sort phase :param in_seq: (k, v) pairs from mapper application :return: shuffle-sorted (k, [v, v, v...]) pairs to be used for reduce """ grp = groupby(lambda t: t[0], in_seq) for k, vs in grp.items(): yield ((k, [v[1] for v in vs]))
def grouped_craft(self) -> str: groups = groupby('craft', [x.dict() for x in self.people]) person_groups: List[PersonGroup] = [ PersonGroup(craft=craft, number=len(persons), people=persons) for craft, persons in groups.items() ] return "\n\n".join([x.extract() for x in person_groups])
def collect_metrics(reactor, config, log, client=None, authenticator=None, _print=False): """ Start collecting the metrics :param reactor: Twisted reactor :param dict config: Configuration got from file containing all info needed to collect metrics :param :class:`silverberg.client.CQLClient` client: Optional cassandra client. A new client will be created if this is not given and disconnected before returing :param :class:`otter.auth.IAuthenticator` authenticator: Optional authenticator. A new authenticator will be created if this is not given :param bool _print: Should debug messages be printed to stdout? :return: :class:`Deferred` fired with ``list`` of `GroupMetrics` """ _client = client or connect_cass_servers(reactor, config['cassandra']) authenticator = authenticator or generate_authenticator(reactor, config['identity']) store = CassScalingGroupCollection(_client, reactor, 1000) dispatcher = get_dispatcher(reactor, authenticator, log, get_service_configs(config), store) # calculate metrics on launch_server and non-paused groups groups = yield perform(dispatcher, Effect(GetAllValidGroups())) groups = [ g for g in groups if json.loads(g["launch_config"]).get("type") == "launch_server" and (not g.get("paused", False))] tenanted_groups = groupby(lambda g: g["tenantId"], groups) group_metrics = yield get_all_metrics( dispatcher, tenanted_groups, log, _print=_print) # Add to cloud metrics metr_conf = config.get("metrics", None) if metr_conf is not None: eff = add_to_cloud_metrics( metr_conf['ttl'], config['region'], group_metrics, len(tenanted_groups), config, log, _print) eff = Effect(TenantScope(eff, metr_conf['tenant_id'])) yield perform(dispatcher, eff) log.msg('added to cloud metrics') if _print: print('added to cloud metrics') if _print: group_metrics.sort(key=lambda g: abs(g.desired - g.actual), reverse=True) print('groups sorted as per divergence') print('\n'.join(map(str, group_metrics))) # Disconnect only if we created the client if not client: yield _client.disconnect() defer.returnValue(group_metrics)
def process_shuffle_sort(in_seq): """ Simulates shuffle-sort phase :param in_seq: (k, v) pairs from mapper application :return: shuffle-sorted (k, [v, v, v...]) pairs to be used for reduce """ # if t[0] is a list needs to be casted as a tuple because lists can't be hash keys in python. grp = groupby(lambda t: (tuple(t[0]) if type(t[0]) is list else t[0]), in_seq) for k, vs in grp.items(): yield((k, [v[1] for v in vs]))
def group_by(key, seq): """ 通过key function 给list分组 >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank'] >>> group_by(len, names) # doctest: +SKIP {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']} >>> iseven = lambda x: x % 2 == 0 >>> group_by(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) # doctest: +SKIP {False: [1, 3, 5, 7], True: [2, 4, 6, 8]} Non-callable keys imply grouping on a member. >>> group_by('gender', [{'name': 'Alice', 'gender': 'F'}, ... {'name': 'Bob', 'gender': 'M'}, ... {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP {'F': [{'gender': 'F', 'name': 'Alice'}], 'M': [{'gender': 'M', 'name': 'Bob'}, {'gender': 'M', 'name': 'Charlie'}]} See Also: countby """ groupby(key, seq)
def group_all(sanitized_data): """ Handle all the required data manipulations in memory. Should simply map to the required objects for the front end. """ grouped = groupby("current_dept_description", sanitized_data) assert isinstance(grouped, dict) assert grouped double_grouped = [ { "name": key, "ethnicity": count_by_key(groupby("income_level", grouped[key]), "ethnic_code_description"), "gender": count_by_key(groupby("income_level", grouped[key]), "gender") } for key in grouped ] assert all(isinstance(key, dict) for key in double_grouped) return double_grouped
def select(self): d = groupby(attrgetter('fitness'), self.sorted_individuals) inds = [] ks = np.sort(list(d.keys()))[::-1] while len(inds) < self.n_individuals: for k in ks: if d[k]: a = choice(d[k]) inds.append(a) d[k].remove(a) self.individuals = inds
def process_shuffle_sort(in_seq): """ Simulates shuffle-sort phase :param in_seq: (k, v) pairs from mapper application :return: shuffle-sorted (k, [v, v, v...]) pairs to be used for reduce """ # if t[0] is a list needs to be casted as a tuple because lists can't be hash keys in python. grp = groupby(lambda t: (tuple(t[0]) if type(t[0]) is list else t[0]), in_seq) for k, vs in grp.items(): yield ((k, [v[1] for v in vs]))
def group_all(sanitized_data): """ Handle all the required data manipulations in memory. Should simply map to the required objects for the front end. """ grouped = groupby("current_dept_description", sanitized_data) assert isinstance(grouped, dict) assert grouped double_grouped = [{ "name": key, "ethnicity": count_by_key(groupby("income_level", grouped[key]), "ethnic_code_description"), "gender": count_by_key(groupby("income_level", grouped[key]), "gender") } for key in grouped] assert all(isinstance(key, dict) for key in double_grouped) return double_grouped
def generate_example_timeseries_table(filename: Path, conditions: List[str], strains: List[str], timelimit=100): logger.debug( f"generate_example_timeseries({filename.name}, {conditions}, {strains}, {timelimit})" ) output_filename = folder_data / "example_timeseries.tsv" table = pandas.read_csv(filename, sep="\t").set_index('time') logger.debug(f"The input table has {len(table.columns)} columns.") cols = list() for column in table.columns: strain, condition, plate, replicate = column.split('.') #logger.debug(f"{strain, condition}") if strain in strains and condition in conditions: cols.append(column) logger.debug(f"Reduced the number of columns to {len(cols)}") table = table[cols] logger.debug(f"The second table has {len(table.columns)} columns.") table = table.loc[[i for i in table.index if i <= timelimit]] # Change the plate, replicate fields groups = itertoolz.groupby(lambda s: ".".join(s.split('.')[:2]), table.columns) wanted = [ '.1.1', '.1.2', '.1.3', '.2.1', '.2.2', '.2.3', '.3.1', '.3.2', '.3.3' ] columnmap = dict() for name, group in groups.items(): if name in ['A224T.RKS', 'N455K.RKS', 'WT.RKS']: result = {col: col for col in group[:9]} else: result = dict() for item_wanted, item_actual in zip(wanted, group): s, c, p, r = item_actual.split('.') item = f"{s}.{c}" + item_wanted result[item_actual] = item columnmap.update(result) # First remove any columns not in `columnmap` table = table[sorted(columnmap.keys())] # Rename columns table.columns = [columnmap[i] for i in table.columns] table.to_csv(output_filename, sep="\t")
def json_to_LBConfigs(lbs_json): """ Convert load balancer config from JSON to :obj:`CLBDescription` :param lbs_json: Sequence of load balancer configs :return: Sequence of :class:`ILBDescription` providers """ by_type = groupby(lambda lb: lb.get('type', 'CloudLoadBalancer'), lbs_json) return pset( [CLBDescription(lb_id=str(lb['loadBalancerId']), port=lb['port']) for lb in by_type.get('CloudLoadBalancer', [])] + [RCv3Description(lb_id=str(lb['loadBalancerId'])) for lb in by_type.get('RackConnectV3', [])] )
def partition_groups(grouper, seq, keys): """ Partition a sequence based on a grouping function. This is like groupby, but it returns a tuple of fixed length instead of a dict of arbitrary length. :param callable grouper: A function which returns a key for an item. :param seq: A sequence of items. :param keys: A sequence of key names to expect. :return: A tuple of lists for which the grouper returned each key, in the same order as this keys argument. """ groups = groupby(grouper, seq) return tuple(groups.get(key, []) for key in keys)
def auto_combine(datasets, concat_dim=None): """Attempt to auto-magically combine the given datasets into one. This method attempts to combine a list of datasets into a single entity by inspecting metadata and using a combination of concat and merge. It does not concatenate along more than one dimension or align or sort data under any circumstances. It will fail in complex cases, for which you should use ``concat`` and ``merge`` explicitly. When ``auto_combine`` may succeed: * You have N years of data and M data variables. Each combination of a distinct time period and test of data variables is saved its own dataset. Examples of when ``auto_combine`` fails: * In the above scenario, one file is missing, containing the data for one year's data for one variable. * In the most recent year, there is an additional data variable. * Your data includes "time" and "station" dimensions, and each year's data has a different set of stations. Parameters ---------- datasets : sequence of xarray.Dataset Dataset objects to merge. concat_dim : str or DataArray or Index, optional Dimension along which to concatenate variables, as used by :py:func:`xarray.concat`. You only need to provide this argument if the dimension along which you want to concatenate is not a dimension in the original datasets, e.g., if you want to stack a collection of 2D arrays along a third dimension. Returns ------- combined : xarray.Dataset See also -------- concat Dataset.merge """ from toolz import itertoolz grouped = itertoolz.groupby(lambda ds: tuple(sorted(ds.data_vars)), datasets).values() concatenated = [_auto_concat(ds, dim=concat_dim) for ds in grouped] merged = reduce(lambda ds, other: ds.merge(other), concatenated) return merged
def _get_balance(self, cr, uid, ids, name, args, context=None): """Computed as following: A) Cleared Deposits, Credits, and Interest Amount: SUM of Amts of lines Cleared Deposits, Credits, and Interest # of Items: Number of lines B) Checks, Withdrawals, Debits, and Service Charges Amount: Checks, Withdrawals, Debits, and Service Charges Amount # of Items: Cleared Balance: (Total Sum of the Deposit Amount Cleared (A) – Total Sum of Checks Amount Cleared (B)) Difference= (Ending Balance – Beginning Balance) - cleared balance should be zero. """ res = {} account_precision = self.pool['decimal.precision'].precision_get( cr, uid, 'Account') for stmt in self.browse(cr, uid, ids, context=context): res[stmt.id] = {} cleared = lambda l: l.cleared_bank_account and 'Cleared' or 'Uncleared' get_amount = lambda l: [ round(v.amount, account_precision) for v in l ] process_lines = compose(valmap(get_amount), groupby(cleared)) for line_type in ('debit', 'credit'): r = process_lines(eval('stmt.%s_move_line_ids' % line_type)) res[stmt.id].update({ 'sum_of_%ss' % line_type: sum(r.get('Cleared', [])), 'sum_of_%ss_lines' % line_type: len(r.get('Cleared', [])), 'sum_of_%ss_unclear' % line_type: sum(r.get('Uncleared', [])), 'sum_of_%ss_lines_unclear' % line_type: len(r.get('Uncleared', [])) }) res[stmt.id]['cleared_balance'] = round( res[stmt.id]['sum_of_debits'] - res[stmt.id]['sum_of_credits'], account_precision) res[stmt.id]['uncleared_balance'] = round( res[stmt.id]['sum_of_debits_unclear'] - res[stmt.id]['sum_of_credits_unclear'], account_precision) res[stmt.id]['difference'] = round( (stmt.ending_balance - stmt.starting_balance) - res[stmt.id]['cleared_balance'], account_precision) return res
def plot_points(title, X, L): plt.figure(figsize=(6, 6)) for label, X_label in groupby(lambda t: L[t[0]], enumerate(X)).items(): # these 2 lines are hacks!!!! X_label = list(map(lambda x: x[1], X_label)) X_label = np.array(X_label) plt.scatter(X_label[:, 0], X_label[:, 1], label=label) plt.title(title) plt.xlabel('t-sne 1') plt.ylabel('t-sne 2') plt.legend() return plt
def test_get_variable_counts(pivot_df): # implementation detail to avoid repeating query variables = Variable.objects.all() var_lookup = groupby('id', variables.values('id', 'label', 'code')) df = get_variable_counts(pivot_df, var_lookup, "FOO") # study study_label FOO id count var_code var_label # 0 9 ID#8 11.0 23 45 11.0 var # 1 10 ID#9 11.0 10 21 11.0 var assert set(df.columns) == set([ 'study', 'study_label', 'FOO', 'id', 'count', 'var_code', 'var_label', 'subjects' ]) pd.util.testing.assert_series_equal( df.var_label, pd.Series(data=["var", "var"], name='var_label'))
def group_by_plate_and_treatment( columns: List[str]) -> Dict[Tuple[str, str, str], List[str]]: """ Combines all of the columns in `table` based on their `plate` and `replicate` values. Parameters ---------- columns: List[str] The columns to group """ def groupbykey(s) -> Tuple[str, str, str]: a, b, c, d = s.split('.') return a, c, d # Group the columns by plate, replicate, and strain while ignoring the condition. groups = itertoolz.groupby(groupbykey, columns) return groups
def apply_sort(data, sort_keys): # Data is a list to be sorted. Sort_keys is a list of tuples (key, reverse) # where key is a dict key in a list item, and reverse says whether to sort # in reverse order or not. (i.e. False for ascending, True for descending) if not sort_keys: return data else: # Parse the first sort_key if isinstance(sort_keys[0], string_types): key = sort_keys reverse = False else: key, reverse = sort_keys[0] remaining_sort_keys = sort_keys[1:] # Sort into groups by this key groups = groupby(itemgetter(key), data) try: key_sample = next((k for k in groups.keys() if k is not None)) except StopIteration: key_sample = None if key_sample is None: key_fn = lambda _: True elif isinstance(key_sample, string_types): key_fn = lambda s: s.lower() if s is not None else '' elif isinstance(key_sample, bool): key_fn = bool elif isinstance(key_sample, numbers.Number): key_fn = lambda n: n if n is not None else 0 else: # Unknown, so we'll just use ident key_fn = lambda x: x sorted_indices = sorted(list(groups.keys()), key=key_fn, reverse=reverse) # Sort each group by remaining keys, and concat them together in an # order sorted by this key. return list( concat( apply_sort(groups[index], remaining_sort_keys) for index in sorted_indices))
def action_process(self, cr, uid, ids, context=None): """Set the account move lines as 'Cleared' and Assign 'Bank Acc Rec Statement ID' for the statement lines which are marked as 'Cleared'.""" aml_obj = self.pool['account.move.line'] # If difference balance not zero prevent further processing self.check_difference_balance(cr, uid, ids, context=context) cleared = lambda l: l.cleared_bank_account and 'Cleared' or 'Uncleared' has_move = lambda l: l.move_line_id and True or False move_id = lambda res: [l.move_line_id.id for l in res] process_lines = compose(valmap(move_id), groupby(cleared), filter(has_move)) for stmt in self.browse(cr, uid, ids, context=context): statement_lines = process_lines(stmt.credit_move_line_ids + stmt.debit_move_line_ids) if statement_lines.get('Cleared'): aml_obj.write(cr, uid, statement_lines['Cleared'], { 'cleared_bank_account': True, 'bank_acc_rec_statement_id': stmt.id }, context=context) if statement_lines.get('Uncleared'): aml_obj.write(cr, uid, statement_lines['Uncleared'], { 'cleared_bank_account': False, 'bank_acc_rec_statement_id': False }, context=context) self.write( cr, uid, [stmt.id], { 'state': 'done', 'verified_by_user_id': uid, 'verified_date': fields.date.context_today(self, cr, uid, context=context) }, context=context) return True
def _iter_subplots( hist: CategoryBootstrapHistogram, xattr: str = "attenuation", groupbyattr: str = "scattering", ) -> Generator[_SubplotData, None, None]: signalonly = filter(lambda item: "ibd" in item.category.eventtype.lower(), hist) groupedbyattr = groupby(lambda item: getattr(item.category, groupbyattr), signalonly) for attrvalue, items in groupedbyattr.items(): X = [getattr(it.category, xattr) for it in items] mean, meanerr = zip(*[_calcmu(it.histogram) for it in items]) sigma, sigmaerr = zip(*[_calcsigma(it.histogram) for it in items]) (X, mean, meanerr, sigma, sigmaerr) = (np.array(it) for it in (X, mean, meanerr, sigma, sigmaerr)) yield _SubplotData(groupbyattr, attrvalue, xattr, X, mean, meanerr, sigma, sigmaerr)
def get(self, request, *args, **kwargs): studies = self.resolve_studies() domain = Domain.objects.get(pk=kwargs.get('domain_id')) df = get_counts_df(studies) pivot_df = pivot_counts_df(df) # need for later, but storing for single query variables = Variable.objects.all() var_lookup = groupby('id', variables.values('id', 'label', 'code')) if self.by_age is True: domain_df = get_variable_count_by_variable(pivot_df, var_lookup, domain.code) else: domain_df = get_variable_counts(pivot_df, var_lookup, domain.code) # Build response filename = self.get_filename(domain) content_type = 'text/csv' response = HttpResponse(content_type=content_type) response['Content-Disposition'] = 'attachment; filename="{0}"'.format(filename) domain_df.to_csv(response) return response
def _get_ngram_spans( self, doc: Doc, ): ngrams = [] for sent in doc.sents: for n in range(1, len(list(sent))): tokens = [t for t in sent if not (t.is_stop or t.is_punct)] ngrams.extend(_ngrams(tokens, n)) def ngram_key(ngram): return tuple( self.stemmer.stem(token.text).lower() for token in ngram) key_to_ngrams = itertoolz.groupby(ngram_key, ngrams) key_to_spans = {} for k, grouped_ngrams in key_to_ngrams.items(): key_to_spans[k] = [(ngram[0].i, ngram[-1].i + 1) for ngram in grouped_ngrams] return key_to_spans
def test_get_variable_count_by_variable(pivot_df): # implementation detail to avoid repeating query variables = Variable.objects.all() var_lookup = groupby('id', variables.values('id', 'label', 'code')) df = get_variable_count_by_variable(pivot_df, var_lookup, "FOO", qualifier_code="AGECAT") # domain_code study study_label AGECAT FOO id count var_code var_label domain_code qual_code qual_label # 0 11 ID#10 13.0 14.0 29 45 14.0 var 0 13.0 age assert set(df.columns) == set([ 'study', 'study_label', 'AGECAT', 'FOO', 'id', 'count', 'var_code', 'var_label', 'qual_code', 'qual_label', 'subjects' ]) pd.util.testing.assert_series_equal( df.var_label, pd.Series(data=["var"], name='var_label')) pd.util.testing.assert_series_equal( df.qual_label, pd.Series(data=["age"], name='qual_label'))
def max(expr, env): coll = eval(expr.coll, env) groups = groupby(itemgetter(1), coll.iteritems()) maxgroup = max(groups) unique_max = max(groups) + (len(groups[maxgroup]) != 1) keys = members(typeof(expr), env) def maxdiff(ks): if len(ks) > 1: return max(abs(coll[a] - coll[b]) for a, b in combinations(ks, 2)) else: if coll[ks[0]] == unique_max: return 0 else: # TODO: How does this math change for sports with non-1 # scores (e.g. rugby)? return 1 + maxgroup - coll[ks[0]] return {ks: maxdiff(ks) for ks in keys}
def to_html_doc(title, items, image_base_path, image_width, image_height): doc, tag, text = Doc().tagtext() with tag('html'): with tag('head'): with tag('style'): doc.asis( f'img {{max-width:{image_width}px;max-height:{image_height}px;width:auto;height:auto;}}' ) with tag('body'): with tag('h1'): text(title) for year, year_items in ordered_dict_sorted_by_key( groupby(lambda item: item['year'], items)).items(): with tag('h3'): text(f'{year}') for item in year_items: with tag('img', src=image_base_path + '/' + item['image_id'] + '.jpg?authuser=1', height=f'{image_height}', width=f'{image_width}'): text('') return doc
def generate_metadata(group_id, lb_descriptions): """ Generate autoscale-specific Nova server metadata given the group ID and an iterable of :class:`ILBDescription` providers. :return: a metadata `dict` containing the group ID and LB information """ metadata = { 'rax:auto_scaling_group_id': group_id, 'rax:autoscale:group:id': group_id } descriptions = groupby(lambda desc: (desc.lb_id, type(desc)), lb_descriptions) for (lb_id, desc_type), descs in descriptions.iteritems(): if desc_type == CLBDescription: key = 'rax:autoscale:lb:CloudLoadBalancer:{0}'.format(lb_id) metadata[key] = json.dumps([ {'port': desc.port} for desc in descs]) elif desc_type == RCv3Description: metadata['rax:autoscale:lb:RackConnectV3:{0}'.format(lb_id)] = "" return metadata
def _to_tasks(messages): return freeze(groupby(attrgetter('task_uuid'), messages))
def setUp(self): self.groups = groupby( lambda g: g["tenantId"], ([{"tenantId": "t1", "a": "1"}, {"tenantId": "t1", "a": "2"}] + [{"tenantId": "t{}".format(i), "b": str(i)} for i in range(2, 10)]))
def partition(expr, env): groups = groupby(attrgetter(expr.by), eval(expr.coll, env)) return {k: groups.get(k, []) for k in members(typeof(expr.by), env)}
def max(expr, env): groups = groupby(itemgetter(1), eval(expr.coll, env).iteritems()) return {k for k, v in groups[max(groups)]}
def converge_launch_server(desired_state, servers_with_cheese, load_balancer_nodes, load_balancers, now, timeout=3600): """ Create steps that indicate how to transition from the state provided by the given parameters to the :obj:`DesiredServerGroupState` described by ``desired_state``. :param DesiredServerGroupState desired_state: The desired group state. :param set servers_with_cheese: a list of :obj:`NovaServer` instances. This must only contain servers that are being managed for the specified group. :param load_balancer_nodes: a set of :obj:`ILBNode` providers. This must contain all the load balancer mappings for all the load balancers (of all types) on the tenant. :param dict load_balancers: Collection of load balancer objects accessed based on its ID. The object is opaque and is not used by planner directly. It is intended to contain extra info for specific LB provider :param float now: number of seconds since the POSIX epoch indicating the time at which the convergence was requested. :param float timeout: Number of seconds after which we will delete a server in BUILD. :rtype: :obj:`pbag` of `IStep` """ newest_to_oldest = sorted(servers_with_cheese, key=lambda s: -s.created) servers = defaultdict(lambda: [], groupby(get_destiny, newest_to_oldest)) servers_in_active = servers[Destiny.CONSIDER_AVAILABLE] building_too_long, waiting_for_build = partition_bool( lambda server: now - server.created >= timeout, servers[Destiny.WAIT_WITH_TIMEOUT]) create_server = CreateServer(server_config=desired_state.server_config) # delete any servers that have been building for too long delete_timeout_steps = [DeleteServer(server_id=server.id) for server in building_too_long] # create servers create_steps = [create_server] * ( desired_state.capacity - ( len(servers_in_active) + len(waiting_for_build) + len(servers[Destiny.WAIT]) + len(servers[Destiny.AVOID_REPLACING]))) # Scale down over capacity, starting with building, then WAIT, then # AVOID_REPLACING, then active, preferring older. Also, finish # draining/deleting servers already in draining state servers_in_preferred_order = ( servers_in_active + servers[Destiny.AVOID_REPLACING] + servers[Destiny.WAIT] + waiting_for_build) servers_to_delete = servers_in_preferred_order[desired_state.capacity:] def drain_and_delete_a_server(server): return _drain_and_delete( server, desired_state.draining_timeout, [node for node in load_balancer_nodes if node.matches(server)], now) try: scale_down_steps = list( mapcat(drain_and_delete_a_server, servers_to_delete + servers[Destiny.DRAIN])) except DrainingUnavailable as de: return pbag([fail_convergence(de)]) # delete all servers in error - draining does not need to be # handled because servers in error presumably are not serving # traffic anyway delete_error_steps = [DeleteServer(server_id=server.id) for server in servers[Destiny.DELETE]] # clean up all the load balancers from deleted and errored servers cleanup_errored_and_deleted_steps = [ remove_node_from_lb(lb_node) for server in servers[Destiny.DELETE] + servers[Destiny.CLEANUP] for lb_node in load_balancer_nodes if lb_node.matches(server)] # converge all the servers that remain to their desired load balancer state still_active_servers = filter(lambda s: s not in servers_to_delete, servers_in_active) try: lb_converge_steps = [ step for server in still_active_servers for step in _converge_lb_state( server, [node for node in load_balancer_nodes if node.matches(server)], load_balancers, now, # Temporarily using build timeout as node offline timeout. # See https://github.com/rackerlabs/otter/issues/1905 timeout) ] except DrainingUnavailable as de: return pbag([fail_convergence(de)]) # Converge again if we expect state transitions on any servers converge_later = [] if any((s not in servers_to_delete for s in waiting_for_build)): converge_later = [ ConvergeLater(reasons=[ErrorReason.String('waiting for servers')])] unavail_fmt = ('Waiting for server {server_id} to transition to ACTIVE ' 'from {status}') reasons = [ErrorReason.UserMessage(unavail_fmt.format(server_id=s.id, status=s.state.name)) for s in servers[Destiny.WAIT] if s not in servers_to_delete] if reasons: converge_later.append(ConvergeLater(limited=True, reasons=reasons)) return pbag(create_steps + scale_down_steps + delete_error_steps + cleanup_errored_and_deleted_steps + delete_timeout_steps + lb_converge_steps + converge_later)
def converge_launch_stack(desired_state, stacks): """ Create steps that indicate how to transition from the state provided by the given parameters to the :obj:`DesiredStackGroupState` described by ``desired_state``. See note [Converging stacks] for more information. :param DesiredStackGroupState desired_state: The desired group state. :param set stacks: a set of :obj:`HeatStack` instances. This must only contain stacks that are being managed for the specified group. :rtype: :obj:`pbag` of `IStep` """ config = desired_state.stack_config by_state = groupby(lambda stack: stack.get_state(), stacks) stacks_complete = by_state.get(StackState.CREATE_UPDATE_COMPLETE, []) stacks_failed = by_state.get(StackState.CREATE_UPDATE_FAILED, []) stacks_check_complete = by_state.get(StackState.CHECK_COMPLETE, []) stacks_check_failed = by_state.get(StackState.CHECK_FAILED, []) stacks_in_progress = by_state.get(StackState.IN_PROGRESS, []) stacks_delete_in_progress = by_state.get(StackState.DELETE_IN_PROGRESS, []) stacks_delete_failed = by_state.get(StackState.DELETE_FAILED, []) stacks_good = stacks_complete + stacks_check_complete stacks_amiss = (stacks_failed + stacks_check_failed + stacks_in_progress + stacks_delete_in_progress) if stacks_delete_failed: reasons = [ErrorReason.String("Stacks in DELETE_FAILED found.")] return pbag([FailConvergence(reasons)]) # If there are no stacks in CHECK_* or other work to be done, we assume # we're at the beginning of a convergence cycle and need to perform stack # checks. if stacks_complete and not (stacks_check_complete or stacks_amiss): return pbag([CheckStack(stack) for stack in stacks_complete]) # Otherwise, if all stacks are in a good state and we have the right number # of stacks, we call update on the stacks in CHECK_COMPLETE and return # SUCCESS without waiting for it to finish (calling update on a stack in # CREATE_COMPLETE is essentially a no-op) so that there will be no stacks # in CREATE_* the next time otter tries to converge this group. This will # cause all of the stacks to be checked at that time and let otter know # if there are any stacks that have fallen into an error state. elif not stacks_amiss and len(stacks_good) == desired_state.capacity: return pbag([UpdateStack(stack=stack, stack_config=config, retry=False) for stack in stacks_check_complete]) def get_create_steps(): create_stack = CreateStack(stack_config=config) good_or_fixable_stack_count = (len(stacks_good) + len(stacks_in_progress) + len(stacks_check_failed)) return [create_stack] * (desired_state.capacity - good_or_fixable_stack_count) def get_scale_down_steps(): stacks_in_preferred_order = ( stacks_good + stacks_in_progress + stacks_check_failed) unneeded_stacks = stacks_in_preferred_order[desired_state.capacity:] return map(DeleteStack, unneeded_stacks) def get_fix_steps(scale_down_steps): num_stacks_to_update = len(stacks_check_failed) - len(scale_down_steps) stacks_to_update = (stacks_check_failed[:num_stacks_to_update] if num_stacks_to_update > 0 else []) return [UpdateStack(stack=s, stack_config=config) for s in stacks_to_update] create_steps = get_create_steps() scale_down_steps = get_scale_down_steps() fix_steps = get_fix_steps(scale_down_steps) delete_stacks_failed_steps = map(DeleteStack, stacks_failed) converge_later = ( [ConvergeLater([ErrorReason.String("Waiting for stacks to finish.")])] if stacks_delete_in_progress or stacks_in_progress else []) return pbag(create_steps + fix_steps + scale_down_steps + delete_stacks_failed_steps + converge_later)
def test_groupby(): assert groupby(iseven, [1, 2, 3, 4]) == {True: [2, 4], False: [1, 3]}
def auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT, compat='no_conflicts', data_vars='all', coords='different'): """Attempt to auto-magically combine the given datasets into one. This method attempts to combine a list of datasets into a single entity by inspecting metadata and using a combination of concat and merge. It does not concatenate along more than one dimension or sort data under any circumstances. It does align coordinates, but different variables on datasets can cause it to fail under some scenarios. In complex cases, you may need to clean up your data and use ``concat``/``merge`` explicitly. ``auto_combine`` works well if you have N years of data and M data variables, and each combination of a distinct time period and set of data variables is saved its own dataset. Parameters ---------- datasets : sequence of xarray.Dataset Dataset objects to merge. concat_dim : str or DataArray or Index, optional Dimension along which to concatenate variables, as used by :py:func:`xarray.concat`. You only need to provide this argument if the dimension along which you want to concatenate is not a dimension in the original datasets, e.g., if you want to stack a collection of 2D arrays along a third dimension. By default, xarray attempts to infer this argument by examining component files. Set ``concat_dim=None`` explicitly to disable concatenation. compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts'}, optional String indicating how to compare variables of the same name for potential conflicts: - 'broadcast_equals': all values must be equal when variables are broadcast against each other to ensure common dimensions. - 'equals': all values and dimensions must be the same. - 'identical': all values, dimensions and attributes must be the same. - 'no_conflicts': only values which are not null in both datasets must be equal. The returned dataset then contains the combination of all non-null values. data_vars : {'minimal', 'different', 'all' or list of str}, optional Details are in the documentation of concat coords : {'minimal', 'different', 'all' o list of str}, optional Details are in the documentation of concat Returns ------- combined : xarray.Dataset See also -------- concat Dataset.merge """ from toolz import itertoolz if concat_dim is not None: dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim grouped = itertoolz.groupby(lambda ds: tuple(sorted(ds.data_vars)), datasets).values() concatenated = [_auto_concat(ds, dim=dim, data_vars=data_vars, coords=coords) for ds in grouped] else: concatenated = datasets merged = merge(concatenated, compat=compat) return merged
def test_groupby_non_callable(): assert groupby(0, [(1, 2), (1, 3), (2, 2), (2, 4)]) == \ {1: [(1, 2), (1, 3)], 2: [(2, 2), (2, 4)]}