def test_merge_with(): dicts = {1: 1, 2: 2}, {1: 10, 2: 20} assert merge_with(sum, *dicts) == {1: 11, 2: 22} assert merge_with(tuple, *dicts) == {1: (1, 10), 2: (2, 20)} dicts = {1: 1, 2: 2, 3: 3}, {1: 10, 2: 20} assert merge_with(sum, *dicts) == {1: 11, 2: 22, 3: 3} assert merge_with(tuple, *dicts) == {1: (1, 10), 2: (2, 20), 3: (3,)} assert not merge_with(sum)
def test_merge_with(self): D, kw = self.D, self.kw dicts = D({1: 1, 2: 2}), D({1: 10, 2: 20}) assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22}) assert merge_with(tuple, *dicts, **kw) == D({1: (1, 10), 2: (2, 20)}) dicts = D({1: 1, 2: 2, 3: 3}), D({1: 10, 2: 20}) assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22, 3: 3}) assert merge_with(tuple, *dicts, **kw) == D({1: (1, 10), 2: (2, 20), 3: (3,)}) assert not merge_with(sum)
def compute_dev_s_scores(): ## TODO: Write the values to the dataframe dynamically instead of at the very end key_name = 'dev_s' model_names, task_names, ngrams_list, seeds_list = get_settings_list() args = get_arguments() args.inp_dir = eutils.mkdir_p(f'./out_dir/{key_name}') avg_scores = [] for outer_idx, (task, model, ngram) in enumerate(itertools.product(task_names, model_names, ngrams_list)): print(f'\nKey_name: {key_name}, Model: {model}, task: {task}, ngram: {ngram}\n') args.task_name = task args.model = model args.ngram = ngram tmp_res = [] for inner_idx, seed in enumerate(seeds_list): args.seed = seed ## Set the seed tmp_res.append(get_model_scores(args)) dict_of_list = merge_with(list, *tmp_res) dict_of_avg_val = {key: statistics.mean(val) for key, val in dict_of_list.items()} avg_scores.append({'model': model, 'task_name': task, 'ngram': ngram, 'word_order_sensitivity':word_order_sensitivity(dict_of_avg_val['accuracy']), **dict_of_avg_val}) df = pd.DataFrame(avg_scores) file_name = args.out_dir / f'{key_name}_scores.csv' df.to_csv(file_name, index=False)
def deep_merge(*ds): def combine(vals): if len(vals) == 1 or not all(isinstance(v, dict) for v in vals): return vals[-1] else: return deep_merge(*vals) return merge_with(combine, *ds)
def trials_to_dimensions(trials, precision=6, remove_single_values=True): """ Converts hyperparameter configurations into the Plotly dimensions, e.g. used to plot parallel coordinates. Args: trials (list): Hyperparameter configurations for a sequence of trials. A trial is represented by a dict of hyperparameters and corresponding values. precision (int, optional): The precision for displaying values of type float. remove_single_values (bool, optional): If true, remove hyperparameters with a single unique value. Returns: :class:`list` of :class:`dict`: A list of dictionaries describing each hyperparameter dimension. """ # TODO: If floats are below 1., switch to exponential notation # TODO: Scale axis to prevent ticks from overlapping def to_dimension_dict(hparam): label, values = hparam elem = values[0] if isinstance(elem, str): unique = list(sorted(set(values))) stoi = {v: i for i, v in enumerate(unique)} tick_values = list(range(len(unique))) tick_text = unique values = [stoi[v] for v in values] elif isinstance(elem, float): fmt = '{{:.{}g}}' fmt = fmt.format(precision) tick_values = list(sorted(set(values))) tick_text = [fmt.format(v) for v in tick_values] else: tick_values = list(sorted(set(values))) tick_text = [str(v) for v in tick_values] dct = dict(range=[min(values), max(values)], label=label, values=values, tickvals=tick_values, ticktext=tick_text) return dct hparams = dicttoolz.merge_with(list, trials) dimensions = [to_dimension_dict(hparam) for hparam in hparams.items()] if remove_single_values: dimensions = list(filter(lambda d: len(d['tickvals']) > 1, dimensions)) return dimensions
def step(self, actions): actions = np.split(actions, self.env_nums) result = [ env.step(np.squeeze(action)) for env, action in zip(self.envs, actions) ] obs, rews, dones, infos = zip(*result) self._obs = np.stack(obs) infos = merge_with(np.array, *infos) return self._obs, np.stack(rews)[:, np.newaxis], \ np.stack(dones)[:, np.newaxis], infos
def _deepmerge(dicts): # merge_with expects a non-variadic function for maybe_a_dict in reversed(dicts): if not isinstance(maybe_a_dict, dict): # If we've got any non-dicts, the last non-dict wins. return maybe_a_dict else: # Otherwise we want to merge all these dicts, using deepmerge on any # collisions return merge_with(_deepmerge, *dicts)
def find_corners(self) -> List[int]: edges = {} edges_dict_list = [{ min(edge, edge[::-1]): tile.id for edge in tile.edges } for tile in self.tiles] edges = merge_with(lambda x: x, *edges_dict_list) freqs = frequencies( concat((value for value in edges.values() if len(value) == 2))) corners = [id for id, count in freqs.items() if count == 2] if len(corners) != 4: raise ValueError("Wrong number of corners!") return corners
def computeRuleHitsForFileSet(self, xliffs): """ For each file in the given filename -> PO object dictionary, compute the Rule -> Hits dictonary. Stores the information in the current instance. Does not return anything """ # Compute dict with sorted & prettified filenames self.files = sorted(xliffs.keys()) # Add all futures to the executor futures = [ self.executor.submit(self.computeRuleHits, filename) for filename in xliffs.keys() ] # Process the results in first-received order. Also keep track of rule performance self.fileRuleHits = collections.defaultdict(dict) n_finished = 0 # Intermediate result storage raw_results = collections.defaultdict( dict) # filename -> {rule: result} for future in concurrent.futures.as_completed(futures): # Extract result for filename, rule, result in future.result(): self.fileRuleHits[filename][rule] = result # Track progress n_finished += 1 if n_finished % 1000 == 0: percent_finished = n_finished * 100. / len(futures) print("Rule computation finished {0:.2f} %".format( percent_finished)) # Compute total stats by file self.statsByFile = { filename: merge(self.ruleHitsToSeverityCountMap(ruleHits), {"translation_url": self.translationURLs[filename]}) for filename, ruleHits in self.fileRuleHits.items() } # Compute map filename -> {rule: numHits for rule} self.statsByFileAndRule = { filename: valmap(len, ruleHits) for filename, ruleHits in self.fileRuleHits.items() } # Compute map rule -> numHits for rule self.totalStatsByRule = merge_with(sum, *(self.statsByFileAndRule.values()))
def step(self, actions): actions = np.split(actions, self.proc_nums * self.env_nums_per_proc) for index, parent_pipe in enumerate(self.parent_pipes): parent_pipe.send(( 'step', actions[ index * self.env_nums_per_proc: (index +1) * self.env_nums_per_proc ] )) results = [] for parent_pipe in self.parent_pipes: results += parent_pipe.recv() obs, rews, dones, infos = zip(*results) self._obs = np.stack(obs) infos = merge_with(np.array, *infos) return self._obs, np.stack(rews)[:, np.newaxis], \ np.stack(dones)[:, np.newaxis], infos
def test_merge_with_non_dict_mappings(): class Foo(Mapping): def __init__(self, d): self.d = d def __iter__(self): return iter(self.d) def __getitem__(self, key): return self.d[key] def __len__(self): return len(self.d) d = Foo({1: 1}) assert merge(d) is d or merge(d) == {1: 1} assert merge_with(sum, d) == {1: 1}
def computeRuleHitsForFileSet(self, poFiles): """ For each file in the given filename -> PO object dictionary, compute the Rule -> Hits dictonary. Stores the information in the current instance. Does not return anything """ # Compute dict with sorted & prettified filenames self.files = sorted(poFiles.keys()) # Add all futures to the executor futures = list(itertools.chain(*(self.computeRuleHits(po, filename) for filename, po in poFiles.items()))) # Process the results in first-received order. Also keep track of rule performance self.fileRuleHits = collections.defaultdict(dict) n_finished = 0 # Intermediate result storage raw_results = collections.defaultdict(dict) # filename -> {rule: result} for future in concurrent.futures.as_completed(futures): # Extract result filename, rule, result = future.result() self.fileRuleHits[filename][rule] = result # Track progress n_finished += 1 if n_finished % 1000 == 0: percent_finished = n_finished * 100. / len(futures) print("Rule computation finished {0:.2f} %".format(percent_finished)) # Compute total stats by file self.statsByFile = { filename: merge(self.ruleHitsToSeverityCountMap(ruleHits), { "translation_url": self.translationURLs[filename]}) for filename, ruleHits in self.fileRuleHits.items() } # Compute map filename -> {rule: numHits for rule} self.statsByFileAndRule = { filename: valmap(len, ruleHits) for filename, ruleHits in self.fileRuleHits.items() } # Compute map rule -> numHits for rule self.totalStatsByRule = merge_with(sum, *(self.statsByFileAndRule.values()))
import sys from toolz.dicttoolz import merge_with, valfilter def parse(line): parts = line.split() return (parts[1], parts[7]) pairs = [parse(line) for line in open('1')] d = merge_with(lambda x: [z for y in x for z in y], *map(lambda x: dict([(x[0], [x[1]])]), pairs)) print(d) avail = ''.join(list(d.keys())) print(avail) for v in d.values(): for c in v: avail = avail.replace(c, '') avail = sorted(avail) order = '' current = '' def filter_avail(s): out = [] for x in s: pres = list(valfilter(lambda y: x in y, d).keys()) good = True for p in pres: if p not in order: good = False break if good:
def test_merge_with_iterable_arg(): dicts = {1: 1, 2: 2}, {1: 10, 2: 20} assert merge_with(sum, *dicts) == {1: 11, 2: 22} assert merge_with(sum, dicts) == {1: 11, 2: 22} assert merge_with(sum, iter(dicts)) == {1: 11, 2: 22}
def test_merge_with_iterable_arg(self): D, kw = self.D, self.kw dicts = D({1: 1, 2: 2}), D({1: 10, 2: 20}) assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22}) assert merge_with(sum, dicts, **kw) == D({1: 11, 2: 22}) assert merge_with(sum, iter(dicts), **kw) == D({1: 11, 2: 22})
def GetRecommendedEvents(self, request, context): session = DBSession() try: user_id = request.user_id k_events = request.k_events user_recommendation_score = session.query(UserEvent)\ .filter(UserEvent.user_id == user_id, EventRecommendation.score.isnot(None))\ .join(EventRecommendation, UserEvent.event_id == EventRecommendation.event_id)\ .with_entities(EventRecommendation.score) dictionary = {} for item in user_recommendation_score: dictionary = merge_with(sum, dictionary, item[0]) event_ids = list(dictionary.keys()) # check durations duration_query = session.query(EventDuration).filter(EventDuration.event_id.in_(event_ids))\ .group_by(EventDuration.event_id)\ .with_entities(EventDuration.event_id, func.min(EventDuration.start)) event_ids_to_be_removed = [] for item in duration_query: if datetime.now().replace(tzinfo=utc) > item[1].replace( tzinfo=utc): event_ids_to_be_removed.append(str(item[0])) event_ids_future = list( set(event_ids).difference(set(event_ids_to_be_removed))) # get score and convert to prob using softmax events_score = [dictionary[x] for x in event_ids_future] if len(events_score) == 0: return personalization_service.GetRecommendedEventsResponse( event_collection=[]) prob = softmax(events_score) # events to recommended size = min(k_events, len(event_ids_future)) choice = np.random.choice(event_ids_future, p=prob, size=size, replace=False) query_events = session.query(Event).filter( Event.id.in_(choice)).all() data = map( lambda event: common.Event( id=event.id, organization_id=event.organization_id, location_id=getInt32Value(event.location_id), description=event.description, name=event.name, cover_image_url=getStringValue(event.cover_image_url), cover_image_hash=getStringValue(event.cover_image_hash), poster_image_url=getStringValue(event.poster_image_url), poster_image_hash=getStringValue(event.poster_image_hash), profile_image_url=getStringValue(event.profile_image_url), profile_image_hash=getStringValue(event.profile_image_hash ), attendee_limit=event.attendee_limit, contact=getStringValue(event.contact), registration_due_date=getTimeStamp(event. registration_due_date), ), query_events, ) return personalization_service.GetRecommendedEventsResponse( event_collection=data) except Exception as e: session.rollback() raise Exception(f"Something went wrong: {e}") finally: session.close()
line = line.split() line = [stem(i) for i in line] for s in line: d[s] += 1 return {k: d[k] for k in d.keys() if not drop_word(k)} if __name__ == "__main__": workflow = ( glob, mapcat(open), mapcat(str.split), map(stem), frequencies, keyfilter(drop_word), ) wordcount = compose(*reversed(workflow)) billboard = wordcount("lyrics/billboard/*") dylan = wordcount("lyrics/dylan/*") m = merge_with(sum, normalise(dylan), normalise(billboard, sign=-1)) print("\nDylan:") col_print(sorted(m, key=m.get)[-50:]) print("\nBillboard:") col_print(sorted(m, key=m.get)[:50])