def test_ordering(self): expect = self.expect fail = self.error_exec def order(arr, *args): return expr(arr).array_to_stream().order_by(*args) docs = [{"id": 100 + n, "a": n, "b": n % 3} for n in range(10)] from operator import itemgetter as get expect(order(docs, "a"), sorted(docs, key=get('a'))) expect(order(docs, "-a"), sorted(docs, key=get('a'), reverse=True)) self.clear_table() self.do_insert(docs) expect(self.table.order_by("a"), sorted(docs, key=get('a'))) expect(self.table.order_by("-a"), sorted(docs, key=get('a'), reverse=True)) expect(self.table.filter({'b': 0}).order_by("a"), sorted(doc for doc in docs if doc['b'] == 0)) expect(self.table.filter({'b': 0}).order_by("a").delete(), {'deleted': len(sorted(doc for doc in docs if doc['b'] == 0))})
def SRTF(no,dictionary): print ("SRTF processing.....") total = 0 P_no = no from operator import get dictionary.sort(key = get('arrival')) temp = dictionary[0] del dictionary[0] total = total + temp['arrival'] P_no1 = 0 while(P_no): P_no2 = 0 next_process= 0 while(P_no2 < P_no-1): if( dictionary [next_process]['burst'] < temp['burst']): if(dictionary[next_process]['arrival'] <= total): dictionary += [temp] temp = dictionary[next_process] del dictionary[next_process] else: next_process = next_process + 1 P_no2 = P_no2+1 temp['burst'] = temp['burst']-1 total = total + 1 if(temp['burst'] == 0): print ("%s takes %d seconds to complete." % (temp['name'],total)) if(P_no != 0): dictionary.sort(key = get('arrival')) temp = dictionary[0] del dictionary[0] P_no = P_no - 1 print ("SJF ENDS")
def get_file_options_args(func): annotations = func.__annotations__.copy() del annotations['return'] pos_args, opt_args = partition(lambda x: x[0] == 'opts', annotations.items()) pos_args = list(map(get(1), pos_args) ) opt_args = list(map(get(1), opt_args)) return pos_args, opt_args
def process_user(target_user, filtered_calendars): filtered_calendar_by_email_dict = dict(zip(map(get('resource_email'), filtered_calendars), filtered_calendars)) contacts_client = contacts(email=target_user, options=options()) if options().undo: undo(contacts_client, target_user, ContactsFeed) return # Get Contacts Groups for user groups = contacts_client.get_groups().entry # Find Contact Group by extended property magic_group = get_magic_group(groups) or create_magic_group(contacts_client) magic_group_members = get_group_members(contacts_client, magic_group) magic_group_emails_set = map(get('address'), flatmap(get('email'), magic_group_members)) # Find "My Contacts" group in Contacts my_contacts_group = next(iter( filter(lambda group: group.system_group and group.system_group.id == options().my_contacts_id, groups)), None) logging.info('%s: Using group called "%s" with %d members and ID %s', target_user, magic_group.title.text, len(magic_group_members), magic_group.id.text) # Add new Calendar Resources as Contacts with closing(Batch(contacts_client, ContactsFeed)) as batch: for cal in filter(lambda x: \ x.resource_email not in magic_group_emails_set, filtered_calendars): new_contact = calendar_resource_to_contact(cal) # Add Contact to the relevant groups new_contact.group_membership_info.append(GroupMembershipInfo(href=magic_group.id.text)) if options().my_contacts and my_contacts_group: new_contact.group_membership_info.append(GroupMembershipInfo(href=my_contacts_group.id.text)) # Set Contact extended property extprop = ExtendedProperty() extprop.name = options().contact_extended_property_name extprop.value = options().contact_extended_property_value new_contact.extended_property.append(extprop) logging.debug('%s: Creating contact "%s"', target_user, new_contact.name.full_name.text) batch.put('add_insert', new_contact) # Sync data for existing Calendar Resources that were added by the script. Remove those that have been deleted with closing(Batch(contacts_client, ContactsFeed)) as batch: for existing_contact in filter(is_script_contact, magic_group_members): calendar_resource_to_copy = get_value_by_contact_email(filtered_calendar_by_email_dict, existing_contact) if calendar_resource_to_copy: calendar_contact = calendar_resource_to_contact(calendar_resource_to_copy) if sync_contact(calendar_contact, existing_contact): logging.info('%s: Modifying contact "%s" with ID %s', target_user, existing_contact.name.full_name.text, existing_contact.id.text) batch.put('add_update', existing_contact) elif options().delete_old: logging.info('%s: Removing surplus auto-generated contact "%s" with ID %s', target_user, existing_contact.name.full_name.text, existing_contact.id.text) batch.put('add_delete', existing_contact)
def get_file_options_args(func): annotations = func.__annotations__.copy() del annotations['return'] pos_args, opt_args = partition(lambda x: x[0] == 'opts', annotations.items()) pos_args = list(map(get(1), pos_args)) opt_args = list(map(get(1), opt_args)) return pos_args, opt_args
def is_stars_intersect(_star1, _star2): center_dist = distance(get("x", "y")(_star1), get("x", "y")(_star2)) if (center_dist < _star1["r_inner"] + _star2["r_outer"]): return True elif (center_dist < _star1["r_outer"] * 2): return hard_check(_star1, _star2) else: return False
def get_results(self, word, limit=100): results = [] if re.match(self.lang_ru, word): results.extend(self.data[self.hash_ru(word)]) if re.match(self.lang_en, word): results.extend(self.data[self.hash_en(word)]) results = map(get(1), sorted(results, key=get(0), reverse=True)) if limit > 0: results = results[:limit] return results
def exec_fifth(self): e = EditDistance() a = Ngram() originale = raw_input("**** Inserisci parola --> ") parola = self.storpia(originale) print '**** Parola storpiata -->', parola # edit distance print '----- EDIT DISTANCE' # costi: 1, 2, 3, 4, 5 for c in range(1, 6): with open('60000_parole_italiane.txt', 'r') as f: e_results = [] for line in f: p = line.rstrip() _, op = e.edit_distance(parola, p) costo = e.op_sequence(op, len(parola) - 1, len(p) - 1, []) if costo < c: e_results.append((p, costo)) if any(originale in a for a in e_results): w = 'parola originale trovata!' else: w = 'parola originale non trovata!' print w, '(soglia costo %s, %s risultati)' % ( c, len(e_results)), '-->', sorted(e_results, key=get(1)) # ngram print '----- NGRAM' b = a.ngram(parola, self.numberOfGrams) # coefficienti: 0.5, 0.6, 0.7, 0.8, 0.9 for j in np.arange(0.5, 1.0, 0.1): with open("%s_grams.txt" % self.numberOfGrams, 'r') as f: g_results = [] for line in f: s = line.split(' -> ') p, g = s[0], s[1] f = a.jaccard(b, g) if f > j: g_results.append((p, f)) if any(originale in a for a in g_results): w = 'parola originale trovata!' else: w = 'parola originale non trovata!' print w, '(jaccard %s, %s risultati)' % ( j, len(g_results)), '-->', sorted(g_results, key=get(1), reverse=True)
def is_stars_intersect(_star1, _star2): r_dist = _star1["r_inner"] + _star2["r_outer"] center_dist = distance(get("x", "y")(_star1), get("x", "y")(_star2)) print("R_DIST: ") print(r_dist) print("CENTER_DIST: ") print(center_dist) if (center_dist < r_dist): return True elif (center_dist < _star1["r_outer"] * 2): print("Stars are too close for additional check") return hard_check(_star1, _star2) else: return False
def predict(self, document, topics_map: Dict[int, str], num_topics: int) -> List[str]: """ Predict topics distribution for a document. :params document: document to predict topics for. :params topics_map: a mapping of topic number to topic name. :params num_topics: return the top num_topics. :returns: a list of topic numbers sorted by their probabilities. """ tokens = ( seq([document]).map(self.preprocess_document).map( lemmatize) # type: ignore .map(self.tokenize).map(self.create_trigrams).flat_map( self.id2word.doc2bow) # type: ignore .to_list()) topics = (seq( self._lda_model[tokens][0]).sorted(key=lambda x: -x[1]).map( get(0)).filter(None).distinct().take(num_topics)) if topics_map: topics = topics.map(lambda topic: topics_map[topic]) return topics.to_list()
def predict(self, o, classification_method='SUM', only_positive=False): T_pos = filter(lambda r: r.antecedent <= o.antecedent, self.R_newpos) if classification_method == 'BEST': return max(T_pos, key=by('confidence')).class_ else: if only_positive: T_neg = [] else: T_neg = map( lambda r: r._replace(confidence=-r.confidence), filter(lambda r: r.antecedent <= o.antecedent, self.R_newneg)) Ts = groupby(sorted(chain(T_pos, T_neg), key=by('class_')), by('class_')) if classification_method == 'AVE': class_to_score_tuples = \ map(lambda k_v: (lambda v: (k_v[0], sum(map(by('confidence'), v)) / (len(v) or 1)))(list(k_v[1])), Ts) elif classification_method == 'SUM': class_to_score_tuples = map( lambda k_v: (k_v[0], sum(map(by('confidence'), k_v[1]))), Ts) else: raise ValueError( "Invalid classification_method value (should be on of: 'BEST', 'SUM', 'AVE')" ) class_score = defaultdict(lambda: 0) class_score.update(dict(class_to_score_tuples)) return max([(c, class_score[c]) for c in self.classes], key=get(1))[0]
def remove_reacts(reacts): reacts_by_emoji = create_index(reacts, get('emoji')) # use clear_reactions() if we're deleting all reactions. calling it is a # bit risky since we might accidentally remove a new reaction as it # comes in, so we only use if we're removing more than 1 set of reacts. if len(new_reacts) == 0 and len(reacts_by_emoji) > 1: return [bot.clear_reactions(chan_id, main_id)] return chain(*starmap(remove_reacts_by_emoji, reacts_by_emoji.items()))
def SJF(no,dictionary): print ("SHORT JOB FIRST SERVE ALGORITHUM IS RUNNING") total = 0 P_no = 0 from operator import get dictionary.sort(key = get('burst')) while(P_no < no): if P_no == 0: total = total + dictionary[P_no]['arrival'] + dictionary[P_no]['burst'] else: total = total + dictionary[P_no]['burst'] print ("%s takes %d seconds to complete." % (dictionary[P_no]['name'],total)) P_no = P_no+1 print ("SJF ENDS \n")
def main(args): incr = get_increment(args) if (not os.path.isfile(CFG)): exit("No config file found, nothing to do.") files, pre_cmds, post_cmds = read_conf() files_paths = list(map(get("path"), files)) files_patterns = list(map(get("pattern"), files)) files_lines = list(map(read_lines, files_paths)) # check if versions are unique versions = flatten(map(get_versions, files_lines, files_patterns)) current = check_versions(versions) newver = str(incr(semver(current))) if current == newver: exit("Nothing to do.") print("Bumping to version %s" % newver) t_vars = {"oldv": current, "newv": newver, "files": " ".join(files_paths)} exec_many(pre_cmds, t_vars) for pattern, lines, path in zip(files_patterns, files_lines, files_paths): splice_new_version = replace_first_with(pattern, newver) is_version_line = partial(re.search, pattern) new_lines = map(update_matching(is_version_line, splice_new_version), lines) copyfile(path, "%s.%d.bkp" % (path, epoch_now())) write_lines(path, new_lines) exec_many(post_cmds, t_vars)
def differing_rank(rows): # no domain ranks = reversed(['superkingdom', 'kingdom', 'phylum',\ 'class', 'order', 'superfamily', 'family', 'genus', 'species']) differ_at = 'MATCH' for rank in ranks: row_ranks = map(get(rank), rows) uniq = set(row_ranks) if len(uniq) > 1: differ_at = rank # none of the below are used popular_rank, popular_count = Counter(row_ranks).most_common(1)[0] percent_match = popular_count / float(len(rows)) return differ_at
def extract_args(name_space): def extract(nt, args): vals = (getattr(name_space, arg) for arg in args) args_dict = dict(zip(args, vals)) return nt(**args_dict) non_nt_types, nts = partition(compose(is_namedtuple, get(1)), types.items()) nt_names, nt_types = zip(*nts) nt_args = map(lambda x: x._fields, nt_types) args_dict = dict(zip(nt_names, map(extract, nt_types, nt_args))) non_nt_types = list(non_nt_types) non_nts = dict( (attr, getattr(name_space, attr)) for (attr, _) in non_nt_types) args_dict.update(non_nts) return args_dict
def blast2summary_dict(db, blastpath): # (Path, Path) -> list[dict] """Reading in a blast output file, lookup all seqids to get taxids with a single blastdbcmd. Then, lookup the taxonomy using ETE2 via the taxid, and add that info to the blast info.""" rows = csv.DictReader(open(blastpath), delimiter='\t', fieldnames=[ 'qseqid', 'sseqid', 'pid', 'alnlen', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore' ]) rows = list(rows) seqids = map(get('sseqid'), rows) taxids = get_taxid(db, seqids) gis = (s.split('|')[1] for s in seqids) matches = dict( (taxids[gi], row) for gi, row in zip(gis, rows) if gi in taxids) ncbi = NCBITaxa( ) # downloads database and creates SQLite database if needed return dictmap(lambda tid, row: merge(row, taxonomy(ncbi, tid)), matches)
def blast2summary_dict(db, blastpath, ete2_db): # (Path, Path) -> list[dict] """Reading in a blast output file, lookup all seqids to get taxids with a single blastdbcmd. Then, lookup the taxonomy using ETE2 via the taxid, and add that info to the blast info.""" # rows = csv.DictReader(open(blastpath), delimiter='\t',fieldnames=[SEQID, 'sseqid','pid', 'alnlen','gapopen','qstart','qend','sstart','send','evalue','bitscore']) rows = csv.DictReader(open(blastpath), delimiter='\t', fieldnames=blast_columns) rows = list(rows) seqids = map(get('sseqid'), rows) taxids = get_taxid(db, seqids) def get_gi(s): fields = s.split('|') if len(fields) > 1: return fields[1] else: raise ValueError("Seq ID %s is missing GI fields and '|'" % s) gis = imap(get_gi, seqids) #TODO: change matches to use something unique--not the TAXID! actually, why is it a dict # in the first place? it should be a list of dictionaries, and then map over # the dictionaries to merge them with the taxonomy info # this will replace the lines: # matches = . . . # items = . . . #matches = dict((taxids[gi], row) for gi, row in zip(gis,rows) if gi in taxids) ncbi = NCBITaxa( ete2_db) # downloads database and creates SQLite database if needed # items = dictmap(lambda tid,row: merge(row, taxonomy(ncbi, tid)), matches) matches = [ assoc(row, 'taxid', taxids[gi]) for gi, row in zip(gis, rows) if gi in taxids ] items = [merge(row1, taxonomy(ncbi, row1['taxid'])) for row1 in matches] res = imap(partial(keyfilter, csv_fields.__contains__), items) return res
def process(refs_fn, query_fn, save_path=None, html=True): ref_seqs, ref_dates, ref_names = zip(*sorted(zip(*get_seqs_and_dates(refs_fn)), key=get(1))) #assert len(ref_seqs) > 1, "Need more than 1 reference sequence" ref_seqs = map(str.upper, ref_seqs) super_ref_seq, super_ref_date, super_ref_name = ref_seqs[0], ref_dates[0], ref_names[0] print(super_ref_name) print(super_ref_date) get_mutations = partial(hamming, super_ref_seq) def get_relative_info(seqs, dates, names): muts = map(get_mutations, seqs) dists = [(yr - super_ref_date).days for yr in dates] return muts, dists, names ref_muts, ref_dists, ref_names = get_relative_info(ref_seqs, ref_dates, ref_names) query_muts, query_dists, query_names = get_relative_info(*get_seqs_and_dates(query_fn)) do_plot(ref_dists, ref_muts, ref_names, query_dists, query_muts, query_names, save_path, html)
def loadMVP_m1(cast_no, z_bins=None, bin_data=True, lagT=True, mask_nans=False): """Load .m1 file and bin data into z_bins if desired Inputs ------ cast_no : integer Cast number associated with the .m1 file z_bins : array Depth values to bin results into bin_data : bool Whether to bin data (influences what the function returns) lagT : bool Whether or not to lag temperature (set to False in obtain_phase_lag.py to see how the dt comes about) mask_nans : bool Whether to return binned values as masked arrays Returns ------- xyt : dict Time and location information data : dict Vectors of length N_raw for each of the parameters in field binned : dict (returned only if bin_data is True) As for data, but binned into z_bins with results that are len(z_bins) - 1 """ filename = cast_no_to_filename(cast_no) # Get header information with open(filename, 'r') as f: # read in 5000 bytes to ensure all header is read header = f.read(5000) h = header_info(header) # Time and place xyt = {} xyt['lat'] = h.coords('lat') xyt['lon'] = h.coords('lon') xyt['time'] = h.date_time('time') xyt['date'] = h.date_time('date') xyt['cast'] = h.cast_number() xyt['bottom'] = h.bottom() # Hydrography fields = ['p', 'z', 'SV', 'T', 'C', 'S', 'rho', 'ANGL1', 'ANLG2', 'ANLG3'] data = m1_to_dict(filename, fields) data['p_raw'], data['z_raw'] = data['p'].copy(), data['z'].copy() data['p'], data['z'] = smooth_p_and_z(*get('p_raw', 'z_raw')(data)) if lagT: # Having option to not lag temperature is helpful to show working of # how I got the lag data['T_unlagged'] = data['T'].copy() data['S_unlagged'] = data['S'].copy() data['S'], data['T'] = lag_temperature(*get('C', 'T', 'p')(data)) # Note: I think I have covered my bases to ensure arrays aren't modified # in place, but I might have missed some. Anyways, I calculate dissipation # early on, just to be safe, since the functions after that include # smoothing procedures, which would ruin diss calculation data['prho'], data['rho'] = calc_density(*get('S', 'T', 'p')(data)) # data['eps'], data['L_T'] = calc_eps(*get('p_raw', 'prho', 'z')(data)) # data['eps_zavg'], data['eps_z_integral'] = calc_eps_avg( # data['eps'], data['z'], xyt['bottom']) data['theta'] = potential_temp(*get('S', 'T', 'p')(data)) data['N2'] = calc_N2(*get('p', 'prho', 'z')(data)) if bin_data: z_bins = np.arange(0, 250) if z_bins is None else z_bins binned = bin_fields(data, z_bins, mask_nans) binned['z_bins'] = z_bins hori, vert, c = calc_modes(binned['N2'], xyt['bottom'], z_bins) binned['hori_0'], binned['hori_1'], binned['hori_2'] = hori.T binned['vert_0'], binned['vert_1'], binned['vert_2'] = vert.T binned['c0'], binned['c1'], binned['c2'] = c return xyt, data, binned else: return xyt, data
def ellipse_dist(self, _ellipse1, _ellipse2): return distance(get("x", "y")(_ellipse1), get("x", "y")(_ellipse2))
def unzip(seq): t1, t2 = tee(seq) return imap(get(0), t1), imap(get(1), t2)
def build_pipeline(funcs: List[Callable[...,Any]], input) -> Callable[...,Any]: nodes = order_funcs(funcs, input) ordered_funcs = map(get(0), nodes) return reduce(compose, ordered_funcs)
def reacts_by_emoji(state): reacts_without_bot = (r for r in state.reacts if r.user_id != state.bot.user_id) return defaultdict(set, create_index(reacts_without_bot, get('emoji')))
if args.influential_url: for nation in influential_nation_names: nat=id_str(nation) if nat not in wa_nation_set: if nat in resident_set: endos = get_nation_endos( nat ) del endos['endorsers'] endos['endos_given']=0 infos[nat]=endos res={} rows = args.rows cols = args.columns col_keys={'endorsers':'endos_given','endorsees':'endos','influential':'influence_score'} for col in cols: res[col]= sorted(infos.values(),key=lambda info: info[col_keys[col]],reverse=True)[0:rows] res = dict(map(lambda item: (item[0],map(get('name'),item[1])), res.iteritems())) res['pool']=map( lambda n: infos[n], apply(set.union,map(lambda l: set(l),res.values())) ) if args.output: outf=open(args.output,"w+") else: import sys outf = sys.stdout json.dump(res,outf,separators=(',', ':'),sort_keys=True) if not args.output: print ""
def process_user(target_user, filtered_calendars): filtered_calendar_by_email_dict = dict( zip(map(get('resource_email'), filtered_calendars), filtered_calendars)) contacts_client = contacts(email=target_user, options=options()) if options().undo: undo(contacts_client, target_user, ContactsFeed) return # Get Contacts Groups for user groups = contacts_client.get_groups().entry # Find Contact Group by extended property magic_group = get_magic_group(groups) or create_magic_group( contacts_client) magic_group_members = get_group_members(contacts_client, magic_group) magic_group_emails_set = map(get('address'), flatmap(get('email'), magic_group_members)) # Find "My Contacts" group in Contacts my_contacts_group = next( iter( filter( lambda group: group.system_group and group.system_group.id == options().my_contacts_id, groups)), None) logging.info('%s: Using group called "%s" with %d members and ID %s', target_user, magic_group.title.text, len(magic_group_members), magic_group.id.text) # Add new Calendar Resources as Contacts with closing(Batch(contacts_client, ContactsFeed)) as batch: for cal in filter(lambda x: \ x.resource_email not in magic_group_emails_set, filtered_calendars): new_contact = calendar_resource_to_contact(cal) # Add Contact to the relevant groups new_contact.group_membership_info.append( GroupMembershipInfo(href=magic_group.id.text)) if options().my_contacts and my_contacts_group: new_contact.group_membership_info.append( GroupMembershipInfo(href=my_contacts_group.id.text)) # Set Contact extended property extprop = ExtendedProperty() extprop.name = options().contact_extended_property_name extprop.value = options().contact_extended_property_value new_contact.extended_property.append(extprop) logging.debug('%s: Creating contact "%s"', target_user, new_contact.name.full_name.text) batch.put('add_insert', new_contact) # Sync data for existing Calendar Resources that were added by the script. Remove those that have been deleted with closing(Batch(contacts_client, ContactsFeed)) as batch: for existing_contact in filter(is_script_contact, magic_group_members): calendar_resource_to_copy = get_value_by_contact_email( filtered_calendar_by_email_dict, existing_contact) if calendar_resource_to_copy: calendar_contact = calendar_resource_to_contact( calendar_resource_to_copy) if sync_contact(calendar_contact, existing_contact): logging.info('%s: Modifying contact "%s" with ID %s', target_user, existing_contact.name.full_name.text, existing_contact.id.text) batch.put('add_update', existing_contact) elif options().delete_old: logging.info( '%s: Removing surplus auto-generated contact "%s" with ID %s', target_user, existing_contact.name.full_name.text, existing_contact.id.text) batch.put('add_delete', existing_contact)
cnt = count_word_freq(path) if is_test: fn_out = '00-out.txt' with open(fn_out, 'w') as f: for key, value in sorted(cnt.items()): print(f'{key}\t{value}', file=f) message("[*] sh check.sh") # 'test/00-answer.txt' と比較 subprocess.run(f'diff -s {fn_out} ../../test/00-answer.txt'.split()) os.remove(fn_out) else: print("[+] 単語の異なり数:", len(cnt), "タイプ") print("[+] 数単語の頻度(上位 10 単語のみ)") for key, value in sorted(cnt.items(), key=get(1), reverse=True)[:10]: print(key, value) message("[*] collections.Counter を使った場合") cnt = word_frequency_cnter(path) for key, value in cnt.most_common(10): print(key, value, file=sys.stderr) message("[*] trans=lambda x: x.lower() と指定した場合") cnt = count_word_freq(path, trans=lambda x: x.lower()) for key, value in sorted(cnt.items(), key=get(1), reverse=True)[:10]: print(key, value) message("[+] Finished!")
def _run_sample(_dict): indir, sample_id, primer_file = get('sample_directory', 'sample_id', 'primer_file')(_dict) return run_sample(indir, os.path.join(outdir, sample_id), truseq, sample_id, primer_file)
def on_field(f: Union[str, List[str]], *vec): return make_pipeline(FunctionTransformer(get(f), validate=False), *vec)