def _get_larger_chroms(ref_file): """Retrieve larger chromosomes, avoiding the smaller ones for plotting. """ from scipy.cluster.vq import kmeans, vq all_sizes = [] for c in ref.file_contigs(ref_file): all_sizes.append(float(c.size)) all_sizes.sort() if len(all_sizes) > 5: # separate out smaller chromosomes and haplotypes with kmeans centroids, _ = kmeans(np.array(all_sizes), 2) idx, _ = vq(np.array(all_sizes), centroids) little_sizes = tz.first( tz.partitionby(lambda xs: xs[0], zip(idx, all_sizes))) little_sizes = [x[1] for x in little_sizes] # create one more cluster with the smaller, removing the haplotypes centroids2, _ = kmeans(np.array(little_sizes), 2) idx2, _ = vq(np.array(little_sizes), centroids2) little_sizes2 = tz.first( tz.partitionby(lambda xs: xs[0], zip(idx2, little_sizes))) little_sizes2 = [x[1] for x in little_sizes2] # get any chromosomes not in haplotype/random bin thresh = max(little_sizes2) else: thresh = 0 larger_chroms = [] for c in ref.file_contigs(ref_file): if c.size > thresh: larger_chroms.append(c.name) return larger_chroms
def _get_larger_chroms(ref_file): """Retrieve larger chromosomes, avoiding the smaller ones for plotting. """ from scipy.cluster.vq import kmeans, vq all_sizes = [] for c in ref.file_contigs(ref_file): all_sizes.append(float(c.size)) all_sizes.sort() # separate out smaller chromosomes and haplotypes with kmeans centroids, _ = kmeans(np.array(all_sizes), 2) idx, _ = vq(np.array(all_sizes), centroids) little_sizes = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx, all_sizes))) little_sizes = [x[1] for x in little_sizes] # create one more cluster with the smaller, removing the haplotypes centroids2, _ = kmeans(np.array(little_sizes), 2) idx2, _ = vq(np.array(little_sizes), centroids2) little_sizes2 = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx2, little_sizes))) little_sizes2 = [x[1] for x in little_sizes2] # get any chromosomes not in haplotype/random bin thresh = max(little_sizes2) larger_chroms = [] for c in ref.file_contigs(ref_file): if c.size > thresh: larger_chroms.append(c.name) return larger_chroms
def test_partitionby(): assert list(partitionby(identity, [])) == [] vowels = "aeiou" assert (list(partitionby(vowels.__contains__, "abcdefghi")) == [["a"], ["b", "c", "d"], ["e"], ["f", "g", "h"], ["i"]]) assert (list(map(first, partitionby( identity, [1, 1, 1, 2, 3, 3, 2, 2, 3]))) == [1, 2, 3, 2, 3]) assert ''.join(map(first, partitionby(identity, "Khhhaaaaannnnn!!!!"))) == 'Khan!'
def test_partitionby(): assert list(partitionby(identity, [])) == [] vowels = "aeiou" assert (list(partitionby(vowels.__contains__, "abcdefghi")) == [["a"], ["b", "c", "d"], ["e"], ["f", "g", "h"], ["i"]]) assert (list(map(first, partitionby(identity, [1, 1, 1, 2, 3, 3, 2, 2, 3]))) == [1, 2, 3, 2, 3]) assert ''.join(map(first, partitionby(identity, "Khhhaaaaannnnn!!!!"))) == 'Khan!'
def zhongji(ip='', username='', password=''): try: result = [] child = telnet(ip, username, password) child.sendline("show lacp internal") while True: index = child.expect([zte_prompt, zte_pager], timeout=120) if index == 0: result.append(child.before) child.sendline('exit') child.close() break else: result.append(child.before) child.send(' ') continue except (pexpect.EOF, pexpect.TIMEOUT) as e: return ['fail', None, ip] rslt = ''.join(result).split('\r\n')[1:-1] records = [x.replace('\x08', '').strip() for x in rslt if 'Smartgroup' in x or 'selected' in x] records = remove(lambda x: 'unselected' in x, records) rec1 = [x.split()[0].lower().replace(':', '') for x in records] rec2 = partition(2, partitionby(lambda x: 'smartgroup' in x, rec1)) rec3 = {x[0][0]: x[1] for x in rec2} return ['success', rec3, ip]
def zhongji(ip='', username='', password=''): try: result = [] child = telnet(ip, username, password) child.sendline("show lacp internal") while True: index = child.expect([zte_prompt, zte_pager], timeout=120) if index == 0: result.append(child.before) child.sendline('exit') child.close() break else: result.append(child.before) child.send(' ') continue except (pexpect.EOF, pexpect.TIMEOUT) as e: return ['fail', None, ip] rslt = ''.join(result).split('\r\n')[1:-1] records = [ x.replace('\x08', '').strip() for x in rslt if 'Smartgroup' in x or 'selected' in x ] records = remove(lambda x: 'unselected' in x, records) rec1 = [x.split()[0].lower().replace(':', '') for x in records] rec2 = partition(2, partitionby(lambda x: 'smartgroup' in x, rec1)) rec3 = {x[0][0]: x[1] for x in rec2} return ['success', rec3, ip]
def dedup_s2_datasets(dss): """ De-duplicate Sentinel 2 datasets. Datasets that share same timestamp and region code are considered to be duplicates. - Sort Datasets by ``(time, region code, label)`` - Find groups of dataset that share common ``(time, region_code)`` - Out of duplicate groups pick one with the most recent timestamp in the label (processing time) Above, ``label`` is something like this: ``S2B_MSIL2A_20190507T093039_N0212_R136_T32NPF_20190507T122231`` The two timestamps are "capture time" and "processing time". :returns: Two list of Datasets, first one contains "freshest" datasets and has no duplicates, and the second one contains less fresh duplicates. """ dss = sorted(dss, key=lambda ds: (ds.center_time, ds.metadata.region_code, ds.metadata_doc['label'])) out = [] skipped = [] for chunk in toolz.partitionby(lambda ds: (ds.center_time, ds.metadata.region_code), dss): out.append(chunk[-1]) skipped.extend(chunk[:-1]) return out, skipped
def find_dupes_to_archive(dc, time, keep_threshold=0.05, freq="m"): ds_s2_order = lambda ds: ( ds.center_time, ds.metadata.region_code, ds.metadata_doc["label"], ) query = dict(product="s2_l2a", time=time) n_total = dataset_count(dc.index, **query) dss = ordered_dss(dc, key=ds_s2_order, freq=freq, **query) dss = tqdm(dss, total=n_total) groups = (group for group in toolz.partitionby( lambda ds: (ds.center_time, ds.metadata.region_code), dss) if len(group) > 1) keep_groups = [] to_archive = [] for dss_group in groups: a_or, a_and = overlap_info(dss_group) # aa is in range [0, 1] with # 0 -- 100% overlap across dupes # 1 -- 0% overlap across dupes (disjoint footprints) aa = (a_or - a_and) / a_or if aa > keep_threshold: keep_groups.append(dss_group) else: to_archive.extend(ds.id for ds in dss_group[:-1]) return to_archive, keep_groups, n_total
def split_to_header_and_content(lines): if lines[0][0] != 'g': raise IOError('ASCII .nl file is expected.\nPlease generate the' ' .nl file with "write gname;" in the AMPL .mod file\n' 'or as "ampl -ogname name.mod" in the command line.') parts = list(partitionby(lambda l: l[0]==' ', lines)) header, content = list(chain(parts[0], parts[1])), parts[2] return header, content
def _read_old_solutions(probname): # grab the lines with open(zero_fname(probname), 'r') as f: lines = [l.strip() for l in f] # first line gives the dimension dim = int(lines[0]) # chunks are separated by empty lines chunks = list(partitionby(lambda l: not l, lines[1:])) gen_chunks = as_pairs(chunks) # first chunk gives the variable names in order blank, varnames = next(gen_chunks) assert blank, blank assert dim == len(varnames), (dim, varnames) # the remaining chunks give the solution vectors solutions = list(_gen_solutions(gen_chunks, dim)) return varnames, solutions
def list_files(paths: Union[Sequence[Union[str, Path]], str, Path]): if isinstance(paths, str) or isinstance(paths, Path): if os.path.isdir(paths): return list( tz.concat((os.path.join(dp, f) for f in fn) for dp, dn, fn in os.walk(paths))) elif os.path.isfile(paths): return [paths] else: raise ValueError( f"Input argument {paths} is not a path or a directory") elif isinstance(paths, Sequence): sortd = sorted(paths, key=os.path.isdir) files, dirs = tuple(tz.partitionby(os.path.isdir, sortd)) return list(tz.concatv(files, *tz.map(list_files, dirs)))
def get_segments(nl_header, content): # Returns Segments. n_cons = nl_header.n_cons n_objs = nl_header.n_objs allsegs = Segments(def_vars=OrderedDict(), cons=[None]*n_cons, objs=[None]*n_objs, var_bnds=[], initial_guess={}, con_jacobian=[], eval_order=[], con_blocks=[], var_blocks=[]) # Creates a sequence of tuples. Two adjacent tuples are the header and the # body of the segment: # V68 0 0 # o3 -> ('V68 0 0',), ('o3', 'v2', 'v53') # v2 # v53 segments_w_header = list(partitionby(lambda l: l[0] in SEGMENTS, content)) segments_w_header = fix_empty_segments(segments_w_header) # Assumption: Constraints precede r segments. Enforce it: segments_w_header = move_r_beyond_last_C(segments_w_header) # Populate allsegs by dispatching to the corresponding segment handler. for header, segment in segments_w_header: ADD_SEGMENT.get(header[0], ignored)(header, segment, allsegs) return allsegs