Python partitionby示例，toolz.partitionby Python示例

示例#1

0

显示文件

文件： cnvkit.py 项目： sanzick/bcbio-nextgen

def _get_larger_chroms(ref_file):
    """Retrieve larger chromosomes, avoiding the smaller ones for plotting.
    """
    from scipy.cluster.vq import kmeans, vq
    all_sizes = []
    for c in ref.file_contigs(ref_file):
        all_sizes.append(float(c.size))
    all_sizes.sort()
    if len(all_sizes) > 5:
        # separate out smaller chromosomes and haplotypes with kmeans
        centroids, _ = kmeans(np.array(all_sizes), 2)
        idx, _ = vq(np.array(all_sizes), centroids)
        little_sizes = tz.first(
            tz.partitionby(lambda xs: xs[0], zip(idx, all_sizes)))
        little_sizes = [x[1] for x in little_sizes]
        # create one more cluster with the smaller, removing the haplotypes
        centroids2, _ = kmeans(np.array(little_sizes), 2)
        idx2, _ = vq(np.array(little_sizes), centroids2)
        little_sizes2 = tz.first(
            tz.partitionby(lambda xs: xs[0], zip(idx2, little_sizes)))
        little_sizes2 = [x[1] for x in little_sizes2]
        # get any chromosomes not in haplotype/random bin
        thresh = max(little_sizes2)
    else:
        thresh = 0
    larger_chroms = []
    for c in ref.file_contigs(ref_file):
        if c.size > thresh:
            larger_chroms.append(c.name)
    return larger_chroms

示例#2

0

显示文件

文件： cnvkit.py 项目： Kisun/bcbio-nextgen

def _get_larger_chroms(ref_file):
    """Retrieve larger chromosomes, avoiding the smaller ones for plotting.
    """
    from scipy.cluster.vq import kmeans, vq
    all_sizes = []
    for c in ref.file_contigs(ref_file):
        all_sizes.append(float(c.size))
    all_sizes.sort()
    # separate out smaller chromosomes and haplotypes with kmeans
    centroids, _ = kmeans(np.array(all_sizes), 2)
    idx, _ = vq(np.array(all_sizes), centroids)
    little_sizes = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx, all_sizes)))
    little_sizes = [x[1] for x in little_sizes]
    # create one more cluster with the smaller, removing the haplotypes
    centroids2, _ = kmeans(np.array(little_sizes), 2)
    idx2, _ = vq(np.array(little_sizes), centroids2)
    little_sizes2 = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx2, little_sizes)))
    little_sizes2 = [x[1] for x in little_sizes2]
    # get any chromosomes not in haplotype/random bin
    thresh = max(little_sizes2)
    larger_chroms = []
    for c in ref.file_contigs(ref_file):
        if c.size > thresh:
            larger_chroms.append(c.name)
    return larger_chroms

示例#3

0

显示文件

文件： test_recipes.py 项目： eriknw/toolz

def test_partitionby():
    assert list(partitionby(identity, [])) == []

    vowels = "aeiou"
    assert (list(partitionby(vowels.__contains__,
                             "abcdefghi")) == [["a"], ["b", "c", "d"], ["e"],
                                               ["f", "g", "h"], ["i"]])

    assert (list(map(first, partitionby(
        identity, [1, 1, 1, 2, 3, 3, 2, 2, 3]))) == [1, 2, 3, 2, 3])

    assert ''.join(map(first, partitionby(identity,
                                          "Khhhaaaaannnnn!!!!"))) == 'Khan!'

示例#4

0

显示文件

文件： test_recipes.py 项目： JNRowe/toolz

def test_partitionby():
    assert list(partitionby(identity, [])) == []

    vowels = "aeiou"
    assert (list(partitionby(vowels.__contains__, "abcdefghi")) ==
            [["a"], ["b", "c", "d"], ["e"], ["f", "g", "h"], ["i"]])

    assert (list(map(first,
                     partitionby(identity,
                                 [1, 1, 1, 2, 3, 3, 2, 2, 3]))) ==
            [1, 2, 3, 2, 3])

    assert ''.join(map(first,
                       partitionby(identity, "Khhhaaaaannnnn!!!!"))) == 'Khan!'

示例#5

0

显示文件

文件： Zte.py 项目： sjava/olt

def zhongji(ip='', username='', password=''):
    try:
        result = []
        child = telnet(ip, username, password)
        child.sendline("show lacp internal")
        while True:
            index = child.expect([zte_prompt, zte_pager], timeout=120)
            if index == 0:
                result.append(child.before)
                child.sendline('exit')
                child.close()
                break
            else:
                result.append(child.before)
                child.send(' ')
                continue
    except (pexpect.EOF, pexpect.TIMEOUT) as e:
        return ['fail', None, ip]
    rslt = ''.join(result).split('\r\n')[1:-1]
    records = [x.replace('\x08', '').strip()
               for x in rslt if 'Smartgroup' in x or 'selected' in x]
    records = remove(lambda x: 'unselected' in x, records)
    rec1 = [x.split()[0].lower().replace(':', '') for x in records]
    rec2 = partition(2, partitionby(lambda x: 'smartgroup' in x, rec1))
    rec3 = {x[0][0]: x[1] for x in rec2}
    return ['success', rec3, ip]

示例#6

0

显示文件

文件： Zte.py 项目： sjava/olt

def zhongji(ip='', username='', password=''):
    try:
        result = []
        child = telnet(ip, username, password)
        child.sendline("show lacp internal")
        while True:
            index = child.expect([zte_prompt, zte_pager], timeout=120)
            if index == 0:
                result.append(child.before)
                child.sendline('exit')
                child.close()
                break
            else:
                result.append(child.before)
                child.send(' ')
                continue
    except (pexpect.EOF, pexpect.TIMEOUT) as e:
        return ['fail', None, ip]
    rslt = ''.join(result).split('\r\n')[1:-1]
    records = [
        x.replace('\x08', '').strip() for x in rslt
        if 'Smartgroup' in x or 'selected' in x
    ]
    records = remove(lambda x: 'unselected' in x, records)
    rec1 = [x.split()[0].lower().replace(':', '') for x in records]
    rec2 = partition(2, partitionby(lambda x: 'smartgroup' in x, rec1))
    rec3 = {x[0][0]: x[1] for x in rec2}
    return ['success', rec3, ip]

示例#7

0

显示文件

def dedup_s2_datasets(dss):
    """
    De-duplicate Sentinel 2 datasets. Datasets that share same timestamp and
    region code are considered to be duplicates.

    - Sort Datasets by ``(time, region code, label)``
    - Find groups of dataset that share common ``(time, region_code)``
    - Out of duplicate groups pick one with the most recent timestamp in the label (processing time)

    Above, ``label`` is something like this:
    ``S2B_MSIL2A_20190507T093039_N0212_R136_T32NPF_20190507T122231``

    The two timestamps are "capture time" and "processing time".

    :returns: Two list of Datasets, first one contains "freshest" datasets and
              has no duplicates, and the second one contains less fresh duplicates.
    """
    dss = sorted(dss, key=lambda ds: (ds.center_time,
                                      ds.metadata.region_code,
                                      ds.metadata_doc['label']))
    out = []
    skipped = []

    for chunk in toolz.partitionby(lambda ds: (ds.center_time,
                                               ds.metadata.region_code), dss):
        out.append(chunk[-1])
        skipped.extend(chunk[:-1])
    return out, skipped

示例#8

0

显示文件

def find_dupes_to_archive(dc, time, keep_threshold=0.05, freq="m"):
    ds_s2_order = lambda ds: (
        ds.center_time,
        ds.metadata.region_code,
        ds.metadata_doc["label"],
    )

    query = dict(product="s2_l2a", time=time)
    n_total = dataset_count(dc.index, **query)
    dss = ordered_dss(dc, key=ds_s2_order, freq=freq, **query)

    dss = tqdm(dss, total=n_total)
    groups = (group for group in toolz.partitionby(
        lambda ds: (ds.center_time, ds.metadata.region_code), dss)
              if len(group) > 1)

    keep_groups = []
    to_archive = []

    for dss_group in groups:
        a_or, a_and = overlap_info(dss_group)
        # aa is in range [0, 1] with
        #  0 -- 100% overlap across dupes
        #  1 -- 0% overlap across dupes (disjoint footprints)
        aa = (a_or - a_and) / a_or
        if aa > keep_threshold:
            keep_groups.append(dss_group)
        else:
            to_archive.extend(ds.id for ds in dss_group[:-1])

    return to_archive, keep_groups, n_total

示例#9

0

显示文件

def split_to_header_and_content(lines):
    if lines[0][0] != 'g':
        raise IOError('ASCII .nl file is expected.\nPlease generate the'
                      ' .nl file with "write gname;" in the AMPL .mod file\n'
                      'or as "ampl -ogname name.mod" in the command line.')
    parts = list(partitionby(lambda l: l[0]==' ', lines))
    header, content = list(chain(parts[0], parts[1])), parts[2] 
    return header, content

示例#10

0

显示文件

文件： auxfiles.py 项目： baharev/ManiSolve

def _read_old_solutions(probname):
    # grab the lines
    with open(zero_fname(probname), 'r') as f:
        lines = [l.strip() for l in f]
    # first line gives the dimension
    dim = int(lines[0])
    # chunks are separated by empty lines
    chunks = list(partitionby(lambda l: not l, lines[1:]))
    gen_chunks = as_pairs(chunks)
    # first chunk gives the variable names in order
    blank, varnames = next(gen_chunks)
    assert blank, blank
    assert dim == len(varnames), (dim, varnames)
    # the remaining chunks give the solution vectors
    solutions = list(_gen_solutions(gen_chunks, dim))
    return varnames, solutions

示例#11

0

显示文件

文件： util.py 项目： trautmane/fibsem-tools

def list_files(paths: Union[Sequence[Union[str, Path]], str, Path]):
    if isinstance(paths, str) or isinstance(paths, Path):
        if os.path.isdir(paths):
            return list(
                tz.concat((os.path.join(dp, f) for f in fn)
                          for dp, dn, fn in os.walk(paths)))
        elif os.path.isfile(paths):
            return [paths]
        else:
            raise ValueError(
                f"Input argument {paths} is not a path or a directory")

    elif isinstance(paths, Sequence):
        sortd = sorted(paths, key=os.path.isdir)
        files, dirs = tuple(tz.partitionby(os.path.isdir, sortd))
        return list(tz.concatv(files, *tz.map(list_files, dirs)))

示例#12

0

显示文件

def get_segments(nl_header, content):
    # Returns Segments.
    n_cons = nl_header.n_cons
    n_objs = nl_header.n_objs
    allsegs = Segments(def_vars=OrderedDict(), cons=[None]*n_cons, 
                       objs=[None]*n_objs, var_bnds=[], initial_guess={}, 
                       con_jacobian=[], eval_order=[], con_blocks=[],  
                       var_blocks=[])
    # Creates a sequence of tuples. Two adjacent tuples are the header and the
    # body of the segment:
    #     V68 0 0
    #     o3         -> ('V68 0 0',), ('o3', 'v2', 'v53')
    #     v2
    #     v53
    segments_w_header = list(partitionby(lambda l: l[0] in SEGMENTS, content))
    segments_w_header = fix_empty_segments(segments_w_header)
    # Assumption: Constraints precede r segments. Enforce it:
    segments_w_header = move_r_beyond_last_C(segments_w_header)
    # Populate allsegs by dispatching to the corresponding segment handler. 
    for header, segment in segments_w_header:
        ADD_SEGMENT.get(header[0], ignored)(header, segment, allsegs)
    return allsegs