Пример #1
0
 def _init_buffers_async(self, new_buffer_ids):
     with _futures.ThreadPoolExecutor(max_workers=len(new_buffer_ids)) as e:
         futs = {
             e.submit(self._init_buffer, t, i): (t, i)
             for t, i in new_buffer_ids
         }
         for f in _futures.as_completed(futs):
             t, i = futs[f]
             dat = f.result()
             laste = 0
             b = self._get_buffer_deque(t, i)
             cb_count = 0
             # does groupby guarantee sorted ??
             for e, grp in _groupby(
                     dat, lambda t: int(t[1].mktime // self._isec)):
                 # fill in gaps
                 while laste and (e - laste > 1):
                     obj = NULL(laste + 1, self._isec, self._tfunc)
                     b.append(obj)
                     cb_count += 1
                     laste += 1
                 d = [i[0] for i in grp]
                 obj = self._iobj(d, e, self._isec, self._tfunc)
                 b.append(obj)
                 cb_count += 1
                 laste = e
             # we sort new to old so need do all callbacks after appends
             for n in range(cb_count - 1, 0, -1):
                 self._callback(i, t, b, n)
Пример #2
0
 def _init_buffers_async(self, new_buffer_ids):
     with _futures.ThreadPoolExecutor(max_workers = len(new_buffer_ids)) as e:            
         futs = {e.submit(self._init_buffer,t,i):(t,i) for t,i in new_buffer_ids}
         for f in _futures.as_completed(futs):
             t, i = futs[f]                
             dat = f.result()
             laste = 0
             b = self._get_buffer_deque(t,i)
             cb_count = 0
             # does groupby guarantee sorted ??
             for e, grp in _groupby(dat, lambda t: int(t[1].mktime // self._isec)):
                 # fill in gaps                 
                 while laste and (e - laste > 1):
                     obj = NULL(laste + 1, self._isec, self._tfunc)
                     b.append(obj)                        
                     cb_count += 1
                     laste += 1                        
                 d = [i[0] for i in grp]
                 obj = self._iobj(d, e, self._isec, self._tfunc)
                 b.append(obj)
                 cb_count += 1
                 laste = e
             # we sort new to old so need do all callbacks after appends
             for n in range(cb_count-1,0,-1):
                 self._callback(i,t,b,n) 
Пример #3
0
def _gap_sizes_1D(y):
    z = []
    for a, b in _groupby(_np.isnan(y).astype(int), lambda x: x == 0):
        if a:
            z.extend(list(b))
        else:  # Where the value is one, replace 1 with the number of sequential 1's
            l = len(list(b))
            z.extend([l] * l)

    return _np.asarray(z)
Пример #4
0
    def load_vpc(self, source):
        #TODO: handle file-like, filename, and an alraedy read dataset.
        cohort_colnames = [[j.rsplit("__", 1)[0] for j in i[1]]
                           for i in _groupby(source[0]["column"][1:],
                                             lambda x: int(x.split("__")[-1]))]
        n_cols_per_cohort = [len(i) for i in cohort_colnames]
        split_columns = _np.split(source[1],
                                  _np.cumsum(n_cols_per_cohort[:-1]),
                                  axis=1)
        for i in range(len(source[0]["cohort_names"])):
            self.cohorts.append(
                PointCloud_Cohort(source[0]["cohort_names"][i],
                                  source[0]["cohort_times"][i],
                                  cohort_colnames[i], split_columns[i],
                                  source[0]["embryospergene"][i]))

        self.all_headers = source[0]
        self.all_data = source[1]
Пример #5
0
def groupby(value, attribute, full_alphabet=False):
    """Group alphabetically a sequence of objects by a common attribute."""
    attr_getter = lambda item: first_letter(getattr(item, attribute))
    grouped = _groupby(sorted(value, key=attr_getter), attr_getter)

    if full_alphabet:
        # convert grouped to dict
        grouped = dict(map(lambda group: (group[0], list(group[1])), grouped))

        # prepare unique set of all used and alphabetic letters
        all_letters = sorted(set(grouped.keys() + list(alphabet())))

        # return special tuples, empty list is used if there are no items
        # for given letter
        return [_GroupTuple(
            (letter, grouped.get(letter, []))
        ) for letter in all_letters]

    return sorted(map(_GroupTuple, grouped))
Пример #6
0
def nangdok(data_dir, batch_size, test_max_size, **kwargs):
    """Load Nangdock corpus data."""
    join = lambda f: _path.join(data_dir, f)
    texts = []
    with open(join("script_nmbd_by_sentence.txt"), encoding="utf-16-le") as f:
        tmp = []
        for line in f.readlines():
            if line.startswith("<"):
                texts.append(tmp)
                tmp = []
            elif _re.match(r"^\d+\..*", line):
                tmp.append(line)
    texts.append(tmp)
    del texts[0]
    participants = sorted(
        filter(lambda l: _re.match("^[fm][v-z][0-9]+", l),
               _os.listdir(data_dir)))
    test_sentences = kwargs.get("test_sentences",
                                [_random.choice(ts) for ts in texts])
    test_participants = kwargs.get("test_participants", [
        _random.choice(list(g))
        for _, g in _groupby(participants, lambda p: p[:2])
    ])
    train = []
    test = []
    for participant in sorted(participants):
        for i, _ in enumerate(texts):
            for j, text in enumerate(_):
                f = join("{0}/{0}_t{1:0>2}_s{2:0>2}.wav".format(
                    participant, i + 1, j + 1))
                if _path.isfile(f):
                    if text in test_sentences or participants in test_participants:
                        test.append((f, text))
                    else:
                        train.append((f, text))
    _random.shuffle(test)
    valid = test[:batch_size]
    if test_max_size and batch_size + test_max_size < len(test):
        test = test[batch_size:(batch_size + test_max_size)]
    else:
        test = test[batch_size:]
    return train, valid, test
Пример #7
0
def groupby(value, attribute, full_alphabet=False):
    """Group alphabetically a sequence of objects by a common attribute."""
    attr_getter = lambda item: first_letter(getattr(item, attribute))
    grouped = _groupby(sorted(value, key=attr_getter), attr_getter)

    if full_alphabet:
        # convert grouped to dict
        grouped = dict(map(lambda group: (group[0], list(group[1])), grouped))

        # prepare unique set of all used and alphabetic letters
        all_letters = sorted(set(grouped.keys() + list(alphabet())))

        # return special tuples, empty list is used if there are no items
        # for given letter
        return [
            _GroupTuple((letter, grouped.get(letter, [])))
            for letter in all_letters
        ]

    return sorted(map(_GroupTuple, grouped))
Пример #8
0
def contiguous_ranges(list_in):
    r"""
    For every unique entry in :obj:`list_in` return the contiguous ranges in list

    Parameters
    ----------
    list_in : list

    Returns
    -------
    ranges : dict
        The keys are with unique entries of list_in, values are the ranges
        in which the entry appears

    """
    offset = 0
    _ranges = _defdict(list)
    for key, grpr in _groupby(list_in):
        l = len(list(grpr))
        irange = _np.arange(offset, offset + l)
        _ranges[key].append(irange)
        offset += l
    return dict(_ranges)
Пример #9
0
def group_by(items, key=None):
    keyfunc = make_getter(key)
    return map(_GroupTuple, _groupby(items, keyfunc))
Пример #10
0
def group_by(items, key=None):
    keyfunc = make_getter(key)
    return map(_GroupTuple, _groupby(items, keyfunc))
Пример #11
0
def groupby(iterable, key, reverse=False):
    """排序分组
    """
    return _groupby(sorted(iterable, key=key, reverse=reverse), key=key)
Пример #12
0
def check_arguments(kwargs):
    """Make sure all keywords are allowed.

    Raises OptionsError on error, returns sanitized dictionary on success.

    Note: Checks in SYNONYMS if argument is not recognized, raises OptionsError
          if it is not found there either.
    """
    new_kwds = {}
    # Make sure types are correct
    for arg, opt in kwargs.items():
        if arg not in ALLOWED_KWDS:
            if arg in SYNONYMS:
                arg = SYNONYMS[arg]
                assert arg in ALLOWED_KWDS
            else:
                raise OptionsError('Unrecognized argument {}'.format(arg))
        if opt is not None and not isinstance(opt, ALLOWED_KWDS[arg]):
            newtype = ALLOWED_KWDS[arg]
            if (newtype is list or newtype is tuple) \
                    and not isinstance(arg, (list, tuple)):
                opt = run.listify(opt)
            elif newtype is int and isinstance(opt, str) and opt.isdigit():
                opt = int(opt)
            else:
                raise TypeError('arg "{}" must be {}, is {} ({})'.format(
                    arg, ALLOWED_KWDS[arg], opt, type(opt)))
        new_kwds[arg] = opt

    # Parse individual complex options
    for arg, opt in new_kwds.items():
        if arg == 'time':
            try:
                if '-' in opt:
                    day, time = opt.split('-')
                else:
                    day = 0
                    time = opt
                time = [int(i) for i in time.split(':')]
                if len(time) == 3:
                    hours, mins, secs = time
                elif len(time) == 2:
                    hours = 0
                    mins, secs = time
                elif len(time) == 1:
                    hours = mins = 0
                    secs = time[0]
                hours = (int(day) * 24) + hours
                if secs > 60:
                    mins += 1
                    secs = secs % 60
                if mins > 60:
                    hours += 1
                    mins = mins % 60
                opt = '{}:{}:{}'.format(
                    str(hours).rjust(2, '0'),
                    str(mins).rjust(2, '0'),
                    str(secs).rjust(2, '0'))
                new_kwds[arg] = opt
            except:
                raise OptionsError('time must be formatted as D-HH:MM:SS ' +
                                   'or a fragment of that (e.g. MM:SS) ' +
                                   'it is formatted as {}'.format(opt))

        # Force memory into an integer of megabytes
        elif arg == 'mem' and isinstance(opt, str):
            if opt.isdigit():
                opt = int(opt)
            else:
                # Try to guess unit by suffix
                memerror = ('mem is malformatted, should be a number '
                            'of MB or a string like 24MB or 10GB, '
                            'it is: {}'.format(opt))
                groups = _groupby(opt, key=str.isdigit)
                try:
                    svalk, svalg = next(groups)
                    sval = int(''.join(svalg))
                    sunitk, sunitg = next(groups)
                    sunit = ''.join(sunitg).lower()
                except ValueError:
                    err = list(_sys.exc_info())
                    err[1] = ValueError(memerror)
                    _raise(*err)
                if len(list(groups)) != 0 or not svalk or sunitk:
                    raise ValueError(memerror)
                if sunit == 'b':
                    opt = int(float(sval) / float(1024) / float(1024))
                elif sunit == 'kb' or sunit == 'k':
                    opt = int(float(sval) / float(1024))
                elif sunit == 'mb' or sunit == 'm':
                    opt = sval
                elif sunit == 'gb' or sunit == 'g':
                    opt = sval * 1024
                elif sunit == 'tb' or sunit == 't':
                    # Crazy people
                    opt = sval * 1024 * 1024
                else:
                    raise ValueError(
                        'Unknown memory unit opt {}'.format(sunit))
                # Don't allow 0, minimum memory req is 5MB
                if opt < 5:
                    opt = 5
            new_kwds[arg] = opt

    return new_kwds
Пример #13
0
def groupby(iterable: Iterable[V], key: Callable[[V], K]) -> Dict[K, List[V]]:
    return {k: list(v)
            for k, v in _groupby(sorted(iterable, key=key), key)
            }  # type: ignore