def merge_ownership_periods(mappings): """ Given a dict of mappings where the values are lists of OwnershipPeriod objects, returns a dict with the same structure with new OwnershipPeriod objects adjusted so that the periods have no gaps. Orders the periods chronologically, and pushes forward the end date of each period to match the start date of the following period. The end date of the last period pushed forward to the max Timestamp. """ return valmap( lambda v: tuple( OwnershipPeriod( a.start, b.start, a.sid, a.value, ) for a, b in sliding_window( 2, concatv( sorted(v), # concat with a fake ownership object to make the last # end date be max timestamp [OwnershipPeriod( pd.Timestamp.max.tz_localize('utc'), None, None, None, )], ), ) ), mappings, )
def symbol_ownership_map(self): rows = sa.select(self.equity_symbol_mappings.c).execute().fetchall() mappings = {} for row in rows: mappings.setdefault((row.company_symbol, row.share_class_symbol), []).append( SymbolOwnership( pd.Timestamp(row.start_date, unit="ns", tz="utc"), pd.Timestamp(row.end_date, unit="ns", tz="utc"), row.sid, row.symbol, ) ) return valmap( lambda v: tuple( SymbolOwnership(a.start, b.start, a.sid, a.symbol) for a, b in sliding_window( 2, concatv( sorted(v), # concat with a fake ownership object to make the last # end date be max timestamp [SymbolOwnership(pd.Timestamp.max.tz_localize("utc"), None, None, None)], ), ) ), mappings, factory=lambda: mappings, )
def test_stress_scatter_death(c, s, *workers): import random s.allowed_failures = 1000 np = pytest.importorskip('numpy') L = yield c.scatter([np.random.random(10000) for i in range(len(workers))]) yield c._replicate(L, n=2) adds = [delayed(slowadd, pure=True)(random.choice(L), random.choice(L), delay=0.05, dask_key_name='slowadd-1-%d' % i) for i in range(50)] adds = [delayed(slowadd, pure=True)(a, b, delay=0.02, dask_key_name='slowadd-2-%d' % i) for i, (a, b) in enumerate(sliding_window(2, adds))] futures = c.compute(adds) L = adds = None alive = list(workers) from distributed.scheduler import logger for i in range(7): yield gen.sleep(0.1) try: s.validate_state() except Exception as c: logger.exception(c) if config.get('log-on-err'): import pdb pdb.set_trace() else: raise w = random.choice(alive) yield w._close() alive.remove(w) try: yield gen.with_timeout(timedelta(seconds=25), c._gather(futures)) except gen.TimeoutError: ws = {w.address: w for w in workers if w.status != 'closed'} print(s.processing) print(ws) print(futures) try: worker = [w for w in ws.values() if w.waiting_for_data][0] except Exception: pass if config.get('log-on-err'): import pdb pdb.set_trace() else: raise except CancelledError: pass finally: futures = None
def teach(self, texts): for text in texts: unigrams = ['__START__'] + text.split() + ['__END__'] for left, right in toolz.sliding_window(2, unigrams): # using defaultdict and counter directly didn't always work if left not in self.brain: self.brain[left] = {} if right not in self.brain[left]: self.brain[left][right] = 0 self.brain[left][right] += 1
def repeat(a, repeats, axis=None): if axis is None: if a.ndim == 1: axis = 0 else: raise NotImplementedError("Must supply an integer axis value") if not isinstance(repeats, Integral): raise NotImplementedError("Only integer valued repeats supported") if -a.ndim <= axis < 0: axis += a.ndim elif not 0 <= axis <= a.ndim - 1: raise ValueError("axis(=%d) out of bounds" % axis) if repeats == 1: return a cchunks = np.cumsum((0,) + a.chunks[axis]) slices = [] for c_start, c_stop in sliding_window(2, cchunks): ls = np.linspace(c_start, c_stop, repeats).round(0) for ls_start, ls_stop in sliding_window(2, ls): if ls_start != ls_stop: slices.append(slice(ls_start, ls_stop)) all_slice = slice(None, None, None) slices = [(all_slice,) * axis + (s,) + (all_slice,) * (a.ndim - axis - 1) for s in slices] slabs = [a[slc] for slc in slices] out = [] for slab in slabs: chunks = list(slab.chunks) assert len(chunks[axis]) == 1 chunks[axis] = (chunks[axis][0] * repeats,) chunks = tuple(chunks) result = slab.map_blocks(np.repeat, repeats, axis=axis, chunks=chunks, dtype=slab.dtype) out.append(result) return concatenate(out, axis=axis)
def test_clean_nbytes(c, s, a, b): L = [delayed(inc)(i) for i in range(10)] for i in range(5): L = [delayed(add)(x, y) for x, y in sliding_window(2, L)] total = delayed(sum)(L) future = c.compute(total) yield wait(future) yield gen.sleep(1) assert len(a.nbytes) + len(b.nbytes) == 1
def split_at_breaks(array, breaks, axis=0): """ Split an array into a list of arrays (using slices) at the given breaks >>> split_at_breaks(np.arange(6), [3, 5]) [array([0, 1, 2]), array([3, 4]), array([5])] """ padded_breaks = concat([[None], breaks, [None]]) slices = [slice(i, j) for i, j in sliding_window(2, padded_breaks)] preslice = (slice(None),) * axis split_array = [array[preslice + (s,)] for s in slices] return split_array
def test_steal_related_tasks(e, s, a, b, c): futures = e.map(slowinc, range(20), delay=0.05, workers=a.address, allow_other_workers=True) yield _wait(futures) nearby = 0 for f1, f2 in sliding_window(2, futures): if s.who_has[f1.key] == s.who_has[f2.key]: nearby += 1 assert nearby > 10
def _get_adjustments_in_range(self, cf, dts, field): if field == 'volume' or field == 'sid': return {} if cf.adjustment is None: return {} rf = self._roll_finders[cf.roll_style] partitions = [] rolls = rf.get_rolls(cf.root_symbol, dts[0], dts[-1], cf.offset) tc = self._trading_calendar adjs = {} for front, back in sliding_window(2, rolls): front_sid, roll_dt = front back_sid = back[0] dt = tc.previous_session_label(roll_dt) if self._frequency == 'minute': dt = tc.open_and_close_for_session(dt)[1] roll_dt = tc.open_and_close_for_session(roll_dt)[0] partitions.append((front_sid, back_sid, dt, roll_dt)) for partition in partitions: front_sid, back_sid, dt, roll_dt = partition last_front_dt = self._bar_reader.get_last_traded_dt( self._asset_finder.retrieve_asset(front_sid), dt) last_back_dt = self._bar_reader.get_last_traded_dt( self._asset_finder.retrieve_asset(back_sid), dt) if isnull(last_front_dt) or isnull(last_back_dt): continue front_close = self._bar_reader.get_value( front_sid, last_front_dt, 'close') back_close = self._bar_reader.get_value( back_sid, last_back_dt, 'close') adj_loc = dts.searchsorted(roll_dt) end_loc = adj_loc - 1 adj = self._make_adjustment(cf.adjustment, front_close, back_close, end_loc) try: adjs[adj_loc].append(adj) except KeyError: adjs[adj_loc] = [adj] return adjs
def training(data,n,support=rangesupport): ''' traning and predict a value use next(),and n is the length of sliding window''' data=list(data) onerdata,max_data,min_data=one(data) #onerdata=data windows=list(toolz.sliding_window(n,onerdata)) def predict(): lasted=toolz.tail(n-1,onerdata) yingshe={support(item,lasted):item for item in windows} minwindow=yingshe.get(min(yingshe.keys()),"1") onerdata.append(lasted[-1]*minwindow[-1]/minwindow[-2]) while 1: predict() yield onerdata[-1]*(max_data-min_data)+min_data
def fast_combine_pairs(files, force_single, full_name, separators): """ assume files that need to be paired are within 10 entries of each other, once the list is sorted """ files = sort_filenames(files) chunks = tz.sliding_window(10, files) pairs = [combine_pairs(chunk, force_single, full_name, separators) for chunk in chunks] pairs = [y for x in pairs for y in x] longest = defaultdict(list) # for each file, save the longest pair it is in for pair in pairs: for file in pair: if len(longest[file]) < len(pair): longest[file] = pair # keep only unique pairs longest = {tuple(sort_filenames(x)) for x in longest.values()} # ensure filenames are R1 followed by R2 return [sort_filenames(list(x)) for x in longest]
def markov_tables(tree): result = {} # for orders 1 and 2: for order in xrange(1, 3): keySet, averages = set(), {} # process the initial sliding window to just contain N/R's paths = [list(list(_.split('^')[0][1] if ':' in _ else _ for _ in elem) for elem in t.sliding_window(order, path)) for path in list(unzip([], tree))] for i in xrange(len(paths)): order_counter = {} for window in paths[i]: if window[len(window) - 1] == 'R' or window[len(window) - 1] == 'N': if ''.join(window) in order_counter: order_counter[''.join(window)] += 1 else: order_counter[''.join(window)] = 1 keySet.add(''.join(window)) total = float(sum(list(order_counter.itervalues()))) if total > 0: for key in order_counter: order_counter[key] = float(order_counter[key]) / total # save the number of operations to original list paths[i] = order_counter # omit any empty paths paths = [path for path in paths if len(path) > 0] # combine all the probabilities from the different paths for key in keySet: for path in paths: if key in averages and key in path: averages[key] += path[key] elif key in path: averages[key] = path[key] # divide by number of paths to get final result averages[key] = float(averages[key]) / float(len(paths)) result.update(averages) # update result to have new keys with 'markov_' keys = result.keys() for key in keys: result['markov_' + key] = str(result[key]) del result[key] return result
def test_stress_steal(c, s, *workers): s.validate = False for w in workers: w.validate = False dinc = delayed(slowinc) L = [delayed(slowinc)(i, delay=0.005) for i in range(100)] for i in range(5): L = [delayed(slowsum)(part, delay=0.005) for part in sliding_window(5, L)] total = delayed(sum)(L) future = c.compute(total) while future.status != 'finished': yield gen.sleep(0.1) for i in range(3): a = random.choice(workers) b = random.choice(workers) if a is not b: s.work_steal(a.address, b.address, 0.5) if not s.processing: break
def test_stress_scatter_death(c, s, *workers): import random np = pytest.importorskip('numpy') L = yield c._scatter([np.random.random(10000) for i in range(len(workers))]) yield c._replicate(L, n=2) adds = [delayed(slowadd, pure=True)(random.choice(L), random.choice(L), delay=0.05) for i in range(50)] adds = [delayed(slowadd, pure=True)(a, b, delay=0.02) for a, b in sliding_window(2, adds)] futures = c.compute(adds) alive = list(workers) from distributed.scheduler import logger for i in range(7): yield gen.sleep(0.1) try: s.validate_state() except Exception as c: logger.exception(c) import pdb; pdb.set_trace() w = random.choice(alive) yield w._close() alive.remove(w) try: yield gen.with_timeout(timedelta(seconds=10), c._gather(futures)) except gen.TimeoutError: import pdb; pdb.set_trace() except CancelledError: pass
def ngram(words, n=1): return sliding_window(n, words)
def sorted_join(lkey, left, rkey, right): """Perform a join between two sequences sorted along their keys. This is useful when performing join over very large lists, as it is a full streaming join. """ if not callable(lkey): lkey = toolz.itertoolz.getter(lkey) if not callable(rkey): rkey = toolz.itertoolz.getter(rkey) left = toolz.sliding_window(2, left) right = toolz.sliding_window(2, right) cur_litem, next_litem = next(left) cur_ritem, next_ritem = next(right) cur_lkey = lkey(cur_litem) cur_rkey = rkey(cur_ritem) next_lkey = lkey(next_litem) next_rkey = rkey(next_ritem) # Compare left and right row by row # Always advance lowest "next index" while True: #print cur_lkey, cur_rkey if cur_rkey == cur_lkey: yield (cur_litem, cur_ritem) # Advance lowest index, advance both if equal if next_lkey <= next_rkey: try: cur_litem, next_litem = next(left) cur_lkey = lkey(cur_litem) next_lkey = lkey(next_litem) except StopIteration: if next_rkey == cur_lkey: yield (cur_litem, next_ritem) if next_rkey == next_lkey: yield (next_litem, next_ritem) for _, next_ritem in right: next_rkey = rkey(next_ritem) if next_rkey == next_lkey: yield (next_litem, next_ritem) elif next_lkey > next_rkey: try: cur_ritem, next_ritem = next(right) cur_rkey = rkey(cur_ritem) next_rkey = rkey(next_ritem) except StopIteration: if cur_lkey == next_rkey: yield (cur_litem, next_ritem) if next_lkey == cur_rkey: yield (next_litem, cur_ritem) if next_lkey == next_rkey: yield (next_litem, next_ritem) for _, next_litem in left: next_lkey = lkey(next_litem) if next_rkey == next_lkey: yield (next_litem, next_ritem) break
def sliding_window(seq): return toolz.sliding_window(n, seq)
def sexy_triples(): for ps in sliding_window(4, primes.primes()): if ps[0] + 6 == ps[1] and \ ps[1] + 6 == ps[2] and \ ps[2] + 6 == ps[3]: yield ps[1] + 3
def sample(seq, n_samples, window=2): windows = list(sliding_window(window, seq)) random.shuffle(windows) return windows[:n_samples]
def verify_trace(self, trace, pipeline_start_date, pipeline_end_date, expected_chunks): # Percent complete should be monotonically increasing through the whole # execution. for before, after in toolz.sliding_window(2, trace): self.assertGreaterEqual( after.percent_complete, before.percent_complete, ) # First publish should come from the start of the first chunk, with no # work yet. first = trace[0] expected_first = TestingProgressPublisher.TraceState( state='init', percent_complete=0.0, execution_bounds=(pipeline_start_date, pipeline_end_date), current_chunk_bounds=expected_chunks[0], current_work=None, ) self.assertEqual(first, expected_first) # Last publish should have a state of success and be 100% complete. last = trace[-1] expected_last = TestingProgressPublisher.TraceState( state='success', percent_complete=100.0, execution_bounds=(pipeline_start_date, pipeline_end_date), current_chunk_bounds=expected_chunks[-1], # We don't know what the last work item will be, but it must be an # instance of a single ComputableTerm, because we only run # ComputableTerms one at a time, and a LoadableTerm will only be in # the graph if some ComputableTerm depends on it. current_work=[instance_of(ComputableTerm)], ) self.assertEqual(last, expected_last) # Remaining updates should all be loads or computes. middle = trace[1:-1] for update in middle: self.assertIsInstance(update.current_work, list) if update.state == 'loading': for term in update.current_work: self.assertIsInstance(term, (LoadableTerm, AssetExists)) elif update.state == 'computing': for term in update.current_work: self.assertIsInstance(term, ComputableTerm) else: raise AssertionError( "Unexpected state: {}".format(update.state), ) # Break up the remaining updates by chunk. all_chunks = [] grouped = itertools.groupby(middle, attrgetter('current_chunk_bounds')) for (chunk_start, chunk_stop), chunk_trace in grouped: all_chunks.append((chunk_start, chunk_stop)) chunk_trace = list(chunk_trace) expected_end_progress = self.expected_chunk_progress( pipeline_start_date, pipeline_end_date, chunk_stop, ) end_progress = chunk_trace[-1].percent_complete assert_almost_equal( end_progress, expected_end_progress, ) self.assertEqual(all_chunks, expected_chunks)
def split(self, X, y=None): """Iterate tuples of data split into training and test sets. Parameters ---------- X : dask object Training data. May be a ``da.Array``, ``db.Bag``, or ``dklearn.Matrix``. y : dask object, optional The target variable for supervised learning problems. Yields ------- X_train, y_train, X_test, y_test : dask objects The split training and testing data, returned as the same type as the input. If y is not provided, ``y_train`` and ``y_test`` will be ``None``. """ if self.n_folds < 2: raise ValueError("n_folds must be >= 2") X, y = check_X_y(X, y) if isinstance(X, da.Array): n = len(X) if n < self.n_folds: raise ValueError("n_folds must be <= n_samples") elif isinstance(X, (dm.Matrix, db.Bag)): n = X.npartitions if n < self.n_folds: raise ValueError("n_folds must be <= npartitions for Bag or " "Matrix objects") else: raise TypeError("Expected an instance of ``da.Array``, " "``db.Bag``, or ``dm.Matrix`` - got " "{0}".format(type(X).__name__)) fold_sizes = (n // self.n_folds) * np.ones(self.n_folds, dtype=np.int) fold_sizes[:n % self.n_folds] += 1 folds = list(sliding_window(2, accumulate(add, fold_sizes, 0))) if isinstance(X, da.Array): x_parts = [X[start:stop] for start, stop in folds] if y is not None: y_parts = [y[start:stop] for start, stop in folds] for i in range(len(x_parts)): X_train = da.concatenate(x_parts[:i] + x_parts[i + 1:]) X_test = x_parts[i] if y is not None: y_train = da.concatenate(y_parts[:i] + y_parts[i + 1:]) y_test = y_parts[i] else: y_train = y_test = None yield X_train, y_train, X_test, y_test else: parts = list(range(n)) for start, stop in folds: test = parts[start:stop] train = parts[:start] + parts[stop:] X_train = _part_split(X, train, 'X_train') X_test = _part_split(X, test, 'X_test') if y is not None: y_train = _part_split(y, train, 'y_train') y_test = _part_split(y, test, 'y_test') else: y_train = y_test = None yield X_train, y_train, X_test, y_test
def alter_quad(quad): pairs = lmap("".join, sliding_window(2, quad)) return merge(lmap(alter_pair, pairs))
def calc_part1(nums: Iterable[int]) -> int: return sum([1 if y > x else 0 for x, y in sliding_window(2, nums)])
def calc_part2(nums: Iterable[int]) -> int: return calc_part1([sum(group) for group in sliding_window(3, nums)])
def letter_grams(word, n): return list(map(lambda x: x[0] + x[1], sliding_window(n, word)))
def lcs(word1, word2): for i in reversed(range(2, len(word2))): for subs in sliding_window(i, word2): if ''.join(subs) in word1: return i return 0
def create_conll12_dataset(in_file: str, out_file: str): """ CoNLL-2012 -> jsonl CoNLL-2012 columns (paper): - 1: Document ID - 2: part number - 3: Word number * 4: Word * 5: Part of Speech * 6: Parse bit - 7: Lemma * 8: Predicate Frameset ID - 9: Word sense - 10: Speaker/Author - 11: Named Entities * 12:N: Predicate Arguments - N: Co-reference """ TOKEN_IDX = 3 POS_IDX = 4 SYNTAX_IDX = 5 VERB_IDX = 7 TAG_IDX = 11 MIN_LEN = 12 with open(in_file) as fi, open(out_file, "w") as fo: _ = next(fi) for value, chunk in tqdm(groupby(fi, key=lambda x: bool(x.strip()))): if not value: continue lines = [line.rstrip("\n").split() for line in chunk] if lines[0][0].startswith("#end"): continue verb_indices = [ idx for idx, line in enumerate(lines) if line[VERB_IDX] != "-" ] # Check assert len(lines[0]) == MIN_LEN + len(verb_indices) assert all( len(pair[0]) == len(pair[1]) for pair in sliding_window(2, lines)) tokens = [line[TOKEN_IDX] for line in lines] pos_tags = [line[POS_IDX] for line in lines] tree = [line[SYNTAX_IDX] for line in lines] labels = [] for n, verb_idx in enumerate(verb_indices): tags = process_span_annotations_for_word( [line[TAG_IDX + n] for line in lines]) predicate_span = get_predicate_span(tags) assert verb_idx in predicate_span labels.append({"verb_span": predicate_span, "tags": tags}) # Write json_line = json.dumps({ "tokens": tokens, "labels": labels, "pos_tags": pos_tags, "tree": tree }) print(json_line, file=fo)
def branch_classification(thres): """ Predict the extent of branching Parameters ---------- thres: array thresholded image to be analysed Returns ------- skel: array skeletonised image is_main: help BLF: int/float branch length fraction """ skeleton = skeletonize(thres) skel = Skeleton(skeleton, source_image=thres) summary = summarize(skel) is_main = np.zeros(summary.shape[0]) us = summary['node-id-src'] vs = summary['node-id-dst'] ws = summary['branch-distance'] edge2idx = {(u, v): i for i, (u, v) in enumerate(zip(us, vs))} edge2idx.update({(v, u): i for i, (u, v) in enumerate(zip(us, vs))}) g = nx.Graph() g.add_weighted_edges_from(zip(us, vs, ws)) for conn in nx.connected_components(g): curr_val = 0 curr_pair = None h = g.subgraph(conn) p = dict(nx.all_pairs_dijkstra_path_length(h)) for src in p: for dst in p[src]: val = p[src][dst] if (val is not None and np.isfinite(val) and val > curr_val): curr_val = val curr_pair = (src, dst) for i, j in tz.sliding_window( 2, nx.shortest_path(h, source=curr_pair[0], target=curr_pair[1], weight='weight')): is_main[edge2idx[(i, j)]] = 1 summary['main'] = is_main #Branch Length Fraction total_length = np.sum(skeleton) trunk_length = 0 for i in range(summary.shape[0]): if summary['main'][i]: trunk_length += summary['branch-distance'][i] branch_length = total_length - trunk_length BLF = branch_length / total_length return skel, is_main, BLF
def process_two(data: list[int]) -> int: totals = map(sum, sliding_window(3, data)) return len(lfilter(comparer, sliding_window(2, totals)))
def slices(input_string, n): if n > len(input_string) or n == 0: raise ValueError input_ = [int(c) for c in input_string] return [list(window) for window in sliding_window(n, input_)]
def is_sum_in_prior_n(arr, limit): for sw in sliding_window(limit + 1, arr): opts = sw[:limit] targ = sw[limit] if not is_sum_in(opts, targ): return targ
def test_sliding_window(): list(sliding_window(3, seq))
def __init__(self, points, req_length): super().__init__(points, req_length) self._curves = [ Bezier(subpoints, None) for subpoints in sliding_window(2, points) ]
def kmers(sequence, k): """Returns a generator of all mers(substring) of length k with overlap window from a string.""" return (''.join(c) for c in sliding_window(k, sequence))
def verify_trace( self, trace, pipeline_start_date, pipeline_end_date, expected_chunks, empty=False, ): # Percent complete should be monotonically increasing through the whole # execution. for before, after in toolz.sliding_window(2, trace): assert after.percent_complete >= before.percent_complete # First publish should come from the start of the first chunk, with no # work yet. first = trace[0] expected_first = TestingProgressPublisher.TraceState( state="init", percent_complete=0.0, execution_bounds=(pipeline_start_date, pipeline_end_date), current_chunk_bounds=expected_chunks[0], current_work=None, ) assert first == expected_first # Last publish should have a state of success and be 100% complete. last = trace[-1] expected_last = TestingProgressPublisher.TraceState( state="success", percent_complete=100.0, execution_bounds=(pipeline_start_date, pipeline_end_date), current_chunk_bounds=expected_chunks[-1], # We don't know what the last work item will be, but it must be an # instance of a single ComputableTerm, because we only run # ComputableTerms one at a time, and a LoadableTerm will only be in # the graph if some ComputableTerm depends on it. # # The one exception to this rule is that, if we run a completely # empty pipeline, the final work will be None. current_work=None if empty else [instance_of(ComputableTerm)], ) assert last == expected_last # Remaining updates should all be loads or computes. middle = trace[1:-1] for update in middle: # For empty pipelines we never leave the 'init' state. if empty: assert update.state == "init" assert update.current_work is None continue if update.state in ("loading", "computing"): assert isinstance(update.current_work, list) if update.state == "loading": for term in update.current_work: assert isinstance(term, (LoadableTerm, AssetExists)) elif update.state == "computing": for term in update.current_work: assert isinstance(term, ComputableTerm) else: raise AssertionError( "Unexpected state: {}".format(update.state), ) # Break up the remaining updates by chunk. all_chunks = [] grouped = itertools.groupby(middle, attrgetter("current_chunk_bounds")) for (chunk_start, chunk_stop), chunk_trace in grouped: all_chunks.append((chunk_start, chunk_stop)) chunk_trace = list(chunk_trace) expected_end_progress = self.expected_chunk_progress( pipeline_start_date, pipeline_end_date, chunk_stop, ) end_progress = chunk_trace[-1].percent_complete assert_almost_equal( end_progress, expected_end_progress, ) assert all_chunks == expected_chunks
def smooth_depths_toolz(depths: List[int]) -> List[int]: return list(map(sum, sliding_window(3, depths)))
def process_one(data: list[int]) -> int: return len(lfilter(comparer, sliding_window(2, data)))
def test_stress_scatter_death(c, s, *workers): import random s.allowed_failures = 1000 np = pytest.importorskip("numpy") L = yield c.scatter([np.random.random(10000) for i in range(len(workers))]) yield c._replicate(L, n=2) adds = [ delayed(slowadd, pure=True)( random.choice(L), random.choice(L), delay=0.05, dask_key_name="slowadd-1-%d" % i, ) for i in range(50) ] adds = [ delayed(slowadd, pure=True)(a, b, delay=0.02, dask_key_name="slowadd-2-%d" % i) for i, (a, b) in enumerate(sliding_window(2, adds)) ] futures = c.compute(adds) L = adds = None alive = list(workers) from distributed.scheduler import logger for i in range(7): yield gen.sleep(0.1) try: s.validate_state() except Exception as c: logger.exception(c) if config.get("log-on-err"): import pdb pdb.set_trace() else: raise w = random.choice(alive) yield w._close() alive.remove(w) try: yield gen.with_timeout(timedelta(seconds=25), c._gather(futures)) except gen.TimeoutError: ws = {w.address: w for w in workers if w.status != "closed"} print(s.processing) print(ws) print(futures) try: worker = [w for w in ws.values() if w.waiting_for_data][0] except Exception: pass if config.get("log-on-err"): import pdb pdb.set_trace() else: raise except CancelledError: pass finally: futures = None