def get_batch_normalization_updates(training_graph, allow_duplicates=False): """Extract correspondences for learning BN population statistics. Parameters ---------- training_graph : :class:`~blocks.graph.ComputationGraph` A graph of expressions wherein "training mode" batch normalization is taking place. allow_duplicates : bool, optional If `True`, allow multiple training-mode application calls from the same :class:`~blocks.bricks.BatchNormalization` instance, and return pairs corresponding to all of them. It's then the user's responsibility to do something sensible to resolve the duplicates. Returns ------- update_pairs : list of tuples A list of 2-tuples where the first element of each tuple is the shared variable containing a "population" mean or standard deviation, and the second is a Theano variable for the corresponding statistics on a minibatch. Note that multiple applications of a single :class:`blocks.bricks.BatchNormalization` may appear in the graph, and therefore (if `allow_duplicates` is True) a single population variable may map to several different minibatch variables, and appear multiple times in this mapping. This can happen in recurrent models, siamese networks or other models that reuse pathways. Notes ----- Used in their raw form, these updates will simply overwrite the population statistics with the minibatch statistics at every gradient step. You will probably want to transform these pairs into something more sensible, such as keeping a moving average of minibatch values, or accumulating an average over the entire training set once every few epochs. """ from ..bricks import BatchNormalization from ..filter import VariableFilter, get_application_call var_filter = VariableFilter(bricks=[BatchNormalization], roles=[OUTPUT]) all_app_calls = map(get_application_call, var_filter(training_graph)) train_app_calls = _training_mode_application_calls(all_app_calls) if len(train_app_calls) == 0: raise ValueError("no training mode BatchNormalization " "applications found in graph") bricks = [c.application.brick for c in train_app_calls] if not allow_duplicates and not isdistinct(bricks): raise ValueError('multiple applications of the same ' 'BatchNormalization brick; pass allow_duplicates ' '= True to override this check') def extract_pair(brick_attribute, metadata_key, app_call): return (getattr(app_call.application.brick, brick_attribute), app_call.metadata[metadata_key]) mean_pair = partial(extract_pair, 'population_mean', 'offset') stdev_pair = partial(extract_pair, 'population_stdev', 'divisor') return sum([[mean_pair(a), stdev_pair(a)] for a in train_app_calls], [])
def test_scalar_symbols(): exprs = [x, y] scalars = scalar_symbols(exprs) assert len(scalars) == len(exprs) assert isdistinct([s._name for s in scalars]) assert builtins.all(s.dshape == e.schema for s, e in zip(scalars, exprs))
def merge(*exprs, **kwargs): if len(exprs) + len(kwargs) == 1: # we only have one object so don't need to construct a merge if exprs: # we only have a positional argumnent, return it unchanged return exprs[0] if kwargs: # we only have a single keyword argument, label it and return it [(k, v)] = kwargs.items() return v.label(k) # label all the kwargs and sort in key order exprs = tuple( concatv( (_wrap(expr, '_%s' % n) for n, expr in enumerate(exprs)), (label(_wrap(v, k), k) for k, v in sorted(kwargs.items(), key=first)), )) if all(ndim(expr) == 0 for expr in exprs): raise TypeError('cannot merge all scalar expressions') result = Merge( exprs, varargsexpr(exprs), maxshape(map(shape, exprs)), ) if not isdistinct(result.fields): raise ValueError( "Repeated columns found: " + ', '.join( k for k, v in frequencies(result.fields).items() if v > 1), ) return result
def test_join_suffixes(): a = symbol('a', 'var * {x: int, y: int}') b = join(a, a, 'x', suffixes=('_l', '_r')) assert isdistinct(b.fields) assert len(b.fields) == 3 assert set(b.fields) == set(['x', 'y_l', 'y_r'])
def test_join_on_same_table(): a = Symbol('a', 'var * {x: int, y: int}') c = join(a, a, 'x') assert isdistinct(c.fields) assert len(c.fields) == 3
def test_join_on_same_table(): a = symbol('a', 'var * {x: int, y: int}') c = join(a, a, 'x') assert isdistinct(c.fields) assert len(c.fields) == 3
def test_join_on_same_columns(): a = Symbol('a', 'var * {x: int, y: int, z: int}') b = Symbol('b', 'var * {x: int, y: int, w: int}') c = join(a, b, 'x') assert isdistinct(c.fields) assert len(c.fields) == 5 assert 'y_left' in c.fields assert 'y_right' in c.fields
def test_join_on_same_columns(): a = symbol('a', 'var * {x: int, y: int, z: int}') b = symbol('b', 'var * {x: int, y: int, w: int}') c = join(a, b, 'x') assert isdistinct(c.fields) assert len(c.fields) == 5 assert 'y_left' in c.fields assert 'y_right' in c.fields
def merge(*tables): # Get common sub expression child = common_subexpression(*tables) if not child: raise ValueError("No common sub expression found for input tables") result = Merge(child, tables) if not isdistinct(result.columns): raise ValueError("Repeated columns found: " + ', '.join(k for k, v in frequencies(result.columns).items() if v > 1)) return result
def merge(*exprs): # Get common sub expression try: child = common_subexpression(*exprs) except: raise ValueError("No common sub expression found for input expressions") result = Merge(child, exprs) if not isdistinct(result.fields): raise ValueError("Repeated columns found: " + ', '.join(k for k, v in frequencies(result.fields).items() if v > 1)) return result
def merge(*exprs, **kwargs): # Get common sub expression exprs = exprs + tuple(label(v, k) for k, v in kwargs.items()) try: child = common_subexpression(*exprs) except: raise ValueError( "No common sub expression found for input expressions") result = Merge(child, exprs) if not isdistinct(result.fields): raise ValueError("Repeated columns found: " + ', '.join( k for k, v in frequencies(result.fields).items() if v > 1)) return result
def test_map(s, a, b): e = Executor((s.ip, s.port), start=False) yield e._start() L1 = e.map(inc, range(5)) assert len(L1) == 5 assert isdistinct(x.key for x in L1) assert all(isinstance(x, Future) for x in L1) result = yield L1[0]._result() assert result == inc(0) assert len(s.dask) == 5 L2 = e.map(inc, L1) result = yield L2[1]._result() assert result == inc(inc(1)) assert len(s.dask) == 10 assert L1[0].key in s.dask[L2[0].key] total = e.submit(sum, L2) result = yield total._result() assert result == sum(map(inc, map(inc, range(5)))) L3 = e.map(add, L1, L2) result = yield L3[1]._result() assert result == inc(1) + inc(inc(1)) L4 = e.map(add, range(3), range(4)) results = yield e._gather(L4) if sys.version_info[0] >= 3: assert results == list(map(add, range(3), range(4))) def f(x, y=10): return x + y L5 = e.map(f, range(5), y=5) results = yield e._gather(L5) assert results == list(range(5, 10)) y = e.submit(f, 10) L6 = e.map(f, range(5), y=y) results = yield e._gather(L6) assert results == list(range(20, 25)) yield e._shutdown()
def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) L1 = e.map(inc, range(5)) assert len(L1) == 5 assert isdistinct(x.key for x in L1) assert all(isinstance(x, Future) for x in L1) result = yield L1[0]._result() assert result == inc(0) assert len(e.dask) == 5 L2 = e.map(inc, L1) result = yield L2[1]._result() assert result == inc(inc(1)) assert len(e.dask) == 10 assert L1[0].key in e.dask[L2[0].key] total = e.submit(sum, L2) result = yield total._result() assert result == sum(map(inc, map(inc, range(5)))) L3 = e.map(add, L1, L2) result = yield L3[1]._result() assert result == inc(1) + inc(inc(1)) L4 = e.map(add, range(3), range(4)) results = yield e._gather(L4) if sys.version_info[0] >= 3: assert results == list(map(add, range(3), range(4))) def f(x, y=10): return x + y L5 = e.map(f, range(5), y=5) results = yield e._gather(L5) assert results == list(range(5, 10)) y = e.submit(f, 10) L6 = e.map(f, range(5), y=y) results = yield e._gather(L6) assert results == list(range(20, 25)) yield e._shutdown()
def merge(*exprs, **kwargs): if len(exprs) + len(kwargs) == 1: if exprs: return exprs[0] if kwargs: [(k, v)] = kwargs.items() return v.label(k) # Get common sub expression exprs += tuple(label(v, k) for k, v in sorted(kwargs.items(), key=first)) child = common_subexpression(*exprs) result = Merge(child, exprs) if not isdistinct(result.fields): raise ValueError( "Repeated columns found: " + ', '.join( k for k, v in frequencies(result.fields).items() if v > 1), ) return result
def merge(*exprs, **kwargs): if len(exprs) + len(kwargs) == 1: if exprs: return exprs[0] if kwargs: [(k, v)] = kwargs.items() return v.label(k) # Get common sub expression exprs += tuple(label(v, k) for k, v in sorted(kwargs.items(), key=first)) child = common_subexpression(*exprs) result = Merge(child, exprs) if not isdistinct(result.fields): raise ValueError( "Repeated columns found: " + ', '.join( k for k, v in frequencies(result.fields).items() if v > 1 ), ) return result
def merge(*exprs, **kwargs): if len(exprs) + len(kwargs) == 1: if exprs: return exprs[0] if kwargs: [(k, v)] = kwargs.items() return v.label(k) # Get common sub expression exprs = exprs + tuple(label(v, k) for k, v in kwargs.items()) try: child = common_subexpression(*exprs) except: raise ValueError("No common sub expression found for input expressions") result = Merge(child, exprs) if not isdistinct(result.fields): raise ValueError("Repeated columns found: " + ', '.join(k for k, v in frequencies(result.fields).items() if v > 1)) return result
async def test_asyncio_map(): async with AioClient(processes=False) as c: L1 = c.map(inc, range(5)) assert len(L1) == 5 assert isdistinct(x.key for x in L1) assert all(isinstance(x, Future) for x in L1) result = await L1[0] assert result == inc(0) L2 = c.map(inc, L1) result = await L2[1] assert result == inc(inc(1)) total = c.submit(sum, L2) result = await total assert result == sum(map(inc, map(inc, range(5)))) L3 = c.map(add, L1, L2) result = await L3[1] assert result == inc(1) + inc(inc(1)) L4 = c.map(add, range(3), range(4)) results = await c.gather(L4) assert results == list(map(add, range(3), range(4))) def f(x, y=10): return x + y L5 = c.map(f, range(5), y=5) results = await c.gather(L5) assert results == list(range(5, 10)) y = c.submit(f, 10) L6 = c.map(f, range(5), y=y) results = await c.gather(L6) assert results == list(range(20, 25))
async def test_asyncio_map(): async with AioClient(processes=False) as c: L1 = c.map(inc, range(5)) assert len(L1) == 5 assert isdistinct(x.key for x in L1) assert all(isinstance(x, AioFuture) for x in L1) result = await L1[0] assert result == inc(0) L2 = c.map(inc, L1) result = await L2[1] assert result == inc(inc(1)) total = c.submit(sum, L2) result = await total assert result == sum(map(inc, map(inc, range(5)))) L3 = c.map(add, L1, L2) result = await L3[1] assert result == inc(1) + inc(inc(1)) L4 = c.map(add, range(3), range(4)) results = await c.gather(L4) assert results == list(map(add, range(3), range(4))) def f(x, y=10): return x + y L5 = c.map(f, range(5), y=5) results = await c.gather(L5) assert results == list(range(5, 10)) y = c.submit(f, 10) L6 = c.map(f, range(5), y=y) results = await c.gather(L6) assert results == list(range(20, 25))
def merge(*exprs, **kwargs): if len(exprs) + len(kwargs) == 1: # we only have one object so don't need to construct a merge if exprs: # we only have a positional argumnent, return it unchanged return exprs[0] if kwargs: # we only have a single keyword argument, label it and return it [(k, v)] = kwargs.items() return v.label(k) # label all the kwargs and sort in key order exprs = tuple(concatv( (_wrap(expr, '_%s' % n) for n, expr in enumerate(exprs)), ( label(_wrap(v, k), k) for k, v in sorted(kwargs.items(), key=first) ), )) if all(ndim(expr) == 0 for expr in exprs): raise TypeError('cannot merge all scalar expressions') result = Merge( exprs, varargsexpr(exprs), maxshape(map(shape, exprs)), ) if not isdistinct(result.fields): raise ValueError( "Repeated columns found: " + ', '.join( k for k, v in frequencies(result.fields).items() if v > 1 ), ) return result