def func(data, n_batch=0, randomorder=True, data_global={}): data = ndict.ordered(ndict.flatten(data)) data_global = ndict.ordered(ndict.flatten(data_global)) # Check if keys of 'x' and 'inputs' match allkeys = (data.keys() + data_global.keys()) for i in range(len(data)): if x.keys()[i] not in allkeys: raise Exception('Non-matching keys:'+str(allkeys)+' vs. '+str(x.keys())) # Compile function if not already done if f[0] == None: _compile() if n_batch <= 0: # Get results _data = data.copy() _data.update(data_global) inputs_ordered = ndict.orderedvals((_data,)) _result = f[0](*inputs_ordered) # Put it in a dictionary with the corresponding keys result = {y.keys()[i]: _result[i] for i in range(len(y))} else: # Minibatch-based evaluation. # This assumes that input and output are tensors, and the first dimension iterates of datapoints n_tot = data.itervalues().next().shape[0] n_minibatches = int(math.ceil(1. * n_tot / n_batch)) n_tile = 1 if n_batch > n_tot: assert n_batch%n_tot == 0 n_tile = n_batch/n_tot indices = np.tile(np.arange(n_tot),n_tile) if randomorder: np.random.shuffle(indices) adict = dict(zip(np.tile(np.arange(n_tot),n_tile),indices)) indices_inverse = sorted(adict, key=adict.get) results = [] for i in range(n_minibatches): data_minibatch = ndict.getRowsFromIndices(data, indices[i*n_batch:(i+1)*n_batch]) data_minibatch.update(data_global) inputs_ordered = ndict.orderedvals((data_minibatch,)) results.append(f[0](*inputs_ordered)) if _debug: print 'Function debug', i, results[-1] if checknan == 'raise': if np.isnan(np.sum(results[-1])): print results[-1] raise Exception("NaN detected") result = {y.keys()[i]: np.concatenate([results[j][i] for j in range(n_minibatches)]) for i in range(len(y))} if randomorder: result = ndict.getRowsFromIndices(result, indices_inverse) result = OrderedDict(sorted(result.items())) # Return result #raise Exception() if return_single_y: return result[result.keys()[0]] return result
def func(data, n_batch=0, randomorder=True, data_global={}): data = ndict.ordered(ndict.flatten(data)) data_global = ndict.ordered(ndict.flatten(data_global)) # Check if keys of 'x' and 'inputs' match allkeys = (data.keys() + data_global.keys()) for i in range(len(data)): if x.keys()[i] not in allkeys: raise Exception('Non-matching keys:' + str(allkeys) + ' vs. ' + str(x.keys())) # Compile function if not already done if f[0] == None: _compile() if n_batch <= 0: # Get results _data = data.copy() _data.update(data_global) inputs_ordered = ndict.orderedvals((_data, )) _result = f[0](*inputs_ordered) # Put it in a dictionary with the corresponding keys result = {y.keys()[i]: _result[i] for i in range(len(y))} else: # Minibatch-based evaluation. # This assumes that input and output are tensors, and the first dimension iterates of datapoints n_tot = data.itervalues().next().shape[0] n_minibatches = int(math.ceil(1. * n_tot / n_batch)) n_tile = 1 if n_batch > n_tot: assert n_batch % n_tot == 0 n_tile = n_batch / n_tot indices = np.tile(np.arange(n_tot), n_tile) if randomorder: np.random.shuffle(indices) adict = dict(zip(np.tile(np.arange(n_tot), n_tile), indices)) indices_inverse = sorted(adict, key=adict.get) results = [] for i in range(n_minibatches): data_minibatch = ndict.getRowsFromIndices( data, indices[i * n_batch:(i + 1) * n_batch]) data_minibatch.update(data_global) inputs_ordered = ndict.orderedvals((data_minibatch, )) results.append(f[0](*inputs_ordered)) if _debug: print 'Function debug', i, results[-1] if checknan == 'raise': if np.isnan(np.sum(results[-1])): print results[-1] raise Exception("NaN detected") result = { y.keys()[i]: np.concatenate([results[j][i] for j in range(n_minibatches)]) for i in range(len(y)) } if randomorder: result = ndict.getRowsFromIndices(result, indices_inverse) result = OrderedDict(sorted(result.items())) # Return result #raise Exception() if return_single_y: return result[result.keys()[0]] return result
def function(x, y, lazy=False, _debug=False, checknan='raise', **kwargs): # Default keyword arguments if not kwargs.has_key('on_unused_input'): kwargs['on_unused_input'] = 'warn' if not kwargs.has_key('mode'): kwargs['mode'] = default_function_mode # Order the input dict x = ndict.ordered(ndict.flatten(x)) # Check the output dict return_single_y = False if not isinstance(y, dict): return_single_y = True y = {str(y): y} y = ndict.ordered(y) # Lazily compiled function (saves a lot of time) f = [None] def _compile(verbose=True): t0 = time.time() print 'Compiling... ', #print '[graphy] Compiling function '+str(x.keys())+' => '+str(y.keys())+' ...' sys.stdout.flush() f[0] = theano.function(x.values(), y.values(), **kwargs) print "%.2f" % (time.time() - t0), 's' if not lazy: _compile() # The function to be called def func(data, n_batch=0, randomorder=True, data_global={}): data = ndict.ordered(ndict.flatten(data)) data_global = ndict.ordered(ndict.flatten(data_global)) # Check if keys of 'x' and 'inputs' match allkeys = (data.keys() + data_global.keys()) for i in range(len(data)): if x.keys()[i] not in allkeys: raise Exception('Non-matching keys:' + str(allkeys) + ' vs. ' + str(x.keys())) # Compile function if not already done if f[0] == None: _compile() if n_batch <= 0: # Get results _data = data.copy() _data.update(data_global) inputs_ordered = ndict.orderedvals((_data, )) _result = f[0](*inputs_ordered) # Put it in a dictionary with the corresponding keys result = {y.keys()[i]: _result[i] for i in range(len(y))} else: # Minibatch-based evaluation. # This assumes that input and output are tensors, and the first dimension iterates of datapoints n_tot = data.itervalues().next().shape[0] n_minibatches = int(math.ceil(1. * n_tot / n_batch)) n_tile = 1 if n_batch > n_tot: assert n_batch % n_tot == 0 n_tile = n_batch / n_tot indices = np.tile(np.arange(n_tot), n_tile) if randomorder: np.random.shuffle(indices) adict = dict(zip(np.tile(np.arange(n_tot), n_tile), indices)) indices_inverse = sorted(adict, key=adict.get) results = [] for i in range(n_minibatches): data_minibatch = ndict.getRowsFromIndices( data, indices[i * n_batch:(i + 1) * n_batch]) data_minibatch.update(data_global) inputs_ordered = ndict.orderedvals((data_minibatch, )) results.append(f[0](*inputs_ordered)) if _debug: print 'Function debug', i, results[-1] if checknan == 'raise': if np.isnan(np.sum(results[-1])): print results[-1] raise Exception("NaN detected") result = { y.keys()[i]: np.concatenate([results[j][i] for j in range(n_minibatches)]) for i in range(len(y)) } if randomorder: result = ndict.getRowsFromIndices(result, indices_inverse) result = OrderedDict(sorted(result.items())) # Return result #raise Exception() if return_single_y: return result[result.keys()[0]] return result # Return the func return G.Struct(__call__=func, f=f)
def function(x, y, lazy=False, _debug=False, checknan='raise', **kwargs): # Default keyword arguments if not kwargs.has_key('on_unused_input'): kwargs['on_unused_input'] = 'warn' if not kwargs.has_key('mode'): kwargs['mode'] = default_function_mode # Order the input dict x = ndict.ordered(ndict.flatten(x)) # Check the output dict return_single_y = False if not isinstance(y, dict): return_single_y = True y = {str(y): y} y = ndict.ordered(y) # Lazily compiled function (saves a lot of time) f = [None] def _compile(verbose=True): t0 = time.time() print 'Compiling... ', #print '[graphy] Compiling function '+str(x.keys())+' => '+str(y.keys())+' ...' sys.stdout.flush() f[0] = theano.function(x.values(), y.values(), **kwargs) print "%.2f" % (time.time()-t0), 's' if not lazy: _compile() # The function to be called def func(data, n_batch=0, randomorder=True, data_global={}): data = ndict.ordered(ndict.flatten(data)) data_global = ndict.ordered(ndict.flatten(data_global)) # Check if keys of 'x' and 'inputs' match allkeys = (data.keys() + data_global.keys()) for i in range(len(data)): if x.keys()[i] not in allkeys: raise Exception('Non-matching keys:'+str(allkeys)+' vs. '+str(x.keys())) # Compile function if not already done if f[0] == None: _compile() if n_batch <= 0: # Get results _data = data.copy() _data.update(data_global) inputs_ordered = ndict.orderedvals((_data,)) _result = f[0](*inputs_ordered) # Put it in a dictionary with the corresponding keys result = {y.keys()[i]: _result[i] for i in range(len(y))} else: # Minibatch-based evaluation. # This assumes that input and output are tensors, and the first dimension iterates of datapoints n_tot = data.itervalues().next().shape[0] n_minibatches = int(math.ceil(1. * n_tot / n_batch)) n_tile = 1 if n_batch > n_tot: assert n_batch%n_tot == 0 n_tile = n_batch/n_tot indices = np.tile(np.arange(n_tot),n_tile) if randomorder: np.random.shuffle(indices) adict = dict(zip(np.tile(np.arange(n_tot),n_tile),indices)) indices_inverse = sorted(adict, key=adict.get) results = [] for i in range(n_minibatches): data_minibatch = ndict.getRowsFromIndices(data, indices[i*n_batch:(i+1)*n_batch]) data_minibatch.update(data_global) inputs_ordered = ndict.orderedvals((data_minibatch,)) results.append(f[0](*inputs_ordered)) if _debug: print 'Function debug', i, results[-1] if checknan == 'raise': if np.isnan(np.sum(results[-1])): print results[-1] raise Exception("NaN detected") result = {y.keys()[i]: np.concatenate([results[j][i] for j in range(n_minibatches)]) for i in range(len(y))} if randomorder: result = ndict.getRowsFromIndices(result, indices_inverse) result = OrderedDict(sorted(result.items())) # Return result #raise Exception() if return_single_y: return result[result.keys()[0]] return result # Return the func return G.Struct(__call__=func, f=f)