def gpu_to_cpu_model(model): for layer in model.layers: for member, value in layer.__dict__.items(): if is_shared_var(value): layer.__dict__[member] = T._shared(np.array(value.get_value(), floatX), name=value.name, borrow=False) for i in xrange(len(layer.params)): if is_shared_var(layer.params[i]): layer.params[i] = T._shared(np.array(layer.params[i].get_value(), floatX), name=layer.params[i].name, borrow=False) return model
def gpu_to_cpu_model(model): for layer in model.layers: for member, value in list(layer.__dict__.items()): if is_shared_var(value): layer.__dict__[member] = T._shared(np.array(value.get_value(), floatX), name=value.name, borrow=False) for i in range(len(layer.params)): if is_shared_var(layer.params[i]): layer.params[i] = T._shared(np.array(layer.params[i].get_value(), floatX), name=layer.params[i].name, borrow=False) return model
def test_gemv2(self): ''' test vector1+dot(vector2,matrix) ''' v1 = theano.shared(numpy.array(numpy.random.rand(5), dtype='float32')) v2 = tensor._shared(numpy.array(numpy.random.rand(2), dtype='float32')) m = theano.shared(numpy.array(numpy.random.rand(5, 2), dtype='float32')) no_gpu_f = theano.function([], v2 + theano.dot(v1, m), mode=mode_without_gpu) gpu_f = theano.function([], v2 + theano.dot(v1, m), mode=mode_with_gpu) # gpu_f2 is needed to test the case when the input is not on the gpu # but the output is moved to the gpu. gpu_f2 = theano.function( [], tcn.gpu_from_host(v2 + theano.dot(v1, m)), mode=mode_with_gpu) # Assert they produce the same output assert numpy.allclose(no_gpu_f(), gpu_f(), atol=self.atol) assert numpy.allclose(no_gpu_f(), gpu_f2(), atol=self.atol) # Assert that the gpu version actually uses gpu assert sum([node.op is gpu_gemv_inplace for node in gpu_f2.maker.fgraph.toposort()]) == 1 assert sum([node.op is gpu_gemv_inplace for node in gpu_f.maker.fgraph.toposort()]) == 1
def arrays_to_tensors(self, arrays): #err = self.to_err_tensor() #return DatasetTensors(pos=self.to_tensor(), err=err, serr=err) if type(arrays) == np.ndarray: return tt._shared(arrays.T) elif type(arrays) == DatasetArrays: return DatasetTensors(arrays_to_tensors(arrays.pos), arrays_to_tensors(arrays.err), arrays_to_tensors(serr))
def sharedX(value, name=None, borrow=True, keep_on_cpu=False): """ Transform value into a shared variable of type floatX """ if keep_on_cpu: return T._shared(theano._asarray(value, dtype=theano.config.floatX), name=name, borrow=borrow) return theano.shared(theano._asarray(value, dtype=theano.config.floatX), name=name, borrow=borrow)
def gen_vec(n, name, device='cpu'): self.rng = numpy.random.RandomState(123) vals = self.rng.uniform(size=(n,), low=-.0005, high=.0005).astype('float32') if device=='gpu': var = theano.shared(vals, name=name) print_mem(name) else: var = TT._shared(vals, name=name) return var
def test_local_gpu_subtensor(): # Test shared forced on CPU. t = tensor._shared(np.zeros(20, "float32")) f = theano.function([], t[3:4], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert any([type(node.op) is tensor.Subtensor for node in topo]) assert not any([isinstance(node.op, GpuSubtensor) for node in topo]) # Test graph input. t = tensor.fmatrix() f = theano.function([t], t[3:4], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert any([type(node.op) is tensor.Subtensor for node in topo]) assert not any([isinstance(node.op, GpuSubtensor) for node in topo]) # Test multiple use of the input # We want the subtensor to be on the GPU to prevent multiple transfer. t = tensor.fmatrix() f = theano.function([t], [t[3:4], t + 1], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert not any([type(node.op) is tensor.Subtensor for node in topo]) assert any([isinstance(node.op, GpuSubtensor) for node in topo]) # Test multiple use of the input + input as output # We want the subtensor to be on the GPU to prevent multiple transfer. t = tensor.fmatrix() f = theano.function([t], [t[3:4], t + 1, t], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert not any([type(node.op) is tensor.Subtensor for node in topo]) assert any([isinstance(node.op, GpuSubtensor) for node in topo]) # Test shared forced on CPU end we do computation on the output of # the subtensor. t = tensor._shared(np.zeros(20, "float32")) f = theano.function([], t[3:4] + 1, mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert any([type(node.op) is tensor.Subtensor for node in topo]) assert not any([isinstance(node.op, GpuSubtensor) for node in topo]) # Our optimizer isn't smart enough to move to the GPU Elemwise. # If it where just a little bit smarter, it could wrongly move it to the GPU. # If it where super smart, it would know it should not move it to the GPU. assert any([isinstance(node.op, tensor.Elemwise) for node in topo])
def gen_mat(nin, nout, name, device='cpu', scale=.01): # NOTE : assumes tanh self.rng = numpy.random.RandomState(123) vals = self.rng.uniform(size=(nin, nout), low=-scale, high=scale).astype('float32') if device=='gpu': var = theano.shared(vals, name=name) print_mem(name) else: var = TT._shared(vals, name=name) return var
def test_local_gpu_subtensor(): # Test shared forced on CPU. t = tensor._shared(numpy.zeros(20, "float32")) f = theano.function([], t[3:4], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert any([type(node.op) is tensor.Subtensor for node in topo]) assert not any([isinstance(node.op, cuda.GpuSubtensor) for node in topo]) # Test graph input. t = tensor.fmatrix() f = theano.function([t], t[3:4], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert any([type(node.op) is tensor.Subtensor for node in topo]) assert not any([isinstance(node.op, cuda.GpuSubtensor) for node in topo]) # Test multiple use of the input # We want the subtensor to be on the GPU to prevent multiple transfer. t = tensor.fmatrix() f = theano.function([t], [t[3:4], t + 1], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert not any([type(node.op) is tensor.Subtensor for node in topo]) assert any([isinstance(node.op, cuda.GpuSubtensor) for node in topo]) # Test multiple use of the input + input as output # We want the subtensor to be on the GPU to prevent multiple transfer. t = tensor.fmatrix() f = theano.function([t], [t[3:4], t + 1, t], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert not any([type(node.op) is tensor.Subtensor for node in topo]) assert any([isinstance(node.op, cuda.GpuSubtensor) for node in topo]) # Test shared forced on CPU end we do computation on the output of # the subtensor. t = tensor._shared(numpy.zeros(20, "float32")) f = theano.function([], t[3:4] + 1, mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert any([type(node.op) is tensor.Subtensor for node in topo]) assert not any([isinstance(node.op, cuda.GpuSubtensor) for node in topo]) assert any([isinstance(node.op, cuda.GpuElemwise) for node in topo])
def test_local_gpu_subtensor(): # Test shared forced on CPU. t = tensor._shared(numpy.zeros(20, "float32")) f = theano.function([], t[3:4], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert any([type(node.op) is tensor.Subtensor for node in topo]) assert not any([isinstance(node.op, GpuSubtensor) for node in topo]) # Test graph input. t = tensor.fmatrix() f = theano.function([t], t[3:4], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert any([type(node.op) is tensor.Subtensor for node in topo]) assert not any([isinstance(node.op, GpuSubtensor) for node in topo]) # Test multiple use of the input # We want the subtensor to be on the GPU to prevent multiple transfer. t = tensor.fmatrix() f = theano.function([t], [t[3:4], t + 1], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert not any([type(node.op) is tensor.Subtensor for node in topo]) assert any([isinstance(node.op, GpuSubtensor) for node in topo]) # Test multiple use of the input + input as output # We want the subtensor to be on the GPU to prevent multiple transfer. t = tensor.fmatrix() f = theano.function([t], [t[3:4], t + 1, t], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert not any([type(node.op) is tensor.Subtensor for node in topo]) assert any([isinstance(node.op, GpuSubtensor) for node in topo]) # Test shared forced on CPU end we do computation on the output of # the subtensor. t = tensor._shared(numpy.zeros(20, "float32")) f = theano.function([], t[3:4] + 1, mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert any([type(node.op) is tensor.Subtensor for node in topo]) assert not any([isinstance(node.op, GpuSubtensor) for node in topo]) assert any([isinstance(node.op, GpuElemwise) for node in topo])
def categorical_sampler(rstream, p, draw_shape, dtype="int32"): if not isinstance(p, theano.Variable): p = tensor._shared(numpy.asarray(p, dtype=theano.config.floatX)) if p.ndim != 1: raise NotImplementedError() if draw_shape.ndim != 1: raise TypeError() op = Categorical( False, tensor.TensorType(broadcastable=(False,) * tensor.get_vector_length(draw_shape), dtype=dtype) ) rstate = rstream.new_shared_rstate() new_rstate, out = op(rstate, p, draw_shape) rstream.add_default_update(out, rstate, new_rstate) return out
def shared_dataset_x(data_x, borrow=True): """Function that loads the dataset into shared variables. The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variables) would lead to a large decrease in performance. """ shared_x = T._shared(np.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) #shared_x = theano.shared(data_x, borrow = borrow) return shared_x
def categorical_sampler(rstream, p, draw_shape, dtype='int32'): if not isinstance(p, theano.Variable): p = tensor._shared(numpy.asarray(p, dtype=theano.config.floatX)) if p.ndim != 1: raise NotImplementedError() if draw_shape.ndim != 1: raise TypeError() op = Categorical( False, tensor.TensorType(broadcastable=(False, ) * tensor.get_vector_length(draw_shape), dtype=dtype)) rstate = rstream.new_shared_rstate() new_rstate, out = op(rstate, p, draw_shape) rstream.add_default_update(out, rstate, new_rstate) return out
def build_model(self): """ Builds then returns the pyMC model. """ M = pm.Model() with M: # The three values here are div and deathrate # Assume just one IC50 for simplicity lIC50 = pm.Normal("IC50s", 2.0) Emin_growth = pm.Uniform("Emin_growth", lower=0.0, upper=self.Emax_growth) Emax_death = pm.Lognormal("Emax_death", -2.0, 2.0) # Import drug concentrations into theano vector drugCs = T._shared(self.drugCs) # Drug term since we're using constant IC50 and hill slope drugTerm = 1.0 / (1.0 + T.pow(10.0, (lIC50 - drugCs) * pm.Lognormal("hill"))) # Do actual conversion to parameters for each drug condition growthV = self.Emax_growth + (Emin_growth - self.Emax_growth) * drugTerm # Calculate the growth rate # _Assuming deathrate in the absence of drug is zero GR = growthV - Emax_death * drugTerm # Calculate the number of live cells lnum = T.exp(GR * self.time) # Normalize live cell data to control, as is similar to measurements # Residual between model prediction and measurement residual = self.lObs - (lnum / lnum[0]) pm.Normal("dataFitlnum", sd=T.std(residual), observed=residual) return M
def theanoCore(timeV, div, deathRate, apopfrac, d): """ Assemble the core growth model. """ # Make a vector of time and one for time-constant values timeV = T._shared(timeV) constV = T.ones_like(timeV) # pylint: disable=no-member # Calculate the growth rate GR = T.outer(div - deathRate, constV) # cGDd is used later cGRd = T.outer(deathRate * apopfrac, constV) / (GR + d) # b is the rate straight to death b = T.outer(deathRate * (1 - apopfrac), constV) lnum = T.exp(GR * timeV) # Number of early apoptosis cells at start is 0.0 eap = cGRd * (lnum - T.exp(-d * timeV)) # Calculate dead cells via apoptosis and via necrosis deadnec = b * (lnum - 1) / GR deadapop = d * cGRd * (lnum - 1) / GR + cGRd * (T.exp(-d * timeV) - 1) return (lnum, eap, deadapop, deadnec)
parser = argparse.ArgumentParser() parser.add_argument("source", type=str, help="Pickled network to steal params from.") parser.add_argument("dest", type=str, help="File to place new network in.") parser.add_argument( "--cpu", "-c", dest="cpu", action="store_const", const=True, default=False, help="Convert network to run on a CPU." ) args = parser.parse_args() print "loading model..." f = file(args.source, "rb") old_network = cPickle.load(f) f.close() params = old_network.params if args.cpu: print "converting gpu parameters..." new_params = [] for param in params: param = T._shared(param.get_value()) new_params.append(param) params = new_params new_network = network(batch_size=None, params=params) print "saving model..." f = file(args.dest, "wb") cPickle.dump(new_network, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close()
def __init__(self, options, channel, data, model): """ Parameters: options: Dictionary `options` is expected to contain the following keys: `cbs` -> int Number of samples to consider at a time when computing some property of the model `gbs` -> int Number of samples over which to compute the gradients `mbs` -> int Number of samples over which to compute the krylov subspace `ebs` -> int Number of samples over which to evaluate the training error `seed` -> int Random number generator seed `profile` -> bool Flag, if profiling should be on or not `verbose` -> int Verbosity level `lbfgsIters' -> int `krylovDim` -> int channel: jobman channel or None data: dictionary-like object return by numpy.load containing the data model : model """ n_params = len(model.params) self.data = data xdata = theano.shared(data['train_x'], name='xdata') ydata = theano.shared(data['train_y'], name='ydata') self.xdata = xdata self.ydata = ydata shared_data = [xdata, ydata] self.rng = numpy.random.RandomState(options['seed']) n_samples = data['train_x'].shape[0] self.grad_batches = n_samples // options['gbs'] self.metric_batches = n_samples // options['mbs'] self.eval_batches = n_samples // options['ebs'] self.verbose = options['verbose'] rng = numpy.random.RandomState(options['seed']) self.rng = rng self.options = options self.channel = channel self.model = model n_dimensions = options['krylovDim'] self.n_dimensions = n_dimensions if options['device'] == 'gpu': cfn_subspaces = \ [theano.shared(numpy.zeros( (n_dimensions,) + shp, dtype='float32'), name='cfn{%s|%d}' % (str(param.name), i)) for i, (shp, param) in enumerate(zip(model.params_shape, model.params))] old_deltas = \ [theano.shared(numpy.zeros(shp, dtype='float32'), name='delta{%s|%d}' % (str(param.name), i)) for i, (shp, param) in enumerate(zip(model.params_shape, model.params))] self.gs = [ theano.shared(numpy.zeros(shp, dtype=theano.config.floatX)) for shp in model.params_shape ] else: cfn_subspaces = \ [TT._shared(numpy.zeros( (n_dimensions,) + shp, dtype='float32'), name='cfn{%s|%d}' % (str(param.name), i)) for i, (shp, param) in enumerate(zip(model.params_shape, model.params))] old_deltas = \ [TT._shared(numpy.zeros(shp, dtype='float32'), name='delta{%s|%d}' % (str(param.name), i)) for i, (shp, param) in enumerate(zip(model.params_shape, model.params))] self.gs = [ TT._shared(numpy.zeros(shp, dtype=theano.config.floatX)) for shp in model.params_shape ] self.cfn_subspaces = cfn_subspaces self.old_deltas = old_deltas self.permg = self.rng.permutation(self.grad_batches) self.permr = self.rng.permutation(self.metric_batches) self.perme = self.rng.permutation(self.eval_batches) self.k = 0 self.posg = 0 self.posr = 0 self.pose = 0 # Step 1. Compile function for computing eucledian gradients print 'Constructing grad function' loc_inputs = [x.type(name='locx') for x in model.inputs] def grad_step(*args): idx = TT.cast(args[0], 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs, nw_inps)) nw_cost = safe_clone(model.train_cost, replace=replace) gs = TT.grad(nw_cost, model.params) nw_gs = [op + np for op, np in zip(args[1:1 + n_params], gs)] return [args[0] + const(1)] + \ nw_gs ig = [ TT.unbroadcast(TT.alloc(const(0), 1, *shp), 0) for shp in model.params_shape ] idx0 = TT.unbroadcast(const([0]), 0) n_steps = options['gbs'] // options['cbs'] rvals, updates = scan(grad_step, states=[idx0] + ig, n_steps=n_steps, name='grad_loop', mode=gpu_mode, profile=options['profile']) nw_gs = [x[0] / const(n_steps) for x in rvals[1:1 + n_params]] updates.update(dict(zip(self.gs, nw_gs))) gdx = TT.iscalar('gdx') grad_inps = zip(loc_inputs, [ x[gdx * options['gbs']:(gdx + 1) * options['gbs']] for x in shared_data ]) print 'Compiling grad function' self.compute_eucledian_gradients = theano.function( [gdx], [], updates=updates, givens=dict(grad_inps), name='compute_eucledian_gradients', mode=gpu_mode, profile=options['profile']) # Step 2. Compile function for Computing Riemannian gradients if options['device'] == 'gpu': mode = gpu_mode def compute_Gv(*args): idx0 = const([0]) ep = [ TT.alloc(const(0), 1, *shp) for shp in model.params_shape ] def Gv_step(*gv_args): idx = TT.cast(gv_args[0], 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs, nw_inps)) nw_cost, nw_preactiv_out = safe_clone( [model.train_cost, model.preactiv_out], replace) nw_gvs = TT.Lop( nw_preactiv_out, model.params, TT.Rop(TT.grad(nw_cost, nw_preactiv_out), model.params, args)) Gvs = [ ogv + ngv for (ogv, ngv) in zip(gv_args[1:], nw_gvs) ] return [gv_args[0] + const(1)] + Gvs states = [idx0] + ep n_steps = options['mbs'] // options['cbs'] rvals, updates = scan(Gv_step, states=states, n_steps=n_steps, mode=theano.Mode(linker='cvm'), name='Gv_step', profile=options['profile']) final_Gvs = [x[0] / const(n_steps) for x in rvals[1:]] return final_Gvs, updates else: mode = cpu_mode def compute_Gv(*args): cgv = [ theano.shared(numpy.zeros(shp, dtype=theano.config.floatX), name='cgv%d' % idx) for idx, shp in enumerate(model.params_shape) ] print_mem('allocated mem for cgv') idx0 = const([0]) ep = [ TT.alloc(const(0), 1, *shp) for shp in model.params_shape ] def Gv_step(*gv_args): idx = TT.cast(gv_args[0], 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs, nw_inps)) nw_cost, nw_preactiv_out = safe_clone( [model.train_cost, model.preactiv_out], replace) nw_gvs = TT.Lop( nw_preactiv_out, model.params, TT.Rop(TT.grad(nw_cost, nw_preactiv_out), model.params, cgv)) Gvs = [ ogv + ngv for (ogv, ngv) in zip(gv_args[1:], nw_gvs) ] return [gv_args[0] + const(1)] + Gvs states = [idx0] + ep n_steps = options['mbs'] // options['cbs'] rvals, updates = scan(Gv_step, states=states, n_steps=n_steps, mode=gpu_mode, name='Gv_step', profile=options['profile']) final_Gvs = [ TT.as_tensor_variable(x[0]) / const(n_steps) for x in rvals[1:] ] grad_inps = zip(loc_inputs, shared_data) loc_fn = theano.function([], final_Gvs, updates=updates, givens=dict(grad_inps), on_unused_input='warn', mode=gpu_mode, name='loc_fn', profile=options['profile']) fake_op = FakeGPUShell(cgv, loc_fn, len(cgv)) return fake_op(*args), {} rvals, updates = krylov_subspace(compute_Gv, self.gs, old_deltas, n_dimensions, model.params_shape, profile=options['profile'], device=options['device']) gdx = TT.iscalar('gdx') grad_inps = zip(loc_inputs, [ x[gdx * options['mbs']:(gdx + 1) * options['mbs']] for x in shared_data ]) updates.update(dict(zip(cfn_subspaces, rvals))) self.update_krylov_subspace = theano.function( [gdx], [], updates=updates, givens=dict(grad_inps), profile=options['profile'], on_unused_input='warn', name='update_krylov_subspace', mode=mode) alphas = tensor.vector('alphas') deltas = [] nw_params = [] if options['device'] == 'gpu': params = model.params else: params = model.cpu_params for param, subspace in zip(params, cfn_subspaces): alpha_reshuffle = [0] + ['x'] * param.ndim delta = (alphas.dimshuffle(*alpha_reshuffle) * \ subspace).sum(axis=0) nw_param = param + delta nw_params.append(nw_param) deltas.append(delta) print 'constructing evaluation function' ebdx = TT.iscalar('ebdx') updates_dict = dict(zip(model.params + old_deltas, nw_params + deltas)) if options['device'] != 'gpu': updates_dict.update(dict(zip(model.cpu_params, nw_params))) self.update_params = theano.function([alphas], updates=updates_dict, name='update_params', allow_input_downcast=True, mode=mode, profile=options['profile']) n_steps = options['ebs'] // options['cbs'] def ls_cost_step(_idx, acc): idx = TT.cast(_idx, 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict( zip(model.inputs + model.params, nw_inps + nw_params)) nw_cost = safe_clone(model.train_cost, replace=replace) return [_idx + const(1), acc + nw_cost] states = [ TT.constant(numpy.float32([0])), TT.constant(numpy.float32([0])) ] rvals, _ = scan(ls_cost_step, states=states, n_steps=n_steps, name='ls_cost_step', mode=gpu_mode, profile=options['profile']) fcost = rvals[1][0] / const(n_steps) def ls_grad_step(_idx, gws): idx = TT.cast(_idx, 'int32') nw_inps = [ x[idx * options['cbs']:(idx + 1) * options['cbs']] for x in loc_inputs ] replace = dict( zip(model.inputs + model.params, nw_inps + nw_params)) nw_cost = safe_clone(model.train_cost, replace=replace) nw_gs = TT.grad(nw_cost, alphas) return _idx + numpy.float32(1), gws + nw_gs states = [ TT.constant(numpy.float32([0])), TT.constant(numpy.zeros((1, n_dimensions), dtype='float32')) ] rvals, _ = scan(ls_grad_step, states=states, n_steps=n_steps, name='ls_grad_step', mode=gpu_mode, profile=options['profile']) fgrad = rvals[1][0] / const(n_steps) grad_inps = zip(loc_inputs, [ x[ebdx * options['ebs']:(ebdx + 1) * options['ebs']] for x in shared_data ]) self.lbfgs_fn = theano.function( [alphas, ebdx], #theano.printing.Print('fcost')(fcost), fcost, givens=grad_inps, allow_input_downcast=True, on_unused_input='warn', name='lbfgs_fn', profile=options['profile'], mode=gpu_mode) self.lbfgs_grad = theano.function([alphas, ebdx], fgrad, givens=grad_inps, on_unused_input='warn', allow_input_downcast=True, name='lbfgs_grad', profile=options['profile'], mode=gpu_mode) n_steps = options['ebs'] // options['cbs'] def ls_error(_idx, acc): idx = TT.cast(_idx, 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs, nw_inps)) nw_cost = TT.cast(safe_clone(model.err, replace=replace), 'float32') return [_idx + const(1), acc + nw_cost] states = [ TT.constant(numpy.float32([0])), TT.constant(numpy.float32([0])) ] rvals, _ = scan(ls_error, states=states, n_steps=n_steps, name='ls_err_step', mode=cpu_mode, profile=options['profile']) ferr = rvals[1][0] / const(n_steps) self.compute_error = theano.function([], ferr, givens=dict( zip(loc_inputs, shared_data)), name='compute_err', mode=gpu_mode, on_unused_input='warn', profile=options['profile'])
required=True, help='the path to the gpu model pickle file') parser.add_argument('--cpu_model', metavar='Path', required=True, help='''path to save the cpu model pickle file''') args = parser.parse_args() print('loading gpu mlp..') fin = open(args.gpu_model) gpu_model = cPickle.load(fin) mlp = MLP(input_dim=gpu_model.input_dim) for layer in gpu_model.layers: layerW = T._shared(np.array(layer.W.get_value(), floatX), name=layer.W.name, borrow=False) layerb = T._shared(np.array(layer.b.get_value(), floatX), name=layer.b.name, borrow=False) mlp_layer = getattr(layers, layer.__class__.__name__)(dim=layer.dim, name=layer.name, W=layerW, b=layerb) mlp.add_layer(mlp_layer) print 'mlp layer', mlp_layer.name, mlp_layer.dim print 'layers', mlp.layers fout = open(args.cpu_model, 'wb') cPickle.dump(mlp, fout) print('Done!')
def shared(self, x, name=None): return tensor._shared(x, name)
def __init__(self, options, channel, data, model): """ Parameters: options: Dictionary `options` is expected to contain the following keys: `cbs` -> int Number of samples to consider at a time when computing some property of the model `gbs` -> int Number of samples over which to compute the gradients `mbs` -> int Number of samples over which to compute the krylov subspace `ebs` -> int Number of samples over which to evaluate the training error `seed` -> int Random number generator seed `profile` -> bool Flag, if profiling should be on or not `verbose` -> int Verbosity level `lbfgsIters' -> int `krylovDim` -> int channel: jobman channel or None data: dictionary-like object return by numpy.load containing the data model : model """ n_params = len(model.params) self.data = data xdata = theano.shared(data['train_x'], name='xdata') ydata = theano.shared(data['train_y'], name='ydata') self.xdata = xdata self.ydata = ydata shared_data = [xdata, ydata] self.rng = numpy.random.RandomState(options['seed']) n_samples = data['train_x'].shape[0] self.grad_batches = n_samples // options['gbs'] self.metric_batches = n_samples // options['mbs'] self.eval_batches = n_samples // options['ebs'] self.verbose = options['verbose'] rng = numpy.random.RandomState(options['seed']) self.rng = rng self.options = options self.channel = channel self.model = model n_dimensions = options['krylovDim'] self.n_dimensions = n_dimensions if options['device']=='gpu': cfn_subspaces = \ [theano.shared(numpy.zeros( (n_dimensions,) + shp, dtype='float32'), name='cfn{%s|%d}' % (str(param.name), i)) for i, (shp, param) in enumerate(zip(model.params_shape, model.params))] old_deltas = \ [theano.shared(numpy.zeros(shp, dtype='float32'), name='delta{%s|%d}' % (str(param.name), i)) for i, (shp, param) in enumerate(zip(model.params_shape, model.params))] self.gs = [theano.shared(numpy.zeros(shp, dtype=theano.config.floatX)) for shp in model.params_shape] else: cfn_subspaces = \ [TT._shared(numpy.zeros( (n_dimensions,) + shp, dtype='float32'), name='cfn{%s|%d}' % (str(param.name), i)) for i, (shp, param) in enumerate(zip(model.params_shape, model.params))] old_deltas = \ [TT._shared(numpy.zeros(shp, dtype='float32'), name='delta{%s|%d}' % (str(param.name), i)) for i, (shp, param) in enumerate(zip(model.params_shape, model.params))] self.gs = [TT._shared(numpy.zeros(shp, dtype=theano.config.floatX)) for shp in model.params_shape] self.cfn_subspaces = cfn_subspaces self.old_deltas = old_deltas self.permg = self.rng.permutation(self.grad_batches) self.permr = self.rng.permutation(self.metric_batches) self.perme = self.rng.permutation(self.eval_batches) self.k = 0 self.posg = 0 self.posr = 0 self.pose = 0 # Step 1. Compile function for computing eucledian gradients print 'Constructing grad function' loc_inputs = [x.type(name='locx') for x in model.inputs] def grad_step(*args): idx = TT.cast(args[0], 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs, nw_inps)) nw_cost = safe_clone(model.train_cost, replace=replace) gs = TT.grad(nw_cost, model.params) nw_gs = [op + np for op, np in zip(args[1: 1 + n_params], gs)] return [args[0] + const(1)] + \ nw_gs ig = [TT.unbroadcast(TT.alloc(const(0), 1, *shp),0) for shp in model.params_shape] idx0 = TT.unbroadcast(const([0]),0) n_steps = options['gbs'] // options['cbs'] rvals, updates = scan(grad_step, states=[idx0] + ig, n_steps=n_steps, name='grad_loop', mode=gpu_mode, profile=options['profile']) nw_gs = [x[0] / const(n_steps) for x in rvals[1: 1 + n_params]] updates.update(dict(zip(self.gs, nw_gs))) gdx = TT.iscalar('gdx') grad_inps = zip(loc_inputs, [x[gdx*options['gbs']:(gdx+1)*options['gbs']] for x in shared_data]) print 'Compiling grad function' self.compute_eucledian_gradients = theano.function( [gdx], [], updates=updates, givens=dict(grad_inps), name='compute_eucledian_gradients', mode=gpu_mode, profile=options['profile']) # Step 2. Compile function for Computing Riemannian gradients if options['device'] == 'gpu': mode=gpu_mode def compute_Gv(*args): idx0 = const([0]) ep = [TT.alloc(const(0), 1, *shp) for shp in model.params_shape] def Gv_step(*gv_args): idx = TT.cast(gv_args[0], 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs, nw_inps)) nw_cost, nw_preactiv_out = safe_clone([model.train_cost, model.preactiv_out], replace) nw_gvs = TT.Lop(nw_preactiv_out, model.params, TT.Rop(TT.grad(nw_cost, nw_preactiv_out), model.params, args)) Gvs = [ogv + ngv for (ogv, ngv) in zip(gv_args[1:], nw_gvs)] return [gv_args[0] + const(1)] + Gvs states = [idx0] + ep n_steps = options['mbs'] // options['cbs'] rvals, updates = scan(Gv_step, states=states, n_steps=n_steps, mode=theano.Mode(linker='cvm'), name='Gv_step', profile=options['profile']) final_Gvs = [x[0] / const(n_steps) for x in rvals[1:]] return final_Gvs, updates else: mode = cpu_mode def compute_Gv(*args): cgv = [theano.shared(numpy.zeros(shp, dtype=theano.config.floatX), name ='cgv%d'%idx) for idx, shp in enumerate(model.params_shape)] print_mem('allocated mem for cgv') idx0 = const([0]) ep = [TT.alloc(const(0), 1, *shp) for shp in model.params_shape] def Gv_step(*gv_args): idx = TT.cast(gv_args[0], 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs, nw_inps)) nw_cost, nw_preactiv_out = safe_clone([model.train_cost, model.preactiv_out], replace) nw_gvs = TT.Lop(nw_preactiv_out, model.params, TT.Rop(TT.grad(nw_cost, nw_preactiv_out), model.params, cgv)) Gvs = [ogv + ngv for (ogv, ngv) in zip(gv_args[1:], nw_gvs)] return [gv_args[0] + const(1)] + Gvs states = [idx0] + ep n_steps = options['mbs'] // options['cbs'] rvals, updates = scan(Gv_step, states=states, n_steps=n_steps, mode=gpu_mode, name='Gv_step', profile=options['profile']) final_Gvs = [TT.as_tensor_variable(x[0]) / const(n_steps) for x in rvals[1:]] grad_inps = zip(loc_inputs, shared_data) loc_fn = theano.function([], final_Gvs, updates = updates, givens = dict(grad_inps), on_unused_input='warn', mode=gpu_mode, name='loc_fn', profile = options['profile']) fake_op = FakeGPUShell(cgv, loc_fn, len(cgv)) return fake_op(*args), {} rvals, updates = krylov_subspace( compute_Gv, self.gs, old_deltas, n_dimensions, model.params_shape, profile=options['profile'], device=options['device']) gdx = TT.iscalar('gdx') grad_inps = zip(loc_inputs, [x[gdx*options['mbs']:(gdx+1)*options['mbs']] for x in shared_data]) updates.update(dict(zip(cfn_subspaces, rvals))) self.update_krylov_subspace = theano.function( [gdx], [], updates=updates, givens=dict(grad_inps), profile=options['profile'], on_unused_input='warn', name='update_krylov_subspace', mode=mode) alphas = tensor.vector('alphas') deltas = [] nw_params = [] if options['device'] == 'gpu': params = model.params else: params = model.cpu_params for param, subspace in zip(params, cfn_subspaces): alpha_reshuffle = [0] + ['x'] * param.ndim delta = (alphas.dimshuffle(*alpha_reshuffle) * \ subspace).sum(axis=0) nw_param = param + delta nw_params.append(nw_param) deltas.append(delta) print 'constructing evaluation function' ebdx = TT.iscalar('ebdx') updates_dict = dict(zip(model.params + old_deltas, nw_params + deltas)) if options['device'] != 'gpu': updates_dict.update(dict(zip(model.cpu_params, nw_params))) self.update_params = theano.function([alphas], updates = updates_dict, name='update_params', allow_input_downcast=True, mode=mode, profile=options['profile']) n_steps = options['ebs'] // options['cbs'] def ls_cost_step(_idx, acc): idx = TT.cast(_idx, 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs + model.params, nw_inps + nw_params)) nw_cost = safe_clone(model.train_cost, replace=replace) return [_idx + const(1), acc + nw_cost] states = [TT.constant(numpy.float32([0])), TT.constant(numpy.float32([0]))] rvals, _ = scan(ls_cost_step, states = states, n_steps = n_steps, name='ls_cost_step', mode=gpu_mode, profile = options['profile']) fcost = rvals[1][0] / const(n_steps) def ls_grad_step(_idx, gws): idx = TT.cast(_idx, 'int32') nw_inps = [x[idx * options['cbs']: (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs + model.params, nw_inps + nw_params)) nw_cost = safe_clone(model.train_cost, replace=replace) nw_gs = TT.grad(nw_cost, alphas) return _idx + numpy.float32(1), gws + nw_gs states = [TT.constant(numpy.float32([0])), TT.constant(numpy.zeros((1, n_dimensions),dtype='float32'))] rvals, _ = scan(ls_grad_step, states = states, n_steps = n_steps, name = 'ls_grad_step', mode = gpu_mode, profile=options['profile']) fgrad = rvals[1][0] / const(n_steps) grad_inps = zip(loc_inputs, [x[ebdx*options['ebs']:(ebdx+1)*options['ebs']] for x in shared_data]) self.lbfgs_fn = theano.function([alphas, ebdx], #theano.printing.Print('fcost')(fcost), fcost, givens=grad_inps, allow_input_downcast=True, on_unused_input='warn', name='lbfgs_fn', profile=options['profile'], mode=gpu_mode) self.lbfgs_grad = theano.function([alphas, ebdx], fgrad, givens=grad_inps, on_unused_input='warn', allow_input_downcast=True, name='lbfgs_grad', profile=options['profile'], mode=gpu_mode) n_steps = options['ebs'] // options['cbs'] def ls_error(_idx, acc): idx = TT.cast(_idx, 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs, nw_inps)) nw_cost = TT.cast(safe_clone( model.err, replace=replace), 'float32') return [_idx + const(1), acc + nw_cost] states = [TT.constant(numpy.float32([0])), TT.constant(numpy.float32([0]))] rvals, _ = scan(ls_error, states = states, n_steps = n_steps, name='ls_err_step', mode=cpu_mode, profile = options['profile']) ferr = rvals[1][0] / const(n_steps) self.compute_error = theano.function([], ferr, givens=dict(zip(loc_inputs, shared_data)), name='compute_err', mode=gpu_mode, on_unused_input='warn', profile=options['profile'])
import pynet.layer as layers floatX = theano.config.floatX parser = argparse.ArgumentParser(description='''Convert gpu pickle pynet model to cpu pickle pynet model''') parser.add_argument('--gpu_model', metavar='Path', required=True, help='the path to the gpu model pickle file') parser.add_argument('--cpu_model', metavar='Path', required=True, help='''path to save the cpu model pickle file''') args = parser.parse_args() print ('loading gpu autoencoder..') fin = open(args.gpu_model) gpu_model = cPickle.load(fin) ae = AutoEncoder(input_dim=gpu_model.input_dim) for layer in gpu_model.encode_layers: layerW = T._shared(np.array(layer.W.get_value(), floatX), name=layer.W.name, borrow=False) layerb = T._shared(np.array(layer.b.get_value(), floatX), name=layer.b.name, borrow=False) encode_layer = getattr(layers, layer.__class__.__name__)(dim=layer.dim, name=layer.name, W=layerW, b=layerb) ae.add_encode_layer(encode_layer) print 'encode layer', encode_layer.name, encode_layer.dim print 'encode layers', ae.encode_layers for ae_layer, gpu_layer in zip(reversed(ae.encode_layers), gpu_model.decode_layers): gpu_decode_layer_b = T._shared(np.array(gpu_layer.b.get_value(), floatX), name=gpu_layer.b.name, borrow=False) decode_layer = getattr(layers, gpu_layer.__class__.__name__)(name=gpu_layer.name, dim=gpu_layer.dim, W=ae_layer.W.T, b=gpu_decode_layer_b) ae.add_decode_layer(decode_layer) print 'decode layer', decode_layer.name, decode_layer.dim
def shared_dataset(data_xy): data_x, data_y = data_xy shared_x = T._shared(numpy.asarray(data_x, dtype=theano.config.floatX),borrow=True) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX),borrow=True) return shared_x, T.cast(shared_y, 'int32')
def load_label(labels): shared_y = T._shared(np.asarray(labels, dtype=theano.config.floatX), borrow=True) return T.cast(shared_y, 'int32')
def to_err_tensor(self): '''Return the positions of points in the dataset as a Theano tensor''' arr = self.to_err_array() return tt._shared(arr)
required=True, help='the path to the gpu model pickle file') parser.add_argument('--cpu_model', metavar='Path', required=True, help='''path to save the cpu model pickle file''') args = parser.parse_args() print('loading gpu autoencoder..') fin = open(args.gpu_model) gpu_model = cPickle.load(fin) ae = AutoEncoder(input_dim=gpu_model.input_dim) for layer in gpu_model.encode_layers: layerW = T._shared(np.array(layer.W.get_value(), floatX), name=layer.W.name, borrow=False) layerb = T._shared(np.array(layer.b.get_value(), floatX), name=layer.b.name, borrow=False) encode_layer = getattr(layers, layer.__class__.__name__)(dim=layer.dim, name=layer.name, W=layerW, b=layerb) ae.add_encode_layer(encode_layer) print 'encode layer', encode_layer.name, encode_layer.dim print 'encode layers', ae.encode_layers for ae_layer, gpu_layer in zip(reversed(ae.encode_layers), gpu_model.decode_layers): gpu_decode_layer_b = T._shared(np.array(gpu_layer.b.get_value(), floatX),
import pynet.layer as layers floatX = theano.config.floatX parser = argparse.ArgumentParser(description='''Convert gpu pickle pynet model to cpu pickle pynet model''') parser.add_argument('--gpu_model', metavar='Path', required=True, help='the path to the gpu model pickle file') parser.add_argument('--cpu_model', metavar='Path', required=True, help='''path to save the cpu model pickle file''') args = parser.parse_args() print ('loading gpu mlp..') fin = open(args.gpu_model) gpu_model = cPickle.load(fin) mlp = MLP(input_dim=gpu_model.input_dim) for layer in gpu_model.layers: layerW = T._shared(np.array(layer.W.get_value(), floatX), name=layer.W.name, borrow=False) layerb = T._shared(np.array(layer.b.get_value(), floatX), name=layer.b.name, borrow=False) mlp_layer = getattr(layers, layer.__class__.__name__)(dim=layer.dim, name=layer.name, W=layerW, b=layerb) mlp.add_layer(mlp_layer) print 'mlp layer', mlp_layer.name, mlp_layer.dim print 'layers', mlp.layers fout = open(args.cpu_model, 'wb') cPickle.dump(mlp, fout) print ('Done!') fin.close() fout.close()
def shared(self, x): return tensor._shared(x)
def __call__(self, shape, name=None): return T._shared(np.ones(shape) * self.c, name=name)
def __init__(self, options, channel, data, model): """ Parameters: options: Dictionary `options` is expected to contain the following keys: `cbs` -> int Number of samples to consider at a time when computing some property of the model `gbs` -> int Number of samples over which to compute the gradients `mbs` -> int Number of samples over which to compute the metric `ebs` -> int Number of samples over which to evaluate the training error `mreg` -> float Regularization added to the metric `mrtol` -> float Relative tolerance for inverting the metric `miters` -> int Number of iterations `seed` -> int Random number generator seed `profile` -> bool Flag, if profiling should be on or not `verbose` -> int Verbosity level `lr` -> float Learning rate channel: jobman channel or None data: dictionary-like object return by numpy.load containing the data model : model """ n_params = len(model.params) self.data = data self.model = model if options['device'] == 'gpu': xdata = theano.shared(data['train_x'], name='xdata') print_mem('xdata') self.ydata = TT._shared(data['train_y'], name='ydata') self.xdata = xdata self.shared_data = [xdata, self.ydata] self.cpu_shared_data = [] else: xdata = theano.shared(data['train_x'][:options['gbs']], name='xdata') print_mem('xdata') self.ydata = TT._shared(data['train_y'][:options['gbs']], name='ydata') self.xdata = xdata self.shared_data = [xdata, self.ydata] cxdata = TT._shared(data['train_x'], name='cpu_xdata', borrow=True) self.cydata = TT._shared(data['train_y'], name='cpu_ydata', borrow=True) cydata = TT.cast(self.cydata, 'int32') self.cxdata = cxdata self.cpu_shared_data = [cxdata, cydata] self.options = options self.rng = numpy.random.RandomState(options['seed']) n_samples = data['train_x'].shape[0] self.n_samples = n_samples self.grad_batches = n_samples // options['gbs'] self.metric_batches = n_samples // options['mbs'] self.eval_batches = n_samples // options['ebs'] self.verbose = options['verbose'] # Store eucledian gradients cst = time.time() if options['device'] == 'gpu': self.gs = [theano.shared(numpy.zeros(shp, dtype=theano.config.floatX), name ='g%d'%idx) for idx, shp in enumerate(model.params_shape)] # Store riemannian gradients self.rs = [theano.shared(numpy.zeros(shp, dtype=theano.config.floatX), name='r%d'%idx) for idx, shp in enumerate(model.params_shape)] # Store jacobi diagonal self.js = [theano.shared(numpy.zeros(shp, dtype=theano.config.floatX), name='j%d'%idx) for idx, shp in enumerate(model.params_shape)] else: self.gs = [TT._shared(numpy.zeros(shp, dtype=theano.config.floatX), name ='g%d'%idx) for idx, shp in enumerate(model.params_shape)] # Store riemannian gradients self.rs = [TT._shared(numpy.zeros(shp, dtype=theano.config.floatX), name='r%d'%idx) for idx, shp in enumerate(model.params_shape)] # Store jacobi diagonal self.js = [TT._shared(numpy.zeros(shp, dtype=theano.config.floatX), name='j%d'%idx) for idx, shp in enumerate(model.params_shape)] self.permg = self.rng.permutation(self.grad_batches) self.permr = self.rng.permutation(self.metric_batches) self.perme = self.rng.permutation(self.eval_batches) self.k = 0 self.posg = 0 self.posr = 0 self.pose = 0 self.device = options['device'] if self.device == 'gpu': self.init_gpu(options, channel, data, model) else: self.init_cpu(options, channel, data, model) self.old_norm=1
def _ _init__(self, options, channel, data): self.rng = numpy.random.RandomState(options['seed']) self.srng = RandomStreams(self.rng.randint(1e5)) self.nin = data['train_x'].shape[2] self.in_shape = (options['cbs'], self.nin) self.options = options if isinstance(options['hid'], str): self.nhid = eval(options['nhid']) else: self.nhid = options['nhid'] self.nout = data['train_y'].shape[2] def gen_mat(nin, nout, name, device='cpu', scale=.01): # NOTE : assumes tanh self.rng = numpy.random.RandomState(123) vals = self.rng.uniform(size=(nin, nout), low=-scale, high=scale).astype('float32') if device=='gpu': var = theano.shared(vals, name=name) print_mem(name) else: var = TT._shared(vals, name=name) return var def gen_vec(n, name, device='cpu'): self.rng = numpy.random.RandomState(123) vals = self.rng.uniform(size=(n,), low=-.0005, high=.0005).astype('float32') if device=='gpu': var = theano.shared(vals, name=name) print_mem(name) else: var = TT._shared(vals, name=name) return var ##### PARAMS Wxx = gen_mat(self.nhid, self.nhid, name='Wxx', device='gpu') Wux = gen_mat(self.nin, self.nhid, name='Wux', device='gpu') Wxy = gen_mat(self.nhid, self.nout, name='Wxy', device='gpu') Wuy = gen_mat(self.nin, self.nout, name='Wuy', device='gpu') bx = gen_vec(self.nhid, name='bx', device='gpu') self.h0 = gen_mat(options['cbs'], self.nhid, name='h0', device='gpu', scale=0) self.params = [Wxx, Wux, Wxy, Wuy, bx, self.h0] self.params_shape = [(self.nhid, self.nhid), (self.nin, self.nhid), (self.nhid, self.nout), (self.nin, self.nout), (self.nhid), (options['cbs'], self.nhid) ] self.cparams =[] self.x = TT.tensor3('X') self.y = TT.tensor3('y') self.inputs = [self.x, self.y] def step(u_t, h_tm1, Wxx, Wux, Wxy, Wuy): h_t = TT.tanh(TT.dot(u_t, Wux) + TT.dot(h_tm1, Wxx)) y_t = TT.dot(h_t, Wxy) + TT.dot(u_t, Wuy) return h_t, y_t _hid0 = TT.alloc(numpy.float32(0), numpy.int32(options['seqlen']+1), numpy.int32(options['cbs']), numpy.int32(self.nhid)) hid0 = TT.set_subtensor(hid0[0], self.h0) [H,Y], _ = scan(step, self.x, [hid0, None], [Wxx, Wux, Wxy, Wuy], n_sptes = options['seqlen']) # TODO : compute 3D cost ... if options['device'] == 'cpu/gpu': self.cpu_params = [ TT._shared(x.get_value(), name=x.name) for x in self.params] self.err = safe_clone(self.err, updates=zip(self.params, self.cpu_params)) self.valid_xdata = TT._shared(data['valid_x'], name='valid_xdata', borrow=True) self.test_xdata = TT._shared(data['test_x'], name='test_xdata', borrow=True) mode = cpu_mode else: self.valid_xdata = theano.shared(data['valid_x'], name='valid_xdata', borrow=True) self.test_xdata = theano.shared(data['test_x'], name='test_xdata', borrow=True) mode = gpu_mode self.valid_ydata = TT.cast( TT._shared(data['valid_y'], name='valid_ydata', borrow=True), 'int32') self.test_ydata = TT.cast( TT._shared(data['test_y'], name='test_xdata', borrow=True), 'int32') givens = {} givens[self.x] = self.valid_xdata givens[self.y] = self.valid_ydata self.valid_eval_func = theano.function([], ferr, givens=givens, name='valid_eval_fn', profile=options['profile'], mode=mode) givens[self.x] = self.test_xdata givens[self.y] = self.test_ydata self.test_eval_func = theano.function([], ferr, givens=givens, name='test_fn', profile=options['profile'], mode=mode)
def __init__(self, options, channel, data, model): """ Parameters: options: Dictionary `options` is expected to contain the following keys: `cbs` -> int Number of samples to consider at a time when computing some property of the model `gbs` -> int Number of samples over which to compute the gradients `mbs` -> int Number of samples over which to compute the metric `ebs` -> int Number of samples over which to evaluate the training error `mreg` -> float Regularization added to the metric `mrtol` -> float Relative tolerance for inverting the metric `miters` -> int Number of iterations `seed` -> int Random number generator seed `profile` -> bool Flag, if profiling should be on or not `verbose` -> int Verbosity level `lr` -> float Learning rate channel: jobman channel or None data: dictionary-like object return by numpy.load containing the data model : model """ n_params = len(model.params) self.data = data if options['device'] != 'gpu': xdata = theano.shared(data['train_x'][:options['gbs']], name='xdata') ydata = TT._shared(data['train_y'][:options['gbs']], name='ydata') self.xdata = xdata self.ydata = ydata shared_data = [xdata, ydata] else: self.cpu_shared_data = [] xdata = theano.shared(data['train_x'], name='xdata') ydata = TT._shared(data['train_y'], name='ydata') self.xdata = xdata self.ydata = ydata shared_data = [xdata, ydata] self.rng = numpy.random.RandomState(options['seed']) n_samples = data['train_x'].shape[0] self.grad_batches = n_samples // options['gbs'] self.metric_batches = n_samples // options['mbs'] self.eval_batches = n_samples // options['ebs'] self.verbose = options['verbose'] if options['device'] != 'gpu': # Store eucledian gradients self.gs = [ TT._shared(numpy.zeros(shp, dtype=theano.config.floatX)) for shp in model.params_shape ] # Store riemannian gradients self.rs = [ TT._shared(numpy.zeros(shp, dtype=theano.config.floatX)) for shp in model.params_shape ] else: # Store eucledian gradients self.gs = [ theano.shared(numpy.zeros(shp, dtype=theano.config.floatX)) for shp in model.params_shape ] # Store riemannian gradients self.rs = [ theano.shared(numpy.zeros(shp, dtype=theano.config.floatX)) for shp in model.params_shape ] self.permg = self.rng.permutation(self.grad_batches) self.permr = self.rng.permutation(self.metric_batches) self.perme = self.rng.permutation(self.eval_batches) self.k = 0 self.posg = 0 self.posr = 0 self.pose = 0 # Step 1. Compile function for computing eucledian gradients # inputs gbdx = TT.iscalar('grad_batch_idx') print 'Constructing grad function' srng = RandomStreams(numpy.random.randint(1e5)) loc_inputs = [x.type() for x in model.inputs] def grad_step(*args): idx = TT.cast(args[0], 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs, nw_inps)) nw_cost = safe_clone(model.train_cost, replace=replace) gs = TT.grad(nw_cost, model.params) nw_gs = [op + np for op, np in zip(args[1:1 + n_params], gs)] return [args[0] + const(1)] + \ nw_gs ig = [ TT.unbroadcast(TT.alloc(const(0), 1, *shp), 0) for shp in model.params_shape ] idx0 = TT.unbroadcast(const([0]), 0) n_steps = options['gbs'] // options['cbs'] rvals, updates = scan(grad_step, states=[idx0] + ig, n_steps=n_steps, name='grad_loop', profile=options['profile']) nw_gs = [x[0] / const(n_steps) for x in rvals[1:1 + n_params]] # updates updates.update(dict(zip(self.gs, nw_gs))) # givens if options['device'] == 'gpu': grad_inps = [(x, y[gbdx * options['gbs']:(gbdx + 1) * options['gbs']]) for x, y in zip(loc_inputs, shared_data)] else: grad_inps = zip(loc_inputs, shared_data) print 'Compiling grad function' self.compute_eucledian_gradients = theano.function( [gbdx], [], updates=updates, givens=dict(grad_inps), name='compute_eucledian_gradients', mode=gpu_mode, on_unused_input='warn', profile=options['profile']) # Step 2. Compile function for Computing Riemannian gradients rbdx = TT.iscalar('riemmanian_batch_idx') rbpos = rbdx * options['mbs'] if options['device'] == 'gpu': mode = gpu_mode def compute_Gv(*args): idx0 = const([0]) ep = [ TT.alloc(const(0), 1, *shp) for shp in model.params_shape ] def Gv_step(*gv_args): idx = TT.cast(gv_args[0], 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs, nw_inps)) nw_outs = safe_clone(model.outs, replace) final_results = dict( zip(model.params, [None] * len(model.params))) for nw_out, out_operator in zip(nw_outs, model.outs_operator): loc_params = [ x for x in model.params if x in theano.gof.graph.inputs([nw_out]) ] loc_args = [ x for x, y in zip(args, model.params) if y in theano.gof.graph.inputs([nw_out]) ] if out_operator == 'softmax': factor = const(options['cbs']) * nw_out elif out_operator == 'sigmoid': factor = const( options['cbs']) * nw_out * (1 - nw_out) else: factor = const(options['cbs']) loc_Gvs = TT.Lop(nw_out, loc_params, TT.Rop(nw_out, loc_params, loc_args) /\ factor) for lp, lgv in zip(loc_params, loc_Gvs): if final_results[lp] is None: final_results[lp] = lgv else: final_results[lp] += lgv Gvs = [ ogv + final_results[param] for (ogv, param) in zip(gv_args[1:], model.params) ] return [gv_args[0] + const(1)] + Gvs nw_cost, nw_preactiv_out = safe_clone( [model.train_cost, model.preactiv_out], replace) nw_gvs = TT.Lop( nw_preactiv_out, model.params, TT.Rop(TT.grad(nw_cost, nw_preactiv_out), model.params, args)) Gvs = [ ogv + ngv for (ogv, ngv) in zip(gv_args[1:], nw_gvs) ] return [gv_args[0] + const(1)] + Gvs states = [idx0] + ep n_steps = options['mbs'] // options['cbs'] rvals, updates = scan(Gv_step, states=states, n_steps=n_steps, mode=theano.Mode(linker='cvm'), name='Gv_step', profile=options['profile']) final_Gvs = [x[0] / const(n_steps) for x in rvals[1:]] return final_Gvs, updates else: mode = cpu_mode def compute_Gv(*args): cgv = [ theano.shared(numpy.zeros(shp, dtype=theano.config.floatX), name='cgv%d' % idx) for idx, shp in enumerate(model.params_shape) ] print_mem('allocated mem for cgv') idx0 = const([0]) ep = [ TT.alloc(const(0), 1, *shp) for shp in model.params_shape ] def Gv_step(*gv_args): idx = TT.cast(gv_args[0], 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs, nw_inps)) nw_outs = safe_clone(model.outs, replace) final_results = dict( zip(model.params, [None] * len(model.params))) for nw_out, out_operator in zip(nw_outs, model.outs_operator): loc_params = [ x for x in model.params if x in theano.gof.graph.inputs([nw_out]) ] loc_args = [ x for x, y in zip(cgv, model.params) if y in theano.gof.graph.inputs([nw_out]) ] if out_operator == 'softmax': factor = const(options['cbs']) * nw_out elif out_operator == 'sigmoid': factor = const( options['cbs']) * nw_out * (1 - nw_out) else: factor = const(options['cbs']) loc_Gvs = TT.Lop(nw_out, loc_params, TT.Rop(nw_out, loc_params, loc_args) /\ factor) for lp, lgv in zip(loc_params, loc_Gvs): if final_results[lp] is None: final_results[lp] = lgv else: final_results[lp] += lgv Gvs = [ ogv + final_results[param] for (ogv, param) in zip(gv_args[1:], model.params) ] return [gv_args[0] + const(1)] + Gvs states = [idx0] + ep n_steps = options['mbs'] // options['cbs'] rvals, updates = scan(Gv_step, states=states, n_steps=n_steps, mode=gpu_mode, name='Gv_step', profile=options['profile']) final_Gvs = [ TT.as_tensor_variable(x[0]) / const(n_steps) for x in rvals[1:] ] grad_inps = zip(loc_inputs, shared_data) loc_fn = theano.function([], final_Gvs, updates=updates, givens=dict(grad_inps), on_unused_input='warn', mode=gpu_mode, name='loc_fn', profile=options['profile']) fake_op = FakeGPUShell(cgv, loc_fn, len(cgv)) return fake_op(*args), {} print 'Constructing riemannian gradient function' norm_grads = TT.sqrt(sum(TT.sum(x**2) for x in self.gs)) rvals = minres.minres(compute_Gv, [x / norm_grads for x in self.gs], rtol=options['mrtol'], shift=-options['mreg'], maxit=options['miters'], mode=mode, profile=options['profile']) nw_rs = [x * norm_grads for x in rvals[0]] flag = rvals[1] niters = rvals[2] rel_residual = rvals[3] rel_Aresidual = rvals[4] Anorm = rvals[5] Acond = rvals[6] xnorm = rvals[7] Axnorm = rvals[8] updates = rvals[9] norm_ord0 = TT.max(abs(nw_rs[0])) for r in nw_rs[1:]: norm_ord0 = TT.maximum(norm_ord0, TT.max(abs(r))) updates.update(dict(zip(self.rs, nw_rs))) grad_inps = [(x, y[rbdx * options['mbs']:(rbdx + 1) * options['mbs']]) for x, y in zip(loc_inputs[:1], shared_data[:1])] print 'Compiling riemannian gradient function' self.compute_riemannian_gradients = theano.function( [rbdx], [ flag, niters, rel_residual, rel_Aresidual, Anorm, Acond, xnorm, Axnorm, norm_grads, norm_ord0 ], updates=updates, givens=dict(grad_inps), name='compute_riemannian_gradients', on_unused_input='warn', mode=mode, profile=options['profile']) # Step 3. Compile function for evaluating cost and updating # parameters print 'constructing evaluation function' lr = TT.scalar('lr') self.lr = numpy.float32(options['lr']) ebdx = TT.iscalar('eval_batch_idx') nw_ps = [p - lr * r for p, r in zip(model.params, self.rs)] def cost_step(_idx, acc): idx = TT.cast(_idx, 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs + model.params, nw_inps + nw_ps)) nw_cost = safe_clone(model.train_cost, replace=replace) return [_idx + const(1), acc + nw_cost] acc0 = const([0]) idx0 = const([0]) n_steps = options['ebs'] // options['cbs'] rvals, updates = scan(cost_step, states=[idx0, acc0], n_steps=n_steps, name='cost_loop', mode=gpu_mode, profile=options['profile']) final_cost = rvals[1] / const(n_steps) if options['device'] == 'gpu': grad_inps = [(x, y[ebdx * options['ebs']:(ebdx + 1) * options['ebs']]) for x, y in zip(loc_inputs, shared_data)] else: grad_inps = zip(loc_inputs, shared_data) print 'compling evaluation function' self.eval_fn = theano.function([ebdx, lr], final_cost, givens=dict(grad_inps), on_unused_input='warn', updates=updates, name='eval_fn', mode=gpu_mode, profile=options['profile']) update_dict = dict(zip(model.params, nw_ps)) if options['device'] != 'gpu': update_dict.update(dict(zip(model.cparams, nw_ps))) self.update_params = theano.function([lr], [], updates=update_dict, name='update_params', on_unused_input='warn', mode=mode, profile=options['profile']) self.options = options self.old_cost = 1e6 self.device = options['device'] n_steps = options['ebs'] // options['cbs'] def ls_error(_idx, acc): idx = TT.cast(_idx, 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs, nw_inps)) nw_cost = TT.cast(safe_clone(model.err, replace=replace), 'float32') return [_idx + const(1), acc + nw_cost] states = [ TT.constant(numpy.float32([0])), TT.constant(numpy.float32([0])) ] rvals, _ = scan(ls_error, states=states, n_steps=n_steps, name='ls_err_step', mode=cpu_mode, profile=options['profile']) ferr = rvals[1][0] / const(n_steps) self.compute_error = theano.function([ebdx], ferr, givens=dict(grad_inps), name='compute_err', mode=gpu_mode, on_unused_input='warn', profile=options['profile'])
def _shared(val, borrow=True): return T._shared(array(val, dtype=floatX), borrow=borrow)
def main(ancestralfile, bamfile, treefile, maxgenotype=3): bamFiles = bamfile.split(',') maxGenotype = maxgenotype knownSites, knownNodes, knownMatrix = readAncestral(ancestralfile) branches = readTree(treefile, knownNodes) knownSamples = parseBAMs(bamFiles, knownSites) exists = (knownSites.T[1] >= 0.001) & np.any(knownSamples > 0, 1) knownSites = knownSites[exists] knownMatrix = knownMatrix[:, exists] knownSamples = knownSamples[exists] for br in branches : diff_sites = knownMatrix[br[0].astype(int)] != knownMatrix[br[1].astype(int)] snv = knownMatrix[br[1].astype(int), diff_sites] presence = knownSamples[diff_sites, snv] if np.sum(presence > 0)*100 <= presence.shape[0] : knownMatrix[br[1].astype(int)] = 100 branches[branches.T[0] == br[1], 0] = br[0] br[:2] = -1 branches = branches[branches.T[0] > -1] exists = [len(set(np.unique(mat)) - {100}) > 1 for mat in knownMatrix.T] knownSites = knownSites[exists] knownMatrix = knownMatrix[:, exists].astype(np.int8) knownSamples = knownSamples[exists] weights = knownSites.T[1].astype(float) weights *= np.sum(knownSamples, 1)/4 knownSamples /= np.sum(knownSamples, 1)[:, np.newaxis] branches2 = t._shared(branches) weights2 = t._shared(weights) knownMatrix2 = t._shared(knownMatrix) knownSamples2 = t._shared(knownSamples) for nGenotype in np.arange(2, maxGenotype + 1): sys.stderr.write( '\n----------\nRunning MCMC with assumption of {0} genotype(s) present in the sample.\n'.format(nGenotype)) ng = np.max([1, nGenotype]) genotypes = np.zeros([ng, knownMatrix.shape[1]], dtype = np.int8) genotypes2 = t._shared(genotypes) with pm.Model() as model: brs = pm.Flat('brs', shape=nGenotype if ng > 1 else ()) props2 = pm.Dirichlet('props2', a=1./np.ones(ng, dtype=float)) \ if ng > 1 else pm.DiscreteUniform('props2', upper=1, lower=1) props = pm.Deterministic('props', props2*(1-0.05*ng) + 0.05) sigma = pm.Gamma('sigma', alpha=0.5, beta=2) lk = pm.Deterministic('lk', getGenotypesAndLK(genotypes2, knownMatrix2, branches2, weights2, knownSamples2,\ sigma, brs, props)) pm.Potential('likelihood', lk) step_br = TreeWalker(brs, branches) step_others = pm.step_methods.Metropolis(vars=[sigma, props]) trace = pm.sample(progressbar=True, draws=5000, tune=15000, step=[step_br, step_others], chains=8, cores=8, compute_convergence_checks=False) trace_logp = np.array([ np.mean([ t['likelihood'] for t in strace ], 0) for strace in trace._straces.values() ]) sys.stderr.write('Done.\n----------\n'.format(nGenotype)) # select traces trace_id = np.argmax(trace_logp.T[0]) logp = trace_logp[trace_id] sys.stdout.write( '----------\nNo. Genotypes:\t{0}\tlogp:\t{1}\thybrid_score:\t{2}\n'.format(nGenotype, logp[0], logp[1])) sigma = trace.get_values('sigma', chains=trace_id) sigma = np.sort(sigma) sys.stdout.write('Sigma\tMean:\t{0:.6E}\tCI95%:\t[ {1:.6E} - {2:.6E} ]\n'.format(np.mean(sigma), sigma[int(sigma.size * 0.025)], sigma[ int(sigma.size * 0.975)])) if nGenotype < 2: br_locs = trace.get_values('brs', chains=trace_id)[:, np.newaxis] props = trace.get_values('props', chains=trace_id)[:, np.newaxis] else: br_locs = trace.get_values('brs', chains=trace_id) props = trace.get_values('props', chains=trace_id) props /= np.sum(props, 1)[:, np.newaxis] for id, (br_loc, prop) in enumerate(zip(br_locs.T, props.T)): prop = np.sort(prop) if nGenotype > 0: brs, locs = br_loc.astype(int), br_loc % 1 brNames, brCounts = np.unique(brs, return_counts=True) brCounts = brCounts.astype(float) / np.sum(brCounts) idx = np.argsort(-brCounts) brNames, brCounts = brNames[idx], brCounts[idx] sys.stdout.write( '\tGenotype {0}:\tMean proportion:\t{1:.4f}\tCI95%:\t[ {2:.4f} - {3:.4f} ]\n'.format(id + 1, np.mean(prop), \ prop[int( prop.size * 0.025)], prop[int( prop.size * 0.975)])) for br, cnt in zip(brNames, brCounts): if cnt >= 0.01 or cnt >= 0.3 * brCounts[0]: lc = np.sort(locs[brs == br]) sys.stdout.write( '\t\t{0:.2f} %\t{1} - {2}\tLocation:\t{3:.4f}\tCI95%:\t[ {4:.4f} - {5:.4f} ]\n'.format( cnt * 100, \ knownNodes[int(branches[br, 0])], \ knownNodes[int(branches[br, 1])], \ np.mean(lc), lc[int(lc.size * 0.025)], \ lc[int(lc.size * 0.975)])) sys.stderr.write('All DONE\n')
def __call__(self, shape, name=None): return T._shared(np_rng.normal(loc=self.loc, scale=self.scale, size=shape), name=name)
def __init__(self, options, channel, data, model): """ Parameters: options: Dictionary `options` is expected to contain the following keys: `cbs` -> int Number of samples to consider at a time when computing some property of the model `gbs` -> int Number of samples over which to compute the gradients `mbs` -> int Number of samples over which to compute the metric `ebs` -> int Number of samples over which to evaluate the training error `mreg` -> float Regularization added to the metric `mrtol` -> float Relative tolerance for inverting the metric `miters` -> int Number of iterations `seed` -> int Random number generator seed `profile` -> bool Flag, if profiling should be on or not `verbose` -> int Verbosity level `lr` -> float Learning rate channel: jobman channel or None data: dictionary-like object return by numpy.load containing the data model : model """ n_params = len(model.params) self.data = data if options['device'] != 'gpu': xdata = theano.shared(data['train_x'][:options['gbs']], name='xdata') ydata = TT._shared(data['train_y'][:options['gbs']], name='ydata') self.xdata = xdata self.ydata = ydata shared_data = [xdata, ydata] else: self.cpu_shared_data = [] xdata = theano.shared(data['train_x'], name='xdata') ydata = TT._shared(data['train_y'], name='ydata') self.xdata = xdata self.ydata = ydata shared_data = [xdata, ydata] self.rng = numpy.random.RandomState(options['seed']) n_samples = data['train_x'].shape[0] self.grad_batches = n_samples // options['gbs'] self.metric_batches = n_samples // options['mbs'] self.eval_batches = n_samples // options['ebs'] self.verbose = options['verbose'] if options['device'] != 'gpu': # Store eucledian gradients self.gs = [TT._shared(numpy.zeros(shp, dtype=theano.config.floatX)) for shp in model.params_shape] # Store riemannian gradients self.rs = [TT._shared(numpy.zeros(shp, dtype=theano.config.floatX)) for shp in model.params_shape] else: # Store eucledian gradients self.gs = [theano.shared(numpy.zeros(shp, dtype=theano.config.floatX)) for shp in model.params_shape] # Store riemannian gradients self.rs = [theano.shared(numpy.zeros(shp, dtype=theano.config.floatX)) for shp in model.params_shape] self.permg = self.rng.permutation(self.grad_batches) self.permr = self.rng.permutation(self.metric_batches) self.perme = self.rng.permutation(self.eval_batches) self.k = 0 self.posg = 0 self.posr = 0 self.pose = 0 # Step 1. Compile function for computing eucledian gradients # inputs gbdx = TT.iscalar('grad_batch_idx') print 'Constructing grad function' srng = RandomStreams(numpy.random.randint(1e5)) loc_inputs = [x.type() for x in model.inputs] def grad_step(*args): idx = TT.cast(args[0], 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs, nw_inps)) nw_cost = safe_clone(model.train_cost, replace=replace) gs = TT.grad(nw_cost, model.params) nw_gs = [op + np for op, np in zip(args[1: 1 + n_params], gs)] return [args[0] + const(1)] + \ nw_gs ig = [TT.unbroadcast(TT.alloc(const(0), 1, *shp),0) for shp in model.params_shape] idx0 = TT.unbroadcast(const([0]),0) n_steps = options['gbs'] // options['cbs'] rvals, updates = scan(grad_step, states=[idx0] + ig, n_steps=n_steps, name='grad_loop', profile=options['profile']) nw_gs = [x[0] / const(n_steps) for x in rvals[1: 1 + n_params]] # updates updates.update(dict(zip(self.gs, nw_gs))) # givens if options['device'] == 'gpu': grad_inps = [(x, y[gbdx*options['gbs']:(gbdx+1)*options['gbs']]) for x,y in zip(loc_inputs, shared_data)] else: grad_inps = zip(loc_inputs, shared_data) print 'Compiling grad function' self.compute_eucledian_gradients = theano.function( [gbdx], [], updates=updates, givens=dict(grad_inps), name='compute_eucledian_gradients', mode=gpu_mode, on_unused_input='warn', profile=options['profile']) # Step 2. Compile function for Computing Riemannian gradients rbdx = TT.iscalar('riemmanian_batch_idx') rbpos = rbdx * options['mbs'] if options['device'] == 'gpu': mode=gpu_mode def compute_Gv(*args): idx0 = const([0]) ep = [TT.alloc(const(0), 1, *shp) for shp in model.params_shape] def Gv_step(*gv_args): idx = TT.cast(gv_args[0], 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs, nw_inps)) nw_outs = safe_clone(model.outs, replace) final_results = dict(zip(model.params, [None] * len(model.params))) for nw_out, out_operator in zip(nw_outs, model.outs_operator): loc_params = [x for x in model.params if x in theano.gof.graph.inputs([nw_out])] loc_args = [x for x, y in zip(args, model.params) if y in theano.gof.graph.inputs([nw_out])] if out_operator == 'softmax': factor = const(options['cbs']) * nw_out elif out_operator == 'sigmoid': factor = const(options['cbs']) * nw_out * (1 - nw_out) else: factor = const(options['cbs']) loc_Gvs = TT.Lop(nw_out, loc_params, TT.Rop(nw_out, loc_params, loc_args) /\ factor) for lp, lgv in zip(loc_params, loc_Gvs): if final_results[lp] is None: final_results[lp] = lgv else: final_results[lp] += lgv Gvs = [ogv + final_results[param] for (ogv, param) in zip(gv_args[1:], model.params)] return [gv_args[0] + const(1)] + Gvs nw_cost, nw_preactiv_out = safe_clone([model.train_cost, model.preactiv_out], replace) nw_gvs = TT.Lop(nw_preactiv_out, model.params, TT.Rop(TT.grad(nw_cost, nw_preactiv_out), model.params, args)) Gvs = [ogv + ngv for (ogv, ngv) in zip(gv_args[1:], nw_gvs)] return [gv_args[0] + const(1)] + Gvs states = [idx0] + ep n_steps = options['mbs'] // options['cbs'] rvals, updates = scan(Gv_step, states=states, n_steps=n_steps, mode=theano.Mode(linker='cvm'), name='Gv_step', profile=options['profile']) final_Gvs = [x[0] / const(n_steps) for x in rvals[1:]] return final_Gvs, updates else: mode = cpu_mode def compute_Gv(*args): cgv = [theano.shared(numpy.zeros(shp, dtype=theano.config.floatX), name ='cgv%d'%idx) for idx, shp in enumerate(model.params_shape)] print_mem('allocated mem for cgv') idx0 = const([0]) ep = [TT.alloc(const(0), 1, *shp) for shp in model.params_shape] def Gv_step(*gv_args): idx = TT.cast(gv_args[0], 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs, nw_inps)) nw_outs = safe_clone(model.outs, replace) final_results = dict(zip(model.params, [None] * len(model.params))) for nw_out, out_operator in zip(nw_outs, model.outs_operator): loc_params = [x for x in model.params if x in theano.gof.graph.inputs([nw_out])] loc_args = [x for x, y in zip(cgv, model.params) if y in theano.gof.graph.inputs([nw_out])] if out_operator == 'softmax': factor = const(options['cbs']) * nw_out elif out_operator == 'sigmoid': factor = const(options['cbs']) * nw_out * (1 - nw_out) else: factor = const(options['cbs']) loc_Gvs = TT.Lop(nw_out, loc_params, TT.Rop(nw_out, loc_params, loc_args) /\ factor) for lp, lgv in zip(loc_params, loc_Gvs): if final_results[lp] is None: final_results[lp] = lgv else: final_results[lp] += lgv Gvs = [ogv + final_results[param] for (ogv, param) in zip(gv_args[1:], model.params)] return [gv_args[0] + const(1)] + Gvs states = [idx0] + ep n_steps = options['mbs'] // options['cbs'] rvals, updates = scan(Gv_step, states=states, n_steps=n_steps, mode=gpu_mode, name='Gv_step', profile=options['profile']) final_Gvs = [TT.as_tensor_variable(x[0]) / const(n_steps) for x in rvals[1:]] grad_inps = zip(loc_inputs, shared_data) loc_fn = theano.function([], final_Gvs, updates = updates, givens = dict(grad_inps), on_unused_input='warn', mode=gpu_mode, name='loc_fn', profile = options['profile']) fake_op = FakeGPUShell(cgv, loc_fn, len(cgv)) return fake_op(*args), {} print 'Constructing riemannian gradient function' norm_grads = TT.sqrt(sum(TT.sum(x ** 2) for x in self.gs)) rvals = minres.minres( compute_Gv, [x / norm_grads for x in self.gs], rtol=options['mrtol'], shift= -options['mreg'], maxit=options['miters'], mode=mode, profile=options['profile']) nw_rs = [x * norm_grads for x in rvals[0]] flag = rvals[1] niters = rvals[2] rel_residual = rvals[3] rel_Aresidual = rvals[4] Anorm = rvals[5] Acond = rvals[6] xnorm = rvals[7] Axnorm = rvals[8] updates = rvals[9] norm_ord0 = TT.max(abs(nw_rs[0])) for r in nw_rs[1:]: norm_ord0 = TT.maximum(norm_ord0, TT.max(abs(r))) updates.update(dict(zip(self.rs, nw_rs))) grad_inps = [(x, y[rbdx * options['mbs']: (rbdx + 1) * options['mbs']]) for x,y in zip(loc_inputs[:1], shared_data[:1])] print 'Compiling riemannian gradient function' self.compute_riemannian_gradients = theano.function( [rbdx], [flag, niters, rel_residual, rel_Aresidual, Anorm, Acond, xnorm, Axnorm, norm_grads, norm_ord0], updates=updates, givens=dict(grad_inps), name='compute_riemannian_gradients', on_unused_input='warn', mode=mode, profile=options['profile']) # Step 3. Compile function for evaluating cost and updating # parameters print 'constructing evaluation function' lr = TT.scalar('lr') self.lr = numpy.float32(options['lr']) ebdx = TT.iscalar('eval_batch_idx') nw_ps = [p - lr * r for p, r in zip(model.params, self.rs)] def cost_step(_idx, acc): idx = TT.cast(_idx, 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs + model.params, nw_inps + nw_ps)) nw_cost = safe_clone(model.train_cost, replace=replace) return [_idx + const(1), acc + nw_cost] acc0 = const([0]) idx0 = const([0]) n_steps = options['ebs'] // options['cbs'] rvals, updates = scan(cost_step, states=[idx0, acc0], n_steps=n_steps, name='cost_loop', mode=gpu_mode, profile=options['profile']) final_cost = rvals[1] / const(n_steps) if options['device'] == 'gpu': grad_inps = [(x, y[ebdx * options['ebs']: (ebdx + 1) * options['ebs']]) for x,y in zip(loc_inputs, shared_data)] else: grad_inps = zip(loc_inputs, shared_data) print 'compling evaluation function' self.eval_fn = theano.function( [ebdx, lr], final_cost, givens=dict(grad_inps), on_unused_input='warn', updates = updates, name='eval_fn', mode=gpu_mode, profile=options['profile']) update_dict = dict(zip(model.params, nw_ps)) if options['device'] != 'gpu': update_dict.update(dict(zip(model.cparams, nw_ps))) self.update_params = theano.function( [lr], [], updates=update_dict, name='update_params', on_unused_input='warn', mode=mode, profile=options['profile']) self.options = options self.old_cost = 1e6 self.device = options['device'] n_steps = options['ebs'] // options['cbs'] def ls_error(_idx, acc): idx = TT.cast(_idx, 'int32') nw_inps = [x[idx * options['cbs']: \ (idx + 1) * options['cbs']] for x in loc_inputs] replace = dict(zip(model.inputs, nw_inps)) nw_cost = TT.cast(safe_clone( model.err, replace=replace), 'float32') return [_idx + const(1), acc + nw_cost] states = [TT.constant(numpy.float32([0])), TT.constant(numpy.float32([0]))] rvals, _ = scan(ls_error, states = states, n_steps = n_steps, name='ls_err_step', mode=cpu_mode, profile = options['profile']) ferr = rvals[1][0] / const(n_steps) self.compute_error = theano.function([ebdx], ferr, givens=dict(grad_inps), name='compute_err', mode=gpu_mode, on_unused_input='warn', profile=options['profile'])
def load_data(data): return T._shared(np.asarray(data, dtype=theano.config.floatX), borrow=True)
help="Pickled network to steal params from.") parser.add_argument("dest", type=str, help="File to place new network in.") parser.add_argument("--cpu", "-c", dest="cpu", action='store_const', const=True, default=False, help="Convert network to run on a CPU.") args = parser.parse_args() print "loading model..." f = file(args.source, 'rb') old_network = cPickle.load(f) f.close() params = old_network.params if args.cpu: print "converting gpu parameters..." new_params = [] for param in params: param = T._shared(param.get_value()) new_params.append(param) params = new_params new_network = network(batch_size=None, params=params) print "saving model..." f = file(args.dest, 'wb') cPickle.dump(new_network, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close()
query_model = f(query_model) result = np.argsort(-np.dot(query_model, doc_model.T), axis = 1) query_docs_ranking = {} ''' speedup ''' for q_idx in range(len(query_list)): docs_ranking = [] for doc_idx in result[q_idx]: docs_ranking.append(doc_list[doc_idx]) query_docs_ranking[query_list[q_idx]] = docs_ranking ''' query for query_key, query_vec in zip(query_list, query_model): print len(query_docs_ranking.keys()) query_result = np.argsort(-(query_vec * doc_model).sum(axis = 1)) docs_ranking = [] for doc_`idx in query_result: docs_ranking.append(doc_list[doc_idx]) query_docs_ranking[query_key] = docs_ranking mAP = eval.mean_average_precision(query_docs_ranking) print mAP, qry_lambda, rel_qry_lambda ''' mAP = qry_eval.mean_average_precision(query_docs_ranking) mAP_list.append(mAP) return max(mAP_list) if __name__ == "__main__": with open("relevance_model_RM.pkl", "rb") as file : rel_query_model = Pickle.load(file)[:720] theano_rel_query_model = _shared(rel_query_model) calculate(theano_rel_query_model, 720)
def n_star_inference(n_stars, iteration, elem_err=False, n_init=20000, n_samples=1000, max_stars=100): ## Define which stars to use these_stars = np.arange(max_stars)[iteration * n_stars:(iteration + 1) * n_stars] ## Load in mock dataset mock_data = np.load(mock_data_file) #dataset mu_times = mock_data.f.obs_time[these_stars] #time of birth sigma_times = mock_data.f.obs_time_err[these_stars] #error on age all_els = mock_data.f.elements full_abundances = mock_data.f.abundances[ these_stars] # chemical element abundances for data full_errors = mock_data.f.abundance_errs[ these_stars] # error on abundances # Filter out correct elements: els = ['C', 'Fe', 'He', 'Mg', 'N', 'Ne', 'O', 'Si'] # TNG elements n_els = len(els) el_indices = np.zeros(len(els), dtype=int) for e, el in enumerate(els): for j in range(len(all_els)): if els[e] == str(all_els[j]): el_indices[e] = j break if j == len(all_els) - 1: print("Failed to find element %s" % el) obs_abundances = full_abundances[:, el_indices] obs_errors = full_errors[:, el_indices] # Now standardize dataset norm_data = (obs_abundances - output_mean) / output_std norm_sd = obs_errors / output_std data_obs = norm_data.ravel() data_sd = np.asarray(norm_sd).ravel() std_times_mean = (mu_times - input_mean[-1]) / input_std[-1] std_times_width = sigma_times / input_std[-1] # Define stacked local priors Local_prior_mean = np.vstack([ np.hstack([std_Theta_prior_mean, std_times_mean[i]]) for i in range(n_stars) ]) Local_prior_sigma = np.vstack([ np.hstack([std_Theta_prior_width, std_times_width[i]]) for i in range(n_stars) ]) # Bound variables to ensure they don't exit the training parameter space lowBound = tt._shared(np.asarray([-5, std_log_SFR_crit, -5, std_min_time])) upBound = tt._shared(np.asarray([5, 5, 5, std_max_time])) # Create stacked mean and variances loc_mean = np.hstack([ np.asarray(std_Theta_prior_mean).reshape(1, -1) * np.ones([n_stars, 1]), std_times_mean.reshape(-1, 1) ]) loc_std = np.hstack([ np.asarray(std_Theta_prior_width).reshape(1, -1) * np.ones([n_stars, 1]), std_times_width.reshape(-1, 1) ]) # Share theano variables w0 = tt._shared(w_array_0) b0 = tt._shared(b_array_0) w1 = tt._shared(w_array_1) b1 = tt._shared(b_array_1) ones_tensor = tt.ones([n_stars, 1]) b0_all = ma.matrix_dot(ones_tensor, b0) b1_all = ma.matrix_dot(ones_tensor, b1) # Define PyMC3 Model simple_model = pm.Model() with simple_model: # Define priors Lambda = pm.Normal('Std-Lambda', mu=std_Lambda_prior_mean, sd=std_Lambda_prior_width, shape=(1, len(std_Lambda_prior_mean))) Locals = pm.Normal( 'Std-Local', mu=loc_mean, sd=loc_std, shape=loc_mean.shape, transform=pm.distributions.transforms.Interval(lowBound, upBound), ) TimeSq = tt.reshape(Locals[:, -1]**2., (n_stars, 1)) TruLa = pm.Deterministic('Lambda', Lambda * input_std[:2] + input_mean[:2]) TruTh = pm.Deterministic( 'Thetas', Locals[:, :3] * input_std[2:5] + input_mean[2:5]) TruTi = pm.Deterministic( 'Times', Locals[:, -1] * input_std[-1] + input_mean[-1]) ## NEURAL NET Lambda_all = ma.matrix_dot(ones_tensor, Lambda) InputVariables = ma.concatenate([Lambda_all, Locals, TimeSq], axis=1) layer1 = ma.matrix_dot(InputVariables, w0) + b0_all output = ma.matrix_dot(ma.tanh(layer1), w1) + b1_all if elem_err: # ERRORS #element_error = pm.Normal('Element-Error',mu=-2,sd=1,shape=(1,n_els)) element_error = pm.HalfCauchy('Std-Element-Error', beta=0.01 / output_std, shape=(1, n_els)) TruErr = pm.Deterministic('Element-Error', element_error * output_std) stacked_error = ma.matrix_dot(ones_tensor, element_error) tot_error = ma.sqrt( stacked_error**2. + norm_sd**2.) # NB this is all standardized by output_std here else: tot_error = norm_sd # NB: all quantities are standardized here predictions = pm.Deterministic("Predicted-Abundances", output * output_std + output_mean) # Define likelihood function (unravelling output to make a multivariate gaussian) likelihood = pm.Normal('likelihood', mu=output.ravel(), sd=tot_error.ravel(), observed=norm_data.ravel()) # Now sample init_time = ttime.time() with simple_model: samples = pm.sample(draws=n_samples, chains=chains, cores=cores, tune=tune, nuts_kwargs={'target_accept': 0.9}, init='advi+adapt_diag', n_init=n_init) end_time = ttime.time() - init_time def construct_output(samples): Lambda = samples.get_values('Lambda')[:, 0, :] Thetas = samples.get_values('Thetas')[:, :, :] Times = samples.get_values('Times')[:, :] predictions = samples.get_values('Predicted-Abundances')[:, :, :] if elem_err: Errs = samples.get_values('Element-Error')[:, 0, :] return Lambda, Thetas, Times, Errs, predictions else: return Lambda, Thetas, Times, predictions print("Finished after %.2f seconds" % end_time) if elem_err: Lambda, Thetas, Times, Errs, predictions = construct_output(samples) return Lambda, Thetas, Times, end_time, Errs, predictions else: Lambda, Thetas, Times, predictions = construct_output(samples) return Lambda, Thetas, Times, end_time, predictions