def test_callback_with_ifelse(self): a, b, c = tensor.scalars('abc') f = function([a,b,c], ifelse(a, 2*b, 2*c), mode=Mode( optimizer=None, linker=vm.VM_Linker(callback=self.callback))) f(1, 2, 3) assert self.n_callbacks[ifelse] == 2
def more_complex_test(): notimpl = NotImplementedOp() ifelseifelseif = IfElseIfElseIf() x1 = T.scalar("x1") x2 = T.scalar("x2") c1 = T.scalar("c1") c2 = T.scalar("c2") t1 = ifelse(c1, x1, notimpl(x2)) t1.name = "t1" t2 = t1 * 10 t2.name = "t2" t3 = ifelse(c2, t2, x1 + t1) t3.name = "t3" t4 = ifelseifelseif(T.eq(x1, x2), x1, T.eq(x1, 5), x2, c2, t3, t3 + 0.5) t4.name = "t4" f = function([c1, c2, x1, x2], t4, mode=Mode(linker="vm", optimizer="fast_run")) print f(1, 0, numpy.array(10, dtype=x1.dtype), 0) assert f(1, 0, numpy.array(10, dtype=x1.dtype), 0) == 20.5 print "... passed"
def more_complex_test(): notimpl = NotImplementedOp() ifelseifelseif = IfElseIfElseIf() x1 = T.scalar('x1') x2 = T.scalar('x2') c1 = generic('c1') c2 = generic('c2') t1 = ifelse(c1,x1,notimpl(x2)) t1.name = 't1' t2 = t1*10 t2.name = 't2' t3 = ifelse(c2,t2, x1+t1) t3.name = 't3' t4 = ifelseifelseif(T.eq(x1,x2), x1, T.eq(x1,5), x2, c2, t3, t3+0.5) t4.name = 't4' f = function([c1,c2,x1,x2], t4, mode=Mode(linker='vm', optimizer='fast_run')) print f(1, 0, numpy.array(10,dtype=x1.dtype),0) assert f(1,0,numpy.array(10,dtype=x1.dtype),0) == 20.5 print '... passed'
def test_ifelse(): a = T.scalar() b = generic() c = generic() notimpl = NotImplementedOp() f = function([a, b, c], ifelse(a, notimpl(b), c), mode=Mode(linker="vm", optimizer="fast_run")) try: print "case 1" f(1, "a", "b") assert False except NotImplementedOp.E: pass print "... passed" print "case 2" print f(0, "a", "b") assert f(0, "a", "b") == "b" print "... passed"
def test_ifelse(): a = generic() b = generic() c = generic() notimpl = NotImplementedOp() f = function([a,b,c], ifelse(a, notimpl(b), c), mode=Mode(linker='vm', optimizer='fast_run')) try: print "case 1" f( True, 'a', 'b') assert False except NotImplementedOp.E: pass print "... passed" print "case 2" print f( False, 'a', 'b') assert f( False, 'a', 'b') == 'b' print "... passed"
def build_graph(x, depth=5): z = x for d in range(depth): z = ifelse(z> 0, -z, z) return z
def sgd_qn(parameters,cost=None,gradients=None, stop=None, updates=None,mylambda=1e-4,t0=1e5,skip=16, consider_constant = None,lazy = False, **kwargs): # lazy condition. Needs mode = LazyLinker() when building the function if lazy: from theano.lazycond import cond as ifelse else: from theano.tensor import switch as ifelse if not isinstance(parameters,list): parameters = [parameters] # We need a copy of the parameters: new_parameters = [] replace_dict = {} for p in parameters: np = theano.shared( p.value.copy() ) new_parameters.append(np) replace_dict[p] = np # cloning the graph, equiv_dict[replaced_inputs[i]] contains the input in the # cloned graph that corresponds to the input in the original graph. # likewise, equiv_dict[cost] is the variable in the cloned graph, # corresponding to the cost new_cost = theano.clone(cost, replace = replace_dict) if gradients == None: # for RBM-like models, parts of the graph need to be in # "consider_constant" if consider_constant != None: if not isinstance(consider_constant,list): consider_constant = [consider_constant] grads = TT.grad(cost,parameters, consider_constant=consider_constant) new_param_grads = TT.grad(new_cost,new_parameters, consider_constant = consider_constant) else: grads = TT.grad(cost,parameters) new_param_grads = TT.grad(new_cost,new_parameters) # For graph readability & debugging for p,np,gp,gnp in zip(parameters,new_parameters,grads,new_param_grads): np.name = p.name + '_o' if gp.name != None: gnp.name = gp.name + '_o' else: gp.name = 'g_' + p.name gnp.name = 'g_' + p.name + '_o' grad_diff = [g - ng for g,ng in zip(grads,new_param_grads)] param_diff = [p - np for p,np in zip(parameters,new_parameters)] #the_ratios = [TT.clip(gd/pd,mylambda,100.*mylambda) for gd,pd in # zip(grad_diff,param_diff)] from utils import true_div_special # if 0/0, replace with mylambda div = TT.Elemwise(true_div_special) the_ratios = [TT.clip(div(gd,pd,mylambda),mylambda, numpy.array(100. ,dtype=theano.config.floatX )*mylambda) for gd,pd in zip(grad_diff,param_diff)] # allocate a B (the "learning rates") for each param b_list = [] for param in parameters: b_init = numpy.ones_like(param.value) / (mylambda * t0) b = theano.shared(value = b_init, name = 'b_'+param.name) b_list.append(b) updateB = theano.shared(numpy.array(0.0,dtype=theano.config.floatX) , name='updateB') count = theano.shared(numpy.array(skip,dtype=theano.config.floatX) , name='count') # build the update dictionary if updates == None: updates = {} myskip = theano.shared(numpy.array(skip,dtype=theano.config.floatX)) # updates for counters updates[updateB] = ifelse(TT.eq(count,my1),my1,my0) updates[count] = ifelse(TT.le(count,my0),myskip,count - my1) for b,ratio in zip(b_list,the_ratios): updates[b] = ifelse(TT.eq(updateB,my1), b / (my1 + skip * b * ratio), b) for new_param,param in zip(new_parameters,parameters): updates[new_param] = ifelse(TT.le(count,my0), param, new_param) for param,b,grad,new_grad in zip(parameters,b_list,grads,new_param_grads): scale = my1 if 'scale' in kwargs: print 'scaling the lr' scale = kwargs['scale'] updates[param] = ifelse(TT.le(count,my0), param - scale*b * new_grad, param - scale*b * grad) extras = [count, updateB, b_list , parameters,grads,new_parameters,new_param_grads] return updates,extras