def _test_layer_stats(self, layer_output): """ DESCRIPTION: This method is called every batch whereby the examples from test or valid set is pass through, the final result will be the mean of all the results from all the batches in an epoch from the test set or valid set. PARAM: layer_output: the output from the layer RETURN: A list of tuples of [('name_a', var_a), ('name_b', var_b)] whereby var is scalar """ w_len = T.sqrt((self.W ** 2).sum(axis=0)) max_length = T.max(w_len) mean_length = T.mean(w_len) min_length = T.min(w_len) return [('max_col_length', max_length), ('mean_col_length', mean_length), ('min_col_length', min_length), ('output_max', T.max(layer_output)), ('output_mean', T.mean(layer_output)), ('output_min', T.min(layer_output)), ('max_W', T.max(self.W)), ('mean_W', T.mean(self.W)), ('min_W', T.min(self.W)), ('max_b', T.max(self.b)), ('mean_b', T.mean(self.b)), ('min_b', T.min(self.b))]
def define_network(self, layers_info=None): """ Builds Theano graph of the network. """ self.hidden_layers = [None]*self.n_hidden.size self.params = [] for i, h in enumerate(self.n_hidden): if i == 0: self.hidden_layers[i] = LBNHiddenLayer(self.rng, self.trng, self.x, self.n_in, h, self.det_activation[i], self.stoch_n_hidden, self.stoch_activation, det_activation_name=self.det_activation_names[i], stoch_activation_names=self.stoch_activation_names, m=self.m, det_W=None if layers_info is None else np.array( layers_info['hidden_layers'][i]['LBNlayer']['detLayer']\ ['W']), det_b=None if layers_info is None else np.array(layers_info['hidden_layers'][i]\ ['LBNlayer']['detLayer']['b']), stoch_mlp_info=None if layers_info is None else layers_info['hidden_layers'][i]['LBNlayer']['stochLayer']) else: self.hidden_layers[i] = LBNHiddenLayer(self.rng, self.trng, self.hidden_layers[i-1].output, self.n_hidden[i-1], h, self.det_activation[i], self.stoch_n_hidden, self.stoch_activation, det_activation_name=self.det_activation_names[i], stoch_activation_names=self.stoch_activation_names, det_W=None if layers_info is None else np.array(layers_info['hidden_layers'][i]['LBNlayer']\ ['detLayer']['W']), det_b=None if layers_info is None else np.array(layers_info['hidden_layers'][i]['LBNlayer']\ ['detLayer']['b']), stoch_mlp_info=None if layers_info is None else layers_info['hidden_layers'][i]['LBNlayer']['stochLayer']) self.params.append(self.hidden_layers[i].params) self.output_layer = OutputLayer(self.rng, self.hidden_layers[-1].output, self.n_hidden[-1], self.n_out, self.det_activation[-1], self.det_activation_names[-1], V_values=None if layers_info is None else np.array( layers_info['output_layer']['W'])) self.params.append(self.output_layer.params) self.output = self.output_layer.output exp_value = -0.5*T.sum((self.output - self.y.dimshuffle('x',0,1))**2, axis=2) max_exp_value = theano.ifelse.ifelse(T.lt(T.max(exp_value), -1*T.min(exp_value)), T.max(exp_value), T.min(exp_value)) self.log_likelihood = T.sum(T.log(T.sum(T.exp(exp_value - max_exp_value), axis=0)) + max_exp_value)-\ self.y.shape[0]*(T.log(self.m)+self.y.shape[1]/2.*T.log(2*np.pi)) self.predict = theano.function(inputs=[self.x, self.m], outputs=self.output)
def norm(x,ord): x = as_tensor_variable(x) ndim = x.ndim if ndim == 0: raise ValueError("'axis' entry is out of bounds.") elif ndim == 1: if ord == None: return tensor.sum(x**2)**0.5 elif ord == 'inf': return tensor.max(abs(x)) elif ord == '-inf': return tensor.min(abs(x)) elif ord == 0: return x[x.nonzero()].shape[0] else: try: z = tensor.sum(abs(x**ord))**(1./ord) except TypeError: raise ValueError("Invalid norm order for vectors.") return z elif ndim == 2: if ord == None or ord == 'fro': return tensor.sum(abs(x**2))**(0.5) elif ord == 'inf': return tensor.max(tensor.sum(abs(x), 1)) elif ord == '-inf': return tensor.min(tensor.sum(abs(x), 1)) elif ord == 1: return tensor.max(tensor.sum(abs(x), 0)) elif ord == -1: return tensor.min(tensor.sum(abs(x),0)) else: raise ValueError(0) elif ndim > 2: raise NotImplementedError("We don't support norm witn ndim > 2")
def _lowrnk_emp(N, D, K, mnp, masknp): Shared = lambda shape, name: theano.shared(value=np.ones(shape, dtype=theano.config.floatX), name=name, borrow=True) srng = T.shared_randomstreams.RandomStreams(seed=120) mask = Shared((D, N), 'mask') mask.set_value(masknp) m = T.as_tensor_variable(mnp) y = mask * m zero_y = T.as_tensor_variable(np.zeros((D, N))) zero2 = T.as_tensor_variable(np.zeros((D, D))) zero = T.as_tensor_variable(np.zeros(D)) st = T.sum(T.neq(y, zero_y), axis=0) s = st.eval() scale = 1 / ((T.dot(mask, mask.T)) + T.ones((D, N))) emp_cov = scale * (T.dot(y, y.T)) [U,S,V] = T.nlinalg.svd(emp_cov) rk = T.sum(S>0.2) cov = (U[:,0:rk].dot(T.nlinalg.diag(S[0:rk]))).dot(V[0:rk,:]) eigval = T.abs_(T.min([T.min(T.nlinalg.eig(cov)[0]), 0])) cov = cov + (eigval + 0.1) * T.eye(D) print('so far so good') w = theano.tensor.slinalg.cholesky(cov) print('w calculated') wwT = T.dot(w, w.T) # Define random variables for mVNscan component z_y = srng.normal([D]) z_k = srng.normal([D]) z_eps = srng.normal() return mask, m, y, zero_y, zero2, zero, st, scale, cov, w, eigval, wwT, z_y, z_k, z_eps
def plotUpdate(self, updates): ''' >>>get update info of each layer >>>type updates: dict >>>para updates: update dictionary ''' maxdict = T.zeros(shape=(self.deep * 2 + 1, )) mindict = T.zeros(shape=(self.deep * 2 + 1, )) meandict = T.zeros(shape=(self.deep * 2 + 1, )) for i in xrange(self.deep): updw = updates[self.layers[i].w] - self.layers[i].w maxdict = T.set_subtensor(maxdict[2 * i], T.max(updw)) mindict = T.set_subtensor(mindict[2 * i], T.min(updw)) meandict = T.set_subtensor(meandict[2 * i], T.mean(updw)) updb = updates[self.layers[i].b] - self.layers[i].b maxdict = T.set_subtensor(maxdict[2 * i + 1], T.max(updb)) mindict = T.set_subtensor(mindict[2 * i + 1], T.min(updb)) meandict = T.set_subtensor(meandict[2 * i + 1], T.mean(updb)) updw = updates[self.classifier.w] - self.classifier.w maxdict = T.set_subtensor(maxdict[self.deep * 2], T.max(updw)) mindict = T.set_subtensor(mindict[self.deep * 2], T.min(updw)) meandict = T.set_subtensor(meandict[self.deep * 2], T.mean(updw)) return [maxdict, mindict, meandict]
def chamfer_distance(xyz1, xyz2): print('Using Chamfer distance loss') xyz1 = T.as_tensor(xyz1) xyz2 = T.as_tensor(xyz2) def _batch_pairwise_dist(x, y): if x.ndim == 3 and y.ndim == 3: xx = T.batched_dot(x, x.dimshuffle(0, 2, 1)) yy = T.batched_dot(y, y.dimshuffle(0, 2, 1)) zz = T.batched_dot(x, y.dimshuffle(0, 2, 1)) elif x.ndim == 2 and y.ndim == 2: xx = T.shape_padleft(T.dot(x, x.T)) yy = T.shape_padleft(T.dot(y, y.T)) zz = T.shape_padleft(T.dot(x, y.T)) else: raise NotImplementedError indices_x = T.arange(0, xx.shape[1], dtype='int64') indices_y = T.arange(0, yy.shape[1], dtype='int64') rx = T.tile(xx[:, indices_x, indices_x].dimshuffle(0, 1, 'x'), (1, 1, zz.shape[2])) ry = T.tile(yy[:, indices_y, indices_y].dimshuffle(0, 'x', 1), (1, zz.shape[1], 1)) P = rx + ry - 2. * zz return P P = _batch_pairwise_dist(xyz1, xyz2) mins = T.min(P, 1) loss_1 = T.sum(mins) mins = T.min(P, 2) loss_2 = T.sum(mins) return loss_1 + loss_2
def _layer_stats(self, state_below, layer_output): """ DESCRIPTION: This method is called every batch whereby the examples from test or valid set is pass through, the final result will be the mean of all the results from all the batches in an epoch from the test set or valid set. PARAM: layer_output: the output from the layer RETURN: A list of tuples of [('name_a', var_a), ('name_b', var_b)] whereby var is scalar """ w_len = T.sqrt((self.W**2).sum(axis=0)) max_length = T.max(w_len) mean_length = T.mean(w_len) min_length = T.min(w_len) max_output = T.max(layer_output) mean_output = T.mean(T.abs_(layer_output)) min_output = T.min(layer_output) max_state = T.max(state_below) mean_state = T.mean(T.abs_(state_below)) min_state = T.min(state_below) return [('max_W', T.max(self.W)), ('mean_W', T.mean(self.W)), ('min_W', T.min(self.W)), ('max_b', T.max(self.b)), ('mean_b', T.mean(self.b)), ('min_b', T.min(self.b)), ('max_layer_output', max_output), ('mean_layer_output', mean_output), ('min_layer_output', min_output), ('max_col_length', max_length), ('mean_col_length', mean_length), ('min_col_length', min_length), ('max_state_below', max_state), ('mean_state_below', mean_state), ('min_state_below', min_state)]
def test_optimization_min(self): data = np.asarray(np.random.rand(2, 3), dtype=config.floatX) n = tensor.matrix() for axis in [0, 1, -1]: f = function([n], tensor.min(n, axis), mode=self.mode) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert isinstance(topo[0].op, CAReduce) f(data) # test variant with neg to make sure we optimize correctly f = function([n], tensor.min(-n, axis), mode=self.mode) topo = f.maker.fgraph.toposort() assert len(topo) == 2 assert isinstance(topo[0].op, CAReduce) # max assert isinstance(topo[1].op, Elemwise) assert isinstance(topo[1].op.scalar_op, scalar.Neg) f(data) f = function([n], -tensor.min(n, axis), mode=self.mode) topo = f.maker.fgraph.toposort() assert len(topo) == 2 assert isinstance(topo[0].op, Elemwise) assert isinstance(topo[0].op.scalar_op, scalar.Neg) assert isinstance(topo[1].op, CAReduce) # max f(data) f = function([n], -tensor.min(-n, axis), mode=self.mode) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert isinstance(topo[0].op, CAReduce) # max f(data)
def plotUpdate(self,updates): ''' >>>get update info of each layer >>>type updates: dict >>>para updates: update dictionary ''' maxdict=T.zeros(shape=(self.deep*2+1,)) mindict=T.zeros(shape=(self.deep*2+1,)) meandict=T.zeros(shape=(self.deep*2+1,)) for i in xrange(self.deep): updw=updates[self.layers[i].w]-self.layers[i].w maxdict=T.set_subtensor(maxdict[2*i],T.max(updw)) mindict=T.set_subtensor(mindict[2*i],T.min(updw)) meandict=T.set_subtensor(meandict[2*i],T.mean(updw)) updb=updates[self.layers[i].b]-self.layers[i].b maxdict=T.set_subtensor(maxdict[2*i+1],T.max(updb)) mindict=T.set_subtensor(mindict[2*i+1],T.min(updb)) meandict=T.set_subtensor(meandict[2*i+1],T.mean(updb)) updw=updates[self.classifier.w]-self.classifier.w maxdict=T.set_subtensor(maxdict[self.deep*2],T.max(updw)) mindict=T.set_subtensor(mindict[self.deep*2],T.min(updw)) meandict=T.set_subtensor(meandict[self.deep*2],T.mean(updw)) return [maxdict,mindict,meandict]
def compute_S(idx, Sp1, zAA, zBB): Sm = ifelse(T.eq(idx, nT-2), T.dot(zBB[iib[-1]], Tla.matrix_inverse(zAA[iia[-1]])), T.dot(zBB[iib[idx]],Tla.matrix_inverse(zAA[iia[T.min([idx+1,nT-2])]] - T.dot(Sp1,T.transpose(zBB[iib[T.min([idx+1,nT-2])]])))) ) return Sm
def test_optimization_min(self): data = numpy.asarray(numpy.random.rand(2,3),dtype=config.floatX) n = tensor.matrix() f = function([n],tensor.min(n,0), mode=self.mode) topo = f.maker.env.toposort() assert len(topo)==1 assert isinstance(topo[0].op,CAReduce) f(data) #test variant with neg to make sure we optimize correctly f = function([n],tensor.min(-n,0), mode=self.mode) topo = f.maker.env.toposort() assert len(topo)==2 assert isinstance(topo[0].op,CAReduce)#max assert isinstance(topo[1].op, Elemwise) assert isinstance(topo[1].op.scalar_op, scalar.Neg) f(data) f = function([n],-tensor.min(n,0), mode=self.mode) topo = f.maker.env.toposort() assert len(topo)==2 assert isinstance(topo[0].op, Elemwise) assert isinstance(topo[0].op.scalar_op, scalar.Neg) assert isinstance(topo[1].op,CAReduce)#max f(data) f = function([n],-tensor.min(-n,0), mode=self.mode) topo = f.maker.env.toposort() assert len(topo)==1 assert isinstance(topo[0].op,CAReduce)#max f(data)
def value_single(self, x, y, f): ret = T.mean([ T.min([1. - (1 - y) + f[2], 1.]), T.min([1. - f[2] + (1 - y), 1.]) ]) ret = T.cast(ret, dtype=theano.config.floatX) return T.cast(ifelse(T.eq(self.condition_single(x, f), 1.), ret, 1.), dtype=theano.config.floatX)
def calc_min_max(p_n, p_p): hminn = T.min(p_n) hmaxn = T.max(p_n) hminp = T.min(p_p) hmaxp = T.max(p_p) hmin = ifelse(T.lt(hminp, hminn), hminp, hminn) hmax = ifelse(T.lt(hmaxp, hmaxn), hmaxn, hmaxp) return hmax, hmin
def compute_S(idx, Sp1, zAA, zBB): Sm = ifelse( T.eq(idx, nT - 2), T.dot(zBB[iib[-1]], Tla.matrix_inverse(zAA[iia[-1]])), T.dot( zBB[iib[idx]], Tla.matrix_inverse(zAA[iia[T.min([idx + 1, nT - 2])]] - T.dot( Sp1, T.transpose(zBB[iib[T.min([idx + 1, nT - 2])]]))))) return Sm
def eig_pos_barrier( theta = Th.dvector('theta'), M = Th.dmatrix('M') , STA = Th.dvector('STA'), STC = Th.dmatrix('STC'), U = Th.dmatrix('U') , V1 = Th.dvector('V1'), **other): ''' A barrier enforcing that the log-det of M should be > exp(-6), and all the eigenvalues of M > 0. Returns true if barrier is violated. ''' ImM = Th.identity_like(M)-(M+M.T)/2 w,v = eig( ImM ) return 1-(Th.sum(Th.log(w))>-250)*(Th.min(w)>0)*(Th.min(V1.flatten())>0) \
def _get_hidden_layer_connectivity(self, layerIdx): layer_size = self._hidden_sizes[layerIdx] if layerIdx == 0: p_vals = self._get_p(T.min(self.layers_connectivity[layerIdx])) else: p_vals = self._get_p(T.min(self.layers_connectivity_updates[layerIdx-1])) # #Implementations of np.choose in theano GPU # return T.nonzero(self._mrng.multinomial(pvals=[self._p_vals] * layer_size, dtype=theano.config.floatX))[1].astype(dtype=theano.config.floatX) # return T.argmax(self._mrng.multinomial(pvals=[self._p_vals] * layer_size, dtype=theano.config.floatX), axis=1) return T.sum(T.cumsum(self._mrng.multinomial(pvals=T.tile(p_vals[::-1][None, :], (layer_size, 1)), dtype=theano.config.floatX), axis=1), axis=1)
def _get_hidden_layer_connectivity(self, layerIdx): layer_size = self._hidden_sizes[layerIdx] if layerIdx == 0: lc = self.layers_connectivity[layerIdx] p_vals = self._get_p(T.min(lc)) else: lc = self.layers_connectivity_updates[layerIdx-1] p_vals = self._get_p(T.min(lc)) return T.sum( T.cumsum(self._mrng.multinomial( pvals=T.tile(p_vals[::-1][None, :],(layer_size, 1)), dtype=floatX), axis=1), axis=1 )
def get_monitoring_channels(self, V): vb, hb, weights = self.get_params() norms = theano_norms(weights) return {'W_min': tensor.min(weights), 'W_max': tensor.max(weights), 'W_norm_mean': tensor.mean(norms), 'bias_hid_min' : tensor.min(hb), 'bias_hid_mean' : tensor.mean(hb), 'bias_hid_max' : tensor.max(hb), 'bias_vis_min' : tensor.min(vb), 'bias_vis_mean' : tensor.mean(vb), 'bias_vis_max': tensor.max(vb), }
def get_monitoring_channels(self, V): vb, hb, weights = self.get_params() norms = theano_norms(weights) return { 'W_min': tensor.min(weights), 'W_max': tensor.max(weights), 'W_norm_mean': tensor.mean(norms), 'bias_hid_min': tensor.min(hb), 'bias_hid_mean': tensor.mean(hb), 'bias_hid_max': tensor.max(hb), 'bias_vis_min': tensor.min(vb), 'bias_vis_mean': tensor.mean(vb), 'bias_vis_max': tensor.max(vb), }
def attend(self, y_p): updates = self.default_updates() for g in range(self.attrs['glimpse']): for i in range(len(self.base)-1,-1,-1): factor = T.constant(self.base[i].attrs['factor'][0], 'int32') if i > 0 else 1 B, C, I, H, W_att_in, b_att_in = self.get(y_p, i, g) if i == len(self.base) - 1: z_i = self.distance(C, H) else: length = T.cast(T.max(T.sum(I,axis=0))+1,'int32') ext = T.cast(T.minimum(ext/factor,T.min(length)),'int32') def pick(i_t, ext): pad = T.minimum(i_t+ext, B.shape[0]) - ext return T.concatenate([T.zeros((pad,), 'int8'), T.ones((ext,), 'int8'), T.zeros((B.shape[0]-pad-ext+1,), 'int8')], axis=0) idx, _ = theano.map(pick, sequences = [pos/factor], non_sequences = [ext]) idx = (idx.dimshuffle(1,0)[:-1].flatten() > 0).nonzero() C = C.reshape((C.shape[0]*C.shape[1],C.shape[2]))[idx].reshape((ext,C.shape[1],C.shape[2])) z_i = self.distance(C, H) I = I.reshape((I.shape[0]*I.shape[1],))[idx].reshape((ext,I.shape[1])) if i > 0: pos = T.argmax(self.softmax(z_i,I),axis=0) * factor ext = factor else: w_i = self.softmax(z_i,I) B = B.reshape((B.shape[0]*B.shape[1],B.shape[2]))[idx].reshape((ext,B.shape[1],B.shape[2])) proto = T.sum(B * w_i.dimshuffle(0,1,'x').repeat(B.shape[2],axis=2),axis=0) for i in range(len(self.base)): self.glimpses[i].append(proto) return T.dot(proto, self.custom_vars['W_att_in_0']), updates
def get_stencil(self, t, r=None, texp=None): if r is None or texp is None: return tt.shape_padright(t) z = tt.zeros_like(self.a) r = tt.as_tensor_variable(r) R = self.r_star + z hp = 0.5 * self.period if self.ecc is None: # Equation 14 from Winn (2010) k = r / self.r_star arg1 = tt.square(1 + k) - tt.square(self.b) arg2 = tt.square(1 - k) - tt.square(self.b) factor = R / (self.a * self.sin_incl) hdur1 = hp * tt.arcsin(factor * tt.sqrt(arg1)) / np.pi hdur2 = hp * tt.arcsin(factor * tt.sqrt(arg2)) / np.pi ts = [-hdur1, -hdur2, hdur2, hdur1] flag = z else: M_contact1 = self.contact_points_op(self.a, self.ecc, self.cos_omega, self.sin_omega, self.cos_incl + z, self.sin_incl + z, R + r) M_contact2 = self.contact_points_op(self.a, self.ecc, self.cos_omega, self.sin_omega, self.cos_incl + z, self.sin_incl + z, R - r) flag = M_contact1[2] + M_contact2[2] ts = [ tt.mod( (M_contact1[0] - self.M0) / self.n + hp, self.period) - hp, tt.mod( (M_contact2[0] - self.M0) / self.n + hp, self.period) - hp, tt.mod( (M_contact2[1] - self.M0) / self.n + hp, self.period) - hp, tt.mod( (M_contact1[1] - self.M0) / self.n + hp, self.period) - hp ] start = self.period * tt.floor((tt.min(t) - self.t0) / self.period) end = self.period * (tt.ceil((tt.max(t) - self.t0) / self.period) + 1) start += self.t0 end += self.t0 tout = [] for i in range(4): if z.ndim < 1: tout.append(ts[i] + tt.arange(start, end, self.period)) else: tout.append( theano.scan( fn=lambda t0, s0, e0, p0: t0 + tt.arange(s0, e0, p0), sequences=[ts[i], start, end, self.period], )[0].flatten()) ts = tt.sort(tt.concatenate(tout)) return ts, flag
def _best_path_decode(activations): """Calculate the CTC best-path decoding for a given activation sequence. In the returned matrix, shorter sequences are padded with -1s.""" # For each timestep, get the highest output decoding = T.argmax(activations, axis=2) # prev_outputs[time][example] == decoding[time - 1][example] prev_outputs = T.concatenate([T.alloc(_BLANK, 1, decoding.shape[1]), decoding], axis=0)[:-1] # Filter all repetitions to zero (blanks are already zero) decoding = decoding * T.neq(decoding, prev_outputs) # Calculate how many blanks each sequence has relative to longest sequence blank_counts = T.eq(decoding, 0).sum(axis=0) min_blank_count = T.min(blank_counts, axis=0) max_seq_length = decoding.shape[0] - min_blank_count # used later padding_needed = blank_counts - min_blank_count # Generate the padding matrix by ... doing tricky things max_padding_needed = T.max(padding_needed, axis=0) padding_needed = padding_needed.dimshuffle('x',0).repeat(max_padding_needed, axis=0) padding = T.arange(max_padding_needed).dimshuffle(0,'x').repeat(decoding.shape[1],axis=1) padding = PADDING * T.lt(padding, padding_needed) # Apply the padding decoding = T.concatenate([decoding, padding], axis=0) # Remove zero values nonzero_vals = decoding.T.nonzero_values() decoding = T.reshape(nonzero_vals, (decoding.shape[1], max_seq_length)).T return decoding
def compute_D(idx, Dm1, zS, zAA, zBB): D = ifelse(T.eq(idx, nT-1), T.dot(Tla.matrix_inverse(zAA[iia[-1]]), III + T.dot(T.transpose(zBB[iib[idx-1]]), T.dot(Dm1,S[0]))) , ifelse(T.eq(idx, 0), Tla.matrix_inverse(zAA[iia[0]] - T.dot(zBB[iib[0]], T.transpose(S[-1]))), T.dot(Tla.matrix_inverse(zAA[iia[idx]] - T.dot(zBB[iib[T.min([idx,nT-2])]],T.transpose(S[T.max([-idx-1,-nT+1])]))), III + T.dot(T.transpose(zBB[iib[T.min([idx-1,nT-2])]]), T.dot(Dm1,S[-idx]))) ) ) return D
def step(self, t, s_p, c_p, X): #x_t = X[:,t] #X = T.matrix() if len(self.input_shape) == 3: x_t = X[:, t] else: x_t = X[:, t:t+1] x_t = x_t/(1.0+(T.max(x_t)-T.min(x_t))) #x_t = X[:,t+self.input_shape[1]-self.hidden_dim+1:t+self.input_shape[1]+1] #x_t = x_t*self.E #test = T.dot(x_t, self.U) res_s = T.dot(x_t, self.U) + T.dot(s_p, self.W) + self.b#[index,channel,hidden_dim] i = T.nnet.hard_sigmoid(res_s[:, 0, :])# (index,hidden_dim) f = T.nnet.hard_sigmoid(res_s[:, 1, :])#(index,hidden_dim) o = T.nnet.hard_sigmoid(res_s[:, 2, :])#(index,hidden_dim) g = T.tanh(res_s[:, 3, :])#(index,hidden_dim) # i = T.nnet.hard_sigmoid(T.dot(x_t, self.U[0])+T.dot(s_p,self.W[0])+self.b[0])#(index,hidden_dim) # f = T.nnet.hard_sigmoid(T.dot(x_t, self.U[1])+T.dot(s_p,self.W[1])+self.b[1])#(index,hidden_dim) # o = T.nnet.hard_sigmoid(T.dot(x_t, self.U[2])+T.dot(s_p,self.W[2])+self.b[2])#(index,hidden_dim) # g = T.tanh(T.dot(x_t, self.U[3])+T.dot(s_p,self.W[3])+self.b[3])#(index,hidden_dim) c_t = c_p*f + g*i#(index,hidden_dim) s_t = T.tanh(c_t)*o#(index,hidden_dim) # o_t = T.dot(s_t, self.V)#(index,1) # o_t = o_t+self.c[0] o_t = s_t #return o_t # o_t = T.cast(o_t,"float32") # s_t = T.cast(s_t,"float32") # c_t = T.cast(c_t, "float32") return [o_t, s_t, c_t]
def make_experiment(l_out, dataset, batch_size=1000, N_train=50000, N_valid=10000, N_test=10000, loss_function=lasagne.objectives.categorical_crossentropy, extra_loss=0.0, limit_alpha=False): """ Build a loop for training a model, evaluating loss on training, validation and test. """ expressions = holonets.monitor.Expressions(l_out, dataset, batch_size=batch_size, update_rule=lasagne.updates.adam, loss_function=loss_function, loss_aggregate=T.mean, extra_loss=extra_loss, learning_rate=0.001, momentum=0.1) # only add channels for loss and accuracy for deterministic,dataset in zip([False, True, True], ["train", "valid", "test"]): expressions.add_channel(**expressions.loss(dataset, deterministic)) expressions.add_channel(**expressions.accuracy(dataset, deterministic)) channels = expressions.build_channels() if limit_alpha: # then add channel to reset all alphas at 1.0 alphas = [p for p in lasagne.layers.get_all_params(l_out) if p.name == "alpha"] alpha_ceiling = theano.function([], alphas, updates=OrderedDict([(a, T.min([a, 1.0])) for a in alphas])) channels.append({'dataset': 'train', 'eval': lambda x: alpha_ceiling(), 'dimensions': ['Alpha']*len(alphas), 'names': ['alpha {0}'.format(i) for i in range(len(alphas))]}) train = holonets.train.Train(channels, n_batches={'train': N_train//batch_size, 'valid':N_valid//batch_size, 'test':N_test//batch_size}) loop = holonets.run.EpochLoop(train, dimensions=train.dimensions) return loop
def test_max_pool_2d_3D(self): rng = numpy.random.RandomState(utt.fetch_seed()) maxpoolshps = [(1,2)] imval = rng.rand(2,3,4) images = tensor.dtensor3() for maxpoolshp in maxpoolshps: for ignore_border in [True,False]: #print 'maxpoolshp =', maxpoolshp #print 'ignore_border =', ignore_border numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border) output = max_pool_2d(images, maxpoolshp, ignore_border) output_val = function([images], output)(imval) assert numpy.all(output_val == numpy_output_val) c = tensor.sum(output) c_val = function([images], c)(imval) g = tensor.grad(c, images) g_val = function([images], [g.shape, tensor.min(g, axis=(0,1,2)), tensor.max(g, axis=(0,1,2))] )(imval)
def _define_model(self): self.model = pm.Model() with self.model: p = pm.Dirichlet('p', a=np.array([1., 1., 1.]), shape=self.number_of_hidden_states) p_min_potential = pm.Potential('p_min_potential', tt.switch(tt.min(p) < .1, -np.inf, 0)) means = pm.Normal('means', mu=[0, 0, 0], sd=2.0, shape=self.number_of_hidden_states) # break symmetry order_means_potential = pm.Potential('order_means_potential', tt.switch(means[1] - means[0] < 0, -np.inf, 0) + tt.switch(means[2] - means[1] < 0, -np.inf, 0)) sd = pm.HalfCauchy('sd', beta=2, shape=self.number_of_hidden_states) category = pm.Categorical('category', p=p, shape=self.number_of_data) points = pm.Normal('obs', mu=means[category], sd=sd[category], observed=self.data)
def m_lh(self, x, z, v): """ Compute likelihood term m_lh """ # Evaluate bottom-up mask if self.p.input_type == 'binary': # self.p.input_noise since the structure might change z_tilde = z * np.float32( (1 - 2 * self.p.input_noise)) + np.float32(self.p.input_noise) loss = nn.soft_binary_crossentropy(z_tilde, x, 1e-4) elif self.p.input_type == 'continuous': noise_factor = np.float32(self.p.input_noise**2) + v**2 # Represents negative log-p loss = np.float32(0.5) * T.log(noise_factor) + T.sqr(z - x) / ( np.float32(2) * noise_factor) else: raise NotImplemented # normalize loss -= T.min(loss, axis=0, keepdims=True) normalizer = T.log(T.sum(T.exp(-loss), axis=0, keepdims=True)) loss += normalizer assert loss.ndim in [3, 5] m_lh = T.exp(-loss) return m_lh
def downsample(source, axis, factor, method="average"): assert factor == int(factor), "factor is expected to be an int" factor = int(factor) # make shape[axis] a multiple of factor src = source source = source[slice_for_axis(axis=axis, s=slice(0, (source.shape[axis] / factor) * factor))] # Add a temporary dimension as the factor. added_dim_shape = [source.shape[i] for i in range(source.ndim)] added_dim_shape = added_dim_shape[:axis] + [ source.shape[axis] / factor, factor ] + added_dim_shape[axis + 1:] if method == "lstm": assert axis == 0 return source source = T.reshape(source, added_dim_shape) if method == "average": return T.mean(source, axis=axis + 1) elif method == "max": return T.max(source, axis=axis + 1) elif method == "min": return T.min(source, axis=axis + 1) elif method == "concat" or method == 'mlp': # concatenates in last dimension #return T.reshape(source, added_dim_shape[:axis+1] + added_dim_shape[axis+2:-1] + [added_dim_shape[-1] * factor]) return source.swapaxes(axis + 1, src.ndim - 1).reshape([source.shape[0], src.shape[1]] + [factor * source.shape[3]]) elif method == "batch": assert axis == 0 return source.dimshuffle(1, 0, 2, 3).reshape( (source.shape[1], source.shape[0] * source.shape[2], source.shape[3])) else: assert False, "unknown downsample method %r" % method
def get_monitoring_channels(self, V, Y=None): try: self.compile_mode() rval = {} #from_ip = self.inference_procedure.get_monitoring_channels(V, self) #rval.update(from_ip) if self.monitor_params: for param in self.get_params(): rval[param.name + '_min'] = full_min(param) rval[param.name + '_mean'] = T.mean(param) rval[param.name + '_max'] = full_max(param) if 'W' in param.name: norms = theano_norms(param) rval[param.name + '_norms_min'] = T.min(norms) rval[param.name + '_norms_mean'] = T.mean(norms) rval[param.name + '_norms_max'] = T.max(norms) new_rval = {} for key in rval: new_rval[self.monitoring_channel_prefix + key] = rval[key] rval = new_rval return rval finally: self.deploy_mode()
def mindist(translate, min_so_far, ro, rd): # ro: 3 # transalate: nbatch * 3 # min_so_far: nbatch * width * height # rd: width * height * 3 ro = ro + translate # d_o = T.dot(rd, ro) # 640, 480 # d_o = dotty(rd, ro, axis=1) d_o = T.tensordot(rd, ro, axes=[2,1]) o_o = T.sum(ro**2,axis=1) b = 2*d_o c = o_o - 0.001 #FIXME, remove this squaring inner = b **2 - 4 * c # 640 480 does_not_intersect = inner < 0.0 minus_b = -b # sqrt_inner = T.sqrt(T.maximum(0.0001, inner)) eps = 1e-9 background_dist = 10.0 sqrt_inner = T.sqrt(T.maximum(eps, inner)) root1 = (minus_b - sqrt_inner)/2.0 root2 = (minus_b + sqrt_inner)/2.0 depth = T.switch(does_not_intersect, background_dist, T.switch(root1 > 0, root1, T.switch(root2 > 0, root2, background_dist))) return T.min([min_so_far, depth], axis=0)
def get_triplet_loss(self, data, deterministic=False): fc7 = get_output(self.net['fc7'], data, deterministic=deterministic) # (3, nb, 256) reshape = T.reshape(T.tanh(fc7), newshape=(3, -1, 256)) anchor = reshape[0] # (nb, 256) positive = reshape[1] negative = reshape[2] norm_pos = T.pow( T.sum(T.pow(positive - anchor, 2.0), axis=1) + self.eps, 0.5) # (nb, ) norm_neg1 = T.pow( T.sum(T.pow(negative - anchor, 2.0), axis=1) + self.eps, 0.5) # (nb, ) norm_neg2 = T.pow( T.sum(T.pow(negative - positive, 2.0), axis=1) + self.eps, 0.5) # (nb, ) norm_neg = T.min([norm_neg1, norm_neg2], axis=0) max_norm = T.max([norm_pos, norm_neg], axis=0) d_pos = T.maximum( T.exp(norm_pos - max_norm) / (T.exp(norm_pos - max_norm) + T.exp(norm_neg - max_norm)), self.alpha) loss = T.mean(d_pos**2) return loss #, T.mean(norm_pos), T.mean(norm_neg1), T.mean(norm_neg2), T.mean(norm_neg), T.mean(max_norm)
def unet_crossentropy_loss_sampled(y_true, y_pred): print 'unet_crossentropy_loss_sampled' epsilon = 1.0e-4 y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon)) y_true = T.flatten(y_true) # this seems to work # it is super ugly though and I am sure there is a better way to do it # but I am struggling with theano to cooperate # filter the right indices indPos = T.nonzero(y_true)[0] # no idea why this is a tuple indNeg = T.nonzero(1-y_true)[0] # shuffle n = indPos.shape[0] indPos = indPos[srng.permutation(n=n)] n = indNeg.shape[0] indNeg = indNeg[srng.permutation(n=n)] # take equal number of samples depending on which class has less n_samples = T.cast(T.min([T.sum(y_true), T.sum(1-y_true)]), dtype='int64') indPos = indPos[:n_samples] indNeg = indNeg[:n_samples] loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg])) average_loss = T.mean(loss_vector) print 'average_loss:', average_loss return average_loss
def compile_gpu_func(nan_is_error, inf_is_error, big_is_error): """ compile utility function used by contains_nan and contains_inf """ global f_gpumin, f_gpumax, f_gpuabsmax if not cuda.cuda_available: return guard_input = cuda.fvector("nan_guard") cuda_compile_failed = False if (nan_is_error or inf_is_error) and f_gpumin is None: try: f_gpumin = theano.function([guard_input], T.min(guard_input), mode="FAST_RUN") except RuntimeError: # This can happen if cuda is available, but the # device is in exclusive mode and used by another # process. cuda_compile_failed = True if inf_is_error and not cuda_compile_failed and f_gpumax is None: try: f_gpumax = theano.function([guard_input], T.max(guard_input), mode="FAST_RUN") except RuntimeError: # This can happen if cuda is available, but the # device is in exclusive mode and used by another # process. cuda_compile_failed = True if big_is_error and not cuda_compile_failed and f_gpuabsmax is None: try: f_gpuabsmax = theano.function([guard_input], T.max(T.abs_(guard_input)), mode="FAST_RUN") except RuntimeError: # This can happen if cuda is available, but the # device is in exclusive mode and used by another # process. cuda_compile_failed = True
def get_output_for(self, input, **kwargs): # take the minimal working slice size, and use that one. if self.allow_negative: inp_low_zero = input - T.min(input, axis=1).dimshuffle(0, 'x') else: inp_low_zero = input return inp_low_zero / T.sum(inp_low_zero, axis=1).dimshuffle(0, 'x') * self.norm_sum
def _get_hidden_layer_connectivity(self, layerIdx): layer_size = self._hidden_sizes[layerIdx] if layerIdx == 0: p_vals = self._get_p(T.min(self.layers_connectivity[layerIdx])) else: p_vals = self._get_p( T.min(self.layers_connectivity_updates[layerIdx - 1])) # #Implementations of np.choose in theano GPU # return T.nonzero(self._mrng.multinomial(pvals=[self._p_vals] * layer_size, dtype=theano.config.floatX))[1].astype(dtype=theano.config.floatX) # return T.argmax(self._mrng.multinomial(pvals=[self._p_vals] * layer_size, dtype=theano.config.floatX), axis=1) return T.sum(T.cumsum(self._mrng.multinomial( pvals=T.tile(p_vals[::-1][None, :], (layer_size, 1)), dtype=theano.config.floatX), axis=1), axis=1)
def attend(self, y_p): updates = self.default_updates() for g in range(self.attrs['glimpse']): for i in range(len(self.base)-1,-1,-1): factor = T.constant(self.base[i].attrs['factor'][0], 'int32') if i > 0 else 1 B, C, I, h_p, _ = self.get(y_p, i, g) if i == len(self.base) - 1: z_i = self.distance(C, h_p) else: length = T.cast(T.max(T.sum(I,axis=0))+1,'int32') ext = T.cast(T.minimum(ext/factor,T.min(length)),'int32') def pick(i_t, ext): pad = T.minimum(i_t+ext, B.shape[0]) - ext return T.concatenate([T.zeros((pad,), 'int8'), T.ones((ext,), 'int8'), T.zeros((B.shape[0]-pad-ext+1,), 'int8')], axis=0) idx, _ = theano.map(pick, sequences = [pos/factor], non_sequences = [ext]) idx = (idx.dimshuffle(1,0)[:-1].flatten() > 0).nonzero() C = C.reshape((C.shape[0]*C.shape[1],C.shape[2]))[idx].reshape((ext,C.shape[1],C.shape[2])) z_i = self.distance(C, h_p) I = I.reshape((I.shape[0]*I.shape[1],))[idx].reshape((ext,I.shape[1])) if i > 0: pos = T.argmax(self.softmax(z_i,I),axis=0) * factor ext = factor else: w_i = self.softmax(z_i,I) B = B.reshape((B.shape[0]*B.shape[1],B.shape[2]))[idx].reshape((ext,B.shape[1],B.shape[2])) proto = T.sum(B * w_i.dimshuffle(0,1,'x').repeat(B.shape[2],axis=2),axis=0) for i in range(len(self.base)): self.glimpses[i].append(proto) return T.dot(proto, self.custom_vars['W_att_in_0']), updates
def LQLEP_wBarrier( LQLEP = Th.dscalar(), ldet = Th.dscalar(), v1 = Th.dvector(), N_spike = Th.dscalar(), ImM = Th.dmatrix(), U = Th.dmatrix(), V2 = Th.dvector(), u = Th.dvector(), C = Th.dmatrix(), **other): ''' The actual Linear-Quadratic-Exponential-Poisson log-likelihood, as a function of theta and M, with a barrier on the log-det term and a prior. ''' sq_nonlinearity = V2**2.*Th.sum( Th.dot(U,C)*U, axis=[1]) #Th.sum(U**2,axis=[1]) nonlinearity = V2 * Th.sqrt( Th.sum( Th.dot(U,C)*U, axis=[1])) #Th.sum(U**2,axis=[1]) ) if other.has_key('uc'): LQLEP_wPrior = LQLEP + 0.5 * N_spike * ( 1./(ldet+250.)**2. \ - 0.000001 * Th.sum(Th.log(1.-4*sq_nonlinearity))) \ + 10. * Th.sum( (u[2:]+u[:-2]-2*u[1:-1])**2. ) \ + 10. * Th.sum( (other['uc'][2:]+other['uc'][:-2]-2*other['uc'][1:-1])**2. ) \ + 0.000000001 * Th.sum( v1**2. ) # + 100. * Th.sum( v1 ) # + 0.0001*Th.sum( V2**2 ) else: LQLEP_wPrior = LQLEP + 0.5 * N_spike * ( 1./(ldet+250.)**2. \ - 0.000001 * Th.sum(Th.log(1.-4*sq_nonlinearity))) \ + 10. * Th.sum( (u[2:]+u[:-2]-2*u[1:-1])**2. ) \ + 0.000000001 * Th.sum( v1**2. ) # + 100. * Th.sum( v1 ) # + 0.0001*Th.sum( V2**2 ) eigsImM,barrier = eig( ImM ) barrier = 1-(Th.sum(Th.log(eigsImM))>-250) * \ (Th.min(eigsImM)>0) * (Th.max(4*sq_nonlinearity)<1) other.update(locals()) return named( **other )
def _match(self, sample): diff = (T.sqr(self.codebook)).sum( axis=1, keepdims=True) + (T.sqr(sample)).sum( axis=1, keepdims=True) - 2 * T.dot(self.codebook, sample.T) bmu = T.argmin(diff) err = T.min(diff) return err, bmu
def get_stats(input, stat=None): """ Returns a dictionary mapping the name of the statistic to the result on the input. Currently gets mean, var, std, min, max, l1, l2. Parameters ---------- input : tensor Theano tensor to grab stats for. Returns ------- dict Dictionary of all the statistics expressions {string_name: theano expression} """ stats = { 'mean': T.mean(input), 'var': T.var(input), 'std': T.std(input), 'min': T.min(input), 'max': T.max(input), 'l1': input.norm(L=1), 'l2': input.norm(L=2), #'num_nonzero': T.sum(T.nonzero(input)), } stat_list = raise_to_list(stat) compiled_stats = {} if stat_list is None: return stats for stat in stat_list: if isinstance(stat, string_types) and stat in stats: compiled_stats.update({stat: stats[stat]}) return compiled_stats
def compute_probabilistic_matrix(self,X, y, num_cases, k=5): z = T.dot(X, self.A) #Transform x into z space dists = T.sqr(dist2hy(z,z)) dists = T.extra_ops.fill_diagonal(dists, T.max(dists)+1) nv = T.min(dists,axis=1) # value of nearest neighbour dists = (dists.T - nv).T d = T.extra_ops.fill_diagonal(dists, 0) #Take only k nearest num = T.zeros((num_cases, self.num_classes)) denom = T.zeros((num_cases,)) for c_i in xrange(self.num_classes): #Mask for class i mask_i = T.eq(T.outer(T.ones_like(y),y),c_i) #K nearest neighbour within a class i dim_ci = T.sum(mask_i[0]) d_c_i = T.reshape(d[mask_i.nonzero()],(num_cases,dim_ci)) k_indice = T.argsort(d_c_i, axis=1)[:,0:k] kd = T.zeros((num_cases,k)) for it in xrange(k): kd = T.set_subtensor(kd[:,it], d_c_i[T.arange(num_cases),k_indice[:,it]]) #Numerator value = T.exp(-T.mean(kd,axis=1)) num = T.set_subtensor(num[:,c_i], value) denom += value p = num / denom.dimshuffle(0,'x') #prob that point i will be correctly classified return p
def make_consensus(self, networks, axis=2): cns = self.attrs['consensus'] if cns == 'max': return T.max(networks, axis=axis) elif cns == 'min': return T.min(networks, axis=axis) elif cns == 'mean': return T.mean(networks, axis=axis) elif cns == 'flat': if self.depth == 1: return networks if axis == 2: return networks.flatten(ndim=3) #return T.reshape(networks, (networks.shape[0], networks.shape[1], T.prod(networks.shape[2:]) )) else: return networks.flatten(ndim=2) # T.reshape(networks, (networks.shape[0], T.prod(networks.shape[1:]) )) elif cns == 'sum': return T.sum(networks, axis=axis, acc_dtype=theano.config.floatX) elif cns == 'prod': return T.prod(networks, axis=axis) elif cns == 'var': return T.var(networks, axis=axis) elif cns == 'project': p = self.add_param(self.create_random_uniform_weights(self.attrs['n_out'], 1, self.attrs['n_out'] + self.depth + 1)) return T.tensordot(p, networks, [[1], [axis]]) elif cns == 'random': idx = self.rng.random_integers(size=(1,), low=0, high=self.depth) if axis == 0: return networks[idx] if axis == 1: return networks[:,idx] if axis == 2: return networks[:,:,idx] if axis == 3: return networks[:,:,:,idx] assert False, "axis too large" else: assert False, "consensus method unknown: " + cns
def get_cost_updates(self, learning_rate, beta=0.9): """ This function computes the cost and the updates for one trainng step of the dA """ max_x = T.max(self.x) min_x = T.min(self.x) self.x = (self.x - min_x) / (max_x - min_x) y = self.get_hidden_values(self.x) z = self.get_reconstructed_input(y) # note : we sum over the size of a datapoint; if we are using # minibatches, L will be a vector, with one entry per # example in minibatch # L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1) L_prime = T.sum(T.square(z - self.x), axis=1) # note : L is now a vector, where each element is the # cross-entropy cost of the reconstruction of the # corresponding example of the minibatch. We need to # compute the average of all these to get the cost of # the minibatch cost = T.mean(L_prime) # compute the gradients of the cost of the `dA` with respect # to its parameters gparams = T.grad(cost, self.params) # generate the list of updates updates = [] for momen, gparm1 in zip(self.mom, gparams): updates.append((momen, momen * beta + (1 - beta) * gparm1)) for param, momen in zip(self.params, self.mom): updates.append((param, param - learning_rate * momen)) return (cost, updates)
def NRMSE(self, y): """Return a float representing the number of errors in the minibatch over the total number of examples of the minibatch ; zero one loss over the size of the minibatch :type y: theano.tensor.TensorType :param y: corresponds to a vector that gives for each example the correct label """ # check if y has same dimension of y_pred if y.ndim != self.y_pred.ndim: raise TypeError( 'y should have the same shape as self.y_pred', ('y', y.type, 'y_pred', self.y_pred.type) ) # check if y is of the correct datatype if y.dtype.startswith('flo'): #CHANGED!!!!! # the T.neq operator returns a vector of 0s and 1s, where 1 # represents a mistake in prediction return T.sqrt(T.mean(T.sqr(y-self.y_pred))) / (T.max(y) - T.min(y)) #NRMSE else: raise NotImplementedError()
def get_stats(input, stat=None): """ Returns a dictionary mapping the name of the statistic to the result on the input. Currently gets mean, var, std, min, max, l1, l2. Parameters ---------- input : tensor Theano tensor to grab stats for. Returns ------- dict Dictionary of all the statistics expressions {string_name: theano expression} """ stats = { 'mean': T.mean(input), 'var': T.var(input), 'std': T.std(input), 'min': T.min(input), 'max': T.max(input), 'l1': input.norm(L=1), 'l2': input.norm(L=2), #'num_nonzero': T.sum(T.nonzero(input)), } stat_list = raise_to_list(stat) compiled_stats = {} if stat_list is None: return stats for stat in stat_list: if isinstance(stat, six.string_types) and stat in stats: compiled_stats.update({stat: stats[stat]}) return compiled_stats
def get_output_for(self, input, init=False, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = input.flatten(2) activation = T.tensordot(input, self.W, [[1], [0]]) abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2) + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1)) if init: mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0) abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x') self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))] f = T.sum(T.exp(-abs_dif),axis=2) if init: mf = T.mean(f,axis=0) f -= mf.dimshuffle('x',0) self.init_updates.append((self.b, -mf)) else: f += self.b.dimshuffle('x',0) return T.concatenate([input, f], axis=1)
def unet_crossentropy_loss_sampled(y_true, y_pred): # weighted version of pixel-wise crossrntropy loss function alpha = 0.6 epsilon = 1.0e-5 y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0 - epsilon)) y_true = T.flatten(y_true) # this seems to work # it is super ugly though and I am sure there is a better way to do it # but I am struggling with theano to cooperate # filter the right indices indPos = T.nonzero(y_true)[0] # no idea why this is a tuple indNeg = T.nonzero(1 - y_true)[0] # shuffle n = indPos.shape[0] indPos = indPos[srng.permutation(n=n)] n = indNeg.shape[0] indNeg = indNeg[srng.permutation(n=n)] # take equal number of samples depending on which class has less n_samples = T.cast(T.min([T.sum(y_true), T.sum(1 - y_true)]), dtype='int64') # indPos = indPos[:n_samples] # indNeg = indNeg[:n_samples] total = np.float64(patchSize_out * patchSize_out * patchZ_out) loss_vector = ifelse( T.gt(n_samples, 0), # if this patch has positive samples, then calulate the first formula (-alpha * T.sum(T.log(y_pred_clipped[indPos])) - (1 - alpha) * T.sum(T.log(1 - y_pred_clipped[indNeg]))) / total, -(1 - alpha) * T.sum(T.log(1 - y_pred_clipped[indNeg])) / total) average_loss = T.mean(loss_vector) / (1 - alpha) return average_loss
def test_max_pool_2d_3D(self): rng = numpy.random.RandomState(utt.fetch_seed()) maxpoolshps = [(1, 2)] imval = rng.rand(2, 3, 4) images = tensor.dtensor3() for maxpoolshp, ignore_border, mode in product(maxpoolshps, [True, False], ['max', 'sum', 'average_inc_pad', 'average_exc_pad']): # print 'maxpoolshp =', maxpoolshp # print 'ignore_border =', ignore_border numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border, mode) output = max_pool_2d(images, maxpoolshp, ignore_border, mode=mode) output_val = function([images], output)(imval) assert numpy.all(output_val == numpy_output_val), ( "output_val is %s, numpy_output_val is %s" % (output_val, numpy_output_val)) c = tensor.sum(output) c_val = function([images], c)(imval) g = tensor.grad(c, images) g_val = function([images], [g.shape, tensor.min(g, axis=(0, 1, 2)), tensor.max(g, axis=(0, 1, 2))] )(imval)
def test_max_pool_2d_3D(self): rng = numpy.random.RandomState(utt.fetch_seed()) maxpoolshps = [(1, 2)] imval = rng.rand(2, 3, 4) images = tensor.dtensor3() for maxpoolshp, ignore_border, mode in product( maxpoolshps, [True, False], ['max', 'average_inc_pad', 'average_exc_pad']): # print 'maxpoolshp =', maxpoolshp # print 'ignore_border =', ignore_border numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border, mode) output = max_pool_2d(images, maxpoolshp, ignore_border, mode=mode) output_val = function([images], output)(imval) assert numpy.all(output_val == numpy_output_val), ( "output_val is %s, numpy_output_val is %s" % (output_val, numpy_output_val)) c = tensor.sum(output) c_val = function([images], c)(imval) g = tensor.grad(c, images) g_val = function([images], [ g.shape, tensor.min(g, axis=(0, 1, 2)), tensor.max(g, axis=(0, 1, 2)) ])(imval)
def apply_moving_average(params, avg_params, updates, steps, decay): # assert params and avg_params are aligned weight = T.min([decay, steps / (steps + 1.)]).astype(theano.config.floatX) avg_updates = [] for p, a in zip(params, avg_params): avg_updates.append((a, a - (1. - weight) * (a - p))) return updates.items() + avg_updates
def downsample(source, axis, factor, method="average"): assert factor == int(factor), "factor is expected to be an int" factor = int(factor) # make shape[axis] a multiple of factor src = source source = source[slice_for_axis(axis=axis, s=slice(0, (source.shape[axis] / factor) * factor))] # Add a temporary dimension as the factor. added_dim_shape = [source.shape[i] for i in range(source.ndim)] added_dim_shape = added_dim_shape[:axis] + [source.shape[axis] / factor, factor] + added_dim_shape[axis + 1:] if method == "lstm": assert axis == 0 return source source = T.reshape(source, added_dim_shape) if method == "average": return T.mean(source, axis=axis + 1) elif method == "max": return T.max(source, axis=axis + 1) elif method == "min": return T.min(source, axis=axis + 1) elif method == "concat" or method == 'mlp': # concatenates in last dimension #return T.reshape(source, added_dim_shape[:axis+1] + added_dim_shape[axis+2:-1] + [added_dim_shape[-1] * factor]) return source.swapaxes(axis+1,src.ndim-1).reshape([source.shape[0],src.shape[1]] + [factor * source.shape[3]]) elif method == "batch": assert axis == 0 return source.dimshuffle(1,0,2,3).reshape((source.shape[1],source.shape[0]*source.shape[2],source.shape[3])) else: assert False, "unknown downsample method %r" % method
def unet_crossentropy_loss_sampled(y_true, y_pred): print 'unet_crossentropy_loss_sampled' epsilon = 1.0e-4 y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0 - epsilon)) y_true = T.flatten(y_true) # this seems to work # it is super ugly though and I am sure there is a better way to do it # but I am struggling with theano to cooperate # filter the right indices indPos = T.nonzero(y_true)[0] # no idea why this is a tuple indNeg = T.nonzero(1 - y_true)[0] # shuffle n = indPos.shape[0] indPos = indPos[srng.permutation(n=n)] n = indNeg.shape[0] indNeg = indNeg[srng.permutation(n=n)] # take equal number of samples depending on which class has less n_samples = T.cast(T.min([T.sum(y_true), T.sum(1 - y_true)]), dtype='int64') indPos = indPos[:n_samples] indNeg = indNeg[:n_samples] loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean( T.log(1 - y_pred_clipped[indNeg])) average_loss = T.mean(loss_vector) print 'average_loss:', average_loss return average_loss
def get_monitoring_channels(self, V): try: self.compile_mode() rval = {} #from_ip = self.inference_procedure.get_monitoring_channels(V, self) #rval.update(from_ip) if self.monitor_params: for param in self.get_params(): rval[param.name + '_min'] = full_min(param) rval[param.name + '_mean'] = T.mean(param) rval[param.name + '_max'] = full_max(param) if 'W' in param.name: norms = theano_norms(param) rval[param.name + '_norms_min' ]= T.min(norms) rval[param.name + '_norms_mean'] = T.mean(norms) rval[param.name + '_norms_max'] = T.max(norms) new_rval = {} for key in rval: new_rval[self.monitoring_channel_prefix+key] = rval[key] rval = new_rval return rval finally: self.deploy_mode()
def train_simple(self, X_train, y_train, n_epochs, batch_size, optimization_function, cost_function): cost = cost_function(self.y, self.out, self.params) error = self.error() X_train_shared = shared(X_train.astype('float32')) y_train_shared = shared(y_train.astype('int32')) N = X_train.shape[0] n_batches = N // batch_size + (N % batch_size != 0) index = T.iscalar() batch_begin = index * batch_size batch_end = T.min(((index+1) * batch_size, N)) upd = optimization_function(self.params, cost) optimize = function([index], [cost, error], givens=[(self.X, X_train_shared[batch_begin:batch_end]), (self.y, y_train_shared[batch_begin:batch_end])], updates=upd) p = T.ivector() permute = function([p], updates=[(X_train_shared, X_train_shared[p]), (y_train_shared, y_train_shared[p])], allow_input_downcast=True) for j in range(n_epochs): for i in range(n_batches): print(optimize(i)) permute(np.random.permutation(N))
def get_output_for(self, input, init=False, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = input.flatten(2) activation = T.tensordot(input, self.W, [[1], [0]]) abs_dif = (T.sum(abs( activation.dimshuffle(0, 1, 2, 'x') - activation.dimshuffle('x', 1, 2, 0)), axis=2) + 1e6 * T.eye(input.shape[0]).dimshuffle(0, 'x', 1)) if init: mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2), axis=0) abs_dif /= mean_min_abs_dif.dimshuffle('x', 0, 'x') self.init_updates = [ (self.log_weight_scale, self.log_weight_scale - T.log(mean_min_abs_dif).dimshuffle(0, 'x')) ] f = T.sum(T.exp(-abs_dif), axis=2) if init: mf = T.mean(f, axis=0) f -= mf.dimshuffle('x', 0) self.init_updates.append((self.b, -mf)) else: f += self.b.dimshuffle('x', 0) return T.concatenate([input, f], axis=1)
def rank_objective(num_queries: int, num_w_samples: int, samples: tt.TensorVariable, features: tt.TensorVariable, beta_pref: float) -> float: """ The ranking maximum volume removal objective function, using the Plackett-Luce model of human behavior. CANNOT BE USED WITH (INC_PREV_QUERY AND NO DEMPREF). :param features: a list containing the feature values of each query. :param samples: samples of w, used to approximate the objective. :return: the value of the objective function, evaluated on the given queries' features. """ # features: n_queries x feature_size # samples: n_samples x feature_size exp_rewards = tt.sum( tt.dot(features, samples.T), axis=1) / samples.shape[0] # n_queries x 1 -- summed across samples volumes_removed = [] rankings = itertools.permutations(list( range(num_queries))) # iterating over all possible rankings for rank in rankings: exp_rewards_sorted = [None] * len(rank) for i in range(len(rank)): exp_rewards_sorted[rank[i]] = exp_rewards[i] value, i = 1, 0 for i in range(len(rank) - 1): exp_i = [ beta_pref * (exp_rewards_sorted[j] - exp_rewards_sorted[i]) for j in range(i, len(rank) - 1) ] value *= (1. / tt.sum(tt.exp(exp_i))) volumes_removed.append(1 - value) return tt.min(volumes_removed)
def unet_crossentropy_loss_sampled(y_true, y_pred): epsilon = 1.0e-4 y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon)) y_true = T.flatten(y_true) # this seems to work # it is super ugly though and I am sure there is a better way to do it # but I am struggling with theano to cooperate # filter the right indices classPos = 1 classNeg = 0 indPos = T.eq(y_true, classPos).nonzero()[0] indNeg = T.eq(y_true, classNeg).nonzero()[0] #pos = y_true[ indPos ] #neg = y_true[ indNeg ] # shuffle n = indPos.shape[0] indPos = indPos[UNET.srng.permutation(n=n)] n = indNeg.shape[0] indNeg = indNeg[UNET.srng.permutation(n=n)] # take equal number of samples depending on which class has less n_samples = T.cast(T.min([ indPos.shape[0], indNeg.shape[0]]), dtype='int64') #n_samples = T.cast(T.min([T.sum(y_true), T.sum(1-y_true)]), dtype='int64') indPos = indPos[:n_samples] indNeg = indNeg[:n_samples] #loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg])) loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(y_pred_clipped[indNeg])) loss_vector = T.clip(loss_vector, epsilon, 1.0-epsilon) average_loss = T.mean(loss_vector) if T.isnan(average_loss): average_loss = T.mean( y_pred_clipped[indPos]) return average_loss
def adjust_estimate(estimate_r_t, risk_pref_t): # preserves TxN shape of estimates by broadcasting 1xN parameter array across the trial dimension estimate_r_t_adj = (estimate_r_t - 0.5) * risk_pref_t + 0.5 # creates TxNx1 estimate_r_t_adj3 = T.reshape( estimate_r_t_adj, newshape=[estimate_r_t_adj.shape[0], estimate_r_t_adj.shape[1], 1]) # creates 0's that are TxNx1 zeros_like_estimate_r_t_adj3 = T.reshape( T.zeros_like(estimate_r_t_adj), newshape=[estimate_r_t_adj.shape[0], estimate_r_t_adj.shape[1], 1]) # create TxNx2 and then take the max over returning TxNx1 estimate_r_t_adj_max = T.max( T.stack([estimate_r_t_adj3, zeros_like_estimate_r_t_adj3], axis=2), axis=2, ) # create TxNx2 and then take the max over returning TxN estimate_r_t_adj_max_min = T.squeeze( T.min(T.stack( [estimate_r_t_adj_max, T.ones_like(estimate_r_t_adj_max)], axis=2), axis=2)) return (estimate_r_t_adj_max_min)