def get_stencil(self, t, r=None, texp=None): if r is None or texp is None: return tt.shape_padright(t) z = tt.zeros_like(self.a) r = tt.as_tensor_variable(r) R = self.r_star + z hp = 0.5 * self.period if self.ecc is None: # Equation 14 from Winn (2010) k = r / self.r_star arg1 = tt.square(1 + k) - tt.square(self.b) arg2 = tt.square(1 - k) - tt.square(self.b) factor = R / (self.a * self.sin_incl) hdur1 = hp * tt.arcsin(factor * tt.sqrt(arg1)) / np.pi hdur2 = hp * tt.arcsin(factor * tt.sqrt(arg2)) / np.pi ts = [-hdur1, -hdur2, hdur2, hdur1] flag = z else: M_contact1 = self.contact_points_op(self.a, self.ecc, self.cos_omega, self.sin_omega, self.cos_incl + z, self.sin_incl + z, R + r) M_contact2 = self.contact_points_op(self.a, self.ecc, self.cos_omega, self.sin_omega, self.cos_incl + z, self.sin_incl + z, R - r) flag = M_contact1[2] + M_contact2[2] ts = [ tt.mod( (M_contact1[0] - self.M0) / self.n + hp, self.period) - hp, tt.mod( (M_contact2[0] - self.M0) / self.n + hp, self.period) - hp, tt.mod( (M_contact2[1] - self.M0) / self.n + hp, self.period) - hp, tt.mod( (M_contact1[1] - self.M0) / self.n + hp, self.period) - hp ] start = self.period * tt.floor((tt.min(t) - self.t0) / self.period) end = self.period * (tt.ceil((tt.max(t) - self.t0) / self.period) + 1) start += self.t0 end += self.t0 tout = [] for i in range(4): if z.ndim < 1: tout.append(ts[i] + tt.arange(start, end, self.period)) else: tout.append( theano.scan( fn=lambda t0, s0, e0, p0: t0 + tt.arange(s0, e0, p0), sequences=[ts[i], start, end, self.period], )[0].flatten()) ts = tt.sort(tt.concatenate(tout)) return ts, flag
def custom_svrg1(loss, params, m=100, learning_rate=0.01): grads = theano.grad(loss, params) updates = OrderedDict() it_num = theano.shared(np.cast['int16'](0.)) it = it_num + 1 for param, grad in zip(params, grads): value = param.get_value(borrow=True) mu = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) grad_w_tilde = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) new_grad_w_tilde = theano.ifelse.ifelse(T.eq(it, m), grad, grad_w_tilde) mu_acc = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) updates[param] = param - learning_rate * (grad - grad_w_tilde + mu) updates[grad_w_tilde] = new_grad_w_tilde updates[mu] = theano.ifelse.ifelse(T.eq(T.mod(it, m), 0), mu_acc, mu) updates[mu_acc] = theano.ifelse.ifelse(T.eq(T.mod(it, m), 0), 0*mu_acc, mu_acc + grad) updates[it_num] = theano.ifelse.ifelse(T.eq(it, m), np.cast['int16'](1), np.cast['int16'](m)) return updates
def AdaMaxAvg2(ws, objective, alpha=.01, beta1=.1, beta2=.001, beta3=0.01, n_accum=1): if n_accum == 1: return AdaMaxAvg(ws, objective, alpha, beta1, beta2, beta3) print 'AdaMax_Avg2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2,'beta3:',beta3,'n_accum:',n_accum gs = G.ndict.T_grad(objective.sum(), ws, disconnected_inputs='raise') new = OrderedDict() from theano.ifelse import ifelse it = G.sharedf(0.) new[it] = it + 1 reset = T.eq(T.mod(it,n_accum), 0) update = T.eq(T.mod(it,n_accum), n_accum-1) ws_avg = [] for j in range(len(ws)): w_avg = {} for i in ws[j]: _w = ws[j][i] _g = gs[j][i] #_g = T.switch(T.isnan(_g),T.zeros_like(_g),_g) #remove NaN's mom1 = G.sharedf(_w.get_value() * 0.) _max = G.sharedf(_w.get_value() * 0.) w_avg[i] = G.sharedf(_w.get_value()) g_sum = G.sharedf(_w.get_value() * 0.) new[g_sum] = ifelse(reset, _g, g_sum + _g) new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1) new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max) new[_w] = ifelse(update, _w + alpha * new[mom1] / new[_max], _w) new[w_avg[i]] = ifelse(update, beta3 * new[_w] + (1.-beta3) * w_avg[i], w_avg[i]) ws_avg += [w_avg] return new, ws_avg
def in_transit(self, t, r=0.0, texp=None): """Get a list of timestamps that are in transit Args: t (vector): A vector of timestamps to be evaluated. r (Optional): The radii of the planets. texp (Optional[float]): The exposure time. Returns: The indices of the timestamps that are in transit. """ z = tt.zeros_like(self.a) r = tt.as_tensor_variable(r) + z R = self.r_star + z # Wrap the times into time since transit hp = 0.5 * self.period dt = tt.mod(self._warp_times(t) + hp, self.period) - hp if self.ecc is None: # Equation 14 from Winn (2010) k = r / R arg = tt.square(1 + k) - tt.square(self.b) factor = R / (self.a * self.sin_incl) hdur = hp * tt.arcsin(factor * tt.sqrt(arg)) / np.pi t_start = -hdur t_end = hdur flag = z else: M_contact = self.contact_points_op( self.a, self.ecc, self.cos_omega, self.sin_omega, self.cos_incl + z, self.sin_incl + z, R + r, ) flag = M_contact[2] t_start = (M_contact[0] - self.M0) / self.n t_start = tt.mod(t_start + hp, self.period) - hp t_end = (M_contact[1] - self.M0) / self.n t_end = tt.mod(t_end + hp, self.period) - hp t_start = tt.switch(tt.gt(t_start, 0.0), t_start - self.period, t_start) t_end = tt.switch(tt.lt(t_end, 0.0), t_end + self.period, t_end) if texp is not None: t_start -= 0.5 * texp t_end += 0.5 * texp mask = tt.any(tt.and_(dt >= t_start, dt <= t_end), axis=-1) result = ifelse(tt.all(tt.eq(flag, 0)), tt.arange(t.size)[mask], tt.arange(t.size)) return result
def custom_svrg1(loss, params, m=100, learning_rate=0.01): grads = theano.grad(loss, params) updates = OrderedDict() it_num = theano.shared(np.cast['int16'](0.)) it = it_num + 1 for param, grad in zip(params, grads): value = param.get_value(borrow=True) mu = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) grad_w_tilde = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) new_grad_w_tilde = theano.ifelse.ifelse(T.eq(it, m), grad, grad_w_tilde) mu_acc = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) updates[param] = param - learning_rate * (grad - grad_w_tilde + mu) updates[grad_w_tilde] = new_grad_w_tilde updates[mu] = theano.ifelse.ifelse(T.eq(T.mod(it, m), 0), mu_acc, mu) updates[mu_acc] = theano.ifelse.ifelse(T.eq(T.mod(it, m), 0), 0 * mu_acc, mu_acc + grad) updates[it_num] = theano.ifelse.ifelse(T.eq(it, m), np.cast['int16'](1), np.cast['int16'](m)) return updates
def AdaMax2(w, objective, alpha=.01, beta1=.1, beta2=.001, n_accum=2): print 'AdaMax2', 'alpha:', alpha, 'beta1:', beta1, 'beta2:', beta2, 'n_accum:', n_accum g = T.grad(objective.sum(), w, disconnected_inputs='warn') new = OrderedDict() from theano.ifelse import ifelse it = G.sharedf(0.) new[it] = it + 1 reset = T.eq(T.mod(new[it], n_accum), 0) update = T.eq(T.mod(new[it], n_accum), n_accum - 1) for i in range(len(w)): mom1 = G.sharedf(w[i].get_value() * 0.) _max = G.sharedf(w[i].get_value() * 0.) g_sum = G.sharedf(w[i].get_value() * 0.) #gi = T.switch(T.isnan(gi),T.zeros_like(gi),gi) #remove NaN's new[g_sum] = ifelse(reset, g[i], g_sum + g[i]) new[mom1] = ifelse(update, (1 - beta1) * mom1 + beta1 * new[g_sum], mom1) new[_max] = ifelse( update, T.maximum((1 - beta2) * _max, abs(new[g_sum]) + 1e-8), _max) new[w[i]] = ifelse(update, w[i] + alpha * new[mom1] / new[_max], w[i]) return new
def ShiftConv(w_t_g, s_t, N): shift = 2.*s_t-1. Z = T.mod(shift+N, N) simj = 1 - (Z - T.floor(Z)) imj = T.mod(T.arange(N) + T.iround(T.floor(Z)),N) w_t_g_roll_1 = T.roll(w_t_g, -T.iround(T.floor(Z))) w_t_g_roll_2 = T.roll(w_t_g, -(T.iround(T.floor(Z))+1)) w_t_s = w_t_g_roll_1*simj + w_t_g_roll_2*(1-simj) return w_t_s
def in_transit(self, t, r=0.0, texp=None): """Get a list of timestamps that are in transit Args: t (vector): A vector of timestamps to be evaluated. r (Optional): The radii of the planets. texp (Optional[float]): The exposure time. Returns: The indices of the timestamps that are in transit. """ z = tt.zeros_like(self.a) r = tt.as_tensor_variable(r) + z R = self.r_star + z # Wrap the times into time since transit hp = 0.5 * self.period dt = tt.mod(self._warp_times(t) - self.t0 + hp, self.period) - hp if self.ecc is None: # Equation 14 from Winn (2010) k = r / R arg = tt.square(1 + k) - tt.square(self.b) factor = R / (self.a * self.sin_incl) hdur = hp * tt.arcsin(factor * tt.sqrt(arg)) / np.pi t_start = -hdur t_end = hdur flag = z else: M_contact = self.contact_points_op( self.a, self.ecc, self.cos_omega, self.sin_omega, self.cos_incl + z, self.sin_incl + z, R + r) flag = M_contact[2] t_start = (M_contact[0] - self.M0) / self.n t_start = tt.mod(t_start + hp, self.period) - hp t_end = (M_contact[1] - self.M0) / self.n t_end = tt.mod(t_end + hp, self.period) - hp t_start = tt.switch(tt.gt(t_start, 0.0), t_start - self.period, t_start) t_end = tt.switch(tt.lt(t_end, 0.0), t_end + self.period, t_end) if texp is not None: t_start -= 0.5*texp t_end += 0.5*texp mask = tt.any(tt.and_(dt >= t_start, dt <= t_end), axis=-1) result = ifelse(tt.all(tt.eq(flag, 0)), tt.arange(t.size)[mask], tt.arange(t.size)) return result
def get_stencil(self, t, r=None, texp=None): if r is None or texp is None: return tt.shape_padright(t) z = tt.zeros_like(self.a) r = tt.as_tensor_variable(r) R = self.r_star + z hp = 0.5 * self.period if self.ecc is None: # Equation 14 from Winn (2010) k = r / self.r_star arg1 = tt.square(1 + k) - tt.square(self.b) arg2 = tt.square(1 - k) - tt.square(self.b) factor = R / (self.a * self.sin_incl) hdur1 = hp * tt.arcsin(factor * tt.sqrt(arg1)) / np.pi hdur2 = hp * tt.arcsin(factor * tt.sqrt(arg2)) / np.pi ts = [-hdur1, -hdur2, hdur2, hdur1] flag = z else: M_contact1 = self.contact_points_op( self.a, self.ecc, self.cos_omega, self.sin_omega, self.cos_incl + z, self.sin_incl + z, R + r) M_contact2 = self.contact_points_op( self.a, self.ecc, self.cos_omega, self.sin_omega, self.cos_incl + z, self.sin_incl + z, R - r) flag = M_contact1[2] + M_contact2[2] ts = [ tt.mod((M_contact1[0]-self.M0)/self.n+hp, self.period)-hp, tt.mod((M_contact2[0]-self.M0)/self.n+hp, self.period)-hp, tt.mod((M_contact2[1]-self.M0)/self.n+hp, self.period)-hp, tt.mod((M_contact1[1]-self.M0)/self.n+hp, self.period)-hp ] start = self.period * tt.floor((tt.min(t) - self.t0) / self.period) end = self.period * (tt.ceil((tt.max(t) - self.t0) / self.period) + 1) start += self.t0 end += self.t0 tout = [] for i in range(4): if z.ndim < 1: tout.append(ts[i] + tt.arange(start, end, self.period)) else: tout.append(theano.scan( fn=lambda t0, s0, e0, p0: t0 + tt.arange(s0, e0, p0), sequences=[ts[i], start, end, self.period], )[0].flatten()) ts = tt.sort(tt.concatenate(tout)) return ts, flag
def __init__(self, **kwargs): super(ConcatConv, self).__init__(**kwargs) inputs = T.concatenate([s.output for s in self.sources], axis=2) # (time, batch, input-dim = row * features) time = inputs.shape[0] batch = inputs.shape[1] if self.status[0]: self.input = T.concatenate([s.Output for s in self.sources], axis=3) # (batch, stack_size, row, time) else: inputs2 = inputs.reshape((time, batch, inputs.shape[2], self.filter_shape[1])) # (time, batch, row, stack) self.input = inputs2.dimshuffle(1, 3, 2, 0) # (batch, stack_size, row, time) self.input.name = "conv_layer_input_final" if self.pool_params[0][1] > 1: xp = T.constant(self.pool_params[0][1], 'int32') self.input = T.concatenate([self.input, T.zeros((batch, self.filter_shape[1], self.input.shape[2], xp - T.mod(self.input.shape[3], xp)), 'float32')], axis=3) self.index = T.concatenate([self.index, T.zeros((xp - T.mod(self.index.shape[0], xp), batch), 'int8')], axis=0) if self.modes[0] == "valid": if self.filter_shape[3] > 1: idx = int(self.filter_shape[3] / 2) self.index = self.index[idx:-idx] self.Output = self.run_cnn( inputs=self.input, filter_shape=self.filter_shape, params=self.pool_params, modes=self.modes, others=self.other_params ) if self.attrs['batch_norm']: self.Output = self.batch_norm( self.Output.dimshuffle(0, 2, 3, 1).reshape( (self.Output.shape[0] * self.Output.shape[2] * self.Output.shape[3], self.Output.shape[1]) ), self.attrs['n_features'] ).reshape((self.Output.shape[0], self.Output.shape[2], self.Output.shape[3], self.Output.shape[1])).dimshuffle(0, 3, 1, 2) # our CRNN only accept 3D tensor (time, batch, dim) # so, we have to convert back the output to 3D tensor output2 = self.Output.dimshuffle(3, 0, 1, 2) # (time, batch, features, out-row) self.output = output2.reshape((output2.shape[0], output2.shape[1], output2.shape[2] * output2.shape[3])) # (time, batch, out-dim)
def input_row_from_variables(ori_ip,dest_ip,ori_lat,ori_long,dest_lat,dest_long,ori_type,dest_type,dist): '''Create an input row for the MLP from the inputs''' input_row = tensor.zeros([input_size]) offset = 0 ips = [ori_ip,dest_ip] for ip in ips: for _ in range(4): input_row = add_one_shot(input_row, offset, tensor.mod(ip,256)) ip = tensor.int_div(ip,256) offset += 256 for lat_,long_ in [(ori_lat,ori_long),(dest_lat,dest_long)]: translated_lat = tensor.iround((coordinate_size-1)*(lat_/180 + 0.5)) input_row = add_thermo(input_row, offset,translated_lat) offset += coordinate_size translated_long = tensor.iround((coordinate_size-1)*(long_/360 + 0.5)) input_row = add_thermo(input_row, offset,translated_long) offset += coordinate_size for type_ in [ori_type,dest_type]: add_one_shot(input_row, offset, type_ +1) offset += type_size translated_dist = tensor.iround((dest_size-1)*(tensor.minimum(1,dist/max_earth_distance))) input_row = add_thermo(input_row, offset,translated_dist) #could be useful if we want to add something offset +=dest_size return input_row
def time_mask(update_freq, maxlen, batch_size): ''' update_freq- after how many time steps, hiddens should be updated. maxlen - maximum length of the input sequence. batch_size - Batch Size for training! ''' new_mask = tensor.alloc(1, maxlen) qw = tensor.extra_ops.cumsum(new_mask) qw2 = tensor.switch(tensor.eq(tensor.mod(qw, update_freq), 0), 1, 0) temp = qw2 for i in range(batch_size - 1): qw2 = tensor.concatenate([qw2, temp], axis=0) qw2 = qw2.reshape([batch_size, maxlen]) qw2 = qw2.T new_mask = qw2 if update_freq == 1: return new_mask, None, None ones_array = numpy.ones([1, maxlen]) cumsum = numpy.cumsum(ones_array) mod_array = [int(i % (update_freq)) for i in cumsum] mod_array = numpy.asarray(mod_array) alpha_mask = numpy.where(mod_array == 0)[0] interpolation_mask = [] for i in reversed(range(update_freq)): interpolation_mask.append(((i + 1) * 1.0) / update_freq) return new_mask, alpha_mask, interpolation_mask
def init_train_updates(self): step = self.variables.step previous_delta = self.variables.prev_delta previous_gradient = self.variables.prev_gradient n_parameters = count_parameters(self.connection) parameters = parameter_values(self.connection) param_vector = T.concatenate([param.flatten() for param in parameters]) gradients = T.grad(self.variables.error_func, wrt=parameters) full_gradient = T.concatenate([grad.flatten() for grad in gradients]) beta = self.update_function(previous_gradient, full_gradient, previous_delta) parameter_delta = ifelse( T.eq(T.mod(self.variables.epoch, n_parameters), 1), -full_gradient, -full_gradient + beta * previous_delta) updated_parameters = param_vector + step * parameter_delta updates = [ (previous_gradient, full_gradient), (previous_delta, parameter_delta), ] parameter_updates = setup_parameter_updates(parameters, updated_parameters) updates.extend(parameter_updates) return updates
def apply(self, inputs, time_step, states, time_scale, time_offset, mask=None): """Apply the simple transition. Parameters ---------- inputs : :class:`~tensor.TensorVariable` The 2D inputs, in the shape (batch, features). states : :class:`~tensor.TensorVariable` The 2D states, in the shape (batch, features). mask : :class:`~tensor.TensorVariable` A 1D binary array in the shape (batch,) which is 1 if there is data available, 0 if not. Assumed to be 1-s only if not given. """ def activate_rnn(self, inputs, states, mask=None): next_states = inputs + tensor.dot(states, self.W) next_states = self.children[0].apply(next_states) if mask: next_states = (mask[:, None] * next_states + (1 - mask[:, None]) * states) return next_states def do_nothing(states): return states result = ifelse(tensor.eq(tensor.mod(tensor.sub(time_step,time_offset), time_scale),0), activate_rnn(self, inputs, states, mask), do_nothing(states)) return result
def time_mask(update_freq, maxlen, batch_size): ''' update_freq- after how many time steps, hiddens should be updated. maxlen - maximum length of the input sequence. batch_size - Batch Size for training! ''' new_mask = tensor.alloc(1, maxlen) qw = tensor.extra_ops.cumsum(new_mask) qw2 = tensor.switch(tensor.eq(tensor.mod(qw,update_freq), 0), 1, 0) temp = qw2 for i in range(batch_size - 1): qw2 = tensor.concatenate([qw2,temp], axis=0) qw2 = qw2.reshape([batch_size, maxlen]) qw2 = qw2.T new_mask = qw2 if update_freq ==1: return new_mask, None, None ones_array = numpy.ones([1, maxlen]) cumsum = numpy.cumsum(ones_array) mod_array = [int(i%(update_freq)) for i in cumsum] mod_array = numpy.asarray(mod_array) alpha_mask = numpy.where(mod_array==0)[0] interpolation_mask = [] for i in reversed(range(update_freq)): interpolation_mask.append(((i+1)*1.0)/update_freq) return new_mask, alpha_mask, interpolation_mask
def build_likelihood(X, Y): random_pool_selector = T.mod( T.arange(X.shape[0]), param_pool_size ) #srng.choice(size=(X.shape[0],), a=param_pool_size) output = X mult_factor = X.shape[0] / param_pool_size + 1 for i in xrange(len(_structure) - 1): if i == len(structure) - 2: nonlin = lambda x: x else: nonlin = nonlinearity W = Ws_var[i][random_pool_selector] #W = T.tile(Ws_var[i], (mult_factor, 1, 1))[:X.shape[0]]#[random_pool_selector] if bias: #b = T.tile(bs_var[i], (mult_factor, 1))[:X.shape[0]] b = bs_var[i][random_pool_selector] output = nonlin(T.batched_dot(output, W) + b) else: output = nonlin(T.batched_dot(output, W)) if use_softmax: softmax_fn = output - logsumexp(output, 1) log_likelihood = T.sum(softmax_fn[T.arange(Y.shape[0]), Y]) else: log_likelihood = -0.5 * T.sum((output.T - Y)**2) - np.log( 2 * np.pi) * (X.shape[0] / 2).astype(theano.config.floatX) return log_likelihood
def init_train_updates(self): step = self.variables.step previous_delta = self.variables.prev_delta previous_gradient = self.variables.prev_gradient n_parameters = count_parameters(self) parameters = list(iter_parameters(self)) param_vector = parameters2vector(self) gradients = T.grad(self.variables.error_func, wrt=parameters) full_gradient = T.concatenate([grad.flatten() for grad in gradients]) beta = self.update_function(previous_gradient, full_gradient, previous_delta) parameter_delta = ifelse( T.eq(T.mod(self.variables.epoch, n_parameters), 1), -full_gradient, -full_gradient + beta * previous_delta ) updated_parameters = param_vector + step * parameter_delta updates = [ (previous_gradient, full_gradient), (previous_delta, parameter_delta), ] parameter_updates = setup_parameter_updates(parameters, updated_parameters) updates.extend(parameter_updates) return updates
def step(x, h, c, x_pos, y_pos, H, C): print('x =', x) print('h =', h) print('c =', c) print('x_pos = ', x_pos) tmp = H[x_pos, y_pos] print('tmp=', tmp) h = x + tmp c = x + c x_pos = x_pos + 1 y_pos = ifelse(x_pos >= height, y_pos + 1, y_pos) x_pos = tensor.mod(x_pos, height) y_pos = tensor.mod(y_pos, width) # H = tensor. H[x_pos, y_pos] = hheano return h, c, x_pos, y_pos
def in_transit(self, t, r=None, texp=None, light_delay=False): """Get a list of timestamps that are in transit Args: t (vector): A vector of timestamps to be evaluated. r (Optional): The radii of the planets. texp (Optional[float]): The exposure time. Returns: The indices of the timestamps that are in transit. """ if light_delay: raise NotImplementedError( "Light travel time delay is not implemented for simple orbits" ) dt = tt.mod(tt.shape_padright(t) - self._ref_time, self.period) dt -= self._half_period if r is None: tol = 0.5 * self.duration else: x = (r + self.r_star) ** 2 - self._b_norm ** 2 tol = tt.sqrt(x) / self.speed if texp is not None: tol += 0.5 * texp mask = tt.any(tt.abs_(dt) < tol, axis=-1) return tt.arange(t.size)[mask]
def AdaMaxAvg2(ws, objective, alpha=.01, beta1=.1, beta2=.001, beta3=0.01, n_accum=1): if n_accum == 1: return AdaMaxAvg(ws, objective, alpha, beta1, beta2, beta3) print 'AdaMax_Avg2', 'alpha:', alpha, 'beta1:', beta1, 'beta2:', beta2, 'beta3:', beta3, 'n_accum:', n_accum gs = G.ndict.T_grad(objective.sum(), ws, disconnected_inputs='raise') new = OrderedDict() from theano.ifelse import ifelse it = G.sharedf(0.) new[it] = it + 1 reset = T.eq(T.mod(it, n_accum), 0) update = T.eq(T.mod(it, n_accum), n_accum - 1) ws_avg = [] for j in range(len(ws)): w_avg = {} for i in ws[j]: _w = ws[j][i] _g = gs[j][i] #_g = T.switch(T.isnan(_g),T.zeros_like(_g),_g) #remove NaN's mom1 = G.sharedf(_w.get_value() * 0.) _max = G.sharedf(_w.get_value() * 0.) w_avg[i] = G.sharedf(_w.get_value()) g_sum = G.sharedf(_w.get_value() * 0.) new[g_sum] = ifelse(reset, _g, g_sum + _g) new[mom1] = ifelse(update, (1 - beta1) * mom1 + beta1 * new[g_sum], mom1) new[_max] = ifelse( update, T.maximum((1 - beta2) * _max, abs(new[g_sum]) + 1e-8), _max) new[_w] = ifelse(update, _w + alpha * new[mom1] / new[_max], _w) new[w_avg[i]] = ifelse(update, beta3 * new[_w] + (1. - beta3) * w_avg[i], w_avg[i]) ws_avg += [w_avg] return new, ws_avg
def ShiftConv(w_t_g, s_t, N, num_shifts): # pad = (num_shifts//2, (num_shifts-1)//2) # w_t_g_pd_ = T.concatenate([w_t_g[(-pad[0]-1):-1], w_t_g, w_t_g[:(pad[1])]]) # w_t_g_pd = w_t_g_pd_.dimshuffle('x','x','x', 0) # filter = s_t.dimshuffle('x', 'x', 'x', 0) # convolution = T.nnet.conv2d(w_t_g_pd, filter, # input_shape=(1, 1, 1, N + pad[0] + pad[1]), # filter_shape=(1, 1, 1, num_shifts), # subsample=(1, 1), # border_mode='valid') # w_t_s = convolution[0, 0, 0, :] shift = 2.*s_t-1. Z = T.mod(shift+N, N) simj = 1 - (Z - T.floor(Z)) imj = T.mod(T.arange(N) + T.iround(T.floor(Z)),N) w_t_g_roll_1 = T.roll(w_t_g, -T.iround(T.floor(Z))) w_t_g_roll_2 = T.roll(w_t_g, -(T.iround(T.floor(Z))+1)) w_t_s = w_t_g_roll_1*simj + w_t_g_roll_2*(1-simj) return w_t_s
def fprop(self, X): idx = X[0] X = X[1:] z = theano.ifelse.ifelse(T.neq(T.mod(idx, self.N), 0), T.zeros((X[0].shape[0]*self.num_sample, self.nout), dtype=X[0].dtype), self.inner_fn(X)) z.name = self.name return z
def step(x, h, c, x_pos, y_pos, H, C): print('x =', x) print('h =', h) print('c =', c) print('x_pos = ', x_pos) tmp = H[y_pos, x_pos] print('tmp=', tmp) h = x + tmp c = x + c H = tensor.set_subtensor(H[y_pos, x_pos], h) C = tensor.set_subtensor(C[y_pos, x_pos], c) x_pos = x_pos + 1 y_pos = ifelse(x_pos >= width, y_pos + 1, y_pos) x_pos = tensor.mod(x_pos, width) y_pos = tensor.mod(y_pos, height) return h, c, x_pos, y_pos, H, C
def build_loss(pred_var, target_var, resp_dur, t_ind): if t_ind == 0 or t_ind == 1 or t_ind == 4: loss = T.mean( T.mod( T.abs_(pred_var[:, -resp_dur:, :] - target_var[:, -resp_dur:, :]), np.pi)) elif t_ind == 2 or t_ind == 6 or t_ind == 8: loss = T.mean( binary_crossentropy(pred_var[:, -resp_dur:, -1], target_var[:, -resp_dur:, -1])) return loss
def pooling(self, inp, input_dim): inp_shuffle = inp.dimshuffle(1, 0, 2) n_timestep = inp_shuffle.shape[1] output, _ = theano.scan( fn=lambda timestep: T.max(inp_shuffle[:, timestep:timestep + 1, :], axis=1), sequences=T.arange(0, T.floor(n_timestep / 2)) * 2) if T.mod(n_timestep, 2) != 0: output = T.concatenate([output, inp[-1:, :, :]], axis=0) return output
def pooling(self, inp, input_dim): inp_shuffle = inp.dimshuffle(1,0,2) n_timestep = inp_shuffle.shape[1] output, _ = theano.scan( fn=lambda timestep: T.max(inp_shuffle[:,timestep:timestep+1,:], axis=1), sequences=T.arange(0, T.floor(n_timestep/2))*2 ) if T.mod(n_timestep, 2) != 0: output = T.concatenate([output, inp[-1:,:,:]], axis=0) return output
def attend(self, y_p): inp, updates = 0, {} for i in range(len(self.base)): for g in range(self.n_glm): B, C, I, H, W_att_in, b_att_in = self.get(y_p, i, g) z_i = self.distance(C, H) w_i = self.softmax(z_i, I) if self.attrs['momentum'] == 'conv2d': F = self.item('F',i) context = F.shape[3] padding = T.zeros((2,context/2,C.shape[1]),'float32') att = T.concatenate([padding, T.stack([self.item('att',i), w_i]), padding],axis=1) # 2TB v_i = T.nnet.sigmoid(T.dot(T.nnet.conv2d(border_mode='valid', input=att.dimshuffle(2,'x',0,1), # B12T filters=F).dimshuffle(3,0,2,1),self.item('U',i)).reshape((C.shape[0],C.shape[1]))) w_i *= v_i w_i = w_i / w_i.sum(axis=0, keepdims=True) elif self.attrs['momentum'] == 'mono': # gating function idx = T.arange(z_i.shape[0],dtype='float32').dimshuffle(0,'x').repeat(w_i.shape[1],axis=1) # TB d_i = idx - T.sum(self.item('att', i) * idx,axis=0,keepdims=True) f_i = T.nnet.sigmoid(T.dot(T.tanh(T.dot(d_i.dimshuffle(0,1,'x'), self.item('D_in', i))), self.item("D_out", i)) + self.item('Db_out',i))[:,:,0] w_i = T.exp(-z_i) * f_i * I w_i = w_i / w_i.sum(axis=0, keepdims=True) self.glimpses[i].append(T.sum(C * w_i.dimshuffle(0,1,'x').repeat(C.shape[2],axis=2),axis=0)) if self.attrs['smooth']: updates[self.state_vars['datt_%d' % i]] = w_i - self.state_vars['att_%d' % i] if self.attrs['store']: updates[self.state_vars['att_%d' % i]] = theano.gradient.disconnected_grad(w_i) if self.attrs['memory'] > 0: M = self.item('M',i) z_r = self.distance(M, H) w_m = self.softmax(z_r, T.ones_like(M[0])) inp += T.dot(T.sum(w_m*M,axis=0), self.item('W_mem_in',i)) v_m = T.nnet.sigmoid(T.dot(H, self.item('W_mem_write', i))).dimshuffle('x',0, 1).repeat(M.shape[0],axis=0) mem = H.dimshuffle('x',0,1).repeat(self.attrs['memory'],axis=0) updates[self.state_vars['M_%d' % i]] = T.sum((numpy.float32(1) - v_m) * M.dimshuffle(0,'x',1).repeat(v_m.shape[1],axis=1) + v_m * mem,axis=1) if self.attrs['accumulator'] == 'rnn': def rnn(x_t, w_t, c_p): c = x_t * w_t + c_p * (numpy.float32(1.) - w_t) return T.switch(T.ge(c, 0), c, T.exp(c) - 1) zT, _ = theano.scan(rnn, sequences=[B,w_i.dimshuffle(0, 1, 'x').repeat(B.shape[2], axis=2)], outputs_info = [T.zeros_like(B[0])]) z = zT[-1] else: if self.attrs['nbest'] == 1: z = B[T.argmax(w_i,axis=0),T.arange(w_i.shape[1])] else: z = T.sum(B * w_i.dimshuffle(0, 1, 'x').repeat(B.shape[2], axis=2), axis=0) if self.attrs['loss']: updates[self.state_vars['catt_%d' % i]] = -T.sum(T.log(w_i[T.cast(self.item('iatt', i),'int32')[T.cast(self.n,'int32')],T.arange(w_i.shape[1],dtype='int32')]),axis=0) inp += T.dot(z, W_att_in) + b_att_in ifelse(T.eq(T.mod(self.n[0],self.attrs['ndec']),0), inp, T.zeros((self.n.shape[0],self.layer.attrs['n_out'] * 4),'float32')) return inp, updates
def train_givens(self, batch_index, batch_size): ''' batch_index is a theano_variable. ''' # compute the gpu batch index # these will all be theano variables solver_batches_per_gpu_batch = T.cast(T.int_div(self.num_GPU_store,batch_size), 'int32') real_batch_index = T.cast(T.mod(batch_index, solver_batches_per_gpu_batch), 'int32') givens = {self.X_batch_var:self.GPU_X_train[real_batch_index*batch_size:(real_batch_index+1)*batch_size]} givens[self.y_batch_var] = self.GPU_y_train[real_batch_index*batch_size:(real_batch_index+1)*batch_size] return givens
def get_phase(states): v, w = states angle = T.switch(w > 0, np.pi * v.clip(0, 1), w * (np.pi / T.abs_(T.min(w)))) mean = T.arctan2(T.sin(angle).mean(axis=-1), T.cos(angle).mean(axis=-1)) ### calculate angles around the mean angle = T.mod(angle + (np.pi - mean[:,None]), 2*np.pi) - np.pi std = T.sqrt((angle**2).mean(-1)) return std
def fprop_step(state_below, index, state_before, W, U, b): state_now = state_before.copy() index = self.num_modules -\ tensor.nonzero(tensor.mod(index+1, self.M))[0].shape[0] this_range = index * self.module_dim z = tensor.dot(state_below, W[:, :this_range]) +\ tensor.dot(state_before, U[:, :this_range]) +\ b[:this_range] z = tensor.tanh(z) state_now = tensor.set_subtensor(state_now[:, :this_range], z) return state_now
def build_loss(pred_var,target_var,ExptDict): # Unpack necessary variables task = ExptDict["task"]["task_id"] resp_dur = ExptDict["resp_dur"] if task in ['DE1','DE2','GDE2','VDE1']: loss = T.mean(T.mod(T.abs_(pred_var[:,-resp_dur:,:] - target_var[:,-resp_dur:,:]), np.pi)) elif task in ['CD1','CD2','Harvey2012','Harvey2012Dynamic','Harvey2016','COMP']: loss = T.mean(lasagne.objectives.binary_crossentropy(pred_var[:,-resp_dur:,-1], target_var[:,-resp_dur:,-1])) elif task in ['SINE']: loss = T.mean(T.abs_(pred_var[:,-resp_dur:,:] - target_var[:,-resp_dur:,:])) return loss
def step(x, h, c, x_pos, y_pos): global H, C print('x =', x) print('h =', h) print('c =', c) print('x_pos = ', x_pos) tmp = H[y_pos, x_pos] print('tmp=', tmp) h = x + tmp c = x + c H = tensor.inc_subtensor(H[y_pos, x_pos], h, tolerate_inplace_aliasing=True) C = tensor.set_subtensor(C[y_pos, x_pos], c) x_pos = x_pos + 1 y_pos = ifelse(x_pos >= width, y_pos + 1, y_pos) x_pos = tensor.mod(x_pos, width) y_pos = tensor.mod(y_pos, height) return h, c, x_pos, y_pos
def AdaMax2(w, objective, alpha=.01, beta1=.1, beta2=.001, n_accum=2): print 'AdaMax2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2, 'n_accum:', n_accum g = T.grad(objective.sum(), w, disconnected_inputs='warn') new = OrderedDict() from theano.ifelse import ifelse it = G.sharedf(0.) new[it] = it + 1 reset = T.eq(T.mod(new[it],n_accum), 0) update = T.eq(T.mod(new[it],n_accum), n_accum-1) for i in range(len(w)): mom1 = G.sharedf(w[i].get_value() * 0.) _max = G.sharedf(w[i].get_value() * 0.) g_sum = G.sharedf(w[i].get_value() * 0.) #gi = T.switch(T.isnan(gi),T.zeros_like(gi),gi) #remove NaN's new[g_sum] = ifelse(reset, g[i], g_sum + g[i]) new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1) new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max) new[w[i]] = ifelse(update, w[i] + alpha * new[mom1] / new[_max], w[i]) return new
def calc_time_gate(time_input_n): # Broadcast the time across all units t_broadcast = time_input_n.dimshuffle([0,'x']) # Get the time within the period in_cycle_time = T.mod(t_broadcast + shift_broadcast, period_broadcast) # Find the phase is_up_phase = T.le(in_cycle_time, on_mid_broadcast) is_down_phase = T.gt(in_cycle_time, on_mid_broadcast)*T.le(in_cycle_time, on_end_broadcast) # Set the mask sleep_wake_mask = T.switch(is_up_phase, in_cycle_time/on_mid_broadcast, T.switch(is_down_phase, (on_end_broadcast-in_cycle_time)/on_mid_broadcast, off_slope*(in_cycle_time/period_broadcast))) return sleep_wake_mask
def in_transit(self, t, r=0.0, texp=None): """Get a list of timestamps that are in transit Args: t (vector): A vector of timestamps to be evaluated. r (Optional): The radii of the planets. texp (Optional[float]): The exposure time. Returns: The indices of the timestamps that are in transit. """ z = tt.zeros_like(self.a) r = tt.as_tensor_variable(r) + z R = self.r_star + z if self.ecc is None: M_contact = self.contact_points_op(self.a, self.incl + z, r, R) else: M_contact = self.contact_points_op(self.a, self.ecc, self.omega, self.incl + z, r, R) # Wrap the times into time since transit hp = 0.5 * self.period t_start = (M_contact[0] - self.M0) / self.n t_start = tt.mod(t_start + hp, self.period) - hp t_end = (M_contact[3] - self.M0) / self.n t_end = tt.mod(t_end + hp, self.period) - hp dt = tt.mod(self._warp_times(t) - self.t0 + hp, self.period) - hp if texp is not None: t_start -= 0.5 * texp t_end += 0.5 * texp mask = tt.any(tt.and_(dt >= t_start, dt <= t_end), axis=-1) return tt.arange(t.size)[mask]
def step(input_step, previous_activation, time_step, W_in, W_self, biases): new_activation = previous_activation.copy() modzero = T.nonzero(T.eq(T.mod(time_step, self.group_labels), 0))[0] W_in_now = T.flatten(W_in[:, modzero, :], outdim=2) W_self_now = T.flatten(W_self[:, modzero, :], outdim=2) biases_now = T.flatten(biases[modzero, :]) activation = T.dot(input_step, W_in_now) activation += T.dot(previous_activation, W_self_now) activation += biases_now activation = self.activation_function(activation) modzero_activation_changes = (modzero * self.group_size) + ( T.ones((modzero.shape[0], self.group_size), dtype='int32') * T.arange(self.group_size, dtype='int32')).T modzero_flatten = T.flatten(modzero_activation_changes).astype('int32') new_activation = T.set_subtensor(new_activation[:, modzero_flatten], activation) time_step += 1 return new_activation, time_step
def __init__(self, base, momentum=0.1, oracle=False, msteps=100, esteps=200, **kwargs): kwargs['loss'] = 'ce' super(UnsupervisedOutputLayer, self).__init__(**kwargs) if base: self.set_attr('base', base[0].name) self.set_attr('momentum', momentum) self.set_attr('oracle', oracle) self.set_attr('msteps', msteps) self.set_attr('esteps', esteps) eps = T.constant(1e-30, 'float32') pc = theano.gradient.disconnected_grad(base[1].output) # TBV pc = print_to_file('pc', pc) pcx = base[0].output # TBV self.cnt = self.add_param(theano.shared(numpy.zeros((1,), 'float32'), 'cnt'), custom_update=T.constant(1, 'float32')) domax = T.ge(T.mod(T.cast(self.cnt[0], 'int32'), numpy.int32(msteps + esteps)), esteps) hyp = T.mean(pcx, axis=1, keepdims=True) hyp = hyp / hyp.sum(axis=2, keepdims=True) self.hyp = self.add_param( theano.shared(numpy.ones((self.attrs['n_out'],), 'float32') / numpy.float32(self.attrs['n_out']), 'hyp'), 'hyp', custom_update=T.mean(hyp[:, 0, :], axis=0), custom_update_condition=domax, custom_update_normalized=True, custom_update_exp_average=1. / (1. - momentum)) hyp = numpy.float32(1. - momentum) * hyp + numpy.float32(momentum) * self.hyp.dimshuffle('x', 'x', 0).repeat( hyp.shape[1], axis=1).repeat(hyp.shape[0], axis=0) order = T.argsort(self.hyp)[::-1] # order = print_to_file('order', order) shyp = hyp[:, :, order] spcx = pcx[:, :, order] # spcx = print_to_file('pcx', spcx) # shyp = print_to_file('shyp', shyp) K = numpy.float32(1. / (1. - momentum)) * T.sum(T.sum(pc * T.log(pc / shyp), axis=2), axis=0) Q = -T.sum(T.sum(pcx * T.log(pcx), axis=2), axis=0) # K = print_to_file('K', K) # Q = print_to_file('Q', Q) self.L = T.sum(T.switch(domax, Q, K)) self.y_m = spcx.reshape((spcx.shape[0] * spcx.shape[1], spcx.shape[2]))
def attend(self, y_p): inp, updates = 0, {} for i in range(len(self.base)): for g in range(self.n_glm): B, C, I, H, W_att_in, b_att_in = self.get(y_p, i, g) z_i = self.distance(C, H) w_i = self.softmax(z_i, I) if self.attrs['momentum'] == 'conv2d': F = self.item('F',i) context = F.shape[3] padding = T.zeros((2,context/2,C.shape[1]),'float32') att = T.concatenate([padding, T.stack([self.item('att',i), w_i]), padding],axis=1) # 2TB v_i = T.nnet.sigmoid(T.dot(T.nnet.conv2d(border_mode='valid', input=att.dimshuffle(2,'x',0,1), # B12T filters=F).dimshuffle(3,0,2,1),self.item('U',i)).reshape((C.shape[0],C.shape[1]))) w_i *= v_i w_i = w_i / w_i.sum(axis=0, keepdims=True) elif self.attrs['momentum'] == 'mono': # gating function idx = T.arange(z_i.shape[0],dtype='float32').dimshuffle(0,'x').repeat(w_i.shape[1],axis=1) # TB d_i = idx - T.sum(self.item('att', i) * idx,axis=0,keepdims=True) f_i = T.nnet.sigmoid(T.dot(T.tanh(T.dot(d_i.dimshuffle(0,1,'x'), self.item('D_in', i))), self.item("D_out", i)) + self.item('Db_out',i))[:,:,0] w_i = T.exp(-z_i) * f_i * I w_i = w_i / w_i.sum(axis=0, keepdims=True) self.glimpses[i].append(T.sum(C * w_i.dimshuffle(0,1,'x').repeat(C.shape[2],axis=2),axis=0)) if self.attrs['smooth']: updates[self.state_vars['datt_%d' % i]] = w_i - self.state_vars['att_%d' % i] if self.attrs['store']: updates[self.state_vars['att_%d' % i]] = theano.gradient.disconnected_grad(w_i) if self.attrs['align']: Q,K = self.align(w_i,self.item("Q", i)) updates[self.state_vars['Q_%d' % i]] = Q updates[self.state_vars['K_%d' % i]] = K if self.attrs['accumulator'] == 'rnn': def rnn(x_t, w_t, c_p): c = x_t * w_t + c_p * (numpy.float32(1.) - w_t) return T.switch(T.ge(c, 0), c, T.exp(c) - 1) zT, _ = theano.scan(rnn, sequences=[B,w_i.dimshuffle(0, 1, 'x').repeat(B.shape[2], axis=2)], outputs_info = [T.zeros_like(B[0])]) z = zT[-1] else: if self.attrs['nbest'] == 1: z = B[T.argmax(w_i,axis=0),T.arange(w_i.shape[1])] else: z = T.sum(B * w_i.dimshuffle(0, 1, 'x').repeat(B.shape[2], axis=2), axis=0) inp += T.dot(z, W_att_in) + b_att_in ifelse(T.eq(T.mod(self.n[0],self.attrs['ndec']),0), inp, T.zeros((self.n.shape[0],self.layer.attrs['n_out'] * 4),'float32')) return inp, updates
def get_relative_position(self, t): """The planets' positions relative to the star Args: t: The times where the position should be evaluated. Returns: The components of the position vector at ``t`` in units of ``R_sun``. """ dt = tt.mod(tt.shape_padright(t) - self._ref_time, self.period) dt -= self._half_period x = tt.squeeze(self.speed * dt) y = tt.squeeze(self._b_norm + tt.zeros_like(dt)) z = -tt.ones_like(x) return x, y, z
def input_row_from_variables(ori_ip, dest_ip, ori_lat, ori_long, dest_lat, dest_long, ori_type, dest_type, dist, latency): '''Create an input row for the MLP from the inputs''' input_row = tensor.zeros([input_size]) offset = 0 ips = [ori_ip, dest_ip] for ip in ips: for _ in range(4): input_row = add_one_shot(input_row, offset, tensor.mod(ip, 256)) ip = tensor.int_div(ip, 256) offset += 256 for lat_, long_ in [(ori_lat, ori_long), (dest_lat, dest_long)]: translated_lat = tensor.iround( (coordinate_size - 1) * (lat_ / 180 + 0.5)) input_row = add_thermo(input_row, offset, translated_lat) offset += coordinate_size translated_long = tensor.iround( (coordinate_size - 1) * (long_ / 360 + 0.5)) input_row = add_thermo(input_row, offset, translated_long) offset += coordinate_size for type_ in [ori_type, dest_type]: input_row = add_one_shot(input_row, offset, type_ + 1) offset += type_size translated_dist = tensor.iround( (dist_size - 1) * (tensor.minimum(1, dist / max_earth_distance))) input_row = add_thermo(input_row, offset, translated_dist) offset += dist_size translated_dist = tensor.iround( (small_dist_size - 1) * (tensor.minimum(1, dist / max_earth_distance))) input_row = add_thermo(input_row, offset, translated_dist) #could be useful if we want to add something offset += small_dist_size return input_row
def update_odd_mu(X, P, Mu, W, V, U, b, b_L): l = Mu.shape[1] Mu_update = Mu Mu_update = T.set_subtensor(Mu_update[:,1:-1:2], \ Tsigmoid(- T.tensordot(Mu[:,0:-2:2], W, axes = (2,0)) \ - T.tensordot(Mu[:,2::2], W, axes = (2,1)) \ - T.tensordot(X[:,1:-1:2], V, axes = (2,1)) \ - T.tensordot(P[:,1:-1:2], U, axes = (2,1)) \ - b)) Mu_update_last = T.set_subtensor(Mu_update[:,-1], \ Tsigmoid(- T.tensordot(Mu[:,-2], W, axes = (1,0)) \ - T.tensordot(X[:,-1], V, axes = (1,1)) \ - T.tensordot(P[:,-1], U, axes = (1,1)) \ - b_L)) Mu_update = T.switch(T.eq(T.mod(l,2),0), Mu_update_last, Mu_update) return Mu_update[:,1::2]
def getProposedStates(self, states, ntIdxs, s_rng, moves): """ sample proposed states given the current states described by variables states and ntIdxs Inputs: states - symbolic variable (nbInputs , 1, seqLen,4) ntIdxs - symbolic variable (nbInputs , 1,seqLen) indexing the type of nucleotide as each sequence position ntCounts - symbolic variable (nbInputs , 4) storing the counts of each nucleotide type each MCMC chain being run s_rng- theano.tensor.shared_randomstreams object moves - (4,4) float tensor where moves[i] stores the 4 input unit activations encoding nucleotide of type i Returns: states_proposed - symbolic variable (nbInputs , 1, seqLen,4) ntIdxs_proposed - symbolic variable (nbInputs , 1,seqLen) ntCounts_incr - symbolic variable (nbInputs , 4) (elements are 1 , -1 , 0) indicated the increments to counts of each type of nucleotide associated with the proposed mutation """ idxs2Mutate = s_rng.random_integers( size=(states.shape[0], ), low=0, high=states.shape[-2] - 1) ## the sequence positions at which mutations are proposed. ntPermInts = s_rng.random_integers( size=(states.shape[0], ), low=1, high=states.shape[-1] - 1 ) ## sample integers that will be used to permute the nucleotide content at positions selected for mutation ## get the indices of the nucleotides before and after mutation mutatedNT_idxs_old = ntIdxs[T.arange(states.shape[0]), T.arange(1, dtype="int64"), idxs2Mutate] mutatedNT_idxs_new = T.mod(ntPermInts + mutatedNT_idxs_old, 4) ## uptade description of the states ntIdxs_proposed = T.set_subtensor( ntIdxs[T.arange(states.shape[0], dtype="int64"), T.arange(1, dtype="int64"), idxs2Mutate], mutatedNT_idxs_new) states_proposed = T.set_subtensor( states[T.arange(states.shape[0], dtype="int64"), T.arange(1, dtype="int64"), idxs2Mutate, :], moves[mutatedNT_idxs_new]) ## update the counts of each nucleotide type for each input ## approach is to convert the indices of mutated nucleotides to 1 hot encoding then use this to update ## the ntCounts matrix ntCounts_incr = T.extra_ops.to_one_hot( mutatedNT_idxs_new , nb_class = 4, dtype = "int32" ) - \ T.extra_ops.to_one_hot( mutatedNT_idxs_old , nb_class = 4, dtype = "int32" ) return states_proposed, ntIdxs_proposed, ntCounts_incr
def get_phase(states): v, w, r = states ### convert to centered complex coordinates Vcenter = -0.22 Wcenter = 0.6 x = v - Vcenter y = w - Wcenter angle = T.arctan2(y, x) ### take the mean of unit vectors mag = T.sqrt(x**2 + y**2) x = x / mag y = y / mag mean = T.arctan2(y.mean(-1), x.mean(-1)) ### calculate angles around the mean angle = T.mod(angle - mean[:,None] + np.pi, 2*np.pi) - np.pi std = T.sqrt((angle**2).mean(-1)) return std
def get_sfg_updates(self, X_sym, y_sym, params, cost, learning_rate, momentum): gparams = T.grad(cost, params) updates = OrderedDict() from sfg import SFG if not hasattr(self, "sfg_"): self.count_ = theano.shared(0) self.slow_freq_ = 20 self.sfg_ = SFG(params, gparams) slow_updates, fast_updates = self.sfg_.updates(self.learning_rate, self.momentum, epsilon=0.0001, momentum_clipping=None) for param in slow_updates.keys(): updates[param] = theano.ifelse.ifelse(T.eq(self.count_, self.slow_freq_ - 1), slow_updates[param], fast_updates[param]) updates[self.count_] = T.mod(self.count_ + 1, self.slow_freq_) return updates
def train_givens(self, batch_index, batch_size): ''' batch_index is a theano_variable. ''' # compute the gpu batch index # these will all be theano variables solver_batches_per_gpu_batch = T.cast( T.int_div(self.num_GPU_store, batch_size), 'int32') real_batch_index = T.cast( T.mod(batch_index, solver_batches_per_gpu_batch), 'int32') givens = { self.X_batch_var: self.GPU_X_train[real_batch_index * batch_size:(real_batch_index + 1) * batch_size] } givens[self.y_batch_var] = self.GPU_y_train[real_batch_index * batch_size: (real_batch_index + 1) * batch_size] return givens
def sample_out_step(c_t, o_tm1, x_tm1, v_h1_tm1): j_tm1 = tensor.concatenate((x_tm1, o_tm1), axis=1) vinp_h1_t, vgate_h1_t = inp_to_v_h1.proj(j_tm1) v_h1_t = v_cell1.step(vinp_h1_t, vgate_h1_t, v_h1_tm1) o = v_h1_t.dimshuffle('x', 0, 'x', 1) mu_mag, sigma_mag, coeff_mag = _slice_outs(o) mu_phase, sigma_phase, coeff_phase = _slice_outs(o) # Filthiest of the filthy hacks s = tensor.ge(switch, c_t) mu = s * (mu_mag) + (1 - s) * (mu_phase) sigma = s * (sigma_mag) + (1 - s) * (sigma_phase) coeff = s * (coeff_mag) + (1 - s) * (coeff_phase) mu = mu[0].dimshuffle(0, 'x', 1) sigma = sigma[0].dimshuffle(0, 'x', 1) coeff = coeff[0] samp_mag = sample_single_dimensional_gmms(mu, sigma, coeff, srng) samp_phase = sample_single_dimensional_gmms(mu, sigma, coeff, srng) samp_phase = tensor.mod(samp_phase + np.pi, 2 * np.pi) - np.pi samp = s * samp_mag + (1 - s) * samp_phase return samp, v_h1_t
def GMM_phase(y, mu, sig, coeff): """ y : TensorVariable mu : FullyConnected (Linear) sig : FullyConnected (Softplus) coeff : FullyConnected (Softmax) """ n_dim = y.ndim shape_y = y.shape print n_dim y = y.reshape((-1, shape_y[-1])) y = y.dimshuffle(0, 1, 'x') mu = mu.reshape((-1,mu.shape[-1]/coeff.shape[-1],coeff.shape[-1])) sig = sig.reshape((-1, sig.shape[-1]/coeff.shape[-1],coeff.shape[-1])) coeff = coeff.reshape((-1, coeff.shape[-1])) inner0 = np.pi - abs(T.mod(y - mu, 2*np.pi) - np.pi) inner = -0.5 * T.sum(T.sqr(inner0)/sig**2 + 2 * T.log(sig) + T.log(2 * np.pi), axis= -2) nll = -logsumexp(T.log(coeff) +inner, axis=-1) return nll.reshape(shape_y[:-1], ndim = n_dim-1)
def chromagram_(self, img, y_mask=None): if y_mask: img = img[:, :, :, y_mask] width = 4 MAJOR = [5.0, 2.0, 3.5, 2.0, 4.5, 4.0, 2.0, 4.5, 2.0, 3.5, 1.5, 4.0] MINOR = [5.0, 2.0, 3.5, 4.5, 2.0, 4.0, 2.0, 4.5, 3.5, 2.0, 1.5, 4.0] filt = np.array([MAJOR, MINOR]).astype(fx).reshape(2, 1, 1, -1) filt = np.repeat(filt, width, axis=2) img_filt = theano.shared(filt, 'filt_key_profiles') conv_out = dnn_conv(img=img, kerns=img_filt, conv_mode='cross', border_mode='full', subsample=(1, 1)) key_prof = conv_out[:, :, :, :-T.mod(conv_out.shape[3], 12)] key_prof = T.reshape(key_prof, newshape=(conv_out.shape[0], conv_out.shape[1], conv_out.shape[2], -1, 12)) key_prof = T.sum(key_prof, axis=3) key_prof = key_prof - T.min(key_prof, axis=3, keepdims=True) key_prof = key_prof / (T.max(key_prof, axis=3, keepdims=True) + 1e-6) return key_prof
def get_relative_position(self, t, light_delay=False): """The planets' positions relative to the star Args: t: The times where the position should be evaluated. Returns: The components of the position vector at ``t`` in units of ``R_sun``. """ if light_delay: raise NotImplementedError( "Light travel time delay is not implemented for simple orbits" ) dt = tt.mod(tt.shape_padright(t) - self._ref_time, self.period) dt -= self._half_period x = tt.squeeze(self.speed * dt) y = tt.squeeze(self._b_norm + tt.zeros_like(dt)) m = tt.abs_(dt) < 0.5 * self.duration z = tt.squeeze(m * 1.0 - (~m) * 1.0) return x, y, z
def anneal_learning_rate(lr, t, method='half-life', **kwargs): if not isinstance(lr, (T.sharedvar.ScalarSharedVariable, T.sharedvar.TensorSharedVariable)): raise TypeError('lr must be a shared variable, got %s.' % type(lr)) lr_ = lr.get_value() if method == 'half-life': num_iters = kwargs.pop('num_iters', None) decay = kwargs.pop('decay', .1) if num_iters is None: raise ValueError('num_iters must be provided.') cond = T.cast(T.or_(T.eq(t, num_iters // 2), T.eq(t, 3 * num_iters // 4)), theano.config.floatX) lr.default_update = lr * decay * cond + (1. - cond) * lr elif method == 'step': step = kwargs.pop('step', None) decay = kwargs.pop('decay', .5) if step is None: raise ValueError('step must be provided.') cond = T.cast(T.eq(T.mod(t, step), 0), theano.config.floatX) lr.default_update = lr * decay * cond + (1. - cond) * lr elif method == 'exponential': decay = kwargs.pop('decay', 1e-4) t = T.cast(t, theano.config.floatX) lr.default_update = lr_ * T.exp(-decay * t) elif method == 'linear': num_iters = kwargs.pop('num_iters', None) if num_iters is None: raise ValueError('num_iters must be provided.') t = T.cast(t, theano.config.floatX) lr.default_update = lr_ * (1. - t / np.cast[theano.config.floatX](num_iters)) elif method == 'inverse': decay = kwargs.pop('decay', .01) t = T.cast(t, theano.config.floatX) lr.default_update = lr_ / (1. + decay * t) else: raise ValueError('Unknown annealing method.')
def input_row_from_variables(ip_, lat_, long_, type_): '''Create an input row for the MLP from the inputs''' input_row = tensor.zeros([input_size]) offset = 0 for _ in range(4): input_row = add_one_shot(input_row, offset, tensor.mod(ip_, 256)) ip_ = tensor.int_div(ip_, 256) offset += 256 translated_lat = tensor.iround((coordinate_size-1) * (lat_/180 + 0.5)) input_row = add_thermo(input_row, offset, translated_lat) offset += coordinate_size translated_long = tensor.iround((coordinate_size-1) * (long_/360 + 0.5)) input_row = add_thermo(input_row, offset, translated_long) offset += coordinate_size input_row = add_one_shot(input_row, offset, type_ +1) offset += type_size return input_row
def in_transit(self, t, r=None, texp=None): """Get a list of timestamps that are in transit Args: t (vector): A vector of timestamps to be evaluated. r (Optional): The radii of the planets. texp (Optional[float]): The exposure time. Returns: The indices of the timestamps that are in transit. """ dt = tt.mod(tt.shape_padright(t) - self._ref_time, self.period) dt -= self._half_period if self.r is None: tol = 0.5 * self.duration else: x = (r + self.r_star)**2 - self._b_norm**2 tol = tt.sqrt(x) / self.speed if texp is not None: tol += 0.5 * texp mask = tt.any(tt.abs_(dt) < tol, axis=-1) return tt.arange(t.size)[mask]
def GMM_phase(y, mu, sig, coeff): """ y : TensorVariable mu : FullyConnected (Linear) sig : FullyConnected (Softplus) coeff : FullyConnected (Softmax) """ n_dim = y.ndim shape_y = y.shape print n_dim y = y.reshape((-1, shape_y[-1])) y = y.dimshuffle(0, 1, 'x') mu = mu.reshape((-1, mu.shape[-1] / coeff.shape[-1], coeff.shape[-1])) sig = sig.reshape((-1, sig.shape[-1] / coeff.shape[-1], coeff.shape[-1])) coeff = coeff.reshape((-1, coeff.shape[-1])) inner0 = np.pi - abs(T.mod(y - mu, 2 * np.pi) - np.pi) inner = -0.5 * T.sum( T.sqr(inner0) / sig**2 + 2 * T.log(sig) + T.log(2 * np.pi), axis=-2) nll = -logsumexp(T.log(coeff) + inner, axis=-1) return nll.reshape(shape_y[:-1], ndim=n_dim - 1)
def fprop(self, z): z = theano.ifelse.ifelse(T.mod(idx, self.N) != 0, T.zeros_like(z), z) z.name = self.name return z
def conv3d_fft(input, filters, image_shape=None, filter_shape=None, border_mode='valid', pad_last_dim=False): """ Perform a convolution through fft. Only supports input whose shape is even on the last dimension. All other dimensions can be anything and the filters can have an even or odd last dimension. The semantics associated with the last three dimensions are not important as long as they are in the same order between the inputs and the filters. For example, when the convolution is done on a sequence of images, they could be either (duration, height, width) or (height, width, duration). If you must use input which has an odd width, you can either pad it or use the `pad_last_dim` argument which will do it for you and take care to strip the padding before returning. pad_last_dim checks that the last dimension is odd before the actual paddding On valid mode the filters must be smaller than the input. input: (b, ic, i0, i1, i2) filters: (oc, ic, f0, f1, i2) border_mode: 'valid' of 'full' pad_last_dim: Unconditionally pad the last dimension of the input to to turn it from odd to even. Will strip the padding before returning the result. """ # use symbolic shapes to compute shape info at runtime if not specified if image_shape is None: image_shape = input.shape if filter_shape is None: filter_shape = filters.shape # batch size, input channels, input dim 0, input dim 1 b, ic, i0, i1, i2 = image_shape # output channels, input channels, filter dim 0, filter dim 1 oc, ic_, f0, f1, f2 = filter_shape # Check that the last dimension is odd is_odd = T.eq(T.mod(input.shape[4], 2), 1) # pad filters/image to output shape if border_mode == 'valid': o0 = i0 o1 = i1 o2 = i2 input_padded = input if pad_last_dim: o2 = ifelse(is_odd, o2 + 1, o2) input_padded = T.zeros((b, ic, o0, o1, o2), dtype='float32') input_padded = T.set_subtensor(input_padded[:, :, :i0, :i1, :i2], input) filters_padded = T.zeros((oc, ic, o0, o1, o2), dtype='float32') filters_padded = T.set_subtensor(filters_padded[:, :, :f0, :f1, :f2], filters) elif border_mode == 'full': # In this particular case, the values of (o0, o1) represent # the dimensions of the work buffer more than the actual dimensions # of the desired output. o0 = i0 + 2 * (f0 - 1) o1 = i1 + 2 * (f1 - 1) o2 = i2 + 2 * (f2 - 1) if pad_last_dim: o2 = ifelse(is_odd, o2 + 1, o2) # We line up the filters and the images in a way # such that the filters are tightly placed against the # top-left of the array, and the images intersect with # them on one pixel. The top-left pixel of the images # is the bottom-right pixel of the filters when we # do the layout here. filters_padded = T.zeros((oc, ic, o0, o1, o2), dtype='float32') filters_padded = T.set_subtensor(filters_padded[:, :, :f0, :f1, :f2], filters) input_padded = T.zeros((b, ic, o0, o1, o2), dtype='float32') input_padded = T.set_subtensor(input_padded[:, :, (f0 - 1):(f0 - 1 + i0), (f1 - 1):(f1 - 1 + i1), (f2 - 1):(f2 - 1 + i2)], input) else: raise ValueError('invalid mode') # reshape for FFT input_flat = input_padded.reshape((b * ic, o0, o1, o2)) filters_flat = filters_padded.reshape((oc * ic, o0, o1, o2)) # perform FFT input_fft_flat = cufft(input_flat) # (b * ic, o0, o1, o2//2 + 1, 2) filters_fft_flat = cufft(filters_flat) # (oc * ic, o0, o1, o2//2 + 1, 2) # Unfold ic dimension. # We have to collapse two dimensions together # in order to reuse the same `mult_and_reduce`. # This explains the o0 * 01 instead of just keeping # the two dimensions intact. input_fft_v_shape = (b, ic, o0 * o1, o2 // 2 + 1, 2) filters_fft_v_shape = (oc, ic, o0 * o1, o2 // 2 + 1, 2) input_fft_v = input_fft_flat.reshape(input_fft_v_shape) filters_fft_v = filters_fft_flat.reshape(filters_fft_v_shape) # (b, oc, o0 * o1, o2//2 + 1, 2) output_fft_s = mult_and_reduce(input_fft_v, filters_fft_v, input_shape=input_fft_v_shape, filter_shape=filters_fft_v_shape) #output_fft_s = input_fft_v # reshape for IFFT output_fft_flat = output_fft_s.reshape((b * oc, o0, o1, o2 // 2 + 1, 2)) # perform IFFT output_flat = cuifft(output_fft_flat) # (b * oc, o0, o1, o2) # reshape output_circ = output_flat.reshape((b, oc, o0, o1, o2)) # circular! # Now we extract the region of interest. # We just cut it out from the output_circ # array that was used for the computation. # We do not need to handle pad_last_dim in a # special way because we specify explicitly here # how much values are expected. if border_mode == 'valid': output = output_circ[:, :, (f0-1):(f0-1 + i0-f0+1), (f1-1):(f1-1 + i1-f1+1), (f2-1):(f2-1 + i2-f2+1)] elif border_mode == 'full': output = output_circ[:, :, (f0-1):(f0-1 + i0+f0-1), (f1-1):(f1-1 + i1+f1-1), (f2-1):(f2-1 + i2+f2-1)] else: raise ValueError('invalid mode') #output = output_circ[:, :, :, :, :] # Rescale manually. This is just a factor that comes in during the # trip through FFT and inverse FFT. output = (1.0 / T.cast(o0 * o1 * o2, 'float32')) * output # output should now be the result of a batched valid convolution # of the input with the filters. return basic_ops.as_cuda_ndarray_variable(output)