def compute_overlap(mat1, mat2): s1 = mat1.shape[0] s2 = mat2.shape[0] area1 = (mat1[:, 2] - mat1[:, 0]) * (mat1[:, 3] - mat1[:, 1]) if mat2.shape[1] == 5: area2 = mat2[:, 4] else: area2 = (mat2[:, 2] - mat2[:, 0]) * (mat2[:, 3] - mat2[:, 1]) x1 = cartesian([mat1[:, 0], mat2[:, 0]]) x1 = np.amax(x1, axis=1) x2 = cartesian([mat1[:, 2], mat2[:, 2]]) x2 = np.amin(x2, axis=1) com_zero = np.zeros(x2.shape[0]) w = x2 - x1 w = w - 1 w = np.maximum(com_zero, w) y1 = cartesian([mat1[:, 1], mat2[:, 1]]) y1 = np.amax(y1, axis=1) y2 = cartesian([mat1[:, 3], mat2[:, 3]]) y2 = np.amin(y2, axis=1) h = y2 - y1 h = h - 1 h = np.maximum(com_zero, h) oo = w * h aa = cartesian([area1[:], area2[:]]) aa = np.sum(aa, axis=1) ooo = oo / (aa - oo) overlap = np.transpose(ooo.reshape(s1, s2), (1, 0)) return overlap
def test_cartesian(): # Check if cartesian product delivers the right results axes = (np.array([1, 2, 3]), np.array([4, 5]), np.array([6, 7])) true_out = np.array( [ [1, 4, 6], [1, 4, 7], [1, 5, 6], [1, 5, 7], [2, 4, 6], [2, 4, 7], [2, 5, 6], [2, 5, 7], [3, 4, 6], [3, 4, 7], [3, 5, 6], [3, 5, 7], ] ) out = cartesian(axes) assert_array_equal(true_out, out) # check single axis x = np.arange(3) assert_array_equal(x[:, np.newaxis], cartesian((x,)))
def _grid_find_neighbors(self, X, k=3): n_x = np.shape(X)[0] n_dims = len(self.grid_dims) n_neighbors = k**n_dims neighbors_per_dim_X, distances_per_dim = self._find_k_nearest_per_dim( X, k=k) #for ii in range(n_x): #with Timer() as t: #distances_per_grid_dim, neighbor_coordinates = np.reshape(neighbors_per_dim_X[ii,0,:],(-1,1)) #indices_all = np.zeros((n_x*n_neighbors,)) distances_all = np.zeros((n_x, n_neighbors, n_dims)) neighbor_coordinates_all = np.zeros((n_x * n_neighbors, n_dims)) for ii in range(n_x): neighbor_coordinates_all[ii * n_neighbors:(ii + 1) * n_neighbors, :] = cartesian( neighbors_per_dim_X[ii, :, :]) distances_all[ii, :, :] = cartesian(distances_per_dim[ii, :, :]) indices_all = self._kron_grid_indices_to_matrix_indices( neighbor_coordinates_all.astype(int)) distances_all = np.mean(np.square(distances_all), axis=2) return distances_all, indices_all
def predict(self, X, alpha=.05): n_sample = self.X_train.shape[0] self.n_iter = max(self.n_iter, int(np.sqrt(n_sample))) y_hat_b = np.zeros((self.n_iter, X.shape[0])) residuals_val = [] # bootstrap for b in range(self.n_iter): idx_train = np.random.choice(np.arange(n_sample), n_sample, replace=True) idx_val = np.setdiff1d(np.arange(n_sample), idx_train) self.model.fit(self.X_train[idx_train], self.y_train[idx_train]) y_hat_train_b = self.model.predict(self.X_train[idx_val]) residuals_val.append(self.y_train[idx_val] - y_hat_train_b) y_hat_b[b] = self.model.predict(X) residuals_val = np.concatenate(residuals_val) # training residuals self.model.fit(self.X_train, self.y_train) y_hat_train = self.model.predict(self.X_train) residuals_train = self.y_train - y_hat_train # take percentiles to allow comparison between train and validation # residuals residuals_val = np.percentile(residuals_val, q=np.arange(100)) residuals_train = np.percentile(residuals_train, q=np.arange(100)) # compute weighted residuals to account for overfitting as we use # training residuals set to estimate predictions intervals if n_sample > self.max_samples: combs_idx = np.random.choice(np.arange(n_sample), self.max_samples) combs = cartesian( (self.y_train[combs_idx], y_hat_train[combs_idx])) else: combs = cartesian((self.y_train, y_hat_train)) no_info_err_rate = ((combs[:, 0] - combs[:, 1])**2).mean() relative_overfit_rate = (residuals_val.mean() - residuals_train.mean()) / ( no_info_err_rate - residuals_train.mean()) weight = .632 / (1 - .368 * relative_overfit_rate) residuals = (1 - weight) * residuals_train + weight * residuals_val # compute the estimate of the noise around the bootstrapped predictions # and take percentiles as prediction intervals C = np.array([[m + o for m in y_hat_b[:, i] for o in residuals] for i in range(X.shape[0])]) q = [100 * alpha / 2, 100 * (1 - alpha / 2)] percentiles = np.percentile(C, q, axis=1) y_hat = self.model.predict(X) return y_hat, percentiles
def worker(args): p, processors, partitions, dataFrame, H0, covariance_class, n, d = args list_bins = [] list_means = [] list_digitized = [] for k in range(d): partition = partitions[k] data = dataFrame[k] #min, max, parts count dim_bins = linspace(partition[0], partition[1], partition[2] + 1) list_means.append((dim_bins[:-1] + dim_bins[1:]) / 2.) bin_dig = digitize(data, dim_bins) bin_dig[bin_dig == partition[2] + 1] = partition[2] list_digitized.append(bin_dig) list_bins.append(linspace(1, partition[2], partition[2])) digitized = vstack(list_digitized).T bins = cartesian(list_bins) bin_means = cartesian(list_means) selections = [] H_s = [] #calculate this processors chunk of bins chunks = arange(len(bins)) % processors == p iu = triu_indices(d, 1) if covariance_class == 'H3': #Square covariance matrix h0 = mvn.unrollSigma(H0, iu) else: h0 = H0**0.5 for bin, amean in zip(bins[chunks], bin_means[chunks]): selection = (digitized == bin).all(axis=1) if selection.any(): res = optimize.minimize(getBinnedUnbiasedIMSE, x0=h0, args=(dataFrame, bin_means, pd.DataFrame(atleast_2d(amean)), covariance_class, d, iu, selection), method='BFGS', options={ 'gtol': 1e-4, 'eps': 1e-5 }) res = res.x if covariance_class == 'H3': H_s.append(mvn.rollSigma(res, d, iu)) elif covariance_class == 'H2': H_s.append(res**2.) else: H_s.append(ones(d) * res**2.) selections.append(selection) return selections, H_s
def __init__(self, appliance_power_dict={}): self.power_list = appliance_power_dict self.index_to_status = cartesian( [i for i in range(len(self.power_list[app]))] for app in self.power_list) self.MODEL_NAME = "BNILM" self.compute_all_state()
def weighted_hausdorff_distance(w, h, alpha): all_img_locations = tf.convert_to_tensor(cartesian([np.arange(w), np.arange(h)]), dtype=tf.float32) max_dist = math.sqrt(w ** 2 + h ** 2) def hausdorff_loss(y_true, y_pred): def loss(y_true, y_pred): eps = 1e-6 y_true = K.reshape(y_true, [w, h]) gt_points = K.cast(tf.where(y_true > 0.5), dtype=tf.float32) num_gt_points = tf.shape(gt_points)[0] y_pred = K.flatten(y_pred) p = y_pred p_replicated = tf.squeeze(K.repeat(tf.expand_dims(p, axis=-1), num_gt_points)) d_matrix = cdist(all_img_locations, gt_points) num_est_pts = tf.reduce_sum(p) term_1 = (1 / (num_est_pts + eps)) * K.sum(p * K.min(d_matrix, 1)) d_div_p = K.min((d_matrix + eps) / (p_replicated ** alpha + (eps / max_dist)), 0) d_div_p = K.clip(d_div_p, 0, max_dist) term_2 = K.mean(d_div_p, axis=0) return term_1 + term_2 batched_losses = tf.map_fn(lambda x: loss(x[0], x[1]), (y_true, y_pred), dtype=tf.float32) return K.mean(tf.stack(batched_losses)) return hausdorff_loss
def createStatePoints(self): stateDimArrays = [] for d in range(len(self.minList)): dimArray = np.linspace(self.minList[d], self.maxList[d], NUM_POINTS_PER_DIM) stateDimArrays.append(dimArray) return cartesian(stateDimArrays)
def combine_args(**argarrs):#argarrs are [arg name]=[list of values] #Get all permutations of the arguments. Returns a pandas data frame with the argument names as the columns and the cartesian product of all their possible values. #Note that this can't handle None values (at least not yet) arg_keys = argarrs.keys() if len(arg_keys) == 0: raise ValueError("Must be at least one keyword argument (if you don't want to train multiple models just use lists with single entries") arg_tup = () str_lens = [] type_list = [] M = 1 for key in arg_keys: str_vals = [str(entry) for entry in argarrs[key]] str_lens.extend([len(entry) for entry in str_vals]) type_list.append(argarrs[key].dtype) #print key,str_vals,str_lens M *= len(argarrs[key]) #print str_vals,str_lens arg_tup += (str_vals,) #print 'debug',type_list max_str_lens = max(str_lens) all_arg_combos = np.zeros((M,len(arg_keys)),dtype='S{0:d}'.format(max_str_lens)) all_arg_combos = pd.DataFrame(cartesian(arg_tup,all_arg_combos),columns=arg_keys) for i,currtype in enumerate(type_list): if currtype == np.bool: all_arg_combos[arg_keys[i]] = (all_arg_combos[arg_keys[i]] == 'True') else: all_arg_combos[arg_keys[i]] = all_arg_combos[arg_keys[i]].astype(currtype) return all_arg_combos
def combine_args(**argarrs): #argarrs are [arg name]=[list of values] #Get all permutations of the arguments. Returns a pandas data frame with the argument names as the columns and the cartesian product of all their possible values. #Note that this can't handle None values (at least not yet) arg_keys = argarrs.keys() if len(arg_keys) == 0: raise ValueError( "Must be at least one keyword argument (if you don't want to train multiple models just use lists with single entries" ) arg_tup = () str_lens = [] type_list = [] M = 1 for key in arg_keys: str_vals = [str(entry) for entry in argarrs[key]] str_lens.extend([len(entry) for entry in str_vals]) type_list.append(argarrs[key].dtype) #print key,str_vals,str_lens M *= len(argarrs[key]) #print str_vals,str_lens arg_tup += (str_vals, ) #print 'debug',type_list max_str_lens = max(str_lens) all_arg_combos = np.zeros((M, len(arg_keys)), dtype='S{0:d}'.format(max_str_lens)) all_arg_combos = pd.DataFrame(cartesian(arg_tup, all_arg_combos), columns=arg_keys) for i, currtype in enumerate(type_list): if currtype == np.bool: all_arg_combos[arg_keys[i]] = ( all_arg_combos[arg_keys[i]] == 'True') else: all_arg_combos[arg_keys[i]] = all_arg_combos[arg_keys[i]].astype( currtype) return all_arg_combos
def gen_training_batch(n_input_dims, n_output_dims, n_possible_tasks, task_ids): n_input_units = n_input_dims*2 n_output_units = n_output_dims*2 n_inputs = 2**n_input_dims idx_list = [] for i in range(n_input_dims): idx_list.append([i*2, i*2+1]) idx_list = cartesian(idx_list) inputs_list = np.zeros((idx_list.shape[0], n_input_units)) for i in range(idx_list.shape[0]): inputs_list[i, :][idx_list[i]] = 1 inputs_list = np.tile(inputs_list, (len(task_ids), 1)) task_list = np.zeros((n_inputs*len(task_ids), n_possible_tasks)) for i in range(len(task_ids)): task_list[i*n_inputs:(i*n_inputs+n_inputs), task_ids[i]] = 1 outputs_list = np.zeros((n_inputs*len(task_ids), n_output_units)) for i in range(len(task_ids)): for j in range(len(task_ids[i])): input_dim, output_dim = get_task_dims(n_input_dims, n_output_dims, task_ids[i][j]) input_pattern = inputs_list[i*n_inputs:(i*n_inputs+n_inputs), input_dim*2:input_dim*2+2] outputs_list[i*n_inputs:(i*n_inputs+n_inputs), output_dim*2:output_dim*2+2] = input_pattern return inputs_list, task_list, outputs_list
def __init__(self, resized_height, resized_width, p=-9, return_2_terms=False, device=torch.device('cpu')): """ :param resized_height: Number of rows in the image. :param resized_width: Number of columns in the image. :param p: Exponent in the generalized mean. -inf makes it the minimum. :param return_2_terms: Whether to return the 2 terms of the WHD instead of their sum. Default: False. :param device: Device where all Tensors will reside. """ super(nn.Module, self).__init__() # Prepare all possible (row, col) locations in the image self.height, self.width = resized_height, resized_width self.resized_size = torch.tensor([resized_height, resized_width], dtype=torch.get_default_dtype(), device=device) self.max_dist = math.sqrt(resized_height**2 + resized_width**2) self.n_pixels = resized_height * resized_width self.all_img_locations = torch.from_numpy( cartesian([np.arange(resized_height), np.arange(resized_width)])) # Convert to appropiate type self.all_img_locations = self.all_img_locations.to( device=device, dtype=torch.get_default_dtype()) self.return_2_terms = return_2_terms self.p = p
def _generate_sample(self, n, sample_type, generate_pars): # create the array using the spacing method of choice raw_sample = None if sample_type == "sobol": from sobol_seq import i4_sobol_generate raw_sample = i4_sobol_generate(len(generate_pars), n) elif sample_type == "saltelli": from SALib.sample import saltelli problem = { "names": generate_pars, "bounds": [[0, 1] for x in generate_pars], "num_vars": len(generate_pars), } raw_sample = saltelli.sample(problem, n, True) elif sample_type == "grid": from sklearn.utils.extmath import cartesian temp = np.linspace(0, 1, n) raw_sample = cartesian([temp for i in range(len(generate_pars))]) elif sample_type == "random": raw_sample = np.random.random((n, len(generate_pars))) assert raw_sample is not None, "something went wrong - check that type is correct" print("expected shape is {}".format(raw_sample.shape)) # map the raw array to bounds, adhering to log scaling rules scaled_sample = self.log_scale_matrix(raw_sample) return scaled_sample
def generate_logistic_parameters(features_num, step_size=0.2, min_val=-1.0, max_val=1.0, digits=1, items_as_np=True, include_zero=False): """ i = round(max_val, digits) while i <= min_val: if include_zero or i != 0: feature_values.append(i) i -= step_size i = round(i ,digits) """ feature_values = [] i = round(max_val, digits) while i >= min_val: if include_zero or i != 0: feature_values.append(i) i -= step_size i = round(i, digits) feature_values = np.array(feature_values) features = (feature_values for _ in range(features_num)) grid = cartesian(features) if items_as_np: grid = [np.array(i) for i in grid] return grid
def extract_distances_series(nodes_list: pd.Series, shortest_paths_matrix: np.matrix, nodes_mapping: Dict) -> pd.Series: """ Given a list of two list of nodes, return shortest paths between all the pair of nodes from each list e.g Given [[node_1, node_2],[node_3]] this will output shortest paths between 1 and 3 and 2 and 3 :param nodes_list: :param shortest_paths_matrix: matrix generated by extract_shortest_paths_matrix :param nodes_mapping: dict { node_name found in node_list : id used to encode the node in the matrix } e.g { node1: 1: node2: 2, node3: 3} :return: """ if isinstance( nodes_list, list) and len(nodes_list) == 2 and nodes_list[0] and nodes_list[1]: mapped_nodes_1 = np.array( [nodes_mapping[node] for node in nodes_list[0]]) mapped_nodes_2 = np.array( [nodes_mapping[node] for node in nodes_list[1]]) c = cartesian((mapped_nodes_1, mapped_nodes_2)) return shortest_paths_matrix[c[:, 0], c[:, 1]].tolist()[0]
def weights(dim, degree): # 1D sigma-points (x) and weights (w) x, w = hermegauss(degree) # hermegauss() provides weights that cause posdef errors w = factorial(degree) / (degree**2 * hermeval(x, [0] * (degree - 1) + [1])**2) return np.prod(cartesian([w] * dim), axis=1)
def compute_reward(grid_map, cell_list, passenger_list, rew): """ Compute the reward matrix. Args: grid_map (list): list containing the grid structure; cell_list (list): list of non-wall cells; passenger_list (list): list of passenger cells; rew (tuple): rewards obtained in goal states. Returns: The reward matrix. """ g = np.array(grid_map) c = np.array(cell_list) n_states = len(cell_list) * 2**len(passenger_list) r = np.zeros((n_states, 4, n_states)) directions = [[-1, 0], [1, 0], [0, -1], [0, 1]] passenger_states = cartesian([[0, 1]] * len(passenger_list)) for goal in np.argwhere(g == 'G'): for a in range(len(directions)): prev_state = goal - directions[a] if prev_state in c: for i in range(len(passenger_states)): i_idx = np.where((c == prev_state).all(axis=1))[0] + len( cell_list) * i j_idx = j = np.where((c == goal).all(axis=1))[0] + len( cell_list) * i r[i_idx, a, j_idx] = rew[np.sum(passenger_states[i])] return r
def compute_reward(grid_map, cell_list, passenger_list, rew): """ Compute the reward matrix. Args: grid_map (list): list containing the grid structure; cell_list (list): list of non-wall cells; passenger_list (list): list of passenger cells; rew (tuple): rewards obtained in goal states. Returns: The reward matrix. """ g = np.array(grid_map) c = np.array(cell_list) n_states = len(cell_list) * 2**len(passenger_list) r = np.zeros((n_states, 4, n_states)) directions = [[-1, 0], [1, 0], [0, -1], [0, 1]] passenger_states = cartesian([[0, 1]] * len(passenger_list)) for goal in np.argwhere(g == 'G'): for a in range(len(directions)): prev_state = goal - directions[a] if prev_state in c: for i in range(len(passenger_states)): i_idx = np.where( (c == prev_state).all(axis=1))[0] + len(cell_list) * i j_idx = j = np.where( (c == goal).all(axis=1))[0] + len(cell_list) * i r[i_idx, a, j_idx] = rew[np.sum(passenger_states[i])] return r
def zrand_convolve(labelgrid, neighbors='edges'): """ Calculates the avg and std z-Rand index using kernel over `labelgrid` Kernel is determined by `neighbors`, which can include all entries with touching edges (i.e., 4 neighbors) or corners (i.e., 8 neighbors). Parameters ---------- grid : (S, K, N) array_like Array containing cluster labels for each `N` samples, where `S` is mu and `K` is K. neighbors : str, optional How many neighbors to consider when calculating Z-rand kernel. Must be in ['edges', 'corners']. Default: 'edges' Returns ------- zrand_avg : (S, K) np.ndarray Array containing average of the z-Rand index calculated using provided neighbor kernel zrand_std : (S, K) np.ndarray Array containing standard deviation of the z-Rand index """ inds = cartesian([range(labelgrid.shape[0]), range(labelgrid.shape[1])]) zrand = np.empty(shape=labelgrid.shape[:-1] + (2, )) for x, y in inds: ninds = get_neighbors(x, y, neighbors=neighbors, shape=labelgrid.shape) zrand[x, y] = zrand_partitions(labelgrid[ninds].T) return zrand[..., 0], zrand[..., 1]
def calc_cartesian_group_assignment_p(group_assignment_p_list): cart = np.array( [[ np.prod(values) for values in cartesian(line)] for line in zip(*group_assignment_p_list)]) return cart
def search(self): arg_keywords = np.array([key for key, _ in self.data.items()]) combinations = cartesian([self.data[x] for x in arg_keywords]) print(combinations) print(combinations.shape[0], 'combinations.') results = [] dictionaries_list = [] for parameter_combination in combinations: # Initializing Population optimization = Optimization( self.image_path, population_size=self.population_size, polygons_count=self.polygons_count ) dict = {} for column, value in enumerate(parameter_combination): dict[arg_keywords[column]] = value dictionaries_list.append(dict) results.append(optimization.evolve_during(**dict)[1]) # Ordering results order = sorted(range(len(results)), key=lambda k: results[k], reverse=True) dictionaries_list = [dictionaries_list[i] for i in order] results = [results[i] for i in order] for index, result in enumerate(results): print('Improvement ' + str(result), '\tParameters: ' + str(dictionaries_list[index]))
def rectangles_to_states(rectangles): states = [] for rect in rectangles: rect_axes = [np.arange(rect_d[0], rect_d[1]) for rect_d in rect] states += list(cartesian(rect_axes)) states = np.array(states) return states
def CRS(self, UpperLimit, LowerLimit, SampleNum, ParaMode=None): try: if len(SampleNum) == 1: raise ValueError except ValueError as e: print(e.args) print('The parameter sets have to be at least two dimension.') exit() paratmp = np.zeros(len(UpperLimit), dtype=object) for itr, mode in enumerate(ParaMode): if mode == 'log scale': tmpfunc = np.logspace else: tmpfunc = np.linspace ParaSet = tmpfunc(UpperLimit[itr], LowerLimit[itr], SampleNum[itr]) paratmp[itr] = ParaSet para = cartesian([paratmp[0], paratmp[1]]) with open('CRS_Parameter.pickle', 'wb') as picklefile: cPickle.dump(para, picklefile, True) #==========LHS_Parameter.pickle==============# #[[Prod, Deg, Bind, Diffu #........................ #........................ # .......................]] print(para) return para
def factorial_design(n,d,plot_=False): # n is number of points in nth dimension # d is the number of factors # full factorial design is when the number of levels = number of factor #otmp=[] #if type(d) is int: otmp=d*[np.arange(n)] #else: # for i in n: # otmp.add(range(i)) o=cartesian(otmp) D=(o-np.min(o))/(np.max(o)-np.min(o)) if plot_: plt.close() fig1=plt.figure() ax=fig1.add_subplot(111) #fig,ax = plt.subplots() ax.scatter(D[:,0].reshape((D.shape[0],1)),D[:,1].reshape((D.shape[0],1))) ax.set_title('dim-1,dim-2 full factorial design') ax.set_xlabel('dim-1') ax.set_ylabel('dim-2') return(D,fig1) #fig.show() return(D)
def _bspline_direct_elementwise(self): # important later for reshapes into elem colloc matrices self.B = [ Bk.tocsr() for Bk in self.B ] Bel = [None] * self.domain.dim nip_el = self.quadrature.deg gridshape = [ self.domain.nelem(k) for k in range(0, self.domain.dim) ] # get elementwise univariate colloc matrices for k in range(0, self.domain.dim): Bel[k] = [None] * self.domain.nelem(k) for el in range(0, self.domain.nelem(k)): Bel[k][el] = self.B[k][:, nip_el[k]*el:nip_el[k]*el+nip_el[k]].data.reshape(-1, nip_el[k]) # get kronecker jacobian on each element as a view of self.J Jel = [None] * self.domain.nelem() for el in range(0, self.domain.nelem()): el_mulidx = np.unravel_index(el, gridshape) slices = tuple([slice(nip_el[k]*el_mulidx[k], nip_el[k]*el_mulidx[k]+nip_el[k]) for k in range(0, self.domain.dim)]) Jel[el] = self.J[slices].view().reshape(-1) # allocate and initialize global matrices A = np.zeros((np.prod(self.domain.nbfuns), np.prod(self.domain.nbfuns))) B = np.zeros((np.prod(self.domain.nbfuns), np.prod(self.domain.nbfuns))) # compute tensorproduct bsplines collocation matrix at greville abs Bi = self.Bgp[0] for k in range(1, self.domain.dim): Bi = kron(Bi, self.Bgp[k]) Bi = Bi.tocsr() # compute element matrices and add contribution to A for el in range(0, self.domain.nelem()): # get kronecker xips on each element el_mulidx = np.unravel_index(el, gridshape) slices = tuple([slice(nip_el[d]*el_mulidx[d], nip_el[d]*el_mulidx[d]+nip_el[d]) for d in range(0, self.domain.dim)]) el_ip = [ self.quadrature.ip[d][slices[d]] for d in range(0, self.domain.dim) ] el_xip = [ self.domain.eval(el_ip, d) for d in range(self.domain.dim) ] # define index maps ldofs = [ self.B[d][:,nip_el[d]*el_mulidx[d]].nonzero()[0] for d in range(0, self.domain.dim) ] gdofs = np.ravel_multi_index(cartesian(ldofs).transpose(), self.domain.nbfuns) # compute the kernel on the element Gel = self.kernel( _kern_pts_to_mulidx(self.gpp), _kern_pts_to_mulidx(el_xip), self.data) # precompute element basis matrices Bj = Bel[0][el_mulidx[0]] * self.quadrature.weights[0][slices[0]] for k in range(1, self.domain.dim): Bj = np.kron(Bj, Bel[k][el_mulidx[k]] * self.quadrature.weights[k][slices[k]]) Bj = Bj * Jel[el] # assemble A on the element Apart = Gel @ Bj.transpose() # ich muss hier zwei mal uber die elemente summieren!!!! A[:,gdofs] += Apart return A, Bi.transpose()
def _initialize_event_orders(self,timestamps,order_type): symb_matrix = mth.cartesian([np.array(timestamps),self.event_matrix.columns.values]) symb_matrix = symb_matrix.reshape(len(timestamps),len(self.event_matrix.columns.values),2) order_timestamps = symb_matrix[~np.isnan(self.event_matrix.values),0] order_dataframe = pd.DataFrame(symb_matrix[~np.isnan(self.event_matrix.values),1], columns=['Symbol'] ) order_dataframe['Buy'] = order_type return (order_dataframe,order_timestamps)
def get_representations(dataset, postprocess_dir, dataset_name): batch_size = 32 module_path = os.path.join(postprocess_dir, "tfhub") reps = [] with hub.eval_function_for_module(module_path) as f: def _representation_function(x): """Computes representation vector for input images.""" output = f(dict(images=x), signature="representation", as_dict=True) return np.array(output["default"]) for index in range(0, len(dataset.images), batch_size): batch = dataset.images[ index:min(index + batch_size, dataset.images.shape[0]), :] if dataset_name == "smallnorb": batch = np.expand_dims(batch, axis=3) rep = _representation_function(batch) reps.append(rep) reps = np.vstack(reps) # factors factors = cartesian( [np.array(list(range(i))) for i in dataset.factors_num_values]) return factors, reps
def find_weights(df, max_depth, init_step=0.1): trmse_weights = [] for i in range(max_depth): curr_step = init_step / (2.0 ** i) if i == 0: tbase_weights = [np.arange(0., 1, init_step) for i in range(n_dfs - 1)] else: tbase_weights = [np.arange(max(0., trmse_weights[0][1][i] - curr_step), min(1., trmse_weights[0][1][i] + curr_step * 2), curr_step) for i in range(n_dfs - 1)] tcartesian_w = cartesian(tbase_weights) tsummed_weights = np.sum(tcartesian_w, axis=1) tcartesian_w = tcartesian_w[tsummed_weights <= 1.0, :] tsummed_weights = tsummed_weights[tsummed_weights <= 1.0] tsummed_weights = tsummed_weights.reshape(-1, 1) tcartesian_w = np.hstack((tcartesian_w, 1. - tsummed_weights)) print('Current depth:', str(i) + ';', tcartesian_w.shape[0], 'weight combinations') for j in range(tcartesian_w.shape[0]): if j % 100 == 0 and j > 100: print(j) trmse = calc_rmse(tcartesian_w[j, :], full_df) trmse_weights.append((trmse, tcartesian_w[j, :])) trmse_weights.sort(key=lambda x: x[0]) print('Best result:', trmse_weights[0][0], '\n') return trmse_weights
def minimum_cost_flow_problem_graph(X, C, D, size_min, size_max): # Setup minimum cost flow formulation graph # Vertices indexes: # X-nodes: [0, n(x)-1], C-nodes: [n(X), n(X)+n(C)-1], C-dummy nodes:[n(X)+n(C), n(X)+2*n(C)-1], # Artificial node: [n(X)+2*n(C), n(X)+2*n(C)+1-1] # Create indices of nodes n_X = X.shape[0] n_C = C.shape[0] X_ix = np.arange(n_X) C_dummy_ix = np.arange(X_ix[-1] + 1, X_ix[-1] + 1 + n_C) C_ix = np.arange(C_dummy_ix[-1] + 1, C_dummy_ix[-1] + 1 + n_C) art_ix = C_ix[-1] + 1 # Edges edges_X_C_dummy = cartesian( [X_ix, C_dummy_ix]) # All X's connect to all C dummy nodes (C') edges_C_dummy_C = np.stack( [C_dummy_ix, C_ix], axis=1) # Each C' connects to a corresponding C (centroid) edges_C_art = np.stack([C_ix, art_ix * np.ones(n_C)], axis=1) # All C connect to artificial node edges = np.concatenate([edges_X_C_dummy, edges_C_dummy_C, edges_C_art]) # Costs costs_X_C_dummy = D.reshape(D.size) costs = np.concatenate( [costs_X_C_dummy, np.zeros(edges.shape[0] - len(costs_X_C_dummy))]) # Capacities - can set for max-k capacities_C_dummy_C = size_max * np.ones(n_C) cap_non = n_X # The total supply and therefore wont restrict flow capacities = np.concatenate([ np.ones(edges_X_C_dummy.shape[0]), capacities_C_dummy_C, cap_non * np.ones(n_C) ]) # Sources and sinks supplies_X = np.ones(n_X) supplies_C = -1 * size_min * np.ones(n_C) # Demand node supplies_art = -1 * (n_X - n_C * size_min) # Demand node supplies = np.concatenate([ supplies_X, np.zeros(n_C), # C_dummies supplies_C, [supplies_art] ]) # All arrays must be of int dtype for `SimpleMinCostFlow` edges = edges.astype('int32') costs = np.around(costs * 1000, 0).astype( 'int32') # Times by 1000 to give extra precision capacities = capacities.astype('int32') supplies = supplies.astype('int32') return edges, costs, capacities, supplies, n_C, n_X
def __init__(self, W, H, alpha=2): self.W = W self.H = H self.alpha = alpha self.all_img_locations = tf.convert_to_tensor(cartesian( [np.arange(W), np.arange(H)]), dtype=tf.float32) self.max_dist = math.sqrt(W**2 + H**2)
def _set_state_combinations_if_necessary(self): """Get centroids""" # If we import sklearn at the top of the file then auto doc fails. if (self.state_combinations is None or self.state_combinations.shape[1] != len(self.model)): from sklearn.utils.extmath import cartesian centroids = [model['states'] for model in self.model] self.state_combinations = cartesian(centroids)
def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100): """Generate a grid of points based on the ``percentiles of ``X``. The grid is a cartesian product between the columns of Z. The ith column of Z consists in ``grid_resolution`` equally-spaced points between the percentiles of the ith column of X. If ``grid_resolution`` is bigger than the number of unique values in the ith column of X, then those unique values will be used instead. Parameters ---------- X : ndarray The data percentiles : tuple of floats The percentiles which are used to construct the extreme values of the grid. grid_resolution : int The number of equally spaced points to be placed on the grid for a given column. Returns ------- grid : ndarray, shape=(n_points, X.shape[1]) All data points on the grid. n_points is always ``<= grid_resolution ** X.shape[1]``. Z: list of ndarray The values with which the grid has been created. The ndarrays may be of different shape: either (grid_resolution,) or (n_unique_values,). """ try: assert len(percentiles) == 2 except (AssertionError, TypeError): raise ValueError('percentiles must be a sequence of 2 elements.') if not all(0. <= x <= 1. for x in percentiles): raise ValueError('percentiles values must be in [0, 1].') if percentiles[0] >= percentiles[1]: raise ValueError('percentiles[0] must be strictly less ' 'than percentiles[1].') if grid_resolution <= 1: raise ValueError('grid_resolution must be strictly greater than 1.') values = [] for feature in range(X.shape[1]): uniques = np.unique(X[:, feature]) if uniques.shape[0] < grid_resolution: # feature has low resolution use unique vals axis = uniques else: # create axis based on percentiles and grid resolution emp_percentiles = mquantiles(X, prob=percentiles, axis=0) if np.allclose(emp_percentiles[0, feature], emp_percentiles[1, feature]): raise ValueError('percentiles are too close to each other, ' 'unable to build the grid.') axis = np.linspace(emp_percentiles[0, feature], emp_percentiles[1, feature], num=grid_resolution, endpoint=True) values.append(axis) return cartesian(values), values
def create_bias_data(self, layer_index): is_weight = np.asarray([0]) layer = np.asarray([layer_index]) weight_row = np.asarray([0]) weight_column = np.asarray([0]) bias = np.arange(self.layer_sizes[layer_index][1]) bias_data = cartesian([is_weight, layer, weight_row, weight_column, bias]) bias_data = bias_data.astype(np.float32) return torch.autograd.Variable(torch.from_numpy(bias_data))
def generate_CIELab_space(rgb_space=aRGB, axis_stride=0.1): # 3 axes, equal strides along each axes = [np.arange(0, 1+axis_stride, axis_stride)]*3 rgb_points = cartesian(axes) lab_points = [] for row in range(len(rgb_points)): lab_points.append(RGB_to_Lab(rgb_space, rgb_points[row, :])) # dist is squared euclidean, so JND threshold is 0.23^2 return np.array(lab_points)
def setUp(self): self.useLocal = False if self.useLocal: self.tempdir = tempdir = '.' else: self.tempdir = tempdir = mkdtemp(prefix='patty-analytics') self.drivemapLas = os.path.join(tempdir, 'testDriveMap.las') self.sourcelas = os.path.join(tempdir, 'testSource.las') self.footprint_csv = os.path.join(tempdir, 'testFootprint.csv') self.foutlas = os.path.join(tempdir, 'testOutput.las') self.min = -10 self.max = 10 self.num_rows = 1000 # Create plane with a pyramid dm_pct = 0.5 dm_rows = np.round(self.num_rows * dm_pct) dm_min = self.min * dm_pct dm_max = self.max * dm_pct delta = dm_max / dm_rows shape_side = dm_max - dm_min dm_offset = [0, 0, 0] self.dense_obj_offset = [3, 2, -(1 + shape_side / 2)] # make drivemap plane_row = np.linspace( start=self.min, stop=self.max, num=self.num_rows) plane_points = cartesian((plane_row, plane_row, [0])) shape_points, footprint = make_tri_pyramid_with_base( shape_side, delta, dm_offset) np.savetxt(self.footprint_csv, footprint, fmt='%.3f', delimiter=',') dm_points = np.vstack([plane_points, shape_points]) plane_grid = np.zeros((dm_points.shape[0], 6), dtype=np.float32) plane_grid[:, 0:3] = dm_points self.drivemap_pc = pcl.PointCloudXYZRGB(plane_grid) self.drivemap_pc = downsample_voxel(self.drivemap_pc, voxel_size=delta * 20) # utils.set_registration(self.drivemap_pc) utils.save(self.drivemap_pc, self.drivemapLas) # Create a simple pyramid dense_grid = np.zeros((shape_points.shape[0], 6), dtype=np.float32) dense_grid[:, 0:3] = shape_points + self.dense_obj_offset self.source_pc = pcl.PointCloudXYZRGB(dense_grid) self.source_pc = downsample_voxel(self.source_pc, voxel_size=delta * 5) utils.save(self.source_pc, self.sourcelas)
def interpolate_image(image_data, zoom_factor): X = np.arange(image_data.shape[0]) Y = np.arange(image_data.shape[1]) rgi = RegularGridInterpolator((X, Y), image_data) grid_x, grid_y = (np.linspace(0, len(X)-1, zoom_factor*len(X)), np.linspace(0, len(Y)-1, zoom_factor*len(Y))) return rgi(cartesian([grid_x, grid_y])).reshape(grid_x.shape[0], grid_y.shape[0])
def test_cgauss_likelihood(): mu = np.array([0], dtype='float') sigma = np.array([2], dtype='float') x = np.linspace(-1, 2, 2) lapse = np.array([0], dtype='float') parameters = cartesian((mu, sigma, lapse, x)) proportionMethod = PsiMarginal.pf(parameters, psyfun='cGauss') samples = np.random.normal(mu, sigma, (200000, 1)) proportionSamples = np.empty([2, ]) proportionSamples[0] = np.mean(samples <= x[0]) # cdf is p(X<=x), compute this through sampling to check likelihood proportionSamples[1] = np.mean(samples <= x[1]) np.testing.assert_almost_equal(proportionSamples, proportionMethod, decimal=2) == 1
def voxel2voxels_in_volume(x, y, z, stepX, stepY, stepZ): """ Returns a numpy array with all the voxels in the volume corresponding to representative (x, y, z). Here we assume that the representative is the upper, left, front pixel of a (stepX, stepY, stepZ) sized volume. """ # This is what Andrew originally used. Probably not fully correct, but practical. # We could also just return slices and let numpy do its tiling magic... # This should be hidden in an up/down sampler object return cartesian((np.arange(x, x + stepX), np.arange(y, y + stepY), np.arange(z, z + stepZ)))
def stateact_to_feature(self, state, act, onlyindex=True): zedaind = [] for nm, xs in sorted(self.feature_tiles.items()): val = None if nm == 'speedx': val = state.getSpeedX() elif nm == 'trackpos': val = state.getTrackPos() elif nm == 'angle': val = state.getAngle() #print val, nm inds = [] if not val == None: # on of the above for i in range(len(xs) - 1): if xs[i][0] <= val < xs[i + 1][1]: inds.append(i) zedaind.append(inds) elif nm == 'track': # remaning are trackpositions, lets get them tracks = np.array(state.getTrack()) / 200. sensors = [] sensors.append(tracks[3]) # -40 sensors.append((tracks[4] + tracks[5] + tracks[6])/3.) sensors.append((tracks[9] + tracks[8] + tracks[10]) / 3.) # 0 sensors.append((tracks[12] + tracks[13] + tracks[14])/3.) sensors.append(tracks[15]) if self.arguments.show_sensors: print sensors for val in sensors: for i in range(len(xs) - 1): if xs[i] <= val <= xs[i + 1]: ind.append(i) break else: assert False zedaind.append([act]) #print 'feature shape-', self.w.shape,'index length-', len(ind) #print ind assert len(zedaind) == len(self.w.shape), 'ind %s, w %s' %(str(ind), str(self.w.shape)) if onlyindex: return tuple(ind) else: ft = np.zeros_like(self.w) for tot in cartesian(zedaind): ft[tuple(tot)] = 1 return ft
def optimize(self): best_sharpe_ratio = 0 best_allocation = [] num_symbols = len(self.portfolio.get_symbols()) steps = numpy.linspace(0, 1, 1/self.stepsize + 1) allocations = cartesian([steps]*num_symbols) legal_allocations = allocations[numpy.where(allocations.sum(1)==1)] for allocation in legal_allocations: sharpe = self.portfolio.simulate(allocation)[2] if sharpe > best_sharpe_ratio: best_sharpe_ratio = sharpe best_allocation = allocation return (best_allocation, best_sharpe_ratio)
def train(self, metergroup, num_states_dict={}, **load_kwargs): """Train using 1D CO. Places the learnt model in the `model` attribute. Parameters ---------- metergroup : a nilmtk.MeterGroup object Notes ----- * only uses first chunk for each meter (TODO: handle all chunks). """ if self.model: raise RuntimeError( "This implementation of Combinatorial Optimisation" " does not support multiple calls to `train`.") num_meters = len(metergroup.meters) if num_meters > 12: max_num_clusters = 2 else: max_num_clusters = 3 for i, meter in enumerate(metergroup.submeters().meters): print("Training model for submeter '{}'".format(meter)) for chunk in meter.power_series(**load_kwargs): num_total_states = num_states_dict.get(meter) if num_total_states is not None: num_on_states = num_total_states - 1 else: num_on_states = None states = cluster(chunk, max_num_clusters, num_on_states) self.model.append({ 'states': states, 'training_metadata': meter}) break # TODO handle multiple chunks per appliance # Get centroids # If we import sklearn at the top of the file then auto doc fails. from sklearn.utils.extmath import cartesian centroids = [model['states'] for model in self.model] self.state_combinations = cartesian(centroids) # self.state_combinations is a 2D array # each column is a chan # each row is a possible combination of power demand values e.g. # [[0, 0, 0, 0], [0, 0, 0, 100], [0, 0, 50, 0], # [0, 0, 50, 100], ...] print("Done training!")
def spread_points_in_hypercube(point_count, dimension_count): # TODO rename points_spread_in_hypercube """ Place points in a unit hypercube such that the minimum distance between points is approximately maximal. Euclidean distance is used. .. note:: Current implementation simply puts the points in a hypergrid Parameters ---------- point_count : int Number of points to pick dimension_count : int Number of dimensions of the hypercube Returns ------- np.array(shape=(point_count, dimension_count)) Points spread approximately optimally across the hypercube. Raises ------ ValueError When ``point_count < 0 or dimension_count < 1`` Notes ----- The exact solution to this problem is known for only a few `n`. References ---------- .. [1] http://stackoverflow.com/a/2723764/1031434 """ # Current implementation simply puts points in a grid if point_count < 0: raise ValueError("point_count must be at least 0") if dimension_count < 1: raise ValueError("dimension_count must be at least 1") if point_count == 0: return np.empty(shape=(0, dimension_count)) side_count = np.ceil(point_count ** (1 / dimension_count)) # number of points per side points = np.linspace(0, 1, side_count) points = cartesian([points] * dimension_count) return np.random.permutation(points)[:point_count] # XXX permutation is unnecessary
def cartesian_prod_dicts_lists(the_dict): #takes a dictionary and produces a dictionary of the cartesian product of the input if not type(the_dict) is type(ordDict()): warnings.warn('An ordered dict was not used. Thus if this function is called again with the same dict it might not produce the same results.') from sklearn.utils.extmath import cartesian stim_list = [] stim_list = tuple([ list(the_dict[ key_name ]) for key_name in the_dict ]) #cartesian has the last column change the fastest, thus is like c-indexing stim_cart_array = cartesian(stim_list) cart_dict = ordDict() #load up the vectors assosciated with keys to cart_dict for key_name, key_num in zip(the_dict, range(len(the_dict))): cart_dict[key_name] = stim_cart_array[:, key_num] return cart_dict
def generate_predictor_data(self): from sklearn.utils.extmath import cartesian ps = np.linspace(*self.train_p_range) Ts = np.linspace(*self.train_T_range) rhs = atanspace(*self.train_rh_range, scaling=2.5) data = cartesian([ps, Ts, rhs]) # Remove some (for Innsbruck) unrealistic data remove = ( # Lower atmosphere is rather warm ((data[:,0] > 700) & (data[:,1] < 230)) # Middle atmosphere | ((data[:,0] < 700) & (data[:,0] > 400) & (data[:,1] > 300) | (data[:,1] < 200)) # Upper atmosphere is rather cold | ((data[:,0] < 400) & (data[:,1] > 270)) ) data = data[~remove] # Calculate q data[:,2] = data[:,2] * qsat(p=data[:,0], T=data[:,1]) return data
def __init__(self, T, N, eta, tau0, kappa, lambda_init=np.asarray([])): """ Arguments: T: Length of SNP sequence N: Total number of people in the population. eta: Hyperparameter for prior on haplotype weights pi tau0: A (positive) learning parameter that downweights early iterations kappa: Learning rate: exponential decay rate---should be between (0.5, 1.0] to guarantee asymptotic convergence. Note that if you pass the same data in every time and set kappa=0 this class can also be used to do batch VB. """ self._K = pow(2,T) self._T = T self._N = N # pi dist hyperparams self._eta = eta self._tau0 = tau0 + 1 self._kappa = kappa # iteration counter, used for updating rho self._updatect = 0 # Initialize the variational distribution q(pi|lambda) if (lambda_init.shape==(self._K,)): self._lambda = lambda_init else: # todo: not totally sure this is a sensible initialization self._lambda = np.random.gamma(10, 1. / 10, self._K) self._E_log_pi = dirichlet_expectation(self._lambda) self._exp_E_log_pi = np.exp(self._E_log_pi) #all theta values theta = cartesian(np.repeat(np.array([[0.01,0.99]]),T,0)) self.logs_theta = np.zeros([self._K, self._T, 2]) self.logs_theta[:,:,0] = np.log(theta) self.logs_theta[:,:,1] = np.log(1-theta)
def generate_state_combinations_all(self): mains = self.loc.elec.mains() from sklearn.utils.extmath import cartesian centroids = [model['states'] for model in self.co.model] state_combinations = cartesian(centroids) baseline = self.vampire_power if baseline is None: vampire_power = mains.vampire_power() else: vampire_power = self.vampire_power n_rows = state_combinations.shape[0] vampire_power_array = np.zeros((n_rows, 1)) + vampire_power state_combinations = np.hstack((state_combinations, vampire_power_array)) summed_power_of_each_combination = np.sum(state_combinations, axis=1) self.vampire_power = vampire_power self.state_combinations = state_combinations self.summed_power_of_each_combination = summed_power_of_each_combination return vampire_power, state_combinations, summed_power_of_each_combination
def constructTensor(med_file, diag_file): diag_med_comb = diag_cross_med(med_file, diag_file) ## create index map for subject_id, icdcode, and med_name patDict = createIndexMap(diag_med_comb.subject_id) medDict = createIndexMap(np.hstack(diag_med_comb.med_name)) diagDict = createIndexMap(np.hstack(diag_med_comb.code)) tensorIdx = np.array([[0,0,0]]) tensorVal = np.array([[0]]) for i in xrange(diag_med_comb.shape[0]): curDiag = [diagDict[x] for x in diag_med_comb.iloc[i,0]] curMed = [medDict[x] for x in diag_med_comb.iloc[i,1]] curPatId = patDict[diag_med_comb.iloc[i,2]] dmCombo = extmath.cartesian((curDiag, curMed)) tensorIdx = np.append(tensorIdx,np.column_stack((np.repeat(curPatId, dmCombo.shape[0]), dmCombo)),axis=0) tensorVal = np.append(tensorVal, np.ones((dmCombo.shape[0],1), dtype=np.int), axis=0) tensorIdx = np.delete(tensorIdx, (0), axis=0) tensorVal = np.delete(tensorVal, (0), axis=0) tenX = sptensor.sptensor(tensorIdx, tensorVal, np.array([len(patDict), len(diagDict), len(medDict)])) axisDict = {0: patDict, 1: diagDict, 2: medDict} return tenX, axisDict
def symmetry_score(transformation, left, right, stepz=100, ignore_value=0): """Counts how many elements in reflected img2 are equal in img1.""" sizex, sizey, sizez = left.shape score = 0 for zstart in range(0, sizez, stepz): # Generate original coordinates coords = cartesian((np.arange(sizex), np.arange(sizey), np.arange(zstart, min(sizez, zstart + stepz)))) # Reflect coordinates reflected_coords = transform_coords(transformation, coords) # Find valid transformations valid_coords = ((reflected_coords >= 0) & (reflected_coords < (sizex, sizey, sizez))).all(axis=1) coords = coords[valid_coords] reflected_coords = reflected_coords[valid_coords] # print('There were %d of %d reflected points out of boundaries' % # ((~valid_coords).sum(), len(valid_coords))) # Compute score equal = left[tuple(coords.T)] == right[tuple(reflected_coords.T)] valid = (left[tuple(coords.T)] != ignore_value) & (right[tuple(reflected_coords.T)] != ignore_value) score += np.sum(equal & valid) return score
def get_constrained_state_combinations(self, valid_locations, last_combination_appliances, loc, vampire_power): #This method constructs only the valid state combinations from the beginning. #TODO any or all appliances_in_valid_locations_temp = [app for app in loc.metadata.appliances_location if all(locs in loc.metadata.appliances_location[app] for locs in valid_locations)] appliances_in_valid_locations_temp.extend(last_combination_appliances) #Fridge mayalways start running #TODO append 5 #TODO include always consuming appliances appliances_in_valid_locations_temp.append(5) appliances_in_valid_locations = list(set(appliances_in_valid_locations_temp)) #Take care of REDDs tuples names (3,4) and (10,20) if loc.name == 'REDD': if 10 in appliances_in_valid_locations: appliances_in_valid_locations.remove(10) appliances_in_valid_locations.remove(20) appliances_in_valid_locations.append((10,20)) if 3 in appliances_in_valid_locations: appliances_in_valid_locations.remove(3) appliances_in_valid_locations.remove(4) appliances_in_valid_locations.append((3,4)) centroids = [model['states'] for model in self.model if model['training_metadata'].instance() in appliances_in_valid_locations] ordering = [model['training_metadata'].instance() for model in self.model if model['training_metadata'].instance() in appliances_in_valid_locations] from sklearn.utils.extmath import cartesian state_combinations = cartesian(centroids) n_rows = state_combinations.shape[0] vampire_power_array = np.zeros((n_rows, 1)) + vampire_power state_combinations = np.hstack((state_combinations, vampire_power_array)) summed_power_of_each_combination = np.sum(state_combinations, axis=1) return state_combinations, summed_power_of_each_combination, ordering
def apc370models(nMeans=10, nSD=10, perc=5): #the parameters of the shapes mat = l.loadmat(top_dir + 'data/models/PC2001370Params.mat') s = mat['orcurv'][0] #adjustment for repeats [ 14, 15, 16,17, 318, 319, 320, 321] a = np.hstack((range(14), range(18,318))) a = np.hstack((a, range(322, 370))) s = s[a] nStim = np.size(s,0) angularPosition = [] curvature = [] paramLens = [] for shapeInd in range(nStim): angularPosition.append(s[shapeInd][:, 0]) curvature.append(s[shapeInd][:, 1]) paramLens.append(np.size(s[shapeInd],0)) angularPosition = np.array(list(itertools.chain.from_iterable(angularPosition))) angularPosition.shape = (np.size(angularPosition),1) curvature = np.array(list(itertools.chain.from_iterable(curvature))) curvature.shape = (np.size(curvature),1) #variable section length striding inds = np.empty((2,np.size(paramLens)),dtype = np.intp) inds[1,:] = np.cumsum(np.array(paramLens), dtype = np.intp) #ending index inds[0,:] = np.concatenate(([0,], inds[1,:-1])) #beginning index maxAngSD = np.deg2rad(171) minAngSD = np.deg2rad(23) maxCurSD = 0.98 minCurSD = 0.09 #make this into a pyramid based on d-prime orMeans = np.linspace(0, 2*pi-2*pi/nMeans, nMeans) orSDs = np.logspace(np.log10(minAngSD), np.log10(maxAngSD), nSD) curvMeans = np.linspace(-0.5, 1,nMeans) curvSDs = np.logspace(np.log10(minCurSD), np.log10(maxCurSD), nSD) modelParams = cartesian([orMeans,curvMeans,orSDs,curvSDs]) nModels = np.size( modelParams, 0) a = st.vonmises.pdf(angularPosition, kappa = modelParams[:,2]**-1 , loc = modelParams[:,0]) # b = st.norm.pdf(curvature, modelParams[:,1], modelParams[:,3]) temp = a * b models = np.empty(( 362, nModels )) for shapeInd in range(nStim): models[ shapeInd, : ] = np.max( temp[ inds[ 0, shapeInd ] : inds[ 1 , shapeInd ] , : ] , axis = 0 ) models = models - np.mean(models,axis = 0) magnitude = np.linalg.norm( models, axis = 0) magnitude.shape=(1,nModels) models = models / magnitude del a,b, temp return models, modelParams
def __init__(self, stimRange, Pfunction='cGauss', nTrials=50, threshold=None, thresholdPrior=('uniform', None), slope=None, slopePrior=('uniform', None), guessRate=None, guessPrior=('uniform', None), lapseRate=None, lapsePrior=('uniform', None), marginalize=True, thread=True): # Psychometric function parameters self.stimRange = stimRange # range of stimulus intensities self.version = 1.0 self.threshold = np.arange(-10, 10, 0.1) self.slope = np.arange(0.005, 20, 0.1) self.guessRate = np.arange(0.0, 0.11, 0.05) self.lapseRate = np.arange(0.0, 0.11, 0.05) self.marginalize = marginalize # marginalize out nuisance parameters gamma and lambda? self.psyfun = Pfunction self.thread = thread if threshold is not None: self.threshold = threshold if np.shape(self.threshold) == (): self.threshold = np.expand_dims(self.threshold, 0) if slope is not None: self.slope = slope if np.shape(self.slope) == (): self.slope = np.expand_dims(self.slope, 0) if guessRate is not None: self.guessRate = guessRate if np.shape(self.guessRate) == (): self.guessRate = np.expand_dims(self.guessRate, 0) if lapseRate is not None: self.lapseRate = lapseRate if np.shape(self.lapseRate) == (): self.lapseRate = np.expand_dims(self.lapseRate, 0) # Priors self.thresholdPrior = thresholdPrior self.slopePrior = slopePrior self.guessPrior = guessPrior self.lapsePrior = lapsePrior self.priorMu = self.__genprior(self.threshold, *thresholdPrior) self.priorSigma = self.__genprior(self.slope, *slopePrior) self.priorGamma = self.__genprior(self.guessRate, *guessPrior) self.priorLambda = self.__genprior(self.lapseRate, *lapsePrior) # if guess rate equals lapse rate, and they have equal priors, # then gamma can be left out, as the distributions will be the same self.gammaEQlambda = all((all(self.guessRate == self.lapseRate), all(self.priorGamma == self.priorLambda))) # likelihood: table of conditional probabilities p(response | alpha,sigma,gamma,lambda,x) # prior: prior probability over all parameters p_0(alpha,sigma,gamma,lambda) if self.gammaEQlambda: self.dimensions = (len(self.threshold), len(self.slope), len(self.lapseRate), len(self.stimRange)) self.likelihood = np.reshape( pf(cartesian((self.threshold, self.slope, self.lapseRate, self.stimRange)), psyfun=Pfunction), self.dimensions) # row-wise products of prior probabilities self.prior = np.reshape( np.prod(cartesian((self.priorMu, self.priorSigma, self.priorLambda)), axis=1), self.dimensions[:-1]) else: self.dimensions = (len(self.threshold), len(self.slope), len(self.guessRate), len(self.lapseRate), len(self.stimRange)) self.likelihood = np.reshape( pf(cartesian((self.threshold, self.slope, self.guessRate, self.lapseRate, self.stimRange)), psyfun=Pfunction), self.dimensions) # row-wise products of prior probabilities self.prior = np.reshape( np.prod(cartesian((self.priorMu, self.priorSigma, self.priorGamma, self.priorLambda)), axis=1), self.dimensions[:-1]) # normalize prior self.prior = self.prior / np.sum(self.prior) # Set probability density function to prior self.pdf = np.copy(self.prior) # settings self.iTrial = 0 self.nTrials = nTrials self.stop = 0 self.response = [] self.stim = [] # Generate the first stimulus intensity self.minEntropyStim()
def getStartingColors(self, hueFilters=[], lightnessRange=[25,85], onlyUseRGB=True): """Randomly select a starting color from a subset of CIE Lab space. This function returns a set of highly preferable colors within a subspace of the typical 8,325-color CIE Lab space that fall within the range of any hue filters. Rather than the normal every-5 interval, the subspace specifies an every-15 interval along L, a, and b axis starting at the origin. Args: hueFilters (np.array): an n by 2 nd.array specifying lower and upper hue filter bounds that fall within [0,360) degrees. lightnessRange (list): a two-element list that sets the lightness range for filtering for color space before sampling. onlyUseRGB (bool): whether color space should be restricted to RGB. Returns: startingColors (np.array): an n x 3 array of n highly preferable CIE Lab D65 starting colors. """ hueFilters = np.array(hueFilters) lIntervals = CIE_LAB_STARTING_SUBSPACE_INTERVALS["L"] aIntervals = CIE_LAB_STARTING_SUBSPACE_INTERVALS["a"] bIntervals = CIE_LAB_STARTING_SUBSPACE_INTERVALS["b"] isInterval = np.zeros((self.colorSpaces.shape[0], 3)) isInterval[:,0] = np.in1d(self.colorSpaces[:,0], lIntervals) isInterval[:,1] = np.in1d(self.colorSpaces[:,1], aIntervals) isInterval[:,2] = np.in1d(self.colorSpaces[:,2], bIntervals) isIntervalMask = np.all(isInterval, axis=1) startColors = self.colorSpaces[isIntervalMask] isRGB = np.logical_and(startColors[:,[6,7,8]] >= 0, startColors[:,[6,7,8]] <= 255) isRGB = np.all(isRGB, axis=1) if lightnessRange[0] <= 10: minLightness = 0 else: minLightness = lightnessRange[0] + 0.01 if lightnessRange[1] <= 15: maxLightness = 15 else: maxLightness = lightnessRange[1] inLightness = np.logical_not(np.logical_or(startColors[:,0] < minLightness, startColors[:,0] > maxLightness)) startColors = startColors[np.logical_and(isRGB, inLightness)] if hueFilters.size > 0: hueFilters = convert.convertHueRanges(hueFilters) okHue = [np.logical_and(startColors[:,3] >= low, startColors[:,3] <= high) for low,high in hueFilters] okHue = np.any(np.array(okHue), axis=0) startColors = startColors[okHue] # With the remaining subspace, enumerate all unique color pairs. # For efficiency, unique pairs are calculated via one of the triangles # of the cartesian product of all remaining colors. labs = startColors[:,:3] color_col_products = [cartesian((labs[:,i],labs[:,i])) for i in xrange(labs.shape[1])] productSize = (color_col_products[0].shape[0],2*len(color_col_products)) color_product = np.zeros(productSize) for i, d in enumerate(color_col_products): color_product[:,i] = d[:,0] color_product[:,i+len(color_col_products)] = d[:,0] idxs = np.transpose(np.array(np.triu_indices(len(labs),1))) colorPairs = np.ascontiguousarray(labs[idxs,].reshape((-1, 6))) colorPairPreferenceScores = npc.score(colorPairs)[:,2] # Penalize preference scores for colors that are ``ugly''. labs1 = np.ascontiguousarray(colorPairs[:,:3]) labs2 = np.ascontiguousarray(colorPairs[:,3:6]) penalties = np.minimum(npc.scorePenalty(labs1)[:,0], npc.scorePenalty(labs2)[:,0]) colorPairPreferenceScores = colorPairPreferenceScores * penalties maxPref = np.max(colorPairPreferenceScores) stdPref = np.std(colorPairPreferenceScores) prefThreshold = maxPref - 0.75*stdPref colorPairs = colorPairs[colorPairPreferenceScores > prefThreshold,] # Extract the unique colors from color combination list # http://stackoverflow.com/questions/16970982 def getUnique(a): a = colorPairs[:,:3] b = np.ascontiguousarray(a).view(np.dtype((np.void, a.dtype.itemsize * a.shape[1]))) _, idx = np.unique(b, return_index=True) return a[idx] uniq1 = getUnique(colorPairs[:,:3]) uniq2 = getUnique(colorPairs[:,3:]) startingColors = getUnique( np.vstack(( uniq1, uniq2 )) ) return startingColors
def compute_probabilities(grid_map, cell_list, passenger_list, prob): """ Compute the transition probability matrix. Args: grid_map (list): list containing the grid structure; cell_list (list): list of non-wall cells; passenger_list (list): list of passenger cells; prob (float): probability of success of an action. Returns: The transition probability matrix; """ g = np.array(grid_map) c = np.array(cell_list) n_states = len(cell_list) * 2**len(passenger_list) p = np.zeros((n_states, 4, n_states)) directions = [[-1, 0], [1, 0], [0, -1], [0, 1]] passenger_states = cartesian([[0, 1]] * len(passenger_list)) for i in range(n_states): idx = i // len(cell_list) collected_passengers = np.array( passenger_list)[np.argwhere(passenger_states[idx] == 1).ravel()] state = c[i % len(cell_list)] if g[tuple(state)] in ['.', 'S', 'F']: if g[tuple(state)] in ['F']\ and state.tolist() not in collected_passengers.tolist(): continue for a in range(len(directions)): new_state = state + directions[a] j = np.where((c == new_state).all(axis=1))[0] if j.size > 0: assert j.size == 1 if g[tuple(new_state)] == 'F' and new_state.tolist()\ not in collected_passengers.tolist(): current_passenger_state = np.zeros(len(passenger_list)) current_passenger_idx = np.where( (new_state == passenger_list).all(axis=1))[0] current_passenger_state[current_passenger_idx] = 1 new_passenger_state = passenger_states[ idx] + current_passenger_state new_idx = np.where(( passenger_states == new_passenger_state).all( axis=1))[0] j += len(cell_list) * new_idx else: j += len(cell_list) * idx else: j = i p[i, a, j] = prob for d in [1 - np.abs(directions[a]), np.abs(directions[a]) - 1]: slip_state = state + d k = np.where((c == slip_state).all(axis=1))[0] if k.size > 0: assert k.size == 1 if g[tuple(slip_state)] == 'F' and slip_state.tolist()\ not in collected_passengers.tolist(): current_passenger_state = np.zeros( len(passenger_list)) current_passenger_idx = np.where( (slip_state == passenger_list).all(axis=1))[0] current_passenger_state[current_passenger_idx] = 1 new_passenger_state = passenger_states[ idx] + current_passenger_state new_idx = np.where(( passenger_states == new_passenger_state).all( axis=1))[0] k += len(cell_list) * new_idx else: k += len(cell_list) * idx else: k = i p[i, a, k] += (1. - prob) * .5 return p
def design_matrix(sample_labels, interaction_indices=None): """ Parameters --------- sample_labels: a numpy matrix, for each sample a vector with the conditions which we would like to model. cols represent the type of conditions we want to model, row represent a combination of conditions that are represented by the row-variable. if we have a 2x3 design we build this matrix: [[0,0], [0,1], [0,2], [1,0], [1,1], [1,2]] Returns ------- X: the design matrix. factor_labels: the labels of the design-matrix columns factor_num : number of factors for each condition """ factor_num = [] n_factors = 0 for i in range(sample_labels.shape[1]): unique_labels = np.unique(sample_labels[:,i]) if len(unique_labels) == 1: label_factors = 0 else: label_factors = len(unique_labels) n_factors+=label_factors factor_num.append(label_factors) n_interactions = 0 if interaction_indices != None: interaction_factors = np.array(factor_num)[[interaction_indices]] n_interactions = np.prod(interaction_factors) Xint = np.zeros((sample_labels.shape[0], n_interactions)) X = np.zeros((sample_labels.shape[0], n_factors)) lb = LabelEncoder() factor_labels = [] offset = 0 for i, factor in enumerate(factor_num): if factor == 0: continue index = lb.fit_transform(sample_labels.T[i]) for j in range(sample_labels.shape[0]): X[j,index[j]+offset] = 1 factor_labels.append(lb.classes_) offset += factor if interaction_indices != None: interaction_product = [np.arange(v).tolist() for v in interaction_factors] interaction_gen = cartesian(interaction_product) # This is buggy!! Xint = np.zeros((sample_labels.shape[0], n_interactions)) offset = interaction_indices[0] * np.sum(factor_num[:interaction_indices[0]]) offset = np.int(offset) for i, int_indices in enumerate(interaction_gen): index1 = offset + int_indices[0] index2 = offset + int_indices[1] + factor_num[interaction_indices[0]] Xint[:,i] = X[:,index1] * X[:,index2] factor1 = interaction_indices[0] factor2 = interaction_indices[1] new_label = factor_labels[factor1][int_indices[0]] + "_" + \ factor_labels[factor2][int_indices[1]] factor_labels.append(new_label) X = np.hstack((X, Xint)) return X, np.hstack(factor_labels), factor_num
# Show confusion matrix in a separate window plt.matshow(cm) plt.title('Confusion matrix') plt.colorbar() plt.ylabel('True label') plt.xlabel('Predicted label') plt.show() #for the plots, create arrays of outputs of the continuous predictor variables h=50.0 balance_=np.linspace(Xdf['balance'].min(),Xdf['balance'].max(),h) h=100.0 income_=np.linspace(Xdf['income'].min(),Xdf['income'].max(),h) #create combinations of the predictors using the arrays above combos = pd.DataFrame(cartesian([balance_,[0.0,1.0],income_,[1.]])) combos.columns=['balance','student','income','intercept'] #run the fitted model on all the predictor combinations to obtain predicted probabilities of default combos['predict']=result.predict(combos) #return the predicted probability of default for the mean income level, #and for each level of balance and student status grouped = pd.pivot_table(combos,values=['predict'],rows=['balance','student'],aggfunc=np.mean) #select only data with 'student'=1 plt.figure() plt_data=grouped.ix[grouped.index.get_level_values(1)==1] #plot predicted probability of default for 'student'=1 plt.plot(plt_data.index.get_level_values(0),plt_data['predict'],color='b') #select only data with 'student'=0 plt_data=grouped.ix[grouped.index.get_level_values(1)==0] #plot predicted probability of default for 'student'=0
def disaggregate(self, mains, output_datastore, **load_kwargs): '''Disaggregate mains according to the model learnt previously. Parameters ---------- mains : nilmtk.ElecMeter or nilmtk.MeterGroup output_datastore : instance of nilmtk.DataStore subclass For storing power predictions from disaggregation algorithm. output_name : string, optional The `name` to use in the metadata for the `output_datastore`. e.g. some sort of name for this experiment. Defaults to "NILMTK_CO_<date>" resample_seconds : number, optional The desired sample period in seconds. **load_kwargs : key word arguments Passed to `mains.power_series(**kwargs)` ''' MIN_CHUNK_LENGTH = 100 if not self.model: raise RuntimeError("The model needs to be instantiated before" " calling `disaggregate`. For example, the" " model can be instantiated by running `train`.") # If we import sklearn at the top of the file then auto doc fails. from sklearn.utils.extmath import cartesian # sklearn produces lots of DepreciationWarnings with PyTables import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # Extract optional parameters from load_kwargs date_now = datetime.now().isoformat().split('.')[0] output_name = load_kwargs.pop('output_name', 'NILMTK_CO_' + date_now) resample_seconds = load_kwargs.pop('resample_seconds', 60) # Get centroids centroids = [model['states'] for model in self.model] state_combinations = cartesian(centroids) # state_combinations is a 2D array # each column is a chan # each row is a possible combination of power demand values e.g. # [[0, 0, 0, 0], [0, 0, 0, 100], [0, 0, 50, 0], [0, 0, 50, 100], ...] # Add vampire power to the model vampire_power = mains.vampire_power() if printing: print("vampire_power = {} watts".format(vampire_power)) n_rows = state_combinations.shape[0] vampire_power_array = np.zeros((n_rows, 1)) + vampire_power state_combinations = np.hstack((state_combinations, vampire_power_array)) summed_power_of_each_combination = np.sum(state_combinations, axis=1) # summed_power_of_each_combination is now an array where each # value is the total power demand for each combination of states. load_kwargs['sections'] = load_kwargs.pop('sections', mains.good_sections()) resample_rule = '{:d}S'.format(resample_seconds) timeframes = [] building_path = '/building{}'.format(mains.building()) mains_data_location = '{}/elec/meter1'.format(building_path) for chunk in mains.power_series(**load_kwargs): # Check that chunk is sensible size before resampling if len(chunk) < MIN_CHUNK_LENGTH: continue # Record metadata timeframes.append(chunk.timeframe) measurement = chunk.name chunk = chunk.resample(rule=resample_rule) # Check chunk size *again* after resampling if len(chunk) < MIN_CHUNK_LENGTH: continue # Start disaggregation indices_of_state_combinations, residual_power = find_nearest( summed_power_of_each_combination, chunk.values) for i, model in enumerate(self.model): if printing: print("Estimating power demand for '{}'".format(model['training_metadata'])) predicted_power = state_combinations[ indices_of_state_combinations, i].flatten() cols = pd.MultiIndex.from_tuples([chunk.name]) meter_instance = model['training_metadata'].instance() output_datastore.append('{}/elec/meter{}' .format(building_path, meter_instance), pd.DataFrame(predicted_power, index=chunk.index, columns=cols)) # Copy mains data to disag output output_datastore.append(key=mains_data_location, value=pd.DataFrame(chunk, columns=cols)) ################################## # Add metadata to output_datastore # TODO: `preprocessing_applied` for all meters # TODO: split this metadata code into a separate function # TODO: submeter measurement should probably be the mains # measurement we used to train on, not the mains measurement. # DataSet and MeterDevice metadata: #Add metadata for main meter mains_meter = mains.metadata['device_model'] if hasattr(mains, 'metadata') else 'mains' meter_devices = { 'CO': { 'model': 'CO', 'sample_period': resample_seconds, 'max_sample_period': resample_seconds, 'measurements': [{ 'physical_quantity': measurement[0], 'type': measurement[1] }] }, 'mains': { 'model': mains_meter, 'sample_period': resample_seconds, 'max_sample_period': resample_seconds, 'measurements': [{ 'physical_quantity': measurement[0], 'type': measurement[1] }] } } merged_timeframes = merge_timeframes(timeframes, gap=resample_seconds) total_timeframe = TimeFrame(merged_timeframes[0].start, merged_timeframes[-1].end) dataset_metadata = {'name': output_name, 'date': date_now, 'meter_devices': meter_devices, 'timeframe': total_timeframe.to_dict()} output_datastore.save_metadata('/', dataset_metadata) # Building metadata # Mains meter: elec_meters = { 1: { 'device_model': mains_meter, 'site_meter': True, 'data_location': mains_data_location, 'preprocessing_applied': {}, # TODO 'statistics': { 'timeframe': total_timeframe.to_dict(), 'good_sections': list_of_timeframe_dicts(merged_timeframes) } } } # Appliances and submeters: appliances = [] for model in self.model: meter = model['training_metadata'] meter_instance = meter.instance() for app in meter.appliances: meters = app.metadata['meters'] appliance = { 'meters': [meter_instance], 'type': app.identifier.type, 'instance': app.identifier.instance # TODO this `instance` will only be correct when the # model is trained on the same house as it is tested on. # https://github.com/nilmtk/nilmtk/issues/194 } appliances.append(appliance) elec_meters.update({ meter_instance: { 'device_model': 'CO', 'submeter_of': 1, 'data_location': ('{}/elec/meter{}' .format(building_path, meter_instance)), 'preprocessing_applied': {}, # TODO 'statistics': { 'timeframe': total_timeframe.to_dict(), 'good_sections': list_of_timeframe_dicts(merged_timeframes) } } }) building_metadata = { 'instance': mains.building(), 'elec_meters': elec_meters, 'appliances': appliances } output_datastore.save_metadata(building_path, building_metadata)
def calc_cartesian_alpha(alpha, index, n_groups_list): if index < 0: return np.array([alpha for _ in range(np.prod(n_groups_list))]) else: cart = cartesian([range(n_groups) for n_groups in n_groups_list]) return np.array([alpha[i] for i in cart[:, index]])
def disaggregate(self, mains, output_datastore, location_data=None, mains_values=None, baseline=None, **load_kwargs): from sklearn.utils.extmath import cartesian import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # Get centroids centroids = [model['states'] for model in self.model] state_combinations = cartesian(centroids) try: timezone = location_data.dataset.metadata.get('timezone') except Exception: timezone = '' vampire_power = baseline if baseline is None: vampire_power = mains.vampire_power() #- correction n_rows = state_combinations.shape[0] vampire_power_array = np.zeros((n_rows, 1)) + vampire_power state_combinations = np.hstack((state_combinations, vampire_power_array)) print("vampire_power = {} watts".format(vampire_power)) summed_power_of_each_combination = np.sum(state_combinations, axis=1) self.vampire_power = vampire_power self.state_combinations_all = state_combinations self.summed_power_of_each_combination_all = summed_power_of_each_combination resample_seconds = load_kwargs.pop('resample_seconds', 60) load_kwargs.setdefault('resample', True) load_kwargs.setdefault('sample_period', resample_seconds) timeframes = [] building_path = '/building{}'.format(mains.building()) mains_data_location = '{}/elec/meter1'.format(building_path) if mains_values is None: load_kwargs['sections'] = load_kwargs.pop('sections', mains.good_sections()) mains_values = mains.power_series(**load_kwargs) using_series = False else: mains_values = [mains_values] using_series = True self.mains_used = mains_values self.location_used = 0 self.location_loop = 0 self.co_indices_original = [] self.co_indices_location = [] #No longer applies since indices constantly change after each iteration. We now return the combo self.co_residuals_original = [] self.co_residuals_location = [] self.co_combos_location = [] for chunk in mains_values: # Record metadata if using_series: timeframes.append(TimeFrame(start=chunk.index[0], end=chunk.index[-1])) measurement = ('power', 'apparent') else: timeframes.append(chunk.timeframe) measurement = chunk.name # Start disaggregation print('Calculating original indices of state combinations...') indices_of_state_combinations_original, residuals_power_original = find_nearest( summed_power_of_each_combination, chunk.values) self.co_indices_original.extend(indices_of_state_combinations_original) self.co_residuals_original.extend(residuals_power_original) print('Calculating indices of state combinations...') state_combinations_location, residuals_power_location = self.find_nearest( chunk, location_data, vampire_power, resample_seconds) self.co_combos_location.extend(state_combinations_location) self.co_residuals_location.extend(residuals_power_location) #Write results for i, model in enumerate(self.model): print("Estimating power demand for '{}'".format(model['training_metadata'])) predicted_power = state_combinations_location[:, i].flatten() cols = pd.MultiIndex.from_tuples([measurement]) meter_instance = model['training_metadata'].instance() output_datastore.append('{}/elec/meter{}' .format(building_path, meter_instance), pd.DataFrame(predicted_power, index=chunk.index, columns=cols)) # Copy mains data to disag output output_datastore.append(key=mains_data_location, value=pd.DataFrame(chunk, columns=cols)) ################################## # Add metadata to output_datastore self.add_metadata(output_datastore, measurement, timeframes, mains, timezone, load_kwargs)