def triangles_centroids_computation(vertexes_coord, triangle_vertexes): """guess what? computes the centroids of the triangles""" triangles_centroids = take( vertexes_coord, triangle_vertexes[:, 0], axis=0 ) triangles_centroids += take( vertexes_coord, triangle_vertexes[:, 1], axis=0 ) triangles_centroids += take( vertexes_coord, triangle_vertexes[:, 2], axis=0 ) triangles_centroids /= 3.0 return triangles_centroids
def getCurrentGuess(self): """ OUTPUT: ===== The vector x contains the current guess of the solution. DATA LAYOUT: =========== The vector "x" has a part "u" that describes the current solution, "free" for values of the free parameters and a part "art" containing the values of thhe artificial parameters. [ ] [ u ] x = [ ] [ ----- ] [ free ] [ art ] """ u = self.u lfree = scipy.take(self.lambd, self.param['free']) lart = scipy.take(self.lambd, self.param['artificial']) return scipy.r_[u, lfree, lart]
def rerun_dfa(chrom,xdata,mask,groups,names,DFs): """Run DFA in min app""" #extract vars from xdata slice = meancent(_slice(xdata,chrom)) #split in to training and test tr_slice,cv_slice,ts_slice,tr_grp,cv_grp,ts_grp,tr_nm,cv_nm,ts_nm=_split(slice,groups,mask,names) #get indexes idx = scipy.arange(xdata.shape[0])[:,nA] tr_idx = scipy.take(idx,_index(mask,0),0) cv_idx = scipy.take(idx,_index(mask,1),0) ts_idx = scipy.take(idx,_index(mask,2),0) #model DFA on training samples u,v,eigs,dummy = cva(tr_slice,tr_grp,DFs) #project xval and test samples projUcv = scipy.dot(cv_slice,v) projUt = scipy.dot(ts_slice,v) uout = scipy.zeros((xdata.shape[0],DFs),'d') _put(uout,scipy.reshape(tr_idx,(len(tr_idx),)).tolist(),u) _put(uout,scipy.reshape(cv_idx,(len(cv_idx),)).tolist(),projUcv) _put(uout,scipy.reshape(ts_idx,(len(ts_idx),)).tolist(),projUt) return uout,v,eigs
def V_EH_dipole_plane(J_dip, r_dip, list_of_edges_numbers, RWGNumber_CFIE_OK, RWGNumber_signedTriangles, RWGNumber_edgeVertexes, RWGNumber_oppVertexes, vertexes_coord, w, eps_r, mu_r): # observation point for the incoming field r_ref = zeros(3, 'd') #sum(triangles_centroids, axis=0)/T R_hat = (r_dip - r_ref)/sqrt(dot(r_dip - r_ref, r_dip - r_ref)) k_hat = -R_hat # the propagation vector is indeed opposed to R_hat k = w * sqrt(eps_0*eps_r * mu_0*mu_r) # the wavenumber G_EJ, G_HJ = G_EJ_G_HJ(r_dip, r_ref, eps_r, mu_r, k) E_0 = dot(G_EJ, J_dip).astype('D') # creation of the local V arrays E = list_of_edges_numbers.shape[0] V_EH = zeros((E, 4), 'D') V_FULL_PRECISION = 1 # RWGNumber_vertexesCoord RWGNumber_vertexesCoord = zeros((E, 6), 'd') RWGNumber_vertexesCoord[:, 0:3] = take(vertexes_coord, RWGNumber_edgeVertexes[:,0], axis=0).astype('d') RWGNumber_vertexesCoord[:, 3:6] = take(vertexes_coord, RWGNumber_edgeVertexes[:,1], axis=0).astype('d') # RWGNumber_oppVertexesCoord RWGNumber_oppVertexesCoord = zeros((E, 6), 'd') RWGNumber_oppVertexesCoord[:, 0:3] = take(vertexes_coord, RWGNumber_oppVertexes[:,0], axis=0).astype('d') RWGNumber_oppVertexesCoord[:, 3:6] = take(vertexes_coord, RWGNumber_oppVertexes[:,1], axis=0).astype('d') wrapping_code = """ blitz::Range all = blitz::Range::all(); V_EJ_HJ_plane (V_EH(all, 0), V_EH(all, 1), V_EH(all, 2), V_EH(all, 3), E_0, k_hat, r_ref, list_of_edges_numbers, RWGNumber_CFIE_OK, RWGNumber_signedTriangles, RWGNumber_vertexesCoord, RWGNumber_oppVertexesCoord, w, eps_r, mu_r, V_FULL_PRECISION); """ weave.inline(wrapping_code, ['V_EH', 'E_0', 'k_hat', 'r_ref', 'list_of_edges_numbers', 'RWGNumber_CFIE_OK', 'RWGNumber_signedTriangles', 'RWGNumber_vertexesCoord', 'RWGNumber_oppVertexesCoord', 'w', 'eps_r', 'mu_r', 'V_FULL_PRECISION'], type_converters = converters.blitz, include_dirs = ['./code/MoM/'], library_dirs = ['./code/MoM/'], libraries = ['MoM'], headers = ['<iostream>','<complex>','"V_E_V_H.h"'], compiler = 'gcc', extra_compile_args = ['-O3', '-pthread', '-w']) return V_EH
def getCurrentGuess(self): """ OUTPUT: ===== The vector x contains the current guess of the solution. DATA LAYOUT: =========== The vector "x" has a part "u" that describes the current solution, "free" for values of the free parameters and a part "art" containing the values of thhe artificial parameters. [ ] [ u ] x = [ ] [ ----- ] [ free ] [ art ] """ u = self.u lfree = scipy.take(self.lambd,self.param['free']) lart = scipy.take(self.lambd,self.param['artificial']) return scipy.r_[u,lfree,lart]
def V_EH_plane(E_0, k_hat, r_ref, list_of_edges_numbers, RWGNumber_CFIE_OK, RWGNumber_signedTriangles, RWGNumber_edgeVertexes, RWGNumber_oppVertexes, vertexes_coord, w, eps_r, mu_r): # creation of the local V arrays E = list_of_edges_numbers.shape[0] V_EH = zeros((E, 4), 'D') V_FULL_PRECISION = 1 # RWGNumber_vertexesCoord RWGNumber_vertexesCoord = zeros((E, 6), 'd') RWGNumber_vertexesCoord[:, 0:3] = take(vertexes_coord, RWGNumber_edgeVertexes[:,0], axis=0).astype('d') RWGNumber_vertexesCoord[:, 3:6] = take(vertexes_coord, RWGNumber_edgeVertexes[:,1], axis=0).astype('d') # RWGNumber_oppVertexesCoord RWGNumber_oppVertexesCoord = zeros((E, 6), 'd') RWGNumber_oppVertexesCoord[:, 0:3] = take(vertexes_coord, RWGNumber_oppVertexes[:,0], axis=0).astype('d') RWGNumber_oppVertexesCoord[:, 3:6] = take(vertexes_coord, RWGNumber_oppVertexes[:,1], axis=0).astype('d') wrapping_code = """ blitz::Range all = blitz::Range::all(); V_EJ_HJ_plane (V_EH(all, 0), V_EH(all, 1), V_EH(all, 2), V_EH(all, 3), E_0, k_hat, r_ref, list_of_edges_numbers, RWGNumber_CFIE_OK, RWGNumber_signedTriangles, RWGNumber_vertexesCoord, RWGNumber_oppVertexesCoord, w, eps_r, mu_r, V_FULL_PRECISION); """ weave.inline(wrapping_code, ['V_EH', 'E_0', 'k_hat', 'r_ref', 'list_of_edges_numbers', 'RWGNumber_CFIE_OK', 'RWGNumber_signedTriangles', 'RWGNumber_vertexesCoord', 'RWGNumber_oppVertexesCoord', 'w', 'eps_r', 'mu_r', 'V_FULL_PRECISION'], type_converters = converters.blitz, include_dirs = ['./code/MoM/'], library_dirs = ['./code/MoM/'], libraries = ['MoM'], headers = ['<iostream>','<complex>','"V_E_V_H.h"'], compiler = 'gcc', extra_compile_args = ['-O3', '-pthread', '-w']) return V_EH
def calcProfilV(self, xy): """renvoie les valeurs des vitesses sur une section""" vxvy = self.getMfVitesse() grd = self.parent.aquifere.getFullGrid() x0, y0, dx, dy, nx, ny = grd['x0'], grd['y0'], grd['dx'], grd['dy'], grd[ 'nx'], grd['ny'] x, y = zip(*xy) xl0, xl1 = x[:2] yl0, yl1 = y[:2] dd = min(dx, dy) * .95 dxp, dyp = xl1 - xl0, yl1 - yl0 ld = max(ceil(abs(dxp / dx)), ceil(abs(dyp / dy))) ld = int(ld + 1) ddx = dxp / ld ddy = dyp / ld xp2 = xl0 + arange(ld + 1) * ddx yp2 = yl0 + arange(ld + 1) * ddy ix = floor((xp2 - x0) / dx) ix = clip(ix.astype(int), 0, nx - 1) iy = floor((yp2 - y0) / dy) iy = clip(iy.astype(int), 0, ny - 1) vx = take(ravel(vxvy[0]), iy * nx + ix) vy = take(ravel(vxvy[1]), iy * nx + ix) V = sqrt(vx**2 + vy**2) cu = sqrt((xp2 - xp2[0])**2 + (yp2 - yp2[0])**2) return [cu, V]
def rerun_dfa(chrom, xdata, mask, groups, names, DFs): """Run DFA in min app""" #extract vars from xdata slice = meancent(_slice(xdata, chrom)) #split in to training and test tr_slice, cv_slice, ts_slice, tr_grp, cv_grp, ts_grp, tr_nm, cv_nm, ts_nm = _split( slice, groups, mask, names) #get indexes idx = scipy.arange(xdata.shape[0])[:, nA] tr_idx = scipy.take(idx, _index(mask, 0), 0) cv_idx = scipy.take(idx, _index(mask, 1), 0) ts_idx = scipy.take(idx, _index(mask, 2), 0) #model DFA on training samples u, v, eigs, dummy = cva(tr_slice, tr_grp, DFs) #project xval and test samples projUcv = scipy.dot(cv_slice, v) projUt = scipy.dot(ts_slice, v) uout = scipy.zeros((xdata.shape[0], DFs), 'd') _put(uout, scipy.reshape(tr_idx, (len(tr_idx), )).tolist(), u) _put(uout, scipy.reshape(cv_idx, (len(cv_idx), )).tolist(), projUcv) _put(uout, scipy.reshape(ts_idx, (len(ts_idx), )).tolist(), projUt) return uout, v, eigs
def cubeIndex_RWGNumbers_computation(RWGNumber_cubeNumber, RWGNumber_cubeCentroidCoord): """each finest-level cube must somehow know which edges it contains. This function has the goal of establishing this list for every cube. Only the cubes containing edges will be retained. We also create a list of the cubes centroids, which will be ordered the same way as the cubes_lists_edges_numbers list.""" E = RWGNumber_cubeNumber.shape[0] # the number of RWGs involved ind_sorted_cubes_numbers = argsort(RWGNumber_cubeNumber, kind='mergesort') sorted_cubes_numbers = take(RWGNumber_cubeNumber, ind_sorted_cubes_numbers, axis=0) sorted_edges_numbers = take(arange(E), ind_sorted_cubes_numbers, axis=0) sorted_edges_numbers_cubes_centroids = take(RWGNumber_cubeCentroidCoord, ind_sorted_cubes_numbers, axis=0) cubes_lists_edges_numbers = { } # the desired dictionary, renewed for each cube cube_list_edges_numbers_tmp = [ sorted_edges_numbers[0] ] # the temporary list, renewed for each cube cubes_centroids = [sorted_edges_numbers_cubes_centroids[0]] cubeIndex = 0 for j in range( E - 1 ): # we cannot go up to (E-1), since (j+1) will then be equal to E (out of bound index) if sorted_cubes_numbers[j + 1] == sorted_cubes_numbers[ j]: # if the next cube number is the same as the current one cube_list_edges_numbers_tmp.append(sorted_edges_numbers[ j + 1]) # add the next element to the temporary list else: # if not, we then add the temporary "per-cube" list to the complete list cubes_lists_edges_numbers[cubeIndex] = array( cube_list_edges_numbers_tmp) cubes_centroids.append(sorted_edges_numbers_cubes_centroids[j + 1]) cube_list_edges_numbers_tmp = [ sorted_edges_numbers[j + 1] ] # init of the temporary list for the next cube cubeIndex += 1 # we must append the last temporary list if cubeIndex in cubes_lists_edges_numbers: cubes_lists_edges_numbers[cubeIndex + 1] = array(cube_list_edges_numbers_tmp) else: cubes_lists_edges_numbers[cubeIndex] = array( cube_list_edges_numbers_tmp) # we transform the "cubes_lists_edges_numbers" in a linear array, useful for the C++ code C = len(cubes_lists_edges_numbers) cubes_edges_numbers = zeros(E, 'i') cube_N_RWGs = zeros(C, 'i') startIndex = 0 for j in range(C): length = cubes_lists_edges_numbers[j].shape[0] cube_N_RWGs[j] = length cubes_edges_numbers[startIndex:startIndex + length] = cubes_lists_edges_numbers[j] startIndex += length return cubes_edges_numbers, cubes_lists_edges_numbers, cube_N_RWGs.astype( 'i'), (array(cubes_centroids)).astype('d')
def V_EH_dipole(J_dip, r_dip, list_of_edges_numbers, RWGNumber_CFIE_OK, RWGNumber_signedTriangles, RWGNumber_edgeVertexes, RWGNumber_oppVertexes, vertexes_coord, w, eps_r, mu_r): """I don't know yet what's gonna go here. Anyway, we use prefentially 2-D triangles arrays in the C++ code""" # creation of the local V arrays E = list_of_edges_numbers.shape[0] V_EH = zeros((E, 4), 'D') V_FULL_PRECISION = 1 # RWGNumber_vertexesCoord RWGNumber_vertexesCoord = zeros((E, 6), 'd') RWGNumber_vertexesCoord[:, 0:3] = take(vertexes_coord, RWGNumber_edgeVertexes[:, 0], axis=0).astype('d') RWGNumber_vertexesCoord[:, 3:6] = take(vertexes_coord, RWGNumber_edgeVertexes[:, 1], axis=0).astype('d') # RWGNumber_oppVertexesCoord RWGNumber_oppVertexesCoord = zeros((E, 6), 'd') RWGNumber_oppVertexesCoord[:, 0:3] = take(vertexes_coord, RWGNumber_oppVertexes[:, 0], axis=0).astype('d') RWGNumber_oppVertexesCoord[:, 3:6] = take(vertexes_coord, RWGNumber_oppVertexes[:, 1], axis=0).astype('d') wrapping_code = """ std::vector<std::complex<double> > V_tE_J, V_tH_J, V_nE_J, V_nH_J; V_tE_J.resize(E); V_tH_J.resize(E); V_nE_J.resize(E); V_nH_J.resize(E); double rDip[3]; std::complex<double> JDip[3]; for (int i=0 ; i<3 ; ++i) rDip[i] = r_dip(i); for (int i=0 ; i<3 ; ++i) JDip[i] = J_dip(i); V_EJ_HJ_dipole (V_tE_J, V_tH_J, V_nE_J, V_nH_J, JDip, rDip, list_of_edges_numbers, RWGNumber_CFIE_OK, RWGNumber_signedTriangles, RWGNumber_vertexesCoord, RWGNumber_oppVertexesCoord, w, eps_r, mu_r, V_FULL_PRECISION); for (int i=0; i<E; i++) { V_EH(i, 0) = V_tE_J[i]; V_EH(i, 1) = V_tH_J[i]; V_EH(i, 2) = V_nE_J[i]; V_EH(i, 3) = V_nH_J[i]; } """ weave.inline(wrapping_code, [ 'V_EH', 'J_dip', 'r_dip', 'list_of_edges_numbers', 'RWGNumber_CFIE_OK', 'RWGNumber_signedTriangles', 'RWGNumber_vertexesCoord', 'RWGNumber_oppVertexesCoord', 'w', 'eps_r', 'mu_r', 'V_FULL_PRECISION', 'E' ], type_converters=converters.blitz, include_dirs=['./code/MoM/'], library_dirs=['./code/MoM/'], libraries=['MoM'], headers=['<iostream>', '<complex>', '"V_E_V_H.h"'], compiler='gcc', extra_compile_args=['-O3', '-pthread', '-w']) return V_EH
def eig(mat): """ Return the sorted eigenvalues and eigenvectors of mat. """ e, v = scipy.linalg.eig(mat) order = scipy.argsort(abs(e))[::-1] e = scipy.take(e, order) v = scipy.take(v, order, axis=1) return e, v
def compute_RWG_CFIE_OK(self): RWGNumber_CFIE_OK_tmp1 = take(self.triangles_surfaces, self.RWGNumber_signedTriangles, axis=0) RWGNumber_CFIE_OK_tmp2 = take(self.IS_CLOSED_SURFACE, RWGNumber_CFIE_OK_tmp1, axis=0) # following the Taskinen et al. paper in PIER # we can have a CFIE on a junction straddling a dielectric and metallic body self.RWGNumber_CFIE_OK = ((sum(RWGNumber_CFIE_OK_tmp2, axis=1)>=1) * 1).astype('i') # We cannot have M on a junction between a dielectric and metallic body! # The following expression is lacking the fact that a surface can be metallic # or dielectric. If metallic, then there is no M current, even if surface is closed self.RWGNumber_M_CURRENT_OK = ((sum(RWGNumber_CFIE_OK_tmp2, axis=1)>1) * 1).astype('i')
def sgrad(self, X, ndata=None): """Return a stochastic gradient at x. Returns the gradient of a uniformly random summand""" random_points = scipy.random.randint(low=0, high=X.shape[X.ndim - 1], size=X.shape[0]) rows = numpy.arange(X.shape[0]) ans = numpy.zeros_like(X) ans[rows, random_points] = 2 * self.alpha.shape[0] * scipy.take( self.alpha, random_points) * (X[rows, random_points] - scipy.take( self.center, random_points)) ans = self.boundgrad(ans, 1) return ans
def calc_grid_sum_xyvs(xyvs, bins=100): """Convert an XYVs object into a gridded and summed XYVs object. xyvs a list of tuples (x, y, ...) bins the number of required bins, either <int> or [<int>, <int>] Returns a tuple ((bin_x, bin_y), xyz_data) where xyz_data is a binned version of the input object. bin_? is bin count for the ? axis. """ # convert to numpy array xyvs = scipy.array(xyvs) # figure out number of value columns num_columns = len(xyvs[0]) - 2 if num_columns < 1: msg = 'calc_grid_sum_xyvs: xyvs object has no data columns' raise RuntimeError(msg) # get histogram with value1 column xyv1 = scipy.take(xyvs, (0, 1, 2), axis=1) (result, xedges, yedges) = scipy.histogram2d(xyv1[:, 0], xyv1[:, 1], bins=bins, normed=False, weights=xyv1[:, 2]) # create XYZ object result = make_xyz(result, xedges, yedges) # get number of bins for result # -1 since ?edges is numer of *edges*, we want bins bins_x = scipy.shape(xedges)[0] - 1 bins_y = scipy.shape(yedges)[0] - 1 # handle each extra value column seperately for i in range(num_columns - 1): xyvn = scipy.take(xyvs, (0, 1, i + 3), axis=1) (binned_data, xedges, yedges) = scipy.histogram2d(xyvn[:, 0], xyvn[:, 1], bins=bins, normed=False, weights=xyvn[:, 2]) binned_data = make_xyz(binned_data, xedges, yedges) v_col = scipy.take(binned_data, (2, ), axis=1) result = scipy.hstack((result, v_col)) return ((bins_x, bins_y), result)
def call_pls(chrom, xdata, factors, mask, data): """Runs pls on a subset of X-variables""" scores = [] for i in range(chrom.shape[0]): if _remdup(chrom[i]) == 0: #extract vars from xdata slice = scipy.take(xdata, chrom[i, :].tolist(), 1) collate = 0 for nF in range(mask.shape[1]): #split in to training and test try: pls_output = pls(slice, data['class'][:, 0][:, nA], mask[:, nF].tolist(), factors) if min(pls_output['rmsec']) <= min(pls_output['rmsepc']): collate += pls_output['RMSEPC'] else: collate += 10.0**5 except: collate = 0 if collate != 0: scores.append(collate / float(mask.shape[1])) else: scores.append(10.0**5) else: scores.append(10.0**5) return scipy.asarray(scores)[:, nA]
def call_pls(chrom,xdata,factors,mask,data): """Runs pls on a subset of X-variables""" scores = [] for i in range(chrom.shape[0]): if _remdup(chrom[i]) == 0: #extract vars from xdata slice = scipy.take(xdata,chrom[i,:].tolist(),1) collate = 0 for nF in range(mask.shape[1]): #split in to training and test try: pls_output = pls(slice,data['class'][:,0][:,nA],mask[:,nF].tolist(),factors) if min(pls_output['rmsec']) <= min(pls_output['rmsepc']): collate += pls_output['RMSEPC'] else: collate += 10.0**5 except: collate = 0 if collate != 0: scores.append(collate/float(mask.shape[1])) else: scores.append(10.0**5) else: scores.append(10.0**5) return scipy.asarray(scores)[:,nA]
def find(x, v, next_largest=1, indices=None): """Returns the index into the 1D array x corresponding to the element of x that is either equal to v or the nearest to v. x is assumed to contain unique elements. if v is outside the range of values in x then the index of the smallest or largest element of x is returned. If next_largest == 1 then the nearest element taken is the next largest, otherwise if next_largest == 0 then the next smallest is taken. The optional argument indices speeds up multiple calls to this function if you pre-calculate indices=argsort(x). """ if indices is None: indices=argsort(x) xs=take(x, indices) assert next_largest in [0,1], "next_largest must be 0 or 1" eqmask=(xs==v).tolist() try: ix = eqmask.index(1) except ValueError: if next_largest: mask=(xs<v).tolist() else: mask=(xs>v).tolist() try: ix=min([max([0,mask.index(1-next_largest)+next_largest-1]),len(mask)-1]) except ValueError: ix = 0+next_largest-1 return indices[ix]
def find(x, v, next_largest=1, indices=None): """Returns the index into the 1D array x corresponding to the element of x that is either equal to v or the nearest to v. x is assumed to contain unique elements. if v is outside the range of values in x then the index of the smallest or largest element of x is returned. If next_largest == 1 then the nearest element taken is the next largest, otherwise if next_largest == 0 then the next smallest is taken. The optional argument indices speeds up multiple calls to this function if you pre-calculate indices=argsort(x). """ if indices is None: indices = argsort(x) xs = take(x, indices) assert next_largest in [0, 1], "next_largest must be 0 or 1" eqmask = (xs == v).tolist() try: ix = eqmask.index(1) except ValueError: if next_largest: mask = (xs < v).tolist() else: mask = (xs > v).tolist() try: ix = min([ max([0, mask.index(1 - next_largest) + next_largest - 1]), len(mask) - 1 ]) except ValueError: ix = 0 + next_largest - 1 return indices[ix]
def dfa_xval_raw(X, group, mask, nodfs): """Perform DFA with full cross validation >>> import scipy >>> X = scipy.array([[ 0.19343116, 0.49655245, 0.72711322, 0.79482108, 0.13651874],[ 0.68222322, 0.89976918, 0.30929016, 0.95684345, 0.01175669],[ 0.3027644 , 0.82162916, 0.83849604, 0.52259035, 0.89389797],[ 0.54167385, 0.64491038, 0.56807246, 0.88014221, 0.19913807],[ 0.15087298, 0.81797434, 0.37041356, 0.17295614, 0.29872301],[ 0.69789848, 0.66022756, 0.70273991, 0.9797469 , 0.66144258],[ 0.378373 , 0.34197062, 0.54657115, 0.27144726, 0.28440859],[ 0.8600116 , 0.2897259 , 0.4448802 , 0.25232935, 0.46922429],[ 0.85365513, 0.34119357, 0.69456724, 0.8757419 , 0.06478112],[ 0.59356291, 0.53407902, 0.62131013, 0.73730599, 0.98833494]]) >>> group = scipy.array([[1],[1],[1],[1],[2],[2],[2],[3],[3],[3]]) >>> mask = scipy.array([[0],[1],[0],[0],[0],[0],[1],[0],[0],[1]]) >>> scores,loads,eigs = dfa_xval_raw(X,group,mask,2) """ x1, x2, x3, y1, y2, y3, dummy1, dummy2, dummy3 = _split( X, scipy.array(group, 'i'), mask) #get indices idxn = scipy.arange(X.shape[0])[:, nA] tr_idx = scipy.take(idxn, _index(mask, 0), 0) cv_idx = scipy.take(idxn, _index(mask, 1), 0) #train trscores, loads, eigs, loads2 = DFA(x1, y1, nodfs) #cross validation cvscores = scipy.dot(x2, loads) #independent test if max(mask) > 1: ts_idx = scipy.take(idxn, _index(mask, 2), 0) tstscores = scipy.dot(x3, loads) scores = scipy.zeros((X.shape[0], nodfs), 'd') tr_idx = scipy.reshape(tr_idx, (len(tr_idx), )).tolist() cv_idx = scipy.reshape(cv_idx, (len(cv_idx), )).tolist() ts_idx = scipy.reshape(ts_idx, (len(ts_idx), )).tolist() _put(scores, tr_idx, trscores) _put(scores, cv_idx, cvscores) _put(scores, ts_idx, tstscores) else: scores = scipy.concatenate((trscores, cvscores), 0) tr_idx = scipy.reshape(tr_idx, (len(tr_idx), )).tolist() cv_idx = scipy.reshape(cv_idx, (len(cv_idx), )).tolist() _put(scores, tr_idx, trscores) _put(scores, cv_idx, cvscores) return scores, loads, eigs
def sfeval(self, x, ndata=100, avg=True): """Stochastic evaluation of the loss function on the parameters x over the training data set. ndata -- how many samples to take from the training data set avg -- if True, average the loss function by the number of examples samples""" u = scipy.random.randint(0, x.shape[0], ndata) # ndata random index values data_vals = scipy.take(self.ds[0], u, axis=0) # take values at random indices data_cats = scipy.take(self.ds[1], u, axis=0) # take categories at random indices data = (data_vals, data_cats) # put data into correct format ans = funcEval( x, data) # evaluate function only for stochastically selected data if avg: ans = ans / (data[1].shape[0]) # average over size of data slice return ans
def dfa_xval_pls(plsscores, plsloads, nolvs, group, mask, nodfs): """Perform PLS-DFA with full cross validation """ rx1, rx2, rx3, ry1, ry2, ry3, dummy1, dummy2, dummy3 = _split( plsscores, scipy.array(group, 'i')[:, nA], mask[:, nA]) #get indices idxn = scipy.arange(plsscores.shape[0])[:, nA] tr_idx = scipy.take(idxn, _index(mask, 0), 0) cv_idx = scipy.take(idxn, _index(mask, 1), 0) #train cvas, loads, eigs, dummy = cva(rx1[:, 0:nolvs], ry1, nodfs) #cross validation cvav = scipy.dot(rx2[:, 0:nolvs], loads) #independent test if max(mask) > 1: cvat = scipy.dot(rx3[:, 0:nolvs], loads) scores = scipy.zeros((plsscores.shape[0], nodfs), 'd') tr_idx = scipy.reshape(tr_idx, (len(tr_idx), )).tolist() cv_idx = scipy.reshape(cv_idx, (len(cv_idx), )).tolist() ts_idx = scipy.take(idxn, _index(mask, 2), 0) ts_idx = scipy.reshape(ts_idx, (len(ts_idx), )).tolist() _put(scores, tr_idx, cvas) _put(scores, cv_idx, cvav) _put(scores, ts_idx, cvat) else: scores = scipy.concatenate((cvas, cvav), 0) tr_idx = scipy.reshape(tr_idx, (len(tr_idx), )).tolist() cv_idx = scipy.reshape(cv_idx, (len(cv_idx), )).tolist() _put(scores, tr_idx, cvas) _put(scores, cv_idx, cvav) #get loadings for original variables loads = scipy.dot(plsloads[:, 0:nolvs], loads) return scores, loads, eigs
def dfa_xval_raw(X,group,mask,nodfs): """Perform DFA with full cross validation >>> import scipy >>> X = scipy.array([[ 0.19343116, 0.49655245, 0.72711322, 0.79482108, 0.13651874],[ 0.68222322, 0.89976918, 0.30929016, 0.95684345, 0.01175669],[ 0.3027644 , 0.82162916, 0.83849604, 0.52259035, 0.89389797],[ 0.54167385, 0.64491038, 0.56807246, 0.88014221, 0.19913807],[ 0.15087298, 0.81797434, 0.37041356, 0.17295614, 0.29872301],[ 0.69789848, 0.66022756, 0.70273991, 0.9797469 , 0.66144258],[ 0.378373 , 0.34197062, 0.54657115, 0.27144726, 0.28440859],[ 0.8600116 , 0.2897259 , 0.4448802 , 0.25232935, 0.46922429],[ 0.85365513, 0.34119357, 0.69456724, 0.8757419 , 0.06478112],[ 0.59356291, 0.53407902, 0.62131013, 0.73730599, 0.98833494]]) >>> group = scipy.array([[1],[1],[1],[1],[2],[2],[2],[3],[3],[3]]) >>> mask = scipy.array([[0],[1],[0],[0],[0],[0],[1],[0],[0],[1]]) >>> scores,loads,eigs = dfa_xval_raw(X,group,mask,2) """ x1,x2,x3,y1,y2,y3,dummy1,dummy2,dummy3=_split(X,scipy.array(group,'i'),mask) #get indices idxn = scipy.arange(X.shape[0])[:,nA] tr_idx = scipy.take(idxn,_index(mask,0),0) cv_idx = scipy.take(idxn,_index(mask,1),0) #train trscores,loads,eigs,loads2 = DFA(x1,y1,nodfs) #cross validation cvscores = scipy.dot(x2,loads) #independent test if max(mask) > 1: ts_idx = scipy.take(idxn,_index(mask,2),0) tstscores = scipy.dot(x3,loads) scores = scipy.zeros((X.shape[0],nodfs),'d') tr_idx = scipy.reshape(tr_idx,(len(tr_idx),)).tolist() cv_idx = scipy.reshape(cv_idx,(len(cv_idx),)).tolist() ts_idx = scipy.reshape(ts_idx,(len(ts_idx),)).tolist() _put(scores,tr_idx,trscores) _put(scores,cv_idx,cvscores) _put(scores,ts_idx,tstscores) else: scores = scipy.concatenate((trscores,cvscores),0) tr_idx = scipy.reshape(tr_idx,(len(tr_idx),)).tolist() cv_idx = scipy.reshape(cv_idx,(len(cv_idx),)).tolist() _put(scores,tr_idx,trscores) _put(scores,cv_idx,cvscores) return scores,loads,eigs
def calc_grid_sum_xyvs(xyvs, bins=100): """Convert an XYVs object into a gridded and summed XYVs object. xyvs a list of tuples (x, y, ...) bins the number of required bins, either <int> or [<int>, <int>] Returns a tuple ((bin_x, bin_y), xyz_data) where xyz_data is a binned version of the input object. bin_? is bin count for the ? axis. """ # convert to numpy array xyvs = scipy.array(xyvs) # figure out number of value columns num_columns = len(xyvs[0]) - 2 if num_columns < 1: msg = 'calc_grid_sum_xyvs: xyvs object has no data columns' raise RuntimeError(msg) # get histogram with value1 column xyv1 = scipy.take(xyvs, (0,1,2), axis=1) (result, xedges, yedges) = scipy.histogram2d(xyv1[:,0], xyv1[:,1], bins=bins, normed=False, weights=xyv1[:,2]) # create XYZ object result = make_xyz(result, xedges, yedges) # get number of bins for result # -1 since ?edges is numer of *edges*, we want bins bins_x = scipy.shape(xedges)[0] - 1 bins_y = scipy.shape(yedges)[0] - 1 # handle each extra value column seperately for i in range(num_columns-1): xyvn = scipy.take(xyvs, (0,1,i+3), axis=1) (binned_data, xedges, yedges) = scipy.histogram2d(xyvn[:,0], xyvn[:,1], bins=bins, normed=False, weights=xyvn[:,2]) binned_data = make_xyz(binned_data, xedges, yedges) v_col = scipy.take(binned_data, (2,), axis=1) result = scipy.hstack((result, v_col)) return ((bins_x, bins_y), result)
def decimate(x, y, tolerance): """ Returns decimated x and y arrays. This is Douglas and Peucker's algorithm rewritten to use Numeric arrays. Tolerance is usually determined by determining the size that a single pixel represents in the units of x and y. Compression ratios for large seismic and well data sets can be significant. """ # Todo - we could improve the aesthetics by scaling (normalizing) the x and y #arrays. eg in a well the curve varies by +/- 1 and the depths by 0,10000 #This affects the accuracy of the representation in sloping regions. keep = zeros(len(x)) _decimate(x, y, keep, 0, len(x) - 1, tolerance) ids = nonzero(keep) return take(x,ids), take(y, ids)
def get_data(self, amp, eeg): """ Given and Amp instance and an EEG instance, return the data for this EOI """ data = eeg.get_data() ind = self.to_data_indices(amp) return take(data, ind, 1)
def dfa_xval_pls(plsscores,plsloads,nolvs,group,mask,nodfs): """Perform PLS-DFA with full cross validation """ rx1,rx2,rx3,ry1,ry2,ry3,dummy1,dummy2,dummy3=_split(plsscores,scipy.array(group,'i')[:,nA],mask[:,nA]) #get indices idxn = scipy.arange(plsscores.shape[0])[:,nA] tr_idx = scipy.take(idxn,_index(mask,0),0) cv_idx = scipy.take(idxn,_index(mask,1),0) #train cvas,loads,eigs,dummy = cva(rx1[:,0:nolvs],ry1,nodfs) #cross validation cvav = scipy.dot(rx2[:,0:nolvs],loads) #independent test if max(mask) > 1: cvat = scipy.dot(rx3[:,0:nolvs],loads) scores = scipy.zeros((plsscores.shape[0],nodfs),'d') tr_idx = scipy.reshape(tr_idx,(len(tr_idx),)).tolist() cv_idx = scipy.reshape(cv_idx,(len(cv_idx),)).tolist() ts_idx = scipy.take(idxn,_index(mask,2),0) ts_idx = scipy.reshape(ts_idx,(len(ts_idx),)).tolist() _put(scores,tr_idx,cvas) _put(scores,cv_idx,cvav) _put(scores,ts_idx,cvat) else: scores = scipy.concatenate((cvas,cvav),0) tr_idx = scipy.reshape(tr_idx,(len(tr_idx),)).tolist() cv_idx = scipy.reshape(cv_idx,(len(cv_idx),)).tolist() _put(scores,tr_idx,cvas) _put(scores,cv_idx,cvav) #get loadings for original variables loads = scipy.dot(plsloads[:,0:nolvs],loads) return scores,loads,eigs
def edgeNumber_triangles_indexes(list_of_edges_numbers, RWGNumber_signedTriangles): """This function returns a 1-D array of the indexes of the triangles corresponding to a 1-D array of edges_numbers. This function is important for creating lists of triangles that will participate to the MoM, given a particular criterium concerning the edges.""" indexes_of_triangles_tmp1 = take(RWGNumber_signedTriangles, list_of_edges_numbers, axis=0).flat indexes_of_triangles_tmp2 = sort(indexes_of_triangles_tmp1, kind='mergesort') indexes_of_triangles_to_take = ones(indexes_of_triangles_tmp2.shape[0], 'i') indexes_of_triangles_to_take[1:] = indexes_of_triangles_tmp2[1:] - indexes_of_triangles_tmp2[:-1] indexes_of_triangles = compress(indexes_of_triangles_to_take != 0, indexes_of_triangles_tmp2) return indexes_of_triangles.astype('i')
def calcProfilV(self,xy): """renvoie les valeurs des vitesses sur une section""" vxvy = self.getMfVitesse() grd = self.parent.aquifere.getFullGrid() x0,y0,dx,dy,nx,ny = grd['x0'],grd['y0'],grd['dx'],grd['dy'],grd['nx'],grd['ny'] x,y = zip(*xy) xl0, xl1 = x[:2] yl0, yl1 = y[:2] dd = min(dx,dy)*.95;dxp, dyp = xl1-xl0, yl1-yl0 ld = max(ceil(abs(dxp/dx)),ceil(abs(dyp/dy))) ld = int(ld+1); ddx = dxp/ld; ddy = dyp/ld xp2 = xl0+arange(ld+1)*ddx yp2 = yl0+arange(ld+1)*ddy ix = floor((xp2-x0)/dx);ix=clip(ix.astype(int),0,nx-1) iy = floor((yp2-y0)/dy);iy=clip(iy.astype(int),0,ny-1) vx = take(ravel(vxvy[0]),iy*nx+ix) vy = take(ravel(vxvy[1]),iy*nx+ix) V = sqrt(vx**2+vy**2) cu = sqrt((xp2-xp2[0])**2+(yp2-yp2[0])**2) return [cu,V]
def PlotTrajectoriesForExperiments(model, experiments, params = None, with_data=True, plotPts = 100, overlap = .1, skip = 1, showLegend=True): # First find the maximum time in our data maxTime = 0 chemsNeededByCalc = {} for exptName in experiments: dataByCalc = model.exptColl[exptName].GetData() for calc in dataByCalc: chemsNeededByCalc.setdefault(calc, []) for chem in dataByCalc[calc].keys(): chemsNeededByCalc[calc].append(chem) thisMaxTime = max(dataByCalc[calc][chem].keys()) if thisMaxTime > maxTime: maxTime = thisMaxTime lines = [] legend = [] times = scipy.linspace(0, maxTime*(1 + overlap), plotPts) varsByCalc = {} for calc in chemsNeededByCalc: varsByCalc[calc] = {} for chem in chemsNeededByCalc[calc]: varsByCalc[calc][chem] = times model.GetCalculationCollection().Calculate(varsByCalc, params) calcVals = model.GetCalculationCollection().GetResults(varsByCalc) cW = ColorWheel() for exptName in experiments: expt = exptColl[exptName] dataByCalc = expt.GetData() for calc in dataByCalc: for chem in dataByCalc[calc]: color, sym, dash = cW.next() if with_data: for time, (data, error) in dataByCalc[calc][chem].items()[::skip]: errorbar(time, data, yerr=error, color=color, mfc = color, marker=sym, ecolor=color, capsize=6) predicted = scipy.array(calcVals[calc][chem].items()) order = scipy.argsort(predicted[:,0]) predicted = scipy.take(predicted, order) predicted[:,1] = predicted[:,1] *\ model.GetScaleFactors()[exptName][chem] lines.append(plot(predicted[:,0], predicted[:,1], color=color, linestyle=dash, linewidth = 3)) legend.append(chem + ' in ' + str(calc))# + ' for ' + str(exptName)) if showLegend: legend(lines, legend, loc=4)
def sgrad(self, x, ndata=100, bound=True, avg=True): """Return a stochastic gradient at x, evaluated at ndata samples from the training set. x -- the parameters at which to evaluate the stochastic gradient ndata -- the number of samples from the training set to take bound -- whether to bound the gradient avg -- whether to average the gradient by the number of samples taken""" u = scipy.random.randint(0, x.shape[0], ndata) # ndata random index values data_vals = scipy.take(self.ds[0], u, axis=0) # take values at the random indices data_cats = scipy.take(self.ds[1], u, axis=0) # take categories at the random indices data = (data_vals, data_cats) # put data into correct format ans = self.gradEval( x, data) # evaluate gradient only for stochastically selected data if avg: ans = ans / (data[1].shape[0]) # average over size of data slice if bound: ans = sgd.bound(ans) # bound data return ans
def calc_confidence_intervals(H): number_of_bins = len(H[1,:]); sigma68 = sp.zeros((number_of_bins,2)); sigma95 = sp.zeros((number_of_bins,2)); sigma99 = sp.zeros((number_of_bins,2)); for b in range(0,number_of_bins): sl68, su68 = confidenslimit(sp.take(H,[b],axis=1),0.683); sl95, su95 = confidenslimit(sp.take(H,[b],axis=1),0.954); sl99, su99 = confidenslimit(sp.take(H,[b],axis=1),0.997); # Foerste soejle indeholder alle de nedre graenser, anden soejle alle de oevre sigma68[b,:] = sl68, su68 sigma95[b,:] = sl95, su95 sigma99[b,:] = sl99, su99 return(sigma68,sigma95,sigma99)
def _split(xdata, ydata, mask, labels=None): """Splits x and y inputs into training, cross validation (and independent test groups) for use with modelling algorithms. If max(mask)==2 return x1,x2,x3,y1,y2,y3,n1,n2,n3 else if max(mask)==1 return x1,x2,y1,y2,n1,n2 """ x1 = scipy.take(xdata, _index(mask, 0), 0) x2 = scipy.take(xdata, _index(mask, 1), 0) y1 = scipy.take(ydata, _index(mask, 0), 0) y2 = scipy.take(ydata, _index(mask, 1), 0) n1, n2 = [], [] if labels is not None: for i in range(len(labels)): if mask[i] == 0: n1.append(labels[i]) elif mask[i] == 1: n2.append(labels[i]) if max(mask) == 1: return x1, x2, 0, y1, y2, 0, n1, n2, 0 elif max(mask) == 2: x3 = scipy.take(xdata, _index(mask, 2), 0) y3 = scipy.take(ydata, _index(mask, 2), 0) n3 = [] if labels is not None: for i in range(len(labels)): if mask[i] == 2: n3.append(labels[i]) return x1, x2, x3, y1, y2, y3, n1, n2, n3
def _split(xdata, ydata, mask, labels=None): """Splits x and y inputs into training, cross validation (and independent test groups) for use with modelling algorithms. If max(mask)==2 return x1,x2,x3,y1,y2,y3,n1,n2,n3 else if max(mask)==1 return x1,x2,y1,y2,n1,n2 """ x1 = scipy.take(xdata,_index(mask,0),0) x2 = scipy.take(xdata,_index(mask,1),0) y1 = scipy.take(ydata,_index(mask,0),0) y2 = scipy.take(ydata,_index(mask,1),0) n1,n2 = [],[] if labels is not None: for i in range(len(labels)): if mask[i] == 0: n1.append(labels[i]) elif mask[i] == 1: n2.append(labels[i]) if max(mask) == 1: return x1,x2,0,y1,y2,0,n1,n2,0 elif max(mask) == 2: x3 = scipy.take(xdata,_index(mask,2),0) y3 = scipy.take(ydata,_index(mask,2),0) n3 = [] if labels is not None: for i in range(len(labels)): if mask[i] == 2: n3.append(labels[i]) return x1,x2,x3,y1,y2,y3,n1,n2,n3
def reinsert(ch,selch,chsc,selsc): """Reinsert evolved population into original pop retaining the best individuals """ newChrom = scipy.concatenate((ch,selch),0) newScore = scipy.concatenate((chsc,selsc),0) #select only unique chroms - can be removed uid = [] for i in range(len(newChrom)): if len(scipy.unique(newChrom[i,:])) == ch.shape[1]: uid.append(i) newScore = scipy.take(newScore,uid,0) newChrom = scipy.take(newChrom,uid,0) idx = scipy.argsort(newScore,0)[:,0].tolist() idx = idx[0:ch.shape[0]] newChrom = scipy.take(newChrom,idx,0) newScore = scipy.take(newScore,idx,0) return newChrom, newScore
def _get_block_headers(fhead, chan_info): ''' Returns a matrix containing the SON data block headers for a channel. 'fhead' is a FileHeader instance, and 'chan_info' is a ChannelInfo instance. The returned header in memory contains, for each disk block, a column with rows 0-4 representing: Offset to start of block in file Start time in clock ticks End time in clock ticks Chan number Items ''' from scipy import io, zeros, arange, take from numpy import fromfile if chan_info.firstblock == -1: raise ValueError('No data on channel ' + str(chan_info.chan)) succ_block = 1 # Pre-allocate memory for header data: header = zeros([6, chan_info.blocks], int) # Get first data block: fhead.fid.seek(chan_info.firstblock) # Last and next block pointers, start and end times in clock ticks h = fromfile(fhead.fid, 'l', 4) print h, header header[0:4, 0] = fromfile(fhead.fid, 'l', 4) # Channel number and number of items in block: header[4:6, 0] = fromfile(fhead.fid, 'h', 2) # If only one block: if header[succ_block, 0] == -1: header[0, 0] = int(chan_info.firstblock) # Loop if more blocks: else: fhead.fid.seek(header[succ_block, 0]) for i in arange(1, chan_info.blocks): h = fromfile(fhead.fid, 'l', 4) print h header[0:4, i] = fromfile(fhead.fid, 'l', 4) header[4:6, i] = fromfile(fhead.fid, 'h', 2) if header[succ_block, i] > 0: fhead.fid.seek(header[succ_block, i]) header[0, i-1] = header[0, i] # Replace pred_block for previous column: header[0, -1] = header[1, -2] # Delete succ_block data: header = take(header, (0,2,3,4,5), axis=0) return header
def _BW(X, group): """Generate B and W matrices for CVA Ref. Krzanowski """ mx = scipy.mean(X, 0)[nA, :] tgrp = scipy.unique(group) for x in range(len(tgrp)): idx = _index(group, tgrp[x]) L = len(idx) meani = scipy.mean(scipy.take(X, idx, 0), 0) meani = scipy.resize(meani, (len(idx), X.shape[1])) A = scipy.mean(scipy.take(X, idx, 0), 0) - mx C = scipy.take(X, idx, 0) - meani if x > 1: Bo = Bo + L * scipy.dot(scipy.transpose(A), A) Wo = Wo + scipy.dot(scipy.transpose(C), C) elif x == 1: Bo = L * scipy.dot(scipy.transpose(A), A) Wo = scipy.dot(scipy.transpose(C), C) B = (1.0 / (len(tgrp) - 1)) * Bo W = (1.0 / (X.shape[0] - len(tgrp))) * Wo return B, W
def _BW(X,group): """Generate B and W matrices for CVA Ref. Krzanowski """ mx = scipy.mean(X,0)[nA,:] tgrp = scipy.unique(group) for x in range(len(tgrp)): idx = _index(group,tgrp[x]) L = len(idx) meani = scipy.mean(scipy.take(X,idx,0),0) meani = scipy.resize(meani,(len(idx),X.shape[1])) A = scipy.mean(scipy.take(X,idx,0),0) - mx C = scipy.take(X,idx,0) - meani if x > 1: Bo = Bo + L*scipy.dot(scipy.transpose(A),A) Wo = Wo + scipy.dot(scipy.transpose(C),C) elif x == 1: Bo = L*scipy.dot(scipy.transpose(A),A) Wo = scipy.dot(scipy.transpose(C),C) B = (1.0/(len(tgrp)-1))*Bo W = (1.0/(X.shape[0] - len(tgrp)))*Wo return B,W
def estimate(self, algorithm): self.results = [] subds_len = len(self.dataset) / self.k for i in xrange(self.k): offset = i * subds_len # Build test dataset if i == (self.k - 1): testset = self.dataset[offset:len(self.dataset) - 1] else: testset = self.dataset[offset:offset + subds_len] # Build train dataset indexes = [] # build data for index in xrange(len(self.dataset)): if index < offset or index >= (offset + subds_len): indexes.append(index) trainset_data = scipy.take(self.dataset.data(), indexes, axis=0) # build labels for data labels = self.dataset.labels() trainset_labels = [] for index in indexes: trainset_labels.append(labels[index]) trainset = Dataset(trainset_data, trainset_labels) # Test algorithm algorithm.load(trainset) algorithm.learn() correct = 0 testset_data = testset.data() for test in xrange(len(testset)): label = algorithm.classify(testset_data[test]) if testset.get_label(test) == label: correct += 1 self.results.append((float(correct) / len(testset)) * 100) mean = 0 for res in self.results: mean += res mean /= len(self.results) return mean
def _get_block_headers(fhead, chan_info): ''' Returns a matrix containing the SON data block headers for a channel. 'fhead' is a FileHeader instance, and 'chan_info' is a ChannelInfo instance. The returned header in memory contains, for each disk block, a column with rows 0-4 representing: Offset to start of block in file Start time in clock ticks End time in clock ticks Chan number Items ''' from scipy import io, zeros, arange, take if chan_info.firstblock == -1: raise ValueError('No data on channel ' + str(chan_info.chan)) succ_block = 1 # Pre-allocate memory for header data: header = zeros([6, chan_info.blocks], int) # Get first data block: fhead.fid.seek(chan_info.firstblock) # Last and next block pointers, start and end times in clock ticks header[0:4, 0] = io.fread(fhead.fid, 4, 'l') # Channel number and number of items in block: header[4:6, 0] = io.fread(fhead.fid, 2, 'h') # If only one block: if header[succ_block, 0] == -1: header[0, 0] = int(chan_info.firstblock) # Loop if more blocks: else: fhead.fid.seek(header[succ_block, 0]) for i in arange(1, chan_info.blocks): header[0:4, i] = io.fread(fhead.fid, 4, 'l') header[4:6, i] = io.fread(fhead.fid, 2, 'h') if header[succ_block, i] > 0: fhead.fid.seek(header[succ_block, i]) header[0, i - 1] = header[0, i] # Replace pred_block for previous column: header[0, -1] = header[1, -2] # Delete succ_block data: header = take(header, (0, 2, 3, 4, 5), axis=0) return header
def estimate (self, algorithm): self.results = [] subds_len = len (self.dataset) / self.k for i in xrange (self.k): offset = i * subds_len # Build test dataset if i == (self.k - 1): testset = self.dataset[offset:len(self.dataset) - 1] else: testset = self.dataset[offset:offset + subds_len] # Build train dataset indexes = [] # build data for index in xrange (len (self.dataset)): if index < offset or index >= (offset + subds_len): indexes.append (index) trainset_data = scipy.take (self.dataset.data (), indexes, axis=0) # build labels for data labels = self.dataset.labels () trainset_labels = [] for index in indexes: trainset_labels.append (labels[index]) trainset = Dataset (trainset_data, trainset_labels) # Test algorithm algorithm.load (trainset) algorithm.learn () correct = 0 testset_data = testset.data () for test in xrange (len (testset)): label = algorithm.classify (testset_data[test]) if testset.get_label (test) == label: correct += 1 self.results.append ((float (correct) / len (testset)) * 100) mean = 0 for res in self.results: mean += res mean /= len (self.results) return mean
def select(ranksc,chrom,N): """Stochastic universal sampling N is the generation gap (i.e. a real number between 0 and 1) """ N = round(chrom.shape[0]*N) cumsum = scipy.cumsum(ranksc,0) susrange = scipy.rand(N,1)*max(max(cumsum)) sel=[] for each in susrange: qcount,q0 = 0,cumsum[0] for q in cumsum: if q0 < each < q: sel.append(qcount) qcount += 1 q0 = q nchrom = scipy.take(chrom,sel,0) return nchrom
def states2dict(states, nchannels, npoints=None, fractions=[1,2,4], shuffle=True): """Return dictionary with distribution over states for fractions of data. The distributions are *not* normalized, as required by other routines (e.g., KL estimation routines). This function is intented to be used with the KL and entropy estimation functions, kl_estimation and h_estimation. Input arguments: states -- array of states nchannels -- total number of channels (used to determine the maximum number of states) npoints -- number of data points Default: None, meaning the full length of states fractions -- fractions of the data. For example, fractions=[1,2,4] will create 3 entries in the dictionary, based on the full data (N datapoints), half the data (2 x N/2 points), and one quarter of the data (4 x N/4 points). Default: [1,2,4] shuffle -- If True, data points are shuffled before computing the dictionaries to avoid trends in the data Output: Dictionary distr[fraction][distr_nr]. Keys are fractions (as given by input argument), values are lists of distributions. """ if npoints is None: npoints = states.shape[0] if shuffle: states = states.copy() p = scipy.random.permutation(states.shape[0]) states = scipy.take(states, p) distr = {} for d in fractions: distr[d] = [None]*d block_len = npoints//d for i in range(d): part_y = states[i*block_len:(i+1)*block_len] distr[d][i] = states2distr(part_y, nchannels, normed=False) _check_dict_consistency(distr, npoints) return distr
def triangles_unnormalized_normals_computation(vertexes_coord, triangle_vertexes, t): """This function returns the non-normalized normals of each triangle. t is an array of the t indexes to be considered""" triangles_normals = zeros((t.shape[0], 3), 'd') stride = 10000 startIndex, stopIndex = 0, min(stride, t.shape[0]) # it is coded this way for memory optimization: this function is really a memory hog! while startIndex<triangles_normals.shape[0]: indexes = t[startIndex:stopIndex] v0 = take(triangle_vertexes[:, 0], indexes, axis=0) v1 = take(triangle_vertexes[:, 1], indexes, axis=0) v2 = take(triangle_vertexes[:, 2], indexes, axis=0) r0 = take(vertexes_coord, v0, axis=0) # first vertexes of all triangles r1_r0 = take(vertexes_coord, v1, axis=0) - r0 r2_r0 = take(vertexes_coord, v2, axis=0) - r0 triangles_normals[indexes, 0] = r1_r0[:, 1] * r2_r0[:, 2] - r1_r0[:, 2] * r2_r0[:, 1] triangles_normals[indexes, 1] = r1_r0[:, 2] * r2_r0[:, 0] - r1_r0[:, 0] * r2_r0[:, 2] triangles_normals[indexes, 2] = r1_r0[:, 0] * r2_r0[:, 1] - r1_r0[:, 1] * r2_r0[:, 0] startIndex = stopIndex stopIndex = min(stopIndex + stride, t.shape[0]) return triangles_normals.astype('d')
def make_embed(self, *args): if self.scatterActor is not None: self.renderer.RemoveActor(self.scatterActor) selected = self.eegplot.get_selected() if selected is None: error_msg('You must first select an EEG channel by clicking on it', parent=self) return torig, data, trode = selected gname, gnum = trode label = '%s %d' % (gname, gnum) #print "EmbedWin.make_embed(): examining selected EEG channel %s" % label Fs = self.eegplot.eeg.freq dt = 1.0/Fs try: lag = int(self.entryLag.get_text()) except ValueError: error_message('Lag must be an integer; found "%s"'%self.entryLag.get_text()) return try: dim = int(self.entryDim.get_text()) except ValueError: error_message('Dimension must be an integer; found "%s"'%self.entrySim.get_text()) return pnts = [] ind = arange(dim)*lag #print "EmbedWin.make_embed(): ind=" , ind while 1: if ind[-1]>=len(data): break print "EmbedWin.make_embed(): appending to pnts: " , (take(data,ind)[:3]) pnts.append( take(data, ind)[:3] ) # plot 3 dims ind += 1 #print "EmbedWin.make_embed(): polyData = vtk.vtkPolyData()" polyData = vtk.vtkPolyData() #print "EmbedWin.make_embed(): points = vtk.vtkPoints()" points = vtk.vtkPoints() for i, pnt in enumerate(pnts): x, y, z = pnt print "EmbedWin.make_embed(): inserting point " , i, x, y , z points.InsertPoint(i, x, y, z) polyData = vtk.vtkPolyData() #print "EmbedWin.make_embed(): polyData.SetPoints(points)" polyData.SetPoints(points) #print "EmbedWin.make_embed(): vtkSphereSource()" sphere = vtk.vtkSphereSource() res = 5 sphere.SetThetaResolution(res) sphere.SetPhiResolution(res) sphere.SetRadius(10) #print "EmbedWin.make_embed(): filter = vtk.vtkGlyph3D()" filter = vtk.vtkGlyph3D() filter.SetInput(polyData) filter.SetSource(0, sphere.GetOutput()) #print "EmbedWin.make_embed(): mapper = vtk.vtkPolyDataMapper()" mapper = vtk.vtkPolyDataMapper() #print "EmbedWin.make_embed(): mapper.SetInput(filter.GetOutput())" mapper.SetInput(filter.GetOutput()) #print "EmbedWin.make_embed(): " actor = vtk.vtkActor() actor.SetMapper(mapper) actor.GetProperty().SetColor( 1,1,0 ) self.scatterActor = actor self.renderer.AddActor(actor) self.interactor.Render()
def plot_ensemble_results(model, ensemble, expts=None, style='errorbars', show_legend=True, loc='upper left', plot_data=True, plot_trajectories=True): """ Plot the fits to the given experiments over an ensemble. Note that this recalculates the cost for every member of the ensemble, so it may be very slow. Filtering correlated members from the ensemble is strongly recommended. Inputs: model: Model whose results to plot ensemble: Parameter ensemble expts: List of experiment IDs to plot, if None is specified, all experiments are plotted style: Style of plot. Currently supported options are: 'errorbars': Plots points and bars for each data point 'lines': Plots a continuous line for the data show_legend: Boolean that control whether or not to show the legend loc: Location of the legend. See help(Plotting.legend) for options. plot_data: Boolean that controls whether the data is plotted plot_trajectories: Boolean that controls whether the trajectories are plotted """ exptColl = model.get_expts() nets = model.get_calcs() if expts is None: expts = exptColl.keys() lines, labels = [], [] cW = ColorWheel() Network_mod.Network.pretty_plotting() model.cost(ensemble[0]) timepoints = {} for netId, net in nets.items(): traj = getattr(net, 'trajectory', None) if traj is not None: net.times_to_add = scipy.linspace(traj.timepoints[0], traj.timepoints[-1], 1000) Network_mod.Network.full_speed() results = {} for params in ensemble: model.cost(params) for exptId in expts: expt = exptColl[exptId] results.setdefault(exptId, {}) dataByCalc = expt.GetData() for netId in dataByCalc.keys(): results[exptId].setdefault(netId, {}) # Pull the trajectory from that calculation, defaulting to None # if it doesn't exist. net = nets.get(netId) traj = net.trajectory for dataId in dataByCalc[netId].keys(): results[exptId][netId].setdefault(dataId, []) scaleFactor = model.GetScaleFactors()[exptId][dataId] result = scaleFactor * traj.get_var_traj(dataId) results[exptId][netId][dataId].append(result) for exptId in expts: expt = exptColl[exptId] dataByCalc = expt.GetData() # We sort the calculation names for easier comparison across plots sortedCalcIds = dataByCalc.keys() sortedCalcIds.sort() for netId in sortedCalcIds: for dataId, dataDict in dataByCalc[netId].items(): color, sym, dash = cW.next() if plot_data: # Pull the data out of the dictionary and into an array d = scipy.array([[t, v, e] for (t, (v, e)) in dataDict.items()]) if style is 'errorbars': l = errorbar(d[:, 0], d[:, 1], yerr=d[:, 2], fmt='o', color=color, markerfacecolor=color, marker=sym, ecolor='k', capsize=6)[0] elif style is 'lines': # Make sure we order the data before plotting order = scipy.argsort(d[:, 0], 0) d = scipy.take(d, order, 0) l = plot(d[:, 0], d[:, 1], color=color, linestyle=dash) lines.append(l) if plot_trajectories: times = model.get_calcs().get(netId).trajectory.get_times() mean_vals = scipy.mean(results[exptId][netId][dataId], 0) std_vals = scipy.std(results[exptId][netId][dataId], 0) lower_vals = mean_vals - std_vals upper_vals = mean_vals + std_vals # Plot the polygon xpts = scipy.concatenate((times, times[::-1])) ypts = scipy.concatenate((lower_vals, upper_vals[::-1])) fill(xpts, ypts, fc=color, alpha=0.4) # Let's print the pretty name for our variable if we can. name = net.get_component_name(dataId) labels.append('%s in %s for %s' % (name, netId, exptId)) for netId, net in nets.items(): del net.times_to_add if show_legend: legend(lines, labels, loc=loc) for net in nets.values(): net.times_to_add = None return lines, labels
def _get_block_headers(fhead, chanInfo): ''' Returns a matrix containing the Son data block headers for a channel. 'fhead' is a FileHeader instance, and 'chanInfo' is a ChannelInfo instance. The returned header in memory contains, for each disk block, a column with rows 0-4 representing: Offset to start of block in file Start time in clock ticks End time in clock ticks Chan number Items ''' from scipy import io, zeros, arange, take if chanInfo.firstblock == -1: raise ValueError, 'No data on channel %i' % chanInfo.chan succBlock = 1 # Pre-allocate memory for header data: header = zeros([6, chanInfo.blocks], int) # Get first data block: fhead.fid.seek(chanInfo.firstblock) ####################################MAX # Last and next block pointers, start and end times in clock ticks header[0:4, 0] = numpy.fromfile(fhead.fid, numpy.int32, count=4) # Channel number and number of items in block: header[4:6, 0] = numpy.fromfile(fhead.fid, numpy.int16, count=2) ####################################MAX """ # Last and next block pointers, start and end times in clock ticks header[0:4, 0] = io.fread(fhead.fid, 4, 'l') print("header[0:4, 0]") print(header[0:4, 0]) # Channel number and number of items in block: header[4:6, 0] = io.fread(fhead.fid, 2, 'h') """ ####################################MAX ####################################MAX #print "header[succBlock, 0]", header[succBlock, 0] ####################################MAX # If only one block: if header[succBlock, 0] == -1: header[0, 0] = int(chanInfo.firstblock) # Loop if more blocks: else: fhead.fid.seek(header[succBlock, 0]) for i in arange(1, chanInfo.blocks): ####################################MAX header[0:4, i] = numpy.fromfile(fhead.fid, numpy.int32, count=4) header[4:6, i] = numpy.fromfile(fhead.fid, numpy.int16, count=2) ####################################MAX """ header[0:4, i] = io.fread(fhead.fid, 4, 'l') header[4:6, i] = io.fread(fhead.fid, 2, 'h') """ ####################################MAX if header[succBlock, i] > 0: fhead.fid.seek(header[succBlock, i]) header[0, i-1] = header[0, i] # Replace predBlock for previous column: header[0, -1] = header[1, -2] # Delete succBlock data: header = take(header, (0,2,3,4,5),axis=0) return header
def plot_ensemble_results(model, ensemble, expts = None, style='errorbars', show_legend = True, loc = 'upper left', plot_data = True, plot_trajectories = True): """ Plot the fits to the given experiments over an ensemble. Note that this recalculates the cost for every member of the ensemble, so it may be very slow. Filtering correlated members from the ensemble is strongly recommended. Inputs: model: Model whose results to plot ensemble: Parameter ensemble expts: List of experiment IDs to plot, if None is specified, all experiments are plotted style: Style of plot. Currently supported options are: 'errorbars': Plots points and bars for each data point 'lines': Plots a continuous line for the data show_legend: Boolean that control whether or not to show the legend loc: Location of the legend. See help(Plotting.legend) for options. plot_data: Boolean that controls whether the data is plotted plot_trajectories: Boolean that controls whether the trajectories are plotted """ exptColl = model.get_expts() nets = model.get_calcs() if expts is None: expts = exptColl.keys() lines, labels = [], [] cW = ColorWheel() Network_mod.Network.pretty_plotting() model.cost(ensemble[0]) timepoints = {} for netId, net in nets.items(): traj = getattr(net, 'trajectory', None) if traj is not None: net.times_to_add = scipy.linspace(traj.timepoints[0], traj.timepoints[-1], 1000) Network_mod.Network.full_speed() results = {} for params in ensemble: model.cost(params) for exptId in expts: expt = exptColl[exptId] results.setdefault(exptId, {}) dataByCalc = expt.GetData() for netId in dataByCalc.keys(): results[exptId].setdefault(netId, {}) # Pull the trajectory from that calculation, defaulting to None # if it doesn't exist. net = nets.get(netId) traj = net.trajectory for dataId in dataByCalc[netId].keys(): results[exptId][netId].setdefault(dataId, []) scaleFactor = model.GetScaleFactors()[exptId][dataId] result = scaleFactor*traj.get_var_traj(dataId) results[exptId][netId][dataId].append(result) for exptId in expts: expt = exptColl[exptId] dataByCalc = expt.GetData() # We sort the calculation names for easier comparison across plots sortedCalcIds = dataByCalc.keys() sortedCalcIds.sort() for netId in sortedCalcIds: for dataId, dataDict in dataByCalc[netId].items(): color, sym, dash = cW.next() if plot_data: # Pull the data out of the dictionary and into an array d = scipy.array([[t, v, e] for (t, (v, e)) in dataDict.items()]) if style is 'errorbars': l = errorbar(d[:,0], d[:,1], yerr=d[:,2], fmt='o', color=color, markerfacecolor=color, marker=sym, ecolor='k', capsize=6)[0] elif style is 'lines': # Make sure we order the data before plotting order = scipy.argsort(d[:,0], 0) d = scipy.take(d, order, 0) l = plot(d[:,0], d[:,1], color=color, linestyle=dash) lines.append(l) if plot_trajectories: times = model.get_calcs().get(netId).trajectory.get_times() mean_vals = scipy.mean(results[exptId][netId][dataId], 0) std_vals = scipy.std(results[exptId][netId][dataId], 0) lower_vals = mean_vals - std_vals upper_vals = mean_vals + std_vals # Plot the polygon xpts = scipy.concatenate((times, times[::-1])) ypts = scipy.concatenate((lower_vals, upper_vals[::-1])) fill(xpts, ypts, fc=color, alpha=0.4) # Let's print the pretty name for our variable if we can. name = net.get_component_name(dataId) labels.append('%s in %s for %s' % (name, netId, exptId)) for netId, net in nets.items(): del net.times_to_add if show_legend: legend(lines, labels, loc=loc) for net in nets.values(): net.times_to_add = None return lines, labels
def calc_annfatalities_deagg_grid(lat, lon, total_fatalities, event_activity, bins=100): """Calculate the annualised fatalities as a total value in the grid cell. Inputs: lat latitude, dimensions(event) lon longitude, dimensions(event) total_fatalities fatalities, dimensions(event, site) event_activity event activity, dimensions(event) bins an integer or (int, int) describing how the extent of the data in (lat, lon) is to be binned If one integer is supplied the extent determined from the point data (lat, lon) is binned that number of times in the X and Y direction. If (M, N), the number of X bins is M, etc. return: ann_fatalities: the annulaised fatalities for each grid cell, dimensions(grid cells) lat_lon: a tuple of (lat, lon) midpoints, giving the grid cell locations, dimensions(grid cells) """ # Bin data cell_location = utilities.bin_extent(lat, lon, bins=bins) # get bin width and number of cells try: (gnumx, gnumy) = bins except TypeError: gnumx = gnumy = bins num_of_bins = gnumx * gnumy # Run annulised loss calc on binned data lat_lon = scipy.zeros((num_of_bins, 2)) annualised_fatalities_in_cell = scipy.zeros(num_of_bins) i = 0 for row in cell_location: for cell in row: # calc the annualised loss for this cell. index = scipy.array(cell['index']) # print "total_building_value", total_building_value # print "index", index # print "total_building_value.shape", total_building_value.shape # print "index.shape", index.shape # Print "cell['mid_lat_lon']", cell['mid_lat_lon'] # print "cell['mid_lat_lon'].shape", cell['mid_lat_lon'].shape lat_lon[i, :] = cell['mid_lat_lon'] if len(index) >= 1: # print "index", index # print "total_building_loss.shape", total_building_loss.shape # print " total_building_value.shape", total_building_value.shape total_fatalities_cell = scipy.take(total_fatalities, index, axis=1) print total_fatalities_cell.shape, total_fatalities_cell (annualised_fatalities_in_cell[i], _) = calc_annfatalities( total_fatalities_cell, event_activity) else: annualised_fatalities_in_cell[i] = scipy.nan i += 1 return annualised_fatalities_in_cell, lat_lon, gnumx, gnumy
def rerun_pls(chrom,xdata,groups,mask,factors): """rerun pls on a subset of X-variables""" slice = scipy.take(xdata,chrom,1) return pls(slice,groups,mask,factors)
def FhklDWBA5(x,y,z,h,k,l=None,occ=None,alphai=0.2,alphaf=None,substrate=None,wavelength=1.0,e_par=0.,e_perp=1.0,gpu_name="CPU",use_fractionnal=True,verbose=False,language="OpenCL",cl_platform="",separate_paths=False): """ WARNING: this code is still in development, and needs to be checked ! Calculate the grazing-incidence X-ray scattered intensity taking into account 5 scattering paths, for a nanostructure object located above a given substrate. All atoms with z>0 are assumed to be above the surface, and their scattering is computed using the 4 DWBA paths. Atoms with z<=0 are below the surface, and their scattering is computed using a single path, taking into account the refraction and the attenuation length. x,y,z: coordinates of the atoms in fractionnal coordinates (relative to the substrate unit cell) h,k,l: reciprocal space coordinates alphai, alphaf: incident and outgoing angles, in radians substrate: the substrate material, as a pynx.gid.Crystal object - this will be used to calculate the material refraction index. wavelength: in Angstroems e_par,e_perp: percentage of polarisation parallel and perpendicular to the incident plane Note: Either l *OR* alphaf must be supplied - it is assumed that the lattice coordinates are such that the [001] direction is perpendicular to the surface. """ nrj=W2E(wavelength) # Atoms above the surface # tmpx=(x+(z+y)*0).ravel() tmpy=(y+(x+z)*0).ravel() tmpz=(z+(x+y)*0).ravel() idx=scipy.nonzero(tmpz>0) if len(idx[0])>0: if type(occ)!=type(None): tmpocc=take(occ,idx) else: tmpocc=None f1234=FhklDWBA4(take(tmpx,idx),take(tmpy,idx),take(tmpz,idx),h,k,l=l,occ=tmpocc,alphai=alphai,alphaf=alphaf,substrate=substrate,wavelength=wavelength,e_par=e_par,e_perp=e_perp,gpu_name=gpu_name,use_fractionnal=use_fractionnal,language=language,cl_platform=cl_platform,separate_paths=separate_paths) else: f1234=0 # Atoms below the surface idx=scipy.nonzero(tmpz<=0) if len(idx[0])>0: if use_fractionnal: c=substrate.uc.parameters()[2] s_fact=1.0 else: c=2*pi s_fact=1/c if alphaf==None: # alphaf, computed from l and alphai alphaf=scipy.arcsin(l/c*wavelength-scipy.sin(alphai)) else: tmpl=(scipy.sin(alphaf)+scipy.sin(alphai))/wavelength if verbose:print "From alphaf: l=%4.2f -> %4.2f"%(tmpl.min(),tmpl.max()) wi=Wave(alphai,e_par,e_perp,nrj) dwi=DistortedWave(None,substrate,wi) # TODO For outgoing beam: check e_par and e_perp, signs for k real and imag... wf=Wave(alphaf,e_par,e_perp,nrj) dwf=DistortedWave(None,substrate,wf) # kz, transmitted kz_real,kz_imag=(-dwf.ktz-dwi.ktz).real,(dwf.ktz+dwi.ktz).imag if verbose: print "wi.kz, dwi.ktz:",wi.kz,dwi.ktz print "kz_below real:",kz_real print "kz_below imag:",kz_imag print "kz_below mean:",kz_real.mean(), kz_imag.mean() #print dwf.ktz-dwi.ktz #print dwi.Tiy,dwf.Tiy #print kz_real,kz_imag # Compute scattering if type(occ)!=type(None): tmpocc=take(occ,idx) else: tmpocc=None if use_fractionnal: l_real=c*kz_real/(2*pi) l_imag=c*kz_imag/(2*pi) else: l_real=kz_real/(2*pi) l_imag=kz_imag/(2*pi) f5=gpu.Fhkl_thread(h*s_fact,k*s_fact,l_real,take(tmpx,idx),take(tmpy,idx),take(tmpz,idx),occ=tmpocc,gpu_name=gpu_name,sz_imag=l_imag,language=language,cl_platform=cl_platform)[0]*dwi.Tiy*(-dwf.Tiy) else: f5=0 if separate_paths: return f1234[0],f1234[1],f1234[2],f1234[3],f5 return f1234+f5
def dfa_xval_pca(X,pca,nopcs,group,mask,nodfs,ptype='covar'): """Perform PC-DFA with full cross validation >>> import scipy >>> X = scipy.array([[ 0.19343116, 0.49655245, 0.72711322, 0.79482108, 0.13651874],[ 0.68222322, 0.89976918, 0.30929016, 0.95684345, 0.01175669],[ 0.3027644 , 0.82162916, 0.83849604, 0.52259035, 0.89389797],[ 0.54167385, 0.64491038, 0.56807246, 0.88014221, 0.19913807],[ 0.15087298, 0.81797434, 0.37041356, 0.17295614, 0.29872301],[ 0.69789848, 0.66022756, 0.70273991, 0.9797469 , 0.66144258],[ 0.378373 , 0.34197062, 0.54657115, 0.27144726, 0.28440859],[ 0.8600116 , 0.2897259 , 0.4448802 , 0.25232935, 0.46922429],[ 0.85365513, 0.34119357, 0.69456724, 0.8757419 , 0.06478112],[ 0.59356291, 0.53407902, 0.62131013, 0.73730599, 0.98833494]]) >>> group = scipy.array([[1],[1],[1],[1],[2],[2],[2],[3],[3],[3]]) >>> mask = scipy.array([[0],[1],[0],[0],[0],[0],[1],[0],[0],[1]]) >>> scores,loads,eigs = dfa_xval_pca(X,'NIPALS',3,group,mask,2,'covar') """ rx1,rx2,rx3,ry1,ry2,ry3,dummy1,dummy2,dummy3=_split(X,scipy.array(group,'i')[:,nA],mask[:,nA]) if pca == 'SVD': pcscores,pp,pr,pceigs = pca_svd(rx1,type=ptype) elif pca == 'NIPALS': pcscores,pp,pr,pceigs = pca_nipals(rx1,nopcs,type=ptype) #get indices idxn = scipy.arange(X.shape[0])[:,nA] tr_idx = scipy.take(idxn,_index(mask,0),0) cv_idx = scipy.take(idxn,_index(mask,1),0) #train trscores,loads,eigs,dummy = cva(pcscores[:,0:nopcs],ry1,nodfs) #cross validation #Get projected pc scores if ptype in ['covar']: rx2 = rx2-scipy.resize(scipy.mean(rx2,0),(len(rx2),rx1.shape[1])) else: rx2 = (rx2-scipy.resize(scipy.mean(rx2,0),(len(rx2),rx1.shape[1]))) / \ scipy.resize(scipy.std(rx2,0),(len(rx2),rx1.shape[1])) pcscores = scipy.dot(rx2,scipy.transpose(pp)) cvscores = scipy.dot(pcscores[:,0:nopcs],loads) #independent test if max(mask) > 1: ts_idx = scipy.take(idxn,_index(mask,2),0) if ptype in ['covar']: rx3 = rx3-scipy.resize(scipy.mean(rx3,0),(len(rx3),rx1.shape[1])) else: rx3 = (rx3-scipy.resize(scipy.mean(rx3,0),(len(rx3),rx1.shape[1]))) / \ scipy.resize(scipy.std(rx3,0),(len(rx3),rx1.shape[1])) pcscores = scipy.dot(rx3,scipy.transpose(pp)) tstscores = scipy.dot(pcscores[:,0:nopcs],loads) scores = scipy.zeros((X.shape[0],nodfs),'d') tr_idx = scipy.reshape(tr_idx,(len(tr_idx),)).tolist() cv_idx = scipy.reshape(cv_idx,(len(cv_idx),)).tolist() ts_idx = scipy.reshape(ts_idx,(len(ts_idx),)).tolist() _put(scores,tr_idx,trscores) _put(scores,cv_idx,cvscores) _put(scores,ts_idx,tstscores) else: scores = scipy.concatenate((trscores,cvscores),0) tr_idx = scipy.reshape(tr_idx,(len(tr_idx),)).tolist() cv_idx = scipy.reshape(cv_idx,(len(cv_idx),)).tolist() _put(scores,tr_idx,trscores) _put(scores,cv_idx,cvscores) #get loadings for original variables loads = scipy.dot(scipy.transpose(pp[0:nopcs,:]),loads) return scores,loads,eigs
def cva(X,group,nodfs,pcloads=None): """Canonical variates analysis Ref. Krzanowski Manly, B.F.J. Multivariate Statistical Methods: A Primer, 2nd Ed, Chapman & Hall: New York, 1986 >>> import scipy >>> X = scipy.array([[ 0.19343116, 0.49655245, 0.72711322, 0.79482108, 0.13651874],[ 0.68222322, 0.89976918, 0.30929016, 0.95684345, 0.01175669],[ 0.3027644 , 0.82162916, 0.83849604, 0.52259035, 0.89389797],[ 0.54167385, 0.64491038, 0.56807246, 0.88014221, 0.19913807],[ 0.15087298, 0.81797434, 0.37041356, 0.17295614, 0.29872301],[ 0.69789848, 0.66022756, 0.70273991, 0.9797469 , 0.66144258],[ 0.378373 , 0.34197062, 0.54657115, 0.27144726, 0.28440859],[ 0.8600116 , 0.2897259 , 0.4448802 , 0.25232935, 0.46922429],[ 0.85365513, 0.34119357, 0.69456724, 0.8757419 , 0.06478112],[ 0.59356291, 0.53407902, 0.62131013, 0.73730599, 0.98833494]]) >>> group = scipy.array([[1],[1],[1],[1],[2],[2],[2],[3],[3],[3]]) >>> B,W = _BW(X,group) >>> B array([[ 0.12756749, -0.10061061, 0.00366132, -0.00615551, 0.05378535], [-0.10061061, 0.09289765, 0.00469185, 0.03883801, -0.05465494], [ 0.00366132, 0.00469185, 0.0043456 , 0.01883603, -0.00530158], [-0.00615551, 0.03883801, 0.01883603, 0.08554211, -0.0332867 ], [ 0.05378535, -0.05465494, -0.00530158, -0.0332867 , 0.03372716]]) >>> W array([[ 0.049357 , 0.00105553, -0.00808075, 0.04037998, -0.02013773], [ 0.00105553, 0.03555862, -0.00982256, 0.00761902, 0.02439148], [-0.00808075, -0.00982256, 0.03519157, 0.01447587, 0.03438791], [ 0.04037998, 0.00761902, 0.01447587, 0.10132225, -0.01048251], [-0.02013773, 0.02439148, 0.03438791, -0.01048251, 0.1417496 ]]) >>> >>> U,As_out,Ls_out,dummy = cva(X,group,5) >>> >>> U array([[-4.17688874, -4.00309392, -3.30364313, -4.17357019, 0.09912727], [-3.84164699, -4.48421541, -2.42156782, -4.9040549 , 3.20454647], [-3.81085207, -3.81397856, -3.57914463, -7.41611306, 0.9193002 ], [-3.24935377, -4.45386899, -2.95147097, -4.88934464, 1.59185795], [-4.13154582, -2.09087065, -3.10069062, -5.44262709, 2.11303517], [-2.16978732, -4.9634328 , -2.48987133, -5.94427649, 1.31295895], [-1.5773928 , -2.78409584, -3.60130796, -4.65040852, 0.93512979], [ 0.99791536, -3.22594943, -3.54773184, -5.49732342, 2.13121685], [-1.37244426, -5.24757135, -4.44704409, -5.11090375, 1.55257506], [-0.69651359, -3.79497195, -1.19709398, -5.42908493, 0.677332 ]]) >>> """ # Get B,W B,W = _BW(X,group) # produce a diagonal matrix L of generalized # eigenvalues and a full matrix A whose columns are the # corresponding eigenvectors so that B*A = W*A*L. L,A = scipy.linalg.eig(B,W) # need to normalize A such that Aout'*W*Aout = I # introducing Cholesky decomposition K = T'T # (see Seber 1984 "Multivariate Observations" pp 270) # At the moment # A'*W*A = K so substituting Cholesky decomposition # A'*W*A = T'*T ; so, inv(T')*A'*W*A*inv(T) = I # & [inv(T)]'*A'*W*A*inv(T) = I thus, [A*inv(T)]'*W*[A*inv(T)] = I # thus Aout = A*inv(T) K = scipy.dot(scipy.transpose(A),scipy.dot(W,A)) T = scipy.linalg.cholesky(K) Aout = scipy.dot(A,scipy.linalg.inv(T)) # Sort eigenvectors w.r.t eigenvalues order = _flip(scipy.argsort(scipy.reshape(L.real,(len(L),)))) Ls = _flip(scipy.sort(L.real)) # extract & reduce to required size As_out = scipy.take(Aout,order[0:nodfs].tolist(),1) Ls_out = Ls[0:nodfs][nA,:] # Create Scores (canonical variates) is the matrix of scores ### U = scipy.dot(X,As_out) #convert pc-dfa loadings back to original variables if necessary if pcloads is not None: pcloads = scipy.dot(scipy.transpose(pcloads),As_out) return U,As_out,Ls_out,pcloads
def PlotTrajectoriesForExperiments(model, experiments, params=None, with_data=True, plotPts=100, overlap=.1, skip=1, showLegend=True): # First find the maximum time in our data maxTime = 0 chemsNeededByCalc = {} for exptName in experiments: dataByCalc = model.exptColl[exptName].GetData() for calc in dataByCalc: chemsNeededByCalc.setdefault(calc, []) for chem in dataByCalc[calc].keys(): chemsNeededByCalc[calc].append(chem) thisMaxTime = max(dataByCalc[calc][chem].keys()) if thisMaxTime > maxTime: maxTime = thisMaxTime lines = [] legend = [] times = scipy.linspace(0, maxTime * (1 + overlap), plotPts) varsByCalc = {} for calc in chemsNeededByCalc: varsByCalc[calc] = {} for chem in chemsNeededByCalc[calc]: varsByCalc[calc][chem] = times model.GetCalculationCollection().Calculate(varsByCalc, params) calcVals = model.GetCalculationCollection().GetResults(varsByCalc) cW = ColorWheel() for exptName in experiments: expt = exptColl[exptName] dataByCalc = expt.GetData() for calc in dataByCalc: for chem in dataByCalc[calc]: color, sym, dash = cW.next() if with_data: for time, ( data, error) in dataByCalc[calc][chem].items()[::skip]: errorbar(time, data, yerr=error, color=color, mfc=color, marker=sym, ecolor=color, capsize=6) predicted = scipy.array(calcVals[calc][chem].items()) order = scipy.argsort(predicted[:, 0]) predicted = scipy.take(predicted, order) predicted[:,1] = predicted[:,1] *\ model.GetScaleFactors()[exptName][chem] lines.append( plot(predicted[:, 0], predicted[:, 1], color=color, linestyle=dash, linewidth=3)) legend.append(chem + ' in ' + str(calc)) # + ' for ' + str(exptName)) if showLegend: legend(lines, legend, loc=4)
def plot_scen_loss_stats(input_dir, site_tag, output_dir, # plot_file, save_file=None, pre89=False, dollars89=False, resonly=False): """Plot a scenario loss statistics graph from scenario data. input_dir input directory site_tag event descriptor string output_dir general output directory ## plot_file name of map output file to create in 'output_dir' directory ## save_file name of map data output file to create in 'output_dir' directory pre89 if True consider buildings that existed before 1989 only dollars89 if True express dollar values in pre-1989 dollars resonly if True consider only residential buildings """ # read in raw data data = cpr.obsolete_convert_Py2Mat_Risk(site_tag) # filter data depending on various flags temp_ecloss = data.ecloss temp_bval2 = data.ecbval2 if pre89: if resonly: take_array = [x[2] and x[11] for x in data.structures] temp_ecloss = scipy.take(data.ecloss, take_array) temp_bval2 = scipy.take(data.ecbval2, take_array) else: take_array = [x[2] for x in data.structures] temp_ecloss = scipy.take(data.ecloss, take_array) temp_bval2 = scipy.take(data.ecbval2, take_array) else: if resonly: take_array = [x[11] == 'RES1' for x in data.structures] temp_ecloss = scipy.take(data.ecloss, resonly_array) temp_bval2 = scipy.take(data.ecbval2, resonly_array) else: temp_ecloss = data.ecloss temp_bval2 = data.ecbval2 # which dollars does user want doll_str = 'in 2002 dollars' cvt_doll = 1.0 if dollars89: doll_str = 'in 1989 dollars' cvt_doll = 1/1.37 f_ecloss = cvt_doll * temp_ecloss f_bval2 = cvt_doll * temp_bval2 f_aggloss_bill = scipy.sum(f_bval2) / 1e9 print('f_ecloss=%s' % str(f_ecloss)) print('f_bval2=%s' % str(f_bval2)) print('f_aggbval2_bill=%s' % str(f_aggloss_bill)) h_data = cxh.calc_xy_histogram(f_aggloss_bill, bins=10) plot_out = os.path.join(output_dir, 'test1.png') title = 'Newc89 agg loss, pre89, 1989 dollars' xlabel = '$ %s (x 1e+9)' % doll_str pb.plot_barchart(h_data, output_file=plot_out, title=title, xlabel=xlabel, ylabel='frequency', xrange=None, yrange=None, grid=True, show_graph=True, annotate=[])
def findCubeNeighbors(max_N_cubes_1D, big_cube_lower_coord, cubes_centroids, a): """for each cubes finds its neighbors. We use a code similar to Level::searchCubesNeighborsIndexes() from octtree.cpp """ C = cubes_centroids.shape[0] # alternative code absoluteCartesianCoord = floor( (cubes_centroids-big_cube_lower_coord)/a ) CubesNumbers = (absoluteCartesianCoord[:, 0] * max_N_cubes_1D)* max_N_cubes_1D + absoluteCartesianCoord[:, 1] * max_N_cubes_1D + absoluteCartesianCoord[:, 2] CubesSortedNumbersToIndexes = zeros((C, 2),'d') indSortedCubesNumbers = argsort(CubesNumbers, kind='mergesort') CubesSortedNumbersToIndexes[:, 0] = take(CubesNumbers, indSortedCubesNumbers, axis=0) CubesSortedNumbersToIndexes[:, 1] = take(arange(C), indSortedCubesNumbers, axis=0) cubesNeighborsIndexesTmp2 = zeros((C, 28), 'i') - 1 wrapping_code2 = """ int counter; for (int i=0 ; i<C ; ++i) { blitz::Array<double, 1> absCartCoord(3); absCartCoord = absoluteCartesianCoord(i, 0), absoluteCartesianCoord(i, 1), absoluteCartesianCoord(i, 2); counter = 1; cubesNeighborsIndexesTmp2(i, 0) = i; // we first consider the cube itself // we find the neighbors for (int x=-1 ; x<2 ; ++x) { for (int y=-1 ; y<2 ; ++y) { for (int z=-1 ; z<2 ; ++z) { int index = -1; blitz::Array<double, 1> CandidateAbsCartCoord(3); CandidateAbsCartCoord = absCartCoord(0) + x, absCartCoord(1) + y, absCartCoord(2) + z; /// no component of (absoluteCartesianCoord(i) + p) -- where i=0,1,2 and p = x,y,z -- can be: /// (1) negative or (2) greater than MaxNumberCubes1D. int condition = 1; for (int j=0 ; j<3 ; ++j) condition *= ( (CandidateAbsCartCoord(j) >= 0) && (CandidateAbsCartCoord(j) < max_N_cubes_1D) ); // we also do not want to consider the cube itself condition *= !((x==0) && (y==0) && (z==0)); if (condition>0) { double candidate_number = (CandidateAbsCartCoord(0) * max_N_cubes_1D)*max_N_cubes_1D + CandidateAbsCartCoord(1) * max_N_cubes_1D + CandidateAbsCartCoord(2); { // index search if ( (candidate_number < CubesSortedNumbersToIndexes(0, 0)) || (candidate_number > CubesSortedNumbersToIndexes(C-1, 0)) ) index = -1; else { int ind_inf = 0, ind_sup = C-1, ind_mid; while(ind_sup-ind_inf > 1) { ind_mid = (ind_sup+ind_inf)/2; if (candidate_number > CubesSortedNumbersToIndexes(ind_mid, 0)) ind_inf = ind_mid; else ind_sup = ind_mid; } if (candidate_number == CubesSortedNumbersToIndexes(ind_inf, 0)) index = CubesSortedNumbersToIndexes(ind_inf, 1); else if (candidate_number == CubesSortedNumbersToIndexes(ind_sup, 0)) index = CubesSortedNumbersToIndexes(ind_sup, 1); else index = -1; } } // end of index search } if (index>-1) {cubesNeighborsIndexesTmp2(i, counter) = index; counter++;} } // z } // y } // x } """ weave.inline(wrapping_code2, ['C', 'CubesSortedNumbersToIndexes', 'cubesNeighborsIndexesTmp2', 'absoluteCartesianCoord', 'max_N_cubes_1D'], type_converters = converters.blitz, include_dirs = [], library_dirs = [], libraries = [], headers = ['<iostream>'], compiler = 'gcc', extra_compile_args = ['-O3', '-pthread', '-w']) # construction of "cubesNeighborsIndexes" cubes_lists_NeighborsIndexes2 = {} N_total_neighbors = 0 for i in range(C): cubes_lists_NeighborsIndexes2[i] = [] for i in range(C): listTmp = [] j = 0 while cubesNeighborsIndexesTmp2[i, j] > -1: listTmp.append(cubesNeighborsIndexesTmp2[i, j]) j += 1 cubes_lists_NeighborsIndexes2[i] = listTmp N_total_neighbors += len(listTmp) # we also have to save the cubesNeighborsIndexes under a form easily readable by C++ code cubes_neighborsIndexes = zeros(N_total_neighbors, 'i') cube_N_neighbors = zeros(C, 'i') startIndex = 0 for j in range(C): length = len(cubes_lists_NeighborsIndexes2[j]) cube_N_neighbors[j] = length cubes_neighborsIndexes[startIndex:startIndex + length] = cubes_lists_NeighborsIndexes2[j] startIndex += length return cubes_lists_NeighborsIndexes2, cubes_neighborsIndexes, cube_N_neighbors
def plot_model_results(model, expts = None, style='errorbars', show_legend = True, loc = 'upper left', plot_data = True, plot_trajectories = True, data_to_plot=None): """ Plot the fits to the given experiments for the last cost evalution of the model. Note: You may need to run a Plotting.show() to display the plot. Inputs: model: Model whose results to plot expts: List of experiment IDs to plot, if None is specified, all experiments are plotted style: Style of plot. Currently supported options are: 'errorbars': Plots points and bars for each data point 'lines': Plots a continuous line for the data show_legend: Boolean that control whether or not to show the legend loc: Location of the legend. See help(Plotting.legend) for options. plot_data: Boolean that controls whether the data is plotted plot_trajectories: Boolean that controls whether the trajectories are plotted data_to_plot: If None, all data variables will be plotted. Otherwise, pass a list of id's and only variables in that list will be plotted. """ exptColl = model.get_expts() calcColl = model.get_calcs() lines, labels = [], [] cW = ColorWheel() if expts is None: expts = exptColl.keys() for exptId in expts: expt = exptColl[exptId] dataByCalc = expt.GetData() for ds in expt.scaled_extrema_data: dataByCalc.setdefault(ds['calcKey'], {}) dataByCalc[ds['calcKey']].setdefault(ds['var'], {}) # We sort the calculation names for easier comparison across plots sortedCalcIds = dataByCalc.keys() sortedCalcIds.sort() for calcId in sortedCalcIds: # Pull the trajectory from that calculation, defaulting to None # if it doesn't exist. net = calcColl.get(calcId) traj = getattr(net, 'trajectory', None) for dataId, dataDict in dataByCalc[calcId].items(): # Skip this variable if it's not amongst our data_to_plot # list. if (data_to_plot is not None) and (dataId not in data_to_plot): continue color, sym, dash = cW.next() if plot_trajectories: if traj is None: print 'No trajectory in calculation %s!' % calcId print 'The cost must be evaluated before the results', print 'can be plotted.' return scaleFactor = model.GetScaleFactors()[exptId][dataId] result = scaleFactor*traj.getVariableTrajectory(dataId) l = plot(traj.timepoints, result, color=color, linestyle=dash,linewidth=3) # We superimpose a dotted black line to distinguish # theory from data in this case if style is 'lines': plot(traj.timepoints, result, 'k--', linewidth=3, zorder = 10) if plot_data and dataDict: # Pull the data out of the dictionary and into an array d = scipy.array([[t, v, e] for (t, (v, e)) in dataDict.items()]) if style is 'errorbars': l = errorbar(d[:,0], d[:,1], yerr=d[:,2], color=color, mfc = color, linestyle='', marker=sym, ecolor='k', capsize=6)[0] elif style is 'lines': # Make sure we order the data before plotting order = scipy.argsort(d[:,0], 0) d = scipy.take(d, order, 0) l = plot(d[:,0], d[:,1], color=color, linestyle=dash) lines.append(l) # Plot the extra data points. if plot_data: for res in model.residuals: if isinstance(res, Residuals.ScaledExtremum)\ and res.exptKey == exptId and res.calcKey == calcId\ and res.var == dataId: t = res.last_time_result val = res.yMeas sigma = res.ySigma errorbar([t], [val], [sigma], color=color, linestyle='', marker=sym, ecolor=color, capsize=6, mfc='w', mec=color, mew=2, ms=10) # Let's print the pretty name for our variable if we can. lines.append(l) name = net.get_component_name(dataId) labels.append('%s in %s for %s' % (name, calcId, exptId)) if show_legend: legend(lines, labels, loc=loc) return lines, labels