def pca(data, nPCs=-1): domain = None suma = data.sum(axis=0) / float(len(data)) data -= suma # substract average value to get zero mean data /= MA.std(data, axis=0) covMatrix = MA.dot(data.T, data) / len(data) eigVals, eigVectors = linalg.eigh(covMatrix) eigVals = list(eigVals) if nPCs == -1: nPCs = len(eigVals) nPCs = min(nPCs, len(eigVals)) pairs = [(val, i) for i, val in enumerate(eigVals)] pairs.sort() pairs.reverse() indices = [pair[1] for pair in pairs[:nPCs] ] # take indices of the wanted number of principal components vectors = MA.take(eigVectors, indices, axis=1) values = [eigVals[i] for i in indices] projectedData = MA.dot(data, vectors) return projectedData, vectors, values
def pca(data, nPCs = -1): domain = None suma = data.sum(axis=0)/float(len(data)) data -= suma # substract average value to get zero mean data /= MA.std(data, axis=0) covMatrix = MA.dot(data.T, data) / len(data) eigVals, eigVectors = linalg.eigh(covMatrix) eigVals = list(eigVals) if nPCs == -1: nPCs = len(eigVals) nPCs = min(nPCs, len(eigVals)) pairs = [(val, i) for i, val in enumerate(eigVals)] pairs.sort() pairs.reverse() indices = [pair[1] for pair in pairs[:nPCs]] # take indices of the wanted number of principal components vectors = MA.take(eigVectors, indices, axis = 1) values = [eigVals[i] for i in indices] projectedData = MA.dot(data, vectors) return projectedData, vectors, values
def compute(self): if self.data == None: return if type(self.eigVectors) == MA.MaskedArray and type( self.eigValues) == MA.MaskedArray: return if type(self.data) == orange.ExampleTable: data, classes = self.data.toNumpyMA("a/c") elif type(self.data) == tuple: data, classes = self.data data = self.center(data) data = self.normalize(data) self.normalizedData = data exampleCount, attrCount = data.shape classCount = len(set(classes)) # special case when we have two classes if classCount == 2: data1 = MA.take(data, numpy.argwhere(classes == 0).flatten(), axis=0) data2 = MA.take(data, numpy.argwhere(classes != 0).flatten(), axis=0) miDiff = MA.average(data1, axis=1) - MA.average(data2, axis=1) covMatrix = (MA.dot(data1.T, data1) + MA.dot(data2.T, data2)) / exampleCount self.eigVectors = linalg.inv(covMatrix) * miDiff self.eigValues = numpy.array([1]) else: # compute means and average covariances of examples in each class group Sw = MA.zeros([attrCount, attrCount]) for v in set(classes): d = MA.take(data, numpy.argwhere(classes == v).flatten(), axis=0) d = self.center(d) Sw += MA.dot(d.T, d) Sw /= exampleCount total = MA.dot(data.T, data) / float(exampleCount) Sb = total - Sw matrix = linalg.inv(Sw) * Sb eigVals, eigVectors = linalg.eigh(matrix) self.eigValues, self.eigVectors = self.getSorted( eigVals, eigVectors)
def getSorted(self, values, vectors): pairs = [(val, i) for i, val in enumerate(values)] pairs.sort() pairs.reverse() indices = [pair[1] for pair in pairs] newValues = [values[i] for i in indices] newVectors = MA.take(vectors, indices, axis=1) return newValues, newVectors
def test_testTakeTransposeInnerOuter(self): # Test of take, transpose, inner, outer products x = arange(24) y = np.arange(24) x[5:6] = masked x = x.reshape(2, 3, 4) y = y.reshape(2, 3, 4) assert_(eq(np.transpose(y, (2, 0, 1)), transpose(x, (2, 0, 1)))) assert_(eq(np.take(y, (2, 0, 1), 1), take(x, (2, 0, 1), 1))) assert_(eq(np.inner(filled(x, 0), filled(y, 0)), inner(x, y))) assert_(eq(np.outer(filled(x, 0), filled(y, 0)), outer(x, y))) y = array(["abc", 1, "def", 2, 3], object) y[2] = masked t = take(y, [0, 3, 4]) assert_(t[0] == "abc") assert_(t[1] == 2) assert_(t[2] == 3)
def getSorted(self, values, vectors): pairs = [(val, i) for i, val in enumerate(values)] pairs.sort() pairs.reverse() indices = [pair[1] for pair in pairs] newValues = [values[i] for i in indices] newVectors = MA.take(vectors, indices, axis = 1) return newValues, newVectors
def test_testTakeTransposeInnerOuter(self): # Test of take, transpose, inner, outer products x = arange(24) y = np.arange(24) x[5:6] = masked x = x.reshape(2, 3, 4) y = y.reshape(2, 3, 4) assert_(eq(np.transpose(y, (2, 0, 1)), transpose(x, (2, 0, 1)))) assert_(eq(np.take(y, (2, 0, 1), 1), take(x, (2, 0, 1), 1))) assert_(eq(np.inner(filled(x, 0), filled(y, 0)), inner(x, y))) assert_(eq(np.outer(filled(x, 0), filled(y, 0)), outer(x, y))) y = array(['abc', 1, 'def', 2, 3], object) y[2] = masked t = take(y, [0, 3, 4]) assert_(t[0] == 'abc') assert_(t[1] == 2) assert_(t[2] == 3)
def regrid(data_array): nc_grid = netcdf_file('/home/nicholas/thesis/data/netcdf_files/ORCA2.0_grid.nc','r') lon = nc_grid.variables['lon'][0:50,:] lat = nc_grid.variables['lat'][0:50,:] area = nc_grid.variables['area'][0:50,:] mask = nc_grid.variables['mask'][0,0:50,:] nc_grid.close() lon_min = lon.copy() i,j = np.where(lon_min >= 180.) lon_min[i,j] = lon_min[i,j] - 360. iw = np.where(lon_min[0,:] >= lon_min[0][0])[0] ie = np.where(lon_min[0,:] < lon_min[0][0])[0] lon = np.concatenate((np.take(lon_min,ie,axis=1)[:,:-1],np.take(lon_min,iw,axis=1)),axis=1)[:,:-1] lat = np.concatenate((np.take(lat,ie,axis=1)[:,:-1],np.take(lat,iw,axis=1)),axis=1)[:,:-1] lon_list = np.round(lon[0]) lat_list = np.round(lat[:, 0]) timesteps = np.shape(data_array)[0] bm_array = [ma.concatenate((ma.take(data_array[i, :, :],ie,axis=1),ma.take(data_array[i, :, :],iw,axis=1)),axis=1)[:,:-1] for i in range(timesteps)] bm_array = ma.array(bm_array) return bm_array, lon, lat
def sample_all_realizations(year_array, time_frequency, lon_frequency): ''' Given a sampling frequency, sample all the grid realizations at that frequency. ''' print "Sampling all the realisations..." time_steps, lat_steps, lon_steps = np.shape(year_array) lat_indices = np.arange(0, lat_steps, 1) list_of_means = [] list_of_stdevs = [] time_indices = np.arange(0, time_steps, time_frequency) for time_count in np.arange(time_frequency): lon_indices = np.arange(0, lon_steps, lon_frequency) for lon_count in np.arange(lon_frequency): grid = ma.take(ma.take(ma.take(year_array, \ (lon_indices), axis=2), \ (lat_indices), axis=1), \ (time_indices),axis=0) list_of_means = np.append(list_of_means, np.mean(grid)*unit_conversion) lon_indices = lon_indices - 1 time_indices = time_indices - 1 return ma.mean(list_of_means), ma.std(list_of_means)
def _pfromz_MA(z, lapse_rate, P_bott, T_bott, z_bott): """Pressure given altitude in a constant lapse rate layer. The dry gas constant is used in calculations requiring the gas constant. See the docstring for press2alt for references. Input Arguments: * z: Geopotential altitude [m]. * lapse_rate: -dT/dz [K/m] over the layer. * P_bott: Pressure [hPa] at the base of the layer. * T_bott: Temperature [K] at the base of the layer. * z_bott: Geopotential altitude [m] of the base of the layer. Output: * Pressure [hPa] for each element given in the input arguments. All input arguments can be either a scalar or an MA array. All arguments that are MA arrays, however, are of the same size and shape. If every input argument is a scalar, the output is a scalar. If any of the input arguments is an MA array, the output is an MA array of the same size and shape. """ #jfp was import Numeric as N import numpy as N #jfp was import MA import numpy.ma as MA from atmconst import AtmConst const = AtmConst() if MA.size(lapse_rate) == 1: #jfp was if MA.array(lapse_rate)[0] == 0.0: if MA.array(lapse_rate) == 0.0: return P_bott * \ MA.exp( -const.g / (const.R_d*T_bott) * (z-z_bott) ) else: exponent = const.g / (const.R_d * lapse_rate) return P_bott * \ ( (1.0 - (lapse_rate * (z-z_bott) / T_bott))**exponent ) else: exponent = const.g / (const.R_d * lapse_rate) P = P_bott * \ ( (1.0 - (lapse_rate * (z-z_bott) / T_bott))**exponent ) P_at_0 = P_bott * \ MA.exp( -const.g / (const.R_d*T_bott) * (z-z_bott) ) zero_lapse_mask = MA.filled(MA.where(lapse_rate == 0., 1, 0), 0) zero_lapse_mask_indices_flat = N.nonzero(N.ravel(zero_lapse_mask)) P_flat = MA.ravel(P) MA.put( P_flat, zero_lapse_mask_indices_flat \ , MA.take(MA.ravel(P_at_0), zero_lapse_mask_indices_flat) ) return MA.reshape(P_flat, P.shape)
def compute(self): if self.data == None: return if type(self.eigVectors) == MA.MaskedArray and type(self.eigValues) == MA.MaskedArray: return if type(self.data) == orange.ExampleTable: data, classes = self.data.toNumpyMA("a/c") elif type(self.data) == tuple: data, classes = self.data data = self.center(data) data = self.normalize(data) self.normalizedData = data exampleCount, attrCount = data.shape classCount = len(set(classes)) # special case when we have two classes if classCount == 2: data1 = MA.take(data, numpy.argwhere(classes == 0).flatten(), axis=0) data2 = MA.take(data, numpy.argwhere(classes != 0).flatten(), axis=0) miDiff = MA.average(data1, axis=1) - MA.average(data2, axis=1) covMatrix = (MA.dot(data1.T, data1) + MA.dot(data2.T, data2)) / exampleCount self.eigVectors = linalg.inv(covMatrix) * miDiff self.eigValues = numpy.array([1]) else: # compute means and average covariances of examples in each class group Sw = MA.zeros([attrCount, attrCount]) for v in set(classes): d = MA.take(data, numpy.argwhere(classes == v).flatten(), axis=0) d = self.center(d) Sw += MA.dot(d.T, d) Sw /= exampleCount total = MA.dot(data.T, data)/float(exampleCount) Sb = total - Sw matrix = linalg.inv(Sw)*Sb eigVals, eigVectors = linalg.eigh(matrix) self.eigValues, self.eigVectors = self.getSorted(eigVals, eigVectors)
def regrid_array(data=data_cflux): ''' #Could be put with plotting tools??? # Regrid array to be used with Basemap # Only works if the same latitudes and longitudes are selected from netdcf file and grid # Uses the ORCA netcdf file ### transform the longitude of ORCA onto something that basemap can read ### The ORCA grid starts at 80 and goes to 440 ### What we want: starts at 80 and goes to 180 and then switches to -180 and goes to 80 ### this method ''' from Scientific.IO.NetCDF import NetCDFFile #nc_grid_file = choose_netcdf_file() #~ indir = raw_input('Where is the ORCA netcdf file located? \n') nc_grid = NetCDFFile(NC_PATH+ 'ORCA2.0_grid.nc','r') lon = nc_grid.variables['lon'][0:40,:] lat = nc_grid.variables['lat'][0:40,:] area = nc_grid.variables['area'][0:40,:] mask = nc_grid.variables['mask'][0,0:40,:] nc_grid.close() lon_min = lon.copy() i,j = np.where(lon_min >= 180.) # elements of lon_min that are over 180 lon_min[i,j] = lon_min[i,j] - 360. # takes those elements and subtracts 360 from them ### ============================================================================================================== ### get rid of the funny extra lon and do the same for the lat array ! iw = np.where(lon_min[0,:] >= lon_min[0][0])[0] # are the elements that are greater or equal to the first element ie. 78.000038 ie = np.where(lon_min[0,:] < lon_min[0][0])[0] # are the elements less than 78.000038 ### puts the lon in order from -180 to 180 and removes the extra 80 at the end lon = np.concatenate((np.take(lon_min,ie,axis=1),np.take(lon_min,iw,axis=1)),axis=1)[:,:-1] lat = np.concatenate((np.take(lat,ie,axis=1),np.take(lat,iw,axis=1)),axis=1)[:,:-1] # The data that is to be plotted needs to be regridded bm_array = [ma.concatenate((ma.take(data[i, :, :],ie,axis=1),ma.take(data[i, :, :],iw,axis=1)),axis=1)[:,:-1] for i in range(3650)] bm_array = ma.array(bm_array) return bm_array
def _zfromp_MA(P, lapse_rate, P_bott, T_bott, z_bott): """Altitude given pressure in a constant lapse rate layer. The dry gas constant is used in calculations requiring the gas constant. See the docstring for press2alt for references. Input Arguments: * P: Pressure [hPa]. * lapse_rate: -dT/dz [K/m] over the layer. * P_bott: Pressure [hPa] at the base of the layer. * T_bott: Temperature [K] at the base of the layer. * z_bott: Geopotential altitude [m] of the base of the layer. Output: * Altitude [m] for each element given in the input arguments. All input arguments can be either a scalar or an MA array. All arguments that are MA arrays, however, are of the same size and shape. If every input argument is a scalar, the output is a scalar. If any of the input arguments is an MA array, the output is an MA array of the same size and shape. """ import numpy as N #jfp was import Numeric as N import numpy.ma as MA #jfp was import MA from atmconst import AtmConst const = AtmConst() if MA.size(lapse_rate) == 1: if MA.array(lapse_rate)[0] == 0.0: return ( (-const.R_d * T_bott / const.g) * MA.log(P/P_bott) ) + \ z_bott else: exponent = (const.R_d * lapse_rate) / const.g return ((T_bott / lapse_rate) * (1. - (P/P_bott)**exponent)) + \ z_bott else: exponent = (const.R_d * lapse_rate) / const.g z = ((T_bott / lapse_rate) * (1. - (P/P_bott)**exponent)) + z_bott z_at_0 = ( (-const.R_d * T_bott / const.g) * MA.log(P/P_bott) ) + \ z_bott zero_lapse_mask = MA.filled(MA.where(lapse_rate == 0., 1, 0), 0) zero_lapse_mask_indices_flat = N.nonzero(N.ravel(zero_lapse_mask)) z_flat = MA.ravel(z) MA.put( z_flat, zero_lapse_mask_indices_flat \ , MA.take(MA.ravel(z_at_0), zero_lapse_mask_indices_flat) ) return MA.reshape(z_flat, z.shape)
def _zfromp_MA(P, lapse_rate, P_bott, T_bott, z_bott): """Altitude given pressure in a constant lapse rate layer. The dry gas constant is used in calculations requiring the gas constant. See the docstring for press2alt for references. Input Arguments: * P: Pressure [hPa]. * lapse_rate: -dT/dz [K/m] over the layer. * P_bott: Pressure [hPa] at the base of the layer. * T_bott: Temperature [K] at the base of the layer. * z_bott: Geopotential altitude [m] of the base of the layer. Output: * Altitude [m] for each element given in the input arguments. All input arguments can be either a scalar or an MA array. All arguments that are MA arrays, however, are of the same size and shape. If every input argument is a scalar, the output is a scalar. If any of the input arguments is an MA array, the output is an MA array of the same size and shape. """ import numpy as N #jfp was import Numeric as N import numpy.ma as MA #jfp was import MA from atmconst import AtmConst const = AtmConst() if MA.size(lapse_rate) == 1: if MA.array(lapse_rate)[0] == 0.0: return ( (-const.R_d * T_bott / const.g) * MA.log(P/P_bott) ) + \ z_bott else: exponent = (const.R_d * lapse_rate) / const.g return ((T_bott / lapse_rate) * (1. - (P/P_bott)**exponent)) + \ z_bott else: exponent = (const.R_d * lapse_rate) / const.g z = ((T_bott / lapse_rate) * (1. - (P / P_bott)**exponent)) + z_bott z_at_0 = ( (-const.R_d * T_bott / const.g) * MA.log(P/P_bott) ) + \ z_bott zero_lapse_mask = MA.filled(MA.where(lapse_rate == 0., 1, 0), 0) zero_lapse_mask_indices_flat = N.nonzero(N.ravel(zero_lapse_mask)) z_flat = MA.ravel(z) MA.put( z_flat, zero_lapse_mask_indices_flat \ , MA.take(MA.ravel(z_at_0), zero_lapse_mask_indices_flat) ) return MA.reshape(z_flat, z.shape)
def test_testMaPut(self): (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d m = [1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1] i = np.nonzero(m)[0] put(ym, i, zm) assert_(all(take(ym, i, axis=0) == zm))
def getEigVectors(self, nPCs = None, varianceExplained = None): if not self.data: return None self.compute() nPCs = self.getCount(nPCs, varianceExplained) return MA.take(self.eigVectors, range(nPCs), axis=1)
def getEigVectors(self, nPCs=None, varianceExplained=None): if not self.data: return None self.compute() nPCs = self.getCount(nPCs, varianceExplained) return MA.take(self.eigVectors, range(nPCs), axis=1)
lon_min = lon.copy() i,j = np.where(lon_min >= 180.) # elements of lon_min that are over 180 lon_min[i,j] = lon_min[i,j] - 360. # takes those elements and subtracts 360 from them ### ============================================================================================================== ### get rid of the funny extra lon and do the same for the lat array ! iw = np.where(lon_min[0,:] >= lon_min[0][0])[0] # are the elements that are greater or equal to the first element ie. 78.000038 ie = np.where(lon_min[0,:] < lon_min[0][0])[0] # are the elements less than 78.000038 ### puts the lon in order from -180 to 180 and removes the extra 80 at the end lon = np.concatenate((np.take(lon_min,ie,axis=1),np.take(lon_min,iw,axis=1)),axis=1)[:,:-1] lat = np.concatenate((np.take(lat,ie,axis=1),np.take(lat,iw,axis=1)),axis=1)[:,:-1] # The data that is to be plotted needs to be regridded ### Regrid cflux data bm_array_cflux = [ma.concatenate((ma.take(data_cflux_new[i, :, :],ie,axis=1),ma.take(data_cflux_new[i, :, :],iw,axis=1)),axis=1)[:,:-1] for i in range(time_end)] bm_array_cflux = ma.array(bm_array_cflux) #~ return bm_array #~ self.regridded_array = ma.masked_values(bm_array, 1e+20) #~ ### Regrid dpco2 data #~ bm_array_dpco2 = [ma.concatenate((ma.take(data_dpco2[i, :, :],ie,axis=1),ma.take(data_dpco2[i, :, :],iw,axis=1)),axis=1)[:,:-1] for i in range(3650)] #~ bm_array_dpco2 = ma.array(bm_array_dpco2) years = np.arange(1998, 2008, 1) year_stack_bmcflux = np.split(bm_array_cflux, 10, axis=0) year_stack_bmcflux = ma.array(year_stack_bmcflux) #~ year_stack_bmdpco2 = np.split(bm_array_dpco2, 10, axis=0)