示例#1
0
def pca(data, nPCs=-1):
    domain = None

    suma = data.sum(axis=0) / float(len(data))
    data -= suma  # substract average value to get zero mean
    data /= MA.std(data, axis=0)
    covMatrix = MA.dot(data.T, data) / len(data)

    eigVals, eigVectors = linalg.eigh(covMatrix)
    eigVals = list(eigVals)

    if nPCs == -1:
        nPCs = len(eigVals)
    nPCs = min(nPCs, len(eigVals))

    pairs = [(val, i) for i, val in enumerate(eigVals)]
    pairs.sort()
    pairs.reverse()
    indices = [pair[1] for pair in pairs[:nPCs]
               ]  # take indices of the wanted number of principal components

    vectors = MA.take(eigVectors, indices, axis=1)
    values = [eigVals[i] for i in indices]
    projectedData = MA.dot(data, vectors)

    return projectedData, vectors, values
示例#2
0
def pca(data, nPCs = -1):
    domain = None
    
    suma = data.sum(axis=0)/float(len(data))
    data -= suma       # substract average value to get zero mean
    data /= MA.std(data, axis=0)
    covMatrix = MA.dot(data.T, data) / len(data)

    eigVals, eigVectors = linalg.eigh(covMatrix)
    eigVals = list(eigVals)
    
    if nPCs == -1:
        nPCs = len(eigVals)
    nPCs = min(nPCs, len(eigVals))
    
    pairs = [(val, i) for i, val in enumerate(eigVals)]
    pairs.sort()
    pairs.reverse()
    indices = [pair[1] for pair in pairs[:nPCs]]  # take indices of the wanted number of principal components

    vectors = MA.take(eigVectors, indices, axis = 1)
    values = [eigVals[i] for i in indices]
    projectedData = MA.dot(data, vectors)
    
    return projectedData, vectors, values
示例#3
0
    def compute(self):
        if self.data == None:
            return
        if type(self.eigVectors) == MA.MaskedArray and type(
                self.eigValues) == MA.MaskedArray:
            return

        if type(self.data) == orange.ExampleTable:
            data, classes = self.data.toNumpyMA("a/c")
        elif type(self.data) == tuple:
            data, classes = self.data

        data = self.center(data)
        data = self.normalize(data)
        self.normalizedData = data
        exampleCount, attrCount = data.shape
        classCount = len(set(classes))
        # special case when we have two classes
        if classCount == 2:
            data1 = MA.take(data,
                            numpy.argwhere(classes == 0).flatten(),
                            axis=0)
            data2 = MA.take(data,
                            numpy.argwhere(classes != 0).flatten(),
                            axis=0)
            miDiff = MA.average(data1, axis=1) - MA.average(data2, axis=1)
            covMatrix = (MA.dot(data1.T, data1) +
                         MA.dot(data2.T, data2)) / exampleCount
            self.eigVectors = linalg.inv(covMatrix) * miDiff
            self.eigValues = numpy.array([1])
        else:
            # compute means and average covariances of examples in each class group
            Sw = MA.zeros([attrCount, attrCount])
            for v in set(classes):
                d = MA.take(data,
                            numpy.argwhere(classes == v).flatten(),
                            axis=0)
                d = self.center(d)
                Sw += MA.dot(d.T, d)
            Sw /= exampleCount
            total = MA.dot(data.T, data) / float(exampleCount)
            Sb = total - Sw

            matrix = linalg.inv(Sw) * Sb
            eigVals, eigVectors = linalg.eigh(matrix)
            self.eigValues, self.eigVectors = self.getSorted(
                eigVals, eigVectors)
示例#4
0
 def getSorted(self, values, vectors):
     pairs = [(val, i) for i, val in enumerate(values)]
     pairs.sort()
     pairs.reverse()
     indices = [pair[1] for pair in pairs]
     newValues = [values[i] for i in indices]
     newVectors = MA.take(vectors, indices, axis=1)
     return newValues, newVectors
示例#5
0
 def test_testTakeTransposeInnerOuter(self):
     # Test of take, transpose, inner, outer products
     x = arange(24)
     y = np.arange(24)
     x[5:6] = masked
     x = x.reshape(2, 3, 4)
     y = y.reshape(2, 3, 4)
     assert_(eq(np.transpose(y, (2, 0, 1)), transpose(x, (2, 0, 1))))
     assert_(eq(np.take(y, (2, 0, 1), 1), take(x, (2, 0, 1), 1)))
     assert_(eq(np.inner(filled(x, 0), filled(y, 0)), inner(x, y)))
     assert_(eq(np.outer(filled(x, 0), filled(y, 0)), outer(x, y)))
     y = array(["abc", 1, "def", 2, 3], object)
     y[2] = masked
     t = take(y, [0, 3, 4])
     assert_(t[0] == "abc")
     assert_(t[1] == 2)
     assert_(t[2] == 3)
示例#6
0
 def getSorted(self, values, vectors):
     pairs = [(val, i) for i, val in enumerate(values)]
     pairs.sort()
     pairs.reverse()
     indices = [pair[1] for pair in pairs]
     newValues = [values[i] for i in indices]
     newVectors = MA.take(vectors, indices, axis = 1)
     return newValues, newVectors
示例#7
0
 def test_testTakeTransposeInnerOuter(self):
     # Test of take, transpose, inner, outer products
     x = arange(24)
     y = np.arange(24)
     x[5:6] = masked
     x = x.reshape(2, 3, 4)
     y = y.reshape(2, 3, 4)
     assert_(eq(np.transpose(y, (2, 0, 1)), transpose(x, (2, 0, 1))))
     assert_(eq(np.take(y, (2, 0, 1), 1), take(x, (2, 0, 1), 1)))
     assert_(eq(np.inner(filled(x, 0), filled(y, 0)), inner(x, y)))
     assert_(eq(np.outer(filled(x, 0), filled(y, 0)), outer(x, y)))
     y = array(['abc', 1, 'def', 2, 3], object)
     y[2] = masked
     t = take(y, [0, 3, 4])
     assert_(t[0] == 'abc')
     assert_(t[1] == 2)
     assert_(t[2] == 3)
def regrid(data_array):
    nc_grid = netcdf_file('/home/nicholas/thesis/data/netcdf_files/ORCA2.0_grid.nc','r')
    lon = nc_grid.variables['lon'][0:50,:]
    lat = nc_grid.variables['lat'][0:50,:]
    area = nc_grid.variables['area'][0:50,:]
    mask = nc_grid.variables['mask'][0,0:50,:]
    nc_grid.close()
    lon_min = lon.copy()
    i,j = np.where(lon_min >= 180.)
    lon_min[i,j] = lon_min[i,j] - 360. 
    iw = np.where(lon_min[0,:] >= lon_min[0][0])[0]
    ie = np.where(lon_min[0,:] < lon_min[0][0])[0]
    lon = np.concatenate((np.take(lon_min,ie,axis=1)[:,:-1],np.take(lon_min,iw,axis=1)),axis=1)[:,:-1]
    lat = np.concatenate((np.take(lat,ie,axis=1)[:,:-1],np.take(lat,iw,axis=1)),axis=1)[:,:-1]
    lon_list = np.round(lon[0])
    lat_list = np.round(lat[:, 0])
    timesteps = np.shape(data_array)[0]
    bm_array = [ma.concatenate((ma.take(data_array[i, :, :],ie,axis=1),ma.take(data_array[i, :, :],iw,axis=1)),axis=1)[:,:-1] for i in range(timesteps)]
    bm_array = ma.array(bm_array)
    return bm_array, lon, lat
def sample_all_realizations(year_array, time_frequency, lon_frequency):
	'''
	Given a sampling frequency, sample all the grid realizations at that frequency.
	'''
	print "Sampling all the realisations..."
	time_steps, lat_steps, lon_steps = np.shape(year_array)
	lat_indices = np.arange(0, lat_steps, 1)
	list_of_means = []
	list_of_stdevs = []
	time_indices = np.arange(0, time_steps, time_frequency)
	for time_count in np.arange(time_frequency):
		lon_indices = np.arange(0, lon_steps, lon_frequency)
		for lon_count in np.arange(lon_frequency):
			grid = ma.take(ma.take(ma.take(year_array, \
						(lon_indices), axis=2), \
						(lat_indices), axis=1), \
						(time_indices),axis=0)
			list_of_means = np.append(list_of_means, np.mean(grid)*unit_conversion)
			lon_indices = lon_indices - 1
		time_indices = time_indices - 1
	return ma.mean(list_of_means), ma.std(list_of_means)
示例#10
0
def _pfromz_MA(z, lapse_rate, P_bott, T_bott, z_bott):
    """Pressure given altitude in a constant lapse rate layer.

    The dry gas constant is used in calculations requiring the gas
    constant.  See the docstring for press2alt for references.

    Input Arguments:
    * z:  Geopotential altitude [m].
    * lapse_rate:  -dT/dz [K/m] over the layer.
    * P_bott:  Pressure [hPa] at the base of the layer.
    * T_bott:  Temperature [K] at the base of the layer.
    * z_bott:  Geopotential altitude [m] of the base of the layer.

    Output:
    * Pressure [hPa] for each element given in the input arguments.

    All input arguments can be either a scalar or an MA array.  All 
    arguments that are MA arrays, however, are of the same size and 
    shape.  If every input argument is a scalar, the output is a scalar.  
    If any of the input arguments is an MA array, the output is an MA 
    array of the same size and shape.
    """
    #jfp was import Numeric as N
    import numpy as N
    #jfp was import MA
    import numpy.ma as MA
    from atmconst import AtmConst

    const = AtmConst()

    if MA.size(lapse_rate) == 1:
        #jfp was if MA.array(lapse_rate)[0] == 0.0:
        if MA.array(lapse_rate) == 0.0:
            return P_bott * \
                   MA.exp( -const.g / (const.R_d*T_bott) * (z-z_bott) )
        else:
            exponent = const.g / (const.R_d * lapse_rate)
            return P_bott * \
                   ( (1.0 - (lapse_rate * (z-z_bott) / T_bott))**exponent )
    else:
        exponent = const.g / (const.R_d * lapse_rate)
        P = P_bott * \
            ( (1.0 - (lapse_rate * (z-z_bott) / T_bott))**exponent )
        P_at_0 = P_bott * \
                 MA.exp( -const.g / (const.R_d*T_bott) * (z-z_bott) )

        zero_lapse_mask = MA.filled(MA.where(lapse_rate == 0., 1, 0), 0)
        zero_lapse_mask_indices_flat = N.nonzero(N.ravel(zero_lapse_mask))
        P_flat = MA.ravel(P)
        MA.put( P_flat, zero_lapse_mask_indices_flat \
              , MA.take(MA.ravel(P_at_0), zero_lapse_mask_indices_flat) )
        return MA.reshape(P_flat, P.shape)
示例#11
0
def _pfromz_MA(z, lapse_rate, P_bott, T_bott, z_bott):
    """Pressure given altitude in a constant lapse rate layer.

    The dry gas constant is used in calculations requiring the gas
    constant.  See the docstring for press2alt for references.

    Input Arguments:
    * z:  Geopotential altitude [m].
    * lapse_rate:  -dT/dz [K/m] over the layer.
    * P_bott:  Pressure [hPa] at the base of the layer.
    * T_bott:  Temperature [K] at the base of the layer.
    * z_bott:  Geopotential altitude [m] of the base of the layer.

    Output:
    * Pressure [hPa] for each element given in the input arguments.

    All input arguments can be either a scalar or an MA array.  All 
    arguments that are MA arrays, however, are of the same size and 
    shape.  If every input argument is a scalar, the output is a scalar.  
    If any of the input arguments is an MA array, the output is an MA 
    array of the same size and shape.
    """
    #jfp was import Numeric as N
    import numpy as N
    #jfp was import MA
    import numpy.ma as MA
    from atmconst import AtmConst

    const = AtmConst()

    if MA.size(lapse_rate) == 1:
        #jfp was if MA.array(lapse_rate)[0] == 0.0:
        if MA.array(lapse_rate) == 0.0:
            return P_bott * \
                   MA.exp( -const.g / (const.R_d*T_bott) * (z-z_bott) )
        else:
            exponent = const.g / (const.R_d * lapse_rate)
            return P_bott * \
                   ( (1.0 - (lapse_rate * (z-z_bott) / T_bott))**exponent )
    else:
        exponent = const.g / (const.R_d * lapse_rate)
        P = P_bott * \
            ( (1.0 - (lapse_rate * (z-z_bott) / T_bott))**exponent )
        P_at_0 = P_bott * \
                 MA.exp( -const.g / (const.R_d*T_bott) * (z-z_bott) )

        zero_lapse_mask = MA.filled(MA.where(lapse_rate == 0., 1, 0), 0)
        zero_lapse_mask_indices_flat = N.nonzero(N.ravel(zero_lapse_mask))
        P_flat = MA.ravel(P)
        MA.put( P_flat, zero_lapse_mask_indices_flat \
              , MA.take(MA.ravel(P_at_0), zero_lapse_mask_indices_flat) )
        return MA.reshape(P_flat, P.shape)
示例#12
0
    def compute(self):
        if self.data == None:
            return
        if type(self.eigVectors) == MA.MaskedArray and type(self.eigValues) == MA.MaskedArray:
            return
        
        if type(self.data) == orange.ExampleTable:
            data, classes = self.data.toNumpyMA("a/c")
        elif type(self.data) == tuple:
            data, classes = self.data

        data = self.center(data)
        data = self.normalize(data)
        self.normalizedData = data
        exampleCount, attrCount = data.shape
        classCount = len(set(classes))
        # special case when we have two classes
        if classCount == 2:
            data1 = MA.take(data, numpy.argwhere(classes == 0).flatten(), axis=0)
            data2 = MA.take(data, numpy.argwhere(classes != 0).flatten(), axis=0)
            miDiff = MA.average(data1, axis=1) - MA.average(data2, axis=1)
            covMatrix = (MA.dot(data1.T, data1) + MA.dot(data2.T, data2)) / exampleCount
            self.eigVectors = linalg.inv(covMatrix) * miDiff
            self.eigValues = numpy.array([1])
        else:
            # compute means and average covariances of examples in each class group
            Sw = MA.zeros([attrCount, attrCount])
            for v in set(classes):
                d = MA.take(data, numpy.argwhere(classes == v).flatten(), axis=0)
                d = self.center(d)
                Sw += MA.dot(d.T, d)
            Sw /= exampleCount
            total = MA.dot(data.T, data)/float(exampleCount)
            Sb = total - Sw
                        
            matrix = linalg.inv(Sw)*Sb
            eigVals, eigVectors = linalg.eigh(matrix)
            self.eigValues, self.eigVectors = self.getSorted(eigVals, eigVectors)
def regrid_array(data=data_cflux):
	'''
	#Could be put with plotting tools???
	# Regrid array to be used with Basemap
	# Only works if the same latitudes and longitudes are selected from netdcf file and grid
	# Uses the ORCA netcdf file
	### transform the longitude of ORCA onto something that basemap can read
	### The ORCA grid starts at 80 and goes to 440
	### What we want: starts at 80 and goes to 180 and then switches to -180 and goes to 80
	### this method 
	'''
	from Scientific.IO.NetCDF import NetCDFFile
	#nc_grid_file = choose_netcdf_file()
	#~ indir = raw_input('Where is the ORCA netcdf file located? \n')
	nc_grid = NetCDFFile(NC_PATH+ 'ORCA2.0_grid.nc','r')
	lon = nc_grid.variables['lon'][0:40,:]
	lat = nc_grid.variables['lat'][0:40,:]
	area = nc_grid.variables['area'][0:40,:]
	mask = nc_grid.variables['mask'][0,0:40,:]
	nc_grid.close()
	
	lon_min = lon.copy()
	i,j = np.where(lon_min >= 180.) # elements of lon_min that are over 180
	lon_min[i,j] = lon_min[i,j] - 360. # takes those elements and subtracts 360 from them

	### ==============================================================================================================
	### get rid of the funny extra lon and do the same for the lat array ! 
	iw = np.where(lon_min[0,:] >= lon_min[0][0])[0] # are the elements that are greater or equal to the first element ie. 78.000038
	ie = np.where(lon_min[0,:] < lon_min[0][0])[0] # are the elements less than 78.000038

	### puts the lon in order from -180 to 180 and removes the extra 80 at the end
	lon = np.concatenate((np.take(lon_min,ie,axis=1),np.take(lon_min,iw,axis=1)),axis=1)[:,:-1]
	lat = np.concatenate((np.take(lat,ie,axis=1),np.take(lat,iw,axis=1)),axis=1)[:,:-1]

	# The data that is to be plotted needs to be regridded
	bm_array = [ma.concatenate((ma.take(data[i, :, :],ie,axis=1),ma.take(data[i, :, :],iw,axis=1)),axis=1)[:,:-1] for i in range(3650)]
	bm_array = ma.array(bm_array)
	return bm_array
示例#14
0
def _zfromp_MA(P, lapse_rate, P_bott, T_bott, z_bott):
    """Altitude given pressure in a constant lapse rate layer.

    The dry gas constant is used in calculations requiring the gas
    constant.  See the docstring for press2alt for references.

    Input Arguments:
    * P:  Pressure [hPa].
    * lapse_rate:  -dT/dz [K/m] over the layer.
    * P_bott:  Pressure [hPa] at the base of the layer.
    * T_bott:  Temperature [K] at the base of the layer.
    * z_bott:  Geopotential altitude [m] of the base of the layer.

    Output:
    * Altitude [m] for each element given in the input arguments.

    All input arguments can be either a scalar or an MA array.  All 
    arguments that are MA arrays, however, are of the same size and 
    shape.  If every input argument is a scalar, the output is a scalar.
    If any of the input arguments is an MA array, the output is an MA 
    array of the same size and shape.
    """
    import numpy as N
    #jfp was import Numeric as N
    import numpy.ma as MA
    #jfp was import MA
    from atmconst import AtmConst

    const = AtmConst()

    if MA.size(lapse_rate) == 1:
        if MA.array(lapse_rate)[0] == 0.0:
            return ( (-const.R_d * T_bott / const.g) * MA.log(P/P_bott) ) + \
                   z_bott
        else:
            exponent = (const.R_d * lapse_rate) / const.g
            return ((T_bott / lapse_rate) * (1. - (P/P_bott)**exponent)) + \
                   z_bott
    else:
        exponent = (const.R_d * lapse_rate) / const.g
        z = ((T_bott / lapse_rate) * (1. - (P/P_bott)**exponent)) + z_bott
        z_at_0 = ( (-const.R_d * T_bott / const.g) * MA.log(P/P_bott) ) + \
                 z_bott

        zero_lapse_mask = MA.filled(MA.where(lapse_rate == 0., 1, 0), 0)
        zero_lapse_mask_indices_flat = N.nonzero(N.ravel(zero_lapse_mask))
        z_flat = MA.ravel(z)
        MA.put( z_flat, zero_lapse_mask_indices_flat \
              , MA.take(MA.ravel(z_at_0), zero_lapse_mask_indices_flat) )
        return MA.reshape(z_flat, z.shape)
示例#15
0
def _zfromp_MA(P, lapse_rate, P_bott, T_bott, z_bott):
    """Altitude given pressure in a constant lapse rate layer.

    The dry gas constant is used in calculations requiring the gas
    constant.  See the docstring for press2alt for references.

    Input Arguments:
    * P:  Pressure [hPa].
    * lapse_rate:  -dT/dz [K/m] over the layer.
    * P_bott:  Pressure [hPa] at the base of the layer.
    * T_bott:  Temperature [K] at the base of the layer.
    * z_bott:  Geopotential altitude [m] of the base of the layer.

    Output:
    * Altitude [m] for each element given in the input arguments.

    All input arguments can be either a scalar or an MA array.  All 
    arguments that are MA arrays, however, are of the same size and 
    shape.  If every input argument is a scalar, the output is a scalar.
    If any of the input arguments is an MA array, the output is an MA 
    array of the same size and shape.
    """
    import numpy as N
    #jfp was import Numeric as N
    import numpy.ma as MA
    #jfp was import MA
    from atmconst import AtmConst

    const = AtmConst()

    if MA.size(lapse_rate) == 1:
        if MA.array(lapse_rate)[0] == 0.0:
            return ( (-const.R_d * T_bott / const.g) * MA.log(P/P_bott) ) + \
                   z_bott
        else:
            exponent = (const.R_d * lapse_rate) / const.g
            return ((T_bott / lapse_rate) * (1. - (P/P_bott)**exponent)) + \
                   z_bott
    else:
        exponent = (const.R_d * lapse_rate) / const.g
        z = ((T_bott / lapse_rate) * (1. - (P / P_bott)**exponent)) + z_bott
        z_at_0 = ( (-const.R_d * T_bott / const.g) * MA.log(P/P_bott) ) + \
                 z_bott

        zero_lapse_mask = MA.filled(MA.where(lapse_rate == 0., 1, 0), 0)
        zero_lapse_mask_indices_flat = N.nonzero(N.ravel(zero_lapse_mask))
        z_flat = MA.ravel(z)
        MA.put( z_flat, zero_lapse_mask_indices_flat \
              , MA.take(MA.ravel(z_at_0), zero_lapse_mask_indices_flat) )
        return MA.reshape(z_flat, z.shape)
示例#16
0
 def test_testMaPut(self):
     (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d
     m = [1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1]
     i = np.nonzero(m)[0]
     put(ym, i, zm)
     assert_(all(take(ym, i, axis=0) == zm))
示例#17
0
 def test_testMaPut(self):
     (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d
     m = [1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1]
     i = np.nonzero(m)[0]
     put(ym, i, zm)
     assert_(all(take(ym, i, axis=0) == zm))
示例#18
0
 def getEigVectors(self, nPCs = None, varianceExplained = None):
     if not self.data: return None
     self.compute()
     
     nPCs = self.getCount(nPCs, varianceExplained)
     return MA.take(self.eigVectors, range(nPCs), axis=1)
示例#19
0
    def getEigVectors(self, nPCs=None, varianceExplained=None):
        if not self.data: return None
        self.compute()

        nPCs = self.getCount(nPCs, varianceExplained)
        return MA.take(self.eigVectors, range(nPCs), axis=1)
lon_min = lon.copy()
i,j = np.where(lon_min >= 180.) # elements of lon_min that are over 180
lon_min[i,j] = lon_min[i,j] - 360. # takes those elements and subtracts 360 from them

### ==============================================================================================================
### get rid of the funny extra lon and do the same for the lat array ! 
iw = np.where(lon_min[0,:] >= lon_min[0][0])[0] # are the elements that are greater or equal to the first element ie. 78.000038
ie = np.where(lon_min[0,:] < lon_min[0][0])[0] # are the elements less than 78.000038

### puts the lon in order from -180 to 180 and removes the extra 80 at the end
lon = np.concatenate((np.take(lon_min,ie,axis=1),np.take(lon_min,iw,axis=1)),axis=1)[:,:-1]
lat = np.concatenate((np.take(lat,ie,axis=1),np.take(lat,iw,axis=1)),axis=1)[:,:-1]

# The data that is to be plotted needs to be regridded
### Regrid cflux data
bm_array_cflux = [ma.concatenate((ma.take(data_cflux_new[i, :, :],ie,axis=1),ma.take(data_cflux_new[i, :, :],iw,axis=1)),axis=1)[:,:-1] for i in range(time_end)]
bm_array_cflux = ma.array(bm_array_cflux)
#~ return bm_array
#~ self.regridded_array = ma.masked_values(bm_array, 1e+20)

#~ ### Regrid dpco2 data
#~ bm_array_dpco2 = [ma.concatenate((ma.take(data_dpco2[i, :, :],ie,axis=1),ma.take(data_dpco2[i, :, :],iw,axis=1)),axis=1)[:,:-1] for i in range(3650)]
#~ bm_array_dpco2 = ma.array(bm_array_dpco2)

years = np.arange(1998, 2008, 1)


year_stack_bmcflux = np.split(bm_array_cflux, 10, axis=0)
year_stack_bmcflux = ma.array(year_stack_bmcflux)

#~ year_stack_bmdpco2 = np.split(bm_array_dpco2, 10, axis=0)