示例#1
0
def XWrap(x,ifold,fill_value=0):
    """
    Extend and wrap array.
    
    Fold array every y indecies.  There will typically be a hanging
    part of the array.  This is padded out.

    Parameters
    ----------

    x     : input
    ifold : Wrap array after ifold indecies.

    Return
    ------

    xwrap : Wrapped array.

    """

    ncad = x.size # Number of cadences
    nrow = int(np.floor(ncad/ifold) + 1)
    nExtend = nrow * ifold - ncad # Pad out remainder of array with 0s.

    if type(x) is np.ma.core.MaskedArray:
        pad = ma.empty(nExtend)
        pad.mask = True
        x = ma.hstack( (x ,pad) )
    else:    
        pad = np.empty(nExtend) 
        pad[:] = fill_value
        x = np.hstack( (x ,pad) )
    xwrap = x.reshape( nrow,-1 )

    return xwrap
示例#2
0
def compute_spec_fiducial(wcslist):
    """
    For a celestial footprint this is the center.
    For a spectral footprint, it is the beginning of the range.

    This function assumes all WCSs have the same output coordinate frame.

    Build-7 workaround.
    """
    output_frame = wcslist[0].output_frame
    axes_types = wcslist[0].output_frame.axes_type
    spatial_axes = np.array(axes_types) == 'SPATIAL'
    spectral_axes = np.array(axes_types) == 'SPECTRAL'
    footprints = ma.hstack(
        [spec_footprint(w, bounding_box=w.bounding_box) for w in wcslist])
    spatial_footprint = footprints[spatial_axes]
    spectral_footprint = footprints[spectral_axes]
    # Compute center of footprint
    fiducial = np.empty(len(axes_types))
    if (spatial_footprint).any():
        lon, lat = spatial_footprint
        lon, lat = np.deg2rad(lon), np.deg2rad(lat)
        x_mean = np.mean(np.cos(lat) * np.cos(lon))
        y_mean = np.mean(np.cos(lat) * np.sin(lon))
        z_mean = np.mean(np.sin(lat))
        lon_fiducial = np.rad2deg(np.arctan2(y_mean, x_mean)) % 360.0
        lat_fiducial = np.rad2deg(
            np.arctan2(z_mean, np.sqrt(x_mean**2 + y_mean**2)))
        fiducial[spatial_axes] = lon_fiducial, lat_fiducial
    if (spectral_footprint).any():
        fiducial[spectral_axes] = spectral_footprint.min()
    return ((fiducial[spatial_axes]), fiducial[spectral_axes])
示例#3
0
def compute_spec_fiducial(wcslist, domain=None):
    """
    For a celestial footprint this is the center.
    For a spectral footprint, it is the beginning of the range.

    This function assumes all WCSs have the same output coordinate frame.

    Build-7 workaround.
    """
    output_frame = wcslist[0].output_frame
    axes_types = wcslist[0].output_frame.axes_type
    spatial_axes = np.array(axes_types) == 'SPATIAL'
    spectral_axes = np.array(axes_types) == 'SPECTRAL'
    footprints = ma.hstack([spec_footprint(w,
        domain=domain) for w in wcslist])
    spatial_footprint = footprints[spatial_axes]
    spectral_footprint = footprints[spectral_axes]
    # Compute center of footprint
    fiducial = np.empty(len(axes_types))
    if (spatial_footprint).any():
        lon, lat = spatial_footprint
        lon, lat = np.deg2rad(lon), np.deg2rad(lat)
        x_mean = np.mean(np.cos(lat) * np.cos(lon))
        y_mean = np.mean(np.cos(lat) * np.sin(lon))
        z_mean = np.mean(np.sin(lat))
        lon_fiducial = np.rad2deg(np.arctan2(y_mean, x_mean)) % 360.0
        lat_fiducial = np.rad2deg(np.arctan2(z_mean, np.sqrt(x_mean ** 2 +
            y_mean ** 2)))
        fiducial[spatial_axes] = lon_fiducial, lat_fiducial
    #    c = coord.SkyCoord(lon_fiducial, lat_fiducial, unit='deg')
    if (spectral_footprint).any():
        fiducial[spectral_axes] = spectral_footprint.min()
    return ((fiducial[spatial_axes]), fiducial[spectral_axes])
示例#4
0
 def _get_corr_arr(self):
     print 'redrawing cross correl'
     # get the list of names and sort them alphabetically
     corr_data = ma.hstack(self.var_arr_list)
     # @kelidas: return small differences between ma and numpy corrcoef
     # return ma.corrcoef( corr_data, rowvar = False, allow_masked = True )
     return MatSpearman(corr_data)
示例#5
0
 def _angles(self, U, V, eps=0.001):
     xy = self.ax.transData.transform(self.XY)
     uv = ma.hstack((U[:, np.newaxis], V[:, np.newaxis])).filled(0)
     xyp = self.ax.transData.transform(self.XY + eps * uv)
     dxy = xyp - xy
     ang = ma.arctan2(dxy[:, 1], dxy[:, 0])
     return ang
 def _get_corr_arr(self):
     print 'redrawing cross correl'
     # get the list of names and sort them alphabetically
     corr_data = ma.hstack(self.var_arr_list)
     # @kelidas: return small differences between ma and numpy corrcoef
     # return ma.corrcoef( corr_data, rowvar = False, allow_masked = True )
     return MatSpearman(corr_data)
示例#7
0
 def _angles(self, U, V, eps=0.001):
     xy = self.ax.transData.transform(self.XY)
     uv = ma.hstack((U[:,np.newaxis], V[:,np.newaxis])).filled(0)
     xyp = self.ax.transData.transform(self.XY + eps * uv)
     dxy = xyp - xy
     ang = ma.arctan2(dxy[:,1], dxy[:,0])
     return ang
示例#8
0
文件: som.py 项目: Zekom/orange
 def __call__(self, data, weight_id=0, progress_callback=None):
     array, classes, w = data.toNumpyMA()
     domain = data.domain
     if isinstance(domain.class_var, Orange.feature.Discrete):
         # Discrete class (extend the data with class indicator matrix)
         nval = len(data.domain.class_var.values)
         ext = ma.zeros((len(array), nval))
         ext[([i for i, m in enumerate(classes.mask) if m],
              [int(c) for c, m in zip(classes, classes.mask) if m])] = 1.0
     elif isinstance(domain.class_var, Orange.feature.Continuous):
         # Continuous class, just add the one column (what about multitarget)
         nval = 1
         ext = ma.zeros((len(array), nval))
         ext[:,0] = classes
     elif domain.class_var is None:
         # No class var
         nval = 0
         ext = ma.zeros((len(array), nval))
     else:
         raise TypeError("Unsuported `class_var` %r" % domain.class_var) 
     array = ma.hstack((array, ext))
     
     map = Map(self.map_shape, topology=self.topology)
     if self.initialize == Map.InitializeLinear:
         map.initialize_map_linear(array)
     else:
         map.initialize_map_random(array)
     map = Solver(batch_train=self.batch_train, eps=self.eps, neighbourhood=self.neighbourhood,
                  radius_ini=self.radius_ini, radius_fin=self.radius_fin, learning_rate=self.learning_rate,
                  epoch=self.epochs)(array, map, progress_callback=progress_callback)
     # Remove class columns from the vectors 
     for node in map:
         node.vector = node.vector[:-nval]
     return SOMMap(map, data)
示例#9
0
def _build_crossvalidation_iterator(config, y_train, y_test=None):
    """
    Returns a crossvalidation iterator, which contains a list of
    (train_indices, test_indices) that can be used to slice
    a dataset to perform crossvalidation. Additionally,
    returns the original data that was passed in and a mask specifying what
    data points should be used for validation.

    The method of splitting for CV is determined by what is specified in the
    conf file. The splitting of data in train/test/validate set is not done
    in this function- here we only return a mask for the validation data
    and an iterator for the train/test data.
    The full text is provided as a parameter so that joblib can cache the
    call to this function.
    """
    cv_type = config['type']
    k = config['k']
    dataset_size = len(y_train)

    if y_test is not None:
        logging.warning('You have requested test set to be used for evaluation.')
        if cv_type != 'test_set' and cv_type != 'subsampled_test_set':
            raise ValueError('Wrong crossvalidation type. Only test_set '
                             'or subsampled_test_set are permitted with a test set')

        train_indices = range(dataset_size)
        test_indices = range(dataset_size, dataset_size + len(y_test))
        y_train = hstack([y_train, y_test])
        dataset_size += len(y_test)

    random_state = config['random_state']
    if k < 0:
        logging.warning('crossvalidation.k not specified, defaulting to 1')
        k = 1
    if cv_type == 'kfold':
        iterator = cross_validation.KFold(dataset_size, n_folds=int(k), random_state=random_state)
    elif cv_type == 'skfold':
        iterator = cross_validation.StratifiedKFold(y_train, n_folds=int(k), random_state=random_state)
    elif cv_type == 'oracle':
        iterator = LeaveNothingOut(dataset_size)
    elif cv_type == 'test_set' and y_test is not None:
        iterator = PredefinedIndicesIterator(train_indices, test_indices)
    elif cv_type == 'subsampled_test_set' and y_test is not None:
        iterator = SubsamplingPredefinedIndicesIterator(y_train,
                                                        train_indices,
                                                        test_indices, int(k),
                                                        config['sample_size'],
                                                        config['random_state'])
    else:
        raise ValueError('Unrecognised crossvalidation type %(cv_type)s. The supported types are kfold, skfold, '
                         'test_set, subsampled_test_set and oracle')

    return iterator, y_train
示例#10
0
    def join_full(self, dm_new):
        """
        Combines the content of two Datamats.
       
        If a parameter of the Datamats is not equal or does not exist
        in one, it is promoted to a field.
        
        If the two Datamats have different fields then the elements for the
        Datamats that did not have the field will be NaN.
        
        Parameters
        dm_new : instance of Datamat
            This Datamat is added to the current one.

        Capacity to use superset of fields added by rmuil 2012/01/30

        """
        # Check if parameters are equal. If not, promote them to fields.
        for (nm, val) in self._parameters.items():
            if dm_new._parameters.has_key(nm):
                if (val != dm_new._parameters[nm]):
                    self.parameter_to_field(nm)
                    dm_new.parameter_to_field(nm)
            else:
                self.parameter_to_field(nm)
        for (nm, val) in dm_new._parameters.items():
            if self._parameters.has_key(nm):
                if (val != self._parameters[nm]):
                    self.parameter_to_field(nm)
                    dm_new.parameter_to_field(nm)
            else:
                dm_new.parameter_to_field(nm)
        # Deal with mismatch in the fields
        # First those in self that do not exist in new...
        orig_fields = self._fields[:]
        for field in orig_fields:
            if not field in dm_new._fields:
                dm_new.add_field_like(field, self.field(field))
        # ... then those in the new that do not exist in self.
        orig_fields = dm_new._fields[:]
        for field in orig_fields:
            if not field in self._fields:
                self.add_field_like(field, dm_new.field(field))

        # Concatenate fields
        for field in self._fields:
            self.__dict__[field] = ma.hstack((self.__dict__[field], 
                dm_new.__dict__[field]))

        # Update _num_fix
        self._num_fix += dm_new._num_fix 
示例#11
0
    def clean_outliers(self):
        """
        Function to remove outliers.

        Parameters
        ----------
        self.outlier_perc : integer
            Percentile value for mstats.scoreatpercentile function. Mask all values greater than this value.
        """
        # Outliers using percentiles - num_rows * [min, max]
        outlier_all = ma.array([[mstats.scoreatpercentile(self.xs[i, :], 100 - self.outlier_perc),
               mstats.scoreatpercentile(self.xs[i, :], self.outlier_perc)] for i in xrange(self.rows_N)])
        self.xs = ma.array([ma.hstack((ma.masked_outside(self.xs[i, :-self.keep_n_values], outlier_all[i, 0],
            outlier_all[i, 1]), self.xs[i, -self.keep_n_values:])) for i in xrange(self.rows_N)])
示例#12
0
def XWrap2(x,P0,fill_value=0,pow2=False):
    """
    Extend and wrap array.
    
    Fold array every y indecies.  There will typically be a hanging
    part of the array.  This is padded out.

    Parameters
    ----------

    x     : input
    P0    : Base period, units of elements
    pow2  : If true, pad out nRows so that it's the next power of 2.

    Return
    ------

    xwrap : Wrapped array.

    """

    ncad = x.size # Number of cadences
    # for some reason np.ceil(ncad/P0) doesn't work!
    nrow = int( np.floor(ncad/P0) +1 )
    nExtend = nrow * P0 - ncad # Pad out remainder of array with 0s.

    if type(x) is np.ma.core.MaskedArray:
        pad = ma.empty(nExtend)
        pad.mask = True
        x = ma.hstack( (x ,pad) )
    else:    
        pad = np.empty(nExtend) 
        pad[:] = fill_value
        x = np.hstack( (x ,pad) )

    xwrap = x.reshape( nrow,-1 )

    if pow2:
        k = np.ceil(np.log2(nrow)).astype(int)
        nrow2 = 2**k
        fill    = ma.empty( (nrow2-nrow,P0) )
        fill[:] = fill_value
        fill.mask=True
        xwrap = ma.vstack([xwrap,fill])

    return xwrap
示例#13
0
def XWrap2(x, P0, fill_value=0, pow2=False):
    """
    Extend and wrap array.
    
    Fold array every y indecies.  There will typically be a hanging
    part of the array.  This is padded out.

    Parameters
    ----------

    x     : input
    P0    : Base period, units of elements
    pow2  : If true, pad out nRows so that it's the next power of 2.

    Return
    ------

    xwrap : Wrapped array.

    """

    ncad = x.size  # Number of cadences
    # for some reason np.ceil(ncad/P0) doesn't work!
    nrow = int(np.floor(ncad / P0) + 1)
    nExtend = nrow * P0 - ncad  # Pad out remainder of array with 0s.

    if type(x) is np.ma.core.MaskedArray:
        pad = ma.empty(nExtend)
        pad.mask = True
        x = ma.hstack((x, pad))
    else:
        pad = np.empty(nExtend)
        pad[:] = fill_value
        x = np.hstack((x, pad))

    xwrap = x.reshape(nrow, -1)

    if pow2:
        k = np.ceil(np.log2(nrow)).astype(int)
        nrow2 = 2**k
        fill = ma.empty((nrow2 - nrow, P0))
        fill[:] = fill_value
        fill.mask = True
        xwrap = ma.vstack([xwrap, fill])

    return xwrap
示例#14
0
    def join(self, fm_new):
        """
        Adds content of a new Datamat to this Datamat, assuming same fields.
       
        If a parameter of the Datamats is not equal or does not exist
        in one, it is promoted to a field.
        
        If the two Datamats have different fields, the mismatching fields will
        simply be deleted.
        
        Parameters
        fm_new : instance of Datamat
            This Datamat is added to the current one.
        """
        # Check if parameters are equal. If not, promote them to fields.
        '''
        for (nm, val) in fm_new._parameters.items():
            if self._parameters.has_key(nm):
                if (val != self._parameters[nm]):
                    self.parameter_to_field(nm)
                    fm_new.parameter_to_field(nm)
            else:
                fm_new.parameter_to_field(nm)
        '''
        # Deal with mismatch in the fields
        # First those in self that do not exist in new...
        orig_fields = self._fields[:]
        for field in orig_fields:
            
            if not field in fm_new._fields:
                self.rm_field(field)
                warn("field '%s' doesn't exist in target DataMat, removing." % field)
        # ... then those in the new that do not exist in self.
        orig_fields = fm_new._fields[:]
        for field in orig_fields:
            if not field in self._fields:
                fm_new.rm_field(field)
                warn("field '%s' doesn't exist in source DataMat, removing." % field)
        # Concatenate fields
        for field in self._fields:
            self.__dict__[field] = ma.hstack((self.__dict__[field], 
                fm_new.__dict__[field]))

        # Update _num_fix
        self._num_fix += fm_new._num_fix
示例#15
0
文件: detrend.py 项目: mSedore/terra
def bin(lc):
    """
    Bin the light curve for faster computation of GP

    Compute the mean of every nbin measurements (padding the end if
    necessary). Return only the valid datapoints.
    """

    fm = ma.masked_invalid( lc['f'] )
    nbin = 8
    rem  = np.remainder(lc.size,nbin)
    if rem > 0: # if points don't d
        npad = nbin - rem
        pad  = ma.masked_array(np.zeros(npad),True)
        fm = ma.hstack([fm,pad])

    y   = fm.reshape(-1,nbin).mean(axis=1)
    x   = lc['t'][::nbin]
    b   = ~y.mask
    return x[b],y.data[b]
示例#16
0
文件: detrend.py 项目: mSedore/terra
def dt(t0):

    t = copy.deepcopy(t0)
    fm = ma.masked_array(t.f,mask=t.fmask)
    tm = ma.masked_array(t.TIME,mask=t.fmask)

    label = sepseg(tm)

    sL = ma.notmasked_contiguous(label)

    # If there is only one slice.
    if type(sL) == slice: 
        sL = [sL]

    id = sL2id(sL)

    tnd = fm.copy()
    temp = [spldtm(tm[s],fm[s]) for s in sL]
    temp = ma.hstack(temp)
    tnd[id] = temp
    return fm-tnd
示例#17
0
    def __call__(self, data, weight_id=0, progress_callback=None):
        array, classes, w = data.toNumpyMA()
        domain = data.domain
        if isinstance(domain.class_var, Orange.feature.Discrete):
            # Discrete class (extend the data with class indicator matrix)
            nval = len(data.domain.class_var.values)
            ext = ma.zeros((len(array), nval))
            ext[([i for i, m in enumerate(classes.mask) if m],
                 [int(c) for c, m in zip(classes, classes.mask) if m])] = 1.0
        elif isinstance(domain.class_var, Orange.feature.Continuous):
            # Continuous class, just add the one column (what about multitarget)
            nval = 1
            ext = ma.zeros((len(array), nval))
            ext[:, 0] = classes
        elif domain.class_var is None:
            # No class var
            nval = 0
            ext = ma.zeros((len(array), nval))
        else:
            raise TypeError("Unsuported `class_var` %r" % domain.class_var)
        array = ma.hstack((array, ext))

        map = Map(self.map_shape, topology=self.topology)
        if self.initialize == Map.InitializeLinear:
            map.initialize_map_linear(array)
        else:
            map.initialize_map_random(array)
        map = Solver(batch_train=self.batch_train,
                     eps=self.eps,
                     neighbourhood=self.neighbourhood,
                     radius_ini=self.radius_ini,
                     radius_fin=self.radius_fin,
                     learning_rate=self.learning_rate,
                     epoch=self.epochs)(array,
                                        map,
                                        progress_callback=progress_callback)
        # Remove class columns from the vectors
        for node in map:
            node.vector = node.vector[:-nval]
        return SOMMap(map, data)
示例#18
0
def estimate_cell_edges(x):
    """Convert one-dimensional vector x of size n into n + 1, where the input
    describes the centres of the cells, and the output is an estimate of the
    edges of the cell"""
    # centres (with extra centres padded at the ends by linear interpolation)
    dx = ma.diff(x)
    x_c = ma.hstack((x[0] - atleast_1d(dx[0]), x,
                     x[-1] + atleast_1d(dx[-1])))
    # _f is notation from MITgcm (implies faces)
    x_f = (x_c[1:] + x_c[:-1])/2
    dx_c = np.diff(x_c)

    # Catch nan or masked values and estimate edge using dx from previous or
    # next cell
    nan_before = ma.where(
        ma.logical_and(nan_or_masked(x_f[:-1]), ~nan_or_masked(x_f[1:])))[0]
    nan_after = ma.where(
        ma.logical_and(~nan_or_masked(x_f[:-1]), nan_or_masked(x_f[1:])))[0]

    x_f[nan_before] = x_f[nan_before + 1] - dx_c[nan_before + 1]
    x_f[nan_after + 1] = x_f[nan_after] + dx_c[nan_after]

    return x_f
示例#19
0
 def __call__(self, examples, weightID=0, progressCallback=None):
     data, classes, w = examples.toNumpyMA()
     nval = len(examples.domain.classVar.values)
     ext = ma.zeros((len(data), nval))
     ext[([i for i, m in enumerate(classes.mask) if m],
          [int(c) for c, m in zip(classes, classes.mask) if m])] = 1.0
     data = ma.hstack((data, ext))
     map = Map(self.map_shape, topology=self.topology)
     if self.initialize == Map.InitializeLinear:
         map.initialize_map_linear(data)
     else:
         map.initialize_map_random(data)
     map = Solver(batch_train=self.batch_train,
                  eps=self.eps,
                  neighbourhood=self.neighbourhood,
                  radius_ini=self.radius_ini,
                  radius_fin=self.radius_fin,
                  learning_rate=self.learning_rate,
                  epoch=self.epochs)(data,
                                     map,
                                     progressCallback=progressCallback)
     for node in map:
         node.vector = node.vector[:-nval]
     return SOMMap(map, examples)
示例#20
0
def gaussianSmooothNormalisedCorrelation(obs, wrf, sigma=20, sigmaWRF=5, thres=15, showImage=True,
                                         saveImage=True,  outputFolder="", 
                                         outputType="correlation",
                                         *args, **kwargs):
    """
    to used normalised correlation to study the similarity between obs and wrf
    codes from
    armor.tests.gaussianSmoothNormalisedCorrelation2
    input:
        sigma = sigma for obs
        sigmaWRF    = sigma for wrf
    """
    if outputFolder =="":
        try:
            outputFolder = obs.imageFolder
        except AttributeError:
            outputFolder = pattern.defaultOutputFolderForImages
    if showImage:            
        import pylab
        pylab.ion()
    k = obs         # alias
    w = wrf

    matrix0   = copy.copy(k.matrix)
    k.getCentroid()
    k.setThreshold(thres)  #2014-05-30
    k.matrix = k.gaussianFilter(sigma).matrix
    #k.matrix = 100.* (k.matrix>=thres) 
    k.matrix.mask = np.zeros(k.matrix.shape)
    #k.makeImage(closeAll=True)
    #pylab.draw()
    #correlations = []

    w.getCentroid()
    w.setThreshold(thres)  #2014-05-30
    w1 = w.gaussianFilter(sigmaWRF)
    topRowName = w.name + ', gaussian(' + str(sigmaWRF) + ') and ' + k.name
    topRow = ma.hstack([w.matrix, w1.matrix, matrix0])
    #w1.matrix = 100.*(w1.matrix>=thres)
    w1.matrix.mask = np.zeros(w1.matrix.shape)
    try:
        ############################################
        #   key lines
        w2 = w1.momentNormalise(k)
        w3 = w1.momentNormalise(k, extraAngle=np.pi)
        if outputType=="correlation" or outputType=="corr":
            corr    = w2.corr(k)
            corr2   = w3.corr(k)
            if  corr2 > corr:
                print '180 degree switch: '
                print '   ', k.name, w.name ,corr, corr2, '\n................................' 
                corr = corr2
                w2 = w3
            returnValue= corr 
        #elif outputType=="regression" or outputType=="regress":
        else:
            x,  residuals   = w2.regress(k)
            x2, residuals2  = w3.regress(k)
            if residuals2 < residuals:
                print '180 degree switch: '
                print '   ', k.name, w.name, residuals2, "<", residuals, '\n................................' 
                x = x2
                w2 = w3 
            returnValue = x
                
        #
        #############################################

        #######        
        #   making the output image

        w2.matrix = ma.hstack([w1.matrix, w2.matrix, k.matrix])
        w2.name   = w.name + ', normalised, and ' + k.name + '\nnormalised '
        if outputType=="corr" or outputType=="correlation":
            w2.name += 'correlation:  ' + str(corr)
        w2.matrix = ma.vstack([w2.matrix, topRow])
        w2.name  = topRowName + '\n' + "bottom row:" + w2.name
        w2.imagePath = outputFolder + w.name + '_' + k.name + '_sigma' + str(sigma) + '_thres' + str(thres) + '.png'
        w2.vmin= -20.
        w2.vmax = 100.
        if saveImage:
            w2.saveImage()
        if showImage:
            w2.makeImage(closeAll=True)
            pylab.draw()

        #
        ############################################
    #except IndexError:
    except SyntaxError:
        returnValue = -999
    # restoring the matrix
    k.backupMatrix('gaussian smooth normalised correlations, sigma='+ str(sigma) + 'threshold=' + str(thres)) 
    k.matrix = matrix0

    return returnValue
示例#21
0
 def hstack(x):
     return ma.hstack(x)
示例#22
0
文件: datamat.py 项目: nwilming/ocupy
    def join(self, fm_new, minimal_subset=True):
        """
        Adds content of a new Datamat to this Datamat.

        If a parameter of the Datamats is not equal or does not exist
        in one, it is promoted to a field.

        If the two Datamats have different fields then the elements for the
        Datamats that did not have the field will be NaN, unless
        'minimal_subset' is true, in which case the mismatching fields will
        simply be deleted.

        Parameters
        fm_new : instance of Datamat
            This Datamat is added to the current one.
        minimal_subset : if true, remove fields which don't exist in both,
        	instead of using NaNs for missing elements (defaults to False)

        Capacity to use superset of fields added by rmuil 2012/01/30

        """
        # Check if parameters are equal. If not, promote them to fields.
        '''
        for (nm, val) in fm_new._parameters.items():
            if self._parameters.has_key(nm):
                if (val != self._parameters[nm]):
                    self.parameter_to_field(nm)
                    fm_new.parameter_to_field(nm)
            else:
                fm_new.parameter_to_field(nm)
        '''
        # Deal with mismatch in the fields
        # First those in self that do not exist in new...
        orig_fields = self._fields[:]
        for field in orig_fields:
            if not field in fm_new._fields:
                if minimal_subset:
                    self.rm_field(field)
                else:
                    warnings.warn("This option is deprecated. Clean and Filter your data before it is joined.", DeprecationWarning)
                    fm_new.add_field_like(field, self.field(field))
        # ... then those in the new that do not exist in self.
        orig_fields = fm_new._fields[:]
        for field in orig_fields:
            if not field in self._fields:
                if minimal_subset:
                    fm_new.rm_field(field)
                else:
                    warnings.warn("This option is deprecated. Clean and Filter your data before it is joined.", DeprecationWarning)
                    self.add_field_like(field, fm_new.field(field))

        if 'SUBJECTINDEX' in self._fields[:]:
            if fm_new.SUBJECTINDEX[0] in self.SUBJECTINDEX:
                fm_new.SUBJECTINDEX[:] = self.SUBJECTINDEX.max()+1
        # Concatenate fields
        for field in self._fields:
            self.__dict__[field] = ma.hstack((self.__dict__[field],
                fm_new.__dict__[field]))

        # Update _num_fix
        self._num_fix += fm_new._num_fix
示例#23
0
def moving_average(dat,
                   columns=None,
                   window='hanning',
                   size=5,
                   normalize=True,
                   **kwargs):
    """
    Calculates window-averaged time-series.

    Parameters
    ----------
    dat : array like, record array
        Array with data.
    columns : array like
    window : string, array like
        The window to apply. Can be either an array or a string. If a
        string, creates a window of length given by `size` parameter.
        Valid strings are: `boxcar`, `hanning`, `lanczos`.
    size : integer, optional
        Size of the window. Default window size is 5.
    normalize : bool, optional
        If `True` (default) normalizes window to have unit integral.
    kwargs : optional
        Additional arguments depending on the selected window function.

    Returns
    -------
        TODO

    """
    # Checks for data columns.
    ndim = None
    if columns is None:
        columns = dat.dtype.names
    if columns is None:
        ndim = dat.ndim
        if dat.ndim == 1:
            dat = ma.asarray([dat])
            dat = ma.masked_invalid(dat)
            columns = [0]
        else:
            columns = arange(y.shape[1])
    # Initializes result array.
    Dat = copy(dat)

    # If window parameter is given as a string, calculate window array.
    if isinstance(window, basestring):
        # Makes sure that window size is odd and an integer
        size = 2 * (size - 1) // 2 + 1
        if window == 'hanning':
            window = hanning(size)
        elif window == 'boxcar':
            window = ones(size)
        elif window == 'lanczos':
            window = lanczos(size, **kwargs)
        else:
            raise ValueError('Invalid window `{}`.'.format(window))
    elif not isinstance(window, ndarray):
        raise ValueError('Invalid window.')

    # Normalize window to avoid input of variance.
    if normalize:
        window /= window.sum()

    # Walksthroug each column/variable in data array.
    for col in columns:
        mean = dat[col].mean()
        y = dat[col] - mean
        mask = dat[col].mask
        y[mask] = 0
        # Mirror the edges to avoid edge effects.
        y = ma.hstack([y[:size][::-1], y, y[-size:][::-1]])
        # Calculates the windowed-average. In case that input values are
        # complex, calculates the complex window average.
        if iscomplex(y).any():
            Y = convolve(y.real, window, mode='same') + \
                1j * convolve(y.imag, window, mode='same')
        else:
            Y = convolve(y, window, mode='same')
        # Update result array with windo-averaged values.
        Y = Y[size:-size]
        mask = mask | isnan(Y)
        Dat[col] = ma.masked_where(mask, Y) + mean
    #
    if ndim == 1:
        return Dat[0]
    else:
        return Dat
 k.matrix = 100.* (k.matrix>=thres) 
 k.matrix.mask = np.zeros(k.matrix.shape)
 #k.vmax=2
 #k.vmin=-2
 #k.makeImage(closeAll=True)
 #pylab.draw()
 correlations = []
 for w in wrf.listTemp:
     #try:
         # LOAD w, smooth by gaussian , and get threshold
         w.load()
         w.setThreshold(0)
         w.getCentroid()
         w1 = w.gaussianFilter(sigma)
         topRowName = w.name + ', gaussian(' + str(sigma) + ') and ' + k.name
         topRow = ma.hstack([w.matrix, w1.matrix, k.matrix0])
         #k.load()
         #k.setThreshold(0)
         #topRow = ma.hstack([w.matrix, w1.matrix, k.matrix])
         w1.matrix = 100.*(w1.matrix>=thres)
         w1.matrix.mask = np.zeros(w1.matrix.shape)
         #w1.vmax = 2
         #w1.vmin =-2
         #w.makeImage(closeAll=True)
         #pylab.draw()
         #print "w.matrix.shape, w.matrix.mask.shape", w.matrix.shape, w.matrix.mask.shape
         try:
             ############################################
             #   punchlines
             w2 = w1.momentNormalise(k)
             corr = w2.corr(k)
示例#25
0
def moving_average(dat, columns=None, window='hanning', size=5,
    normalize=True, **kwargs):
    """
    Calculates window-averaged time-series.

    Parameters
    ----------
    dat : array like, record array
        Array with data.
    columns : array like
    window : string, array like
        The window to apply. Can be either an array or a string. If a
        string, creates a window of length given by `size` parameter.
        Valid strings are: `boxcar`, `hanning`, `lanczos`.
    size : integer, optional
        Size of the window. Default window size is 5.
    normalize : bool, optional
        If `True` (default) normalizes window to have unit integral.
    kwargs : optional
        Additional arguments depending on the selected window function.

    Returns
    -------
        TODO

    """
    # Checks for data columns.
    ndim = None
    if columns == None:
        columns = dat.dtype.names
    if columns == None:
        ndim = dat.ndim
        if dat.ndim == 1:
            dat = ma.asarray([dat])
            dat = ma.masked_invalid(dat)
            columns = [0]
        else:
            columns = arange(y.shape[1])
    # Initializes result array.
    Dat = copy(dat)

    # If window parameter is given as a string, calculate window array.
    if isinstance(window, basestring):
        # Makes sure that window size is odd and an integer
        size = 2 * (size - 1) // 2 + 1
        if window == 'hanning':
            window = hanning(size)
        elif window == 'boxcar':
            window = ones(size)
        elif window == 'lanczos':
            window = lanczos(size, **kwargs)
        else:
            raise ValueError('Invalid window `{}`.'.format(window))
    elif not isinstance(window, ndarray):
        raise ValueError('Invalid window.')

    # Normalize window to avoid input of variance.
    if normalize:
        window /= window.sum()

    # Walksthroug each column/variable in data array.
    for col in columns:
        mean = dat[col].mean()
        y = dat[col] - mean
        mask = dat[col].mask
        y[mask] = 0
        # Mirror the edges to avoid edge effects.
        y = ma.hstack([y[:size][::-1], y, y[-size:][::-1]])
        # Calculates the windowed-average. In case that input values are
        # complex, calculates the complex window average.
        if iscomplex(y).any():
            Y = convolve(y.real, window, mode='same') + \
                1j * convolve(y.imag, window, mode='same')
        else:
            Y = convolve(y, window, mode='same')
        # Update result array with windo-averaged values.
        Y = Y[size:-size]
        mask = mask | isnan(Y)
        Dat[col] = ma.masked_where(mask, Y) + mean
    #
    if ndim == 1:
        return Dat[0]
    else:
        return Dat
示例#26
0
    def __init__(self,conf,sysVars=None,stateVars=None):

        self.verbose_solver = False
        if conf.get('SolverVerbos',0) > 0:
            self.verbose_solver = True

        self.QmaxConst = deepcopy(self.QmaxConst_cf)

        UnControlledTank = [k for k in self.CSOT_cf.iterkeys() if self.CSOT_cf[k]['Status'] in ['m','uc']]
        self.ControlledTank = [k for k in self.CSOT_cf.iterkeys() if self.CSOT_cf[k]['Status']=='c']
        self.ConfigTanks = [x for x in self.CSOT_cf.iterkeys() if self.CSOT_cf[x]['Status'] in ['c','m','uc']]


        #-----------------------------------------------------------------------------#
        #-------------------------Parameters------------------------------------------#
        #-----------------------------------------------------------------------------#
        # TO DO : get default from config

        # control time period
        self.t_update = conf['ControlTimeperiod'] #[s]; ToDo: This should come from the caller somehow (can change over time)
        self.unitConv = {'l/s':float(self.t_update)/1000.} # "*" [l/s] -> [m3/control-period] ; "/" [m3/control-period] -> [l/s]
        self.unitConv['m3/h'] = float(self.t_update)/3600. # "*" [m3/h] -> [m3/control-period] ; "/" [m3/control-period] -> [m3/h]

        # extend CSOT_cf with calculated volume limits for switching controllable <-> controlled
        # Update CSOT_cf Qmaintenance with sysVars if existing.
        for ti in self.ConfigTanks:
            CSOTi_cf = self.CSOT_cf[ti]
            # 1. Update if existing Qmaintenance and hPon/off first because they are used in Cbl/C switching
            for opcVi,cfKi in [["Qmaintenance","Qmaintenance"],["hPon","h_Pon"],["hPoff","h_Poff"]]:
                #Check config key existence
                if not cfKi in CSOTi_cf:
                    continue
                vari = ':'.join((ti,opcVi))
                #Check opcVariable existence
                if vari in sysVars:
                    value = sysVars[vari].value
                    if value != None:
                        unitConv = 1.0
                        if cfKi in ("Qmaintenance",):
                            unitConv = self.unitConv['m3/h'] / self.unitConv['l/s']
                        CSOTi_cf[cfKi] = value * unitConv

            # Extent or (re)init volume and InEst limits for Cbl/C switching
            if ti in self.ControlledTank:
                self._init_CblC_Config(ti,CSOTi_cf)

        # Parameters used in the subgoals definition
        cOP = conf['ParamOptiProb']
        ArtOptOv = -10.0;
        self.Yref = cOP['YRef'] #[l/s]; ToDo: should come from system variables (parameter to be modified by user)
        self.Yref=self.Yref*self.unitConv['l/s'] #[m3/control-period]

        self.MaxWWTP= cOP['MaxWWTP'] #[l/s]; ToDo: should come from system variables (parameter to be modified by user)
        self.MaxWWTP=self.MaxWWTP*self.unitConv['l/s'] #[m3/control-period]


        self.NbRUB = len(self.ControlledTank+UnControlledTank)

        #self.InflowForecast = True #No Assumption
        self.InflowForecast = False #Assumption constant

        #-----------------------------------------------------------------------------#
        #-----------------------------------------------------------------------------#


        #-----------------------------------------------------------------------------#
        #-------------------------Parameters------------------------------------------#
        #-----------------------------------------------------------------------------#
        self.VolMax = np.array([self.CSOT_cf[x]['Volume'] for x in self.ConfigTanks])
        self.OutMax = np.array([self.CSOT_cf[x]['Qmax'] for x in self.ConfigTanks])
        self.OutMax = self.OutMax*self.unitConv['l/s']
        self.StepDelayed = np.array([self.CSOT_cf[x]['FlowTimeToSink'] for x in self.ConfigTanks])
        self.StepDelayed = (self.StepDelayed/(float(self.t_update)/60)).round()
        self.MaxLag = int(np.max(self.StepDelayed)) #  number of GPC iterations over the Control horizon
        self.NbXPart = self.NbRUB*self.MaxLag

        for k in self.QmaxConst:
            for ik,iv in k[0].iteritems():
                k[0][ik]=iv/((self.t_update)/60)

        OutMIN = 0;

        QMaint = np.array([self.CSOT_cf[x]['Qmaintenance'] for x in self.ConfigTanks])
        QMaint = QMaint*self.unitConv['l/s']
        self.QMaint_extend = np.ravel( np.tile(np.c_[QMaint],self.MaxLag))
        #-----------------------------------------------------------------------------#
        #-----------------------------------------------------------------------------#


        #-----------------------------------------------------------------------------#
        #-------------------------Variables Initialisation----------------------------#
        #-----------------------------------------------------------------------------#
        # TO DO :  Give some values (current volumes, few history values for outflows)
        #Init the default values for the alogrithm history
        self._init_AlgHistory()

        self.RUB_inEST = np.zeros((self.NbRUB,1))
        self.Ov = np.zeros((self.NbRUB,1))
        self.Ov_Real = np.zeros((self.NbRUB,1))
        self.OutComm = np.zeros((self.NbRUB,1))
        if sysVars:
            self.updateAlgHistory(sysVars)
            self.OutComm = self.RUB_OUT_Real[[-1],:].T

        self.Volumes = self.Volumes_Real
        #-----------------------------------------------------------------------------#
        #-----------------------------------------------------------------------------#










        #-----------------------------------------------------------------------------#
        #-------------------------Problem Formulation---------------------------------#
        #-----------------------------------------------------------------------------#
        #--- unchanged matrices between 2 successive steps ---------------------------#
        #--- as long as the network structure or parameters does not change ----------#


        #--- Decision Variables-----------------------#
        # NbTanks*MaxLag first variables represent the water volume over the prediction horizon grouped by tanks
        # The following NbTanks*MaxLag variables represent the outflow volume over the prediction horizon grouped by tanks
        # The last NbTanks*MaxLag variables represent the overflow volume over the prediction horizon grouped by tanks
        self.x = cvxpy.Variable(3*self.NbXPart,1,name='x')




        #--- Matrix C (from the Equality Constraint : Cx = D )-----------------------#
        C_Vol=np.identity(self.NbXPart)-np.diag(np.ones(self.NbXPart-1),-1)
        k=np.arange(self.MaxLag+1,self.NbXPart,self.MaxLag)
        C_Vol[k-1,k-2]=0

        C_Out=np.identity(self.NbXPart)
        self.TkinCascade =[[ idx , self.ConfigTanks.index(self.CSOT_cf[val]['Sink']) ] for idx, val in enumerate(self.ConfigTanks) if self.CSOT_cf[val]['Sink'] not in 'KAHe' ]
        for i in range(len(self.TkinCascade)):
            tku = self.TkinCascade[i][0]
            tkd = self.TkinCascade[i][1]
            Ctemp = -np.eye(self.MaxLag)
            C_Out[tkd*self.MaxLag:(tkd+1)*self.MaxLag , tku*self.MaxLag:(tku+1)*self.MaxLag] \
            = np.hstack(( Ctemp[:,self.StepDelayed[tku]:] , np.zeros((self.MaxLag,self.StepDelayed[tku])) ))


        C_Ov=np.identity(self.NbXPart)

        self.C = np.hstack([C_Vol,C_Out,C_Ov])
        del(C_Vol,C_Out,C_Ov)


        self.EQCxD_L = self.C*self.x



        #--- CostFunction : min |Ax-B|-------------------------#
        Av = np.array([])
        for i in range(self.NbRUB):
                Av_temp = np.tile(-self.VolMax[i]*np.identity(self.MaxLag),(1,self.NbRUB)) #use positive coeff and multiply by (-1) only at the end ?
                Av = np.concatenate([x for x in [Av,Av_temp] if x.size > 0])
                del(Av_temp)

        self.Aout = np.array([])
        for i in range(self.NbRUB):
                if self.CSOT_cf[self.ConfigTanks[i]]['Sink'] == 'KAHe':
                    Atemp = np.identity(self.MaxLag)
                    if self.StepDelayed[i]>1:
                        Aout1 = np.zeros((self.StepDelayed[i]-1,self.MaxLag))
                        Aout2 = np.zeros((self.MaxLag-(self.StepDelayed[i]-1),self.MaxLag))
#                        Aout_temp = np.vstack([Aout1,Aout2])
                        Aout_temp = np.vstack([Aout1,np.identity(self.MaxLag),Aout2])
                    else:
                        Aout_temp = np.vstack([np.identity(self.MaxLag),np.zeros((self.MaxLag,self.MaxLag))])
                else :#tanks in cascade
                    Aout_temp = np.zeros((2*self.MaxLag,self.MaxLag))
                self.Aout = np.concatenate([x for x in [self.Aout,Aout_temp] if x.size > 0],1)
                del(Aout_temp)

        A_Ov = np.identity(self.NbXPart)

        Zsq = np.zeros((self.NbXPart,self.NbXPart))
        Zrect = np.zeros((2*self.MaxLag,self.NbXPart))
        self.A=np.vstack([np.hstack([Av,Zsq,Zsq]), np.hstack([Zrect,self.Aout,Zrect]), np.hstack([Zsq,Zsq,A_Ov])])
        del(Av,A_Ov,Zsq,Zrect)



        self.A0 = np.array([])
        for i in range(self.NbRUB):
            if self.CSOT_cf[self.ConfigTanks[i]]['Sink'] == 'KAHe':
                Atemp = np.identity(self.MaxLag)
            else :#tanks in cascade
                Atemp = np.zeros((self.MaxLag,self.MaxLag))
            if self.StepDelayed[i]:
                A01 = np.flipud(Atemp[:,(self.MaxLag+2-self.StepDelayed[i]-1):] )
            else :
                A01 = np.flipud(Atemp[:,(self.MaxLag+2-self.StepDelayed[i]-1+1):] )
            self.A0 = np.concatenate([x for x in [self.A0,A01] if x.size > 0],1)
            del(A01,Atemp)

        self.A0 = np.vstack((self.A0,np.zeros((self.MaxLag,self.A0.shape[1]))))


        self.Bv = np.zeros((self.NbXPart,1))
        self.B_Ov = ArtOptOv*np.ones((self.NbXPart,1))

        # weighting coefficients
        self._update_CFWeight(conf['CostFunctionWeights'])


        #-----------------------------------------------------------------------------#
        #-------------------------Constraints-----------------------------------------#

        #--- constraints on Vol, Out, Ov--------------------------------#
        #--- x >= l --------------------------------#
        l_Vol = np.zeros([self.NbXPart,1])
        l_Out = OutMIN*np.ones([self.NbXPart,1])
        l_Ov = np.zeros([self.NbXPart,1])
        LowerLimit = np.vstack([l_Vol,l_Out,l_Ov])
        LowerLimit.flatten() # DF it probably exists a simple way to do that
        del(l_Vol,l_Out,l_Ov)
        #--- x <= u --------------------------------#
        u_Vol = np.tile(self.VolMax,(self.MaxLag,1))
        u_Out = np.tile(self.OutMax,(self.MaxLag,1))
        UpperLimit = np.hstack([u_Vol,u_Out])
        UpperLimit = UpperLimit.T.reshape((self.NbXPart*2,1)) # DF it probably exists a simple way to do that
        del(u_Vol,u_Out)




        #--- Hard constraints: l<=x<=u ---
        self.constr_cf = ma.hstack([self.x[kk] >= LowerLimit[kk] for kk in range(3*self.NbXPart)])
        for kk in range(2*self.NbXPart) :
            self.constr_cf = ma.hstack((self.constr_cf, self.x[kk] <= UpperLimit[kk]))


        ### ----- Debug output ----
        #print 'Constrains block - 1'




        #--- Maximum InFlow to the WWTP over the prediction horizon ---
        #---'Constrains block - 2'
        TS2 = np.zeros((2*self.MaxLag,self.x.size[0]))
        TS2[:,self.NbXPart:2*self.NbXPart]=self.Aout
        for k in range(self.NbRUB):
            ColIdx = self.NbXPart-1+(k+1)*self.MaxLag
            RowIdx = np.where(TS2[:,ColIdx]==1)[0]
            if RowIdx:
                TS2[RowIdx[0]:-1,ColIdx] = 1
        self.LEQ2_L = TS2*self.x


        #--- Maximum Flow in pipes prediction horizon ---
        #---'Constrains block - 3.1'
        IND_U = np.reshape(np.c_[self.NbXPart:2*self.NbXPart],(self.MaxLag,self.NbRUB),order='F')
        NbQMaxConst = len(self.QmaxConst)
        self.TS31 = np.zeros((0,self.x.size[0]))
        self.TS32 = np.zeros((0,self.x.size[0]))
        self.Qmax31 = np.zeros((0,1))
        self.TanksName31 = []
        for k in xrange( NbQMaxConst ):
            U_Ind = np.array([self.ConfigTanks.index(kk) for kk in self.QmaxConst[k][0].keys() if kk in self.ConfigTanks])
            if U_Ind.size == 0: continue
            IND_Utmp = IND_U[:,U_Ind] #new addition
            U_Delay = np.array([self.QmaxConst[k][0][self.ConfigTanks[kk]] for kk in U_Ind])
            Qmax = self.QmaxConst[k][1]

            Ninputs = U_Ind.size

            IndNode = np.tile(IND_U[-1,U_Ind],(2*self.MaxLag,1))
            for kk in xrange(Ninputs):
                IndNode[U_Delay[kk]+np.arange(self.MaxLag),kk] = IND_Utmp[:,kk]
            IndNode = IndNode[U_Delay.max()+range(self.MaxLag),:]
            IndNode = IndNode.astype(int)

            for Ii in IndNode:
                if len(Ii) == 0:
                    continue
                ts = np.zeros((1,self.x.size[0]))
                ts[0,Ii] = 1
                self.TS31 = np.vstack((self.TS31, ts))
                self.Qmax31 = np.vstack( (self.Qmax31, Qmax) )

            self.TanksName31 = self.TanksName31 + [set(self.QmaxConst_cf[k][0].keys())]*IndNode.shape[0]

            #---'Constrains block - 3.2
            Maxgap = U_Delay.max() - U_Delay.min()
            U_Delay = U_Delay - U_Delay.min()
            if Maxgap > 0:
                LHS = np.tile(np.c_[1:Maxgap+1],(Ninputs,1)).T - np.tile(U_Delay,(Maxgap,1))
                for kk in xrange(Maxgap):
                    FL = LHS[kk,:] > 0
                    xidx = np.diag(IND_U[np.ix_(LHS[kk,FL]-1,U_Ind[FL])])
                    ts = np.zeros((1,self.x.size[0]))
                    ts[0,xidx] = 1
                    self.TS32 = np.vstack((self.TS32, ts))

                ### ----- Debug output ----
                #print 'Constrains block - 3.2 k=%s/%s %s' % (k,NbQMaxConst,NM)

        if self.TS32.shape[0] > 0 :
            self.LEQ32_L = self.TS32*self.x
        else :
            self.LEQ32_L = None


        for kk in range(self.Qmax31.size) :
            self.constr_cf = ma.hstack((   self.constr_cf , self.TS31[kk,:].reshape(1,self.NbXPart*3)*self.x <= self.Qmax31[kk]   ))

        self.updateStructOptPB()
示例#27
0
    def join(self, fm_new, minimal_subset=True):
        """
        Adds content of a new Datamat to this Datamat.

        If a parameter of the Datamats is not equal or does not exist
        in one, it is promoted to a field.

        If the two Datamats have different fields then the elements for the
        Datamats that did not have the field will be NaN, unless
        'minimal_subset' is true, in which case the mismatching fields will
        simply be deleted.

        Parameters
        fm_new : instance of Datamat
            This Datamat is added to the current one.
        minimal_subset : if true, remove fields which don't exist in both,
        	instead of using NaNs for missing elements (defaults to False)

        Capacity to use superset of fields added by rmuil 2012/01/30

        """
        # Check if parameters are equal. If not, promote them to fields.
        '''
        for (nm, val) in fm_new._parameters.items():
            if self._parameters.has_key(nm):
                if (val != self._parameters[nm]):
                    self.parameter_to_field(nm)
                    fm_new.parameter_to_field(nm)
            else:
                fm_new.parameter_to_field(nm)
        '''
        # Deal with mismatch in the fields
        # First those in self that do not exist in new...
        orig_fields = self._fields[:]
        for field in orig_fields:
            if not field in fm_new._fields:
                if minimal_subset:
                    self.rm_field(field)
                else:
                    warnings.warn(
                        "This option is deprecated. Clean and Filter your data before it is joined.",
                        DeprecationWarning)
                    fm_new.add_field_like(field, self.field(field))
        # ... then those in the new that do not exist in self.
        orig_fields = fm_new._fields[:]
        for field in orig_fields:
            if not field in self._fields:
                if minimal_subset:
                    fm_new.rm_field(field)
                else:
                    warnings.warn(
                        "This option is deprecated. Clean and Filter your data before it is joined.",
                        DeprecationWarning)
                    self.add_field_like(field, fm_new.field(field))

        if 'SUBJECTINDEX' in self._fields[:]:
            if fm_new.SUBJECTINDEX[0] in self.SUBJECTINDEX:
                fm_new.SUBJECTINDEX[:] = self.SUBJECTINDEX.max() + 1
        # Concatenate fields
        for field in self._fields:
            self.__dict__[field] = ma.hstack(
                (self.__dict__[field], fm_new.__dict__[field]))

        # Update _num_fix
        self._num_fix += fm_new._num_fix
示例#28
0
def _build_crossvalidation_iterator(config, x_vals, y_vals, x_test=None,
                                    y_test=None):
    """
    Returns a crossvalidation iterator, which contains a list of
    (train_indices, test_indices) that can be used to slice
    a dataset to perform crossvalidation. Additionally,
    returns the original data that was passed in and a mask specifying what
    data points should be used for validation.

    The method of splitting for CV is determined by what is specified in the
    conf file. The splitting of data in train/test/validate set is not done
    in this function- here we only return a mask for the validation data
    and an iterator for the train/test data.
    The full text is provided as a parameter so that joblib can cache the
    call to this function.
    """
    logging.info('Building crossvalidation iterator')
    cv_type = config['type']
    k = config['k']

    if (config['validation_slices'] != '' and
                config['validation_slices'] is not None):
        # the data should be treated as a stream, which means that it should
        # not
        # be reordered and it should be split into a seen portion and an unseen
        # portion separated by a virtual 'now' point in the stream
        validation_data = get_named_object(config['validation_slices'])
        validation_data = validation_data(x_vals, y_vals)
    else:
        validation_data = [(0, 0)]

    validation_indices = reduce(lambda l, (head, tail): l + range(head, tail),
                                validation_data, [])

    if x_test is not None and y_test is not None:
        logging.warn('You have requested test set to be used for evaluation.')
        if cv_type != 'test_set' and cv_type != 'subsampled_test_set':
            logging.error('Wrong crossvalidation type. Only test_set '
                          'or subsampled_test_set are permitted with a test set')
            sys.exit(1)

        x_vals = list(x_vals)
        train_indices = range(len(x_vals))
        test_indices = range(len(x_vals), len(x_vals) + len(x_test))
        x_vals.extend(x_test)
        y_vals = hstack([y_vals, y_test])

    mask = np.zeros(y_vals.shape[0])  # we only mask the rows
    mask[validation_indices] = 1 # mask has 1 where the data point should be
    # used for validation and not for training/testing

    seen_data_mask = mask == 0
    dataset_size = np.sum(seen_data_mask)
    targets_seen = y_vals[seen_data_mask]
    if k < 0:
        logging.warn(
            'crossvalidation.k not specified, defaulting to 1')
        k = 1
    if cv_type == 'kfold':
        iterator = cross_validation.KFold(dataset_size, int(k))
    elif cv_type == 'skfold':
        iterator = cross_validation.StratifiedKFold(targets_seen, int(k))
    elif cv_type == 'loo':
        iterator = cross_validation.LeaveOneOut(dataset_size, int(k))
    elif cv_type == 'bootstrap':
        ratio = config['ratio']
        if k < 0:
            logging.warn(
                'crossvalidation.ratio not specified,defaulting to 0.8')
            ratio = 0.8
        iterator = cross_validation.Bootstrap(dataset_size,
                                              n_iter=int(k),
                                              train_size=ratio)
    elif cv_type == 'oracle':
        iterator = LeaveNothingOut(dataset_size)
    elif cv_type == 'test_set' and x_test is not None and y_test is not None:
        iterator = PredefinedIndicesIterator(train_indices, test_indices)
    elif cv_type == 'subsampled_test_set' and \
                    x_test is not None and y_test is not None:
        iterator = SubsamplingPredefinedIndicesIterator(y_vals,
                                                        train_indices,
                                                        test_indices, int(k),
                                                        config['sample_size'],
                                                        config['random_state'])
    else:
        raise ValueError(
            'Unrecognised crossvalidation type \'%(cv_type)s\'. The supported '
            'types are \'kfold\', \'skfold\', \'loo\', \'bootstrap\', '
            '\'test_set\', \'subsampled_test_set\' and \'oracle\'')


    # Pick out the non-validation data from x_vals. This requires x_vals
    # to be cast to a format that supports slicing, such as the compressed
    # sparse row format (converting to that is also fast).
    seen_indices = range(targets_seen.shape[0])
    seen_indices = sorted(set(seen_indices) - set(validation_indices))
    x_vals = [x_vals[index] for index in seen_indices]
    # y_vals is a row vector, need to transpose it to get the same shape as
    # x_vals
    y_vals = y_vals[:, seen_indices].transpose()

    return iterator, validation_indices, x_vals, y_vals