Exemplo n.º 1
0
    def update(self, arr):
        """Update the accumulators of the StatsCollector given a complete matrix;
        assume that all observations have the same weight.
        Properly handle missing values.
        """
        assert self.width() == arr.shape[1]  #shape(arr)[1]
        i = 0

        ## Update number of elements counters
        (length, width) = arr.shape  #shape(arr)
        initial_n = self.n.copy(
        )  #self.n[:]          # Keep old n for argmin/argmax
        n = zeros(width) + length
        missings = isnan(arr)
        nnan = sum(missings, 0)
        self.n += n
        self.nnan += nnan
        self.nnonnan += n - nnan

        ## Create masked version of arr and update accumulators
        ma = masked_array(arr, mask=missings)  # Here, mask missings only
        arr_nomissings = arr[~normal_sometrue(missings,
                                              1)]  # Here, strip missing rows
        self.sum = self.sum + sum(ma, 0)  # += does not work...
        self.sum_ssq = self.sum_ssq + sum(ma * ma, 0)  # += does not work...
        self.sum_xxt = self.sum_xxt + matrixmultiply(transpose(arr_nomissings),
                                                     arr_nomissings)
        self.sum_nomi = self.sum_nomi + sum(arr_nomissings, 0)
        self.nxxt += arr_nomissings.shape[0]  #shape(arr_nomissings)[0]

        ## Update (arg)min / make sure old argmin is kept if not updated
        ma_argmin = argmin(ma, 0)
        ma_min = ma[ma_argmin, range(width)]
        min_newpos = argmin(array([self.min, ma_min]), 0).astype('Bool')
        self.min[min_newpos] = ma_min[min_newpos]
        # XXX Argmin computation needs to be revised! Does not work, at least
        # when passing array of shape (1,1).
        self.argmin[min_newpos] = ma_argmin[min_newpos] + initial_n[min_newpos]

        ## Update (arg)max / make sure old argmax is kept if not updated
        ma_argmax = argmax(ma, 0)
        ma_max = ma[ma_argmax, range(width)]
        max_newpos = argmax(array([self.max, ma_max]), 0).astype('Bool')
        self.max[max_newpos] = ma_max[max_newpos]
        # XXX Argmax computation needs to be revised! Does not work, at least
        # when passing array of shape (1,1). Also, is the use of min_newpos
        # correct?
        self.argmax[max_newpos] = ma_argmax[max_newpos] + initial_n[min_newpos]
Exemplo n.º 2
0
    def printStats(self, os=sys.stdout, pretty=True):
        """Print a nice report with the statistics.
        If 'pretty' is set to False, the output will not be so nice, but at least
        will not cause test failures due to a zero test blank tolerance.
        """
        if len(nonzero(self.nnonnan)[0]) != len(self.nnonnan):
            print >> os, "One or more columns in StatsCollector does not contain any data"
            return  # Nothing accumulated yet

        stats = self.getStats()
        sk = [
            "N", "NMISSING", "NNONMISSING", "E", "V", "STDDEV", "STDERR",
            "SUM", "SUMSQ", "MIN", "ARGMIN", "MAX", "ARGMAX"
        ]

        m = array([[stats[k][i] for i in range(self.width())] for k in sk])

        _printMatrix(m, sk, self.fieldnames, os, pretty)

        print "\nCovariance Matrix:"
        _printMatrix(stats["COV"], self.fieldnames, self.fieldnames, os,
                     pretty)
        print "\nCorrelation Matrix:"
        _printMatrix(stats["CORR"], self.fieldnames, self.fieldnames, os,
                     pretty)
Exemplo n.º 3
0
    def update(self, arr):
        """Update the accumulators of the StatsCollector given a complete matrix;
        assume that all observations have the same weight.
        Properly handle missing values.
        """
        assert self.width() == arr.shape[1] #shape(arr)[1]
        i = 0

        ## Update number of elements counters
        (length,width)= arr.shape #shape(arr)
        initial_n     = self.n.copy()#self.n[:]          # Keep old n for argmin/argmax
        n             = zeros(width) + length
        missings      = isnan(arr)
        nnan          = sum(missings,0)
        self.n       += n
        self.nnan    += nnan
        self.nnonnan += n - nnan

        ## Create masked version of arr and update accumulators
        ma = masked_array(arr, mask=missings)        # Here, mask missings only
        arr_nomissings = arr[~normal_sometrue(missings,1)]  # Here, strip missing rows
        self.sum     = self.sum + sum(ma,0)            # += does not work...
        self.sum_ssq = self.sum_ssq + sum(ma*ma,0)     # += does not work...
        self.sum_xxt = self.sum_xxt + matrixmultiply(transpose(arr_nomissings),
                                                     arr_nomissings)
        self.sum_nomi= self.sum_nomi + sum(arr_nomissings,0)
        self.nxxt   += arr_nomissings.shape[0] #shape(arr_nomissings)[0]

        ## Update (arg)min / make sure old argmin is kept if not updated
        ma_argmin  = argmin(ma,0)
        ma_min     = ma[ma_argmin, range(width)]
        min_newpos = argmin(array([self.min, ma_min]), 0).astype('Bool')
        self.min[min_newpos]    = ma_min[min_newpos]
        # XXX Argmin computation needs to be revised! Does not work, at least
        # when passing array of shape (1,1).
        self.argmin[min_newpos] = ma_argmin[min_newpos] + initial_n[min_newpos]

        ## Update (arg)max / make sure old argmax is kept if not updated
        ma_argmax  = argmax(ma,0)
        ma_max     = ma[ma_argmax, range(width)]
        max_newpos = argmax(array([self.max, ma_max]), 0).astype('Bool')
        self.max[max_newpos]    = ma_max[max_newpos]
        # XXX Argmax computation needs to be revised! Does not work, at least
        # when passing array of shape (1,1). Also, is the use of min_newpos
        # correct?
        self.argmax[max_newpos] = ma_argmax[max_newpos] + initial_n[min_newpos]
Exemplo n.º 4
0
    def printStats(self, os = sys.stdout, pretty = True):
        """Print a nice report with the statistics.
        If 'pretty' is set to False, the output will not be so nice, but at least
        will not cause test failures due to a zero test blank tolerance.
        """
        if len(nonzero(self.nnonnan)[0]) != len(self.nnonnan):
            print >>os, "One or more columns in StatsCollector does not contain any data"
            return                      # Nothing accumulated yet
        
        stats = self.getStats()
        sk = ["N"      , "NMISSING" , "NNONMISSING" , "E"   ,
              "V"      , "STDDEV"   , "STDERR"      , "SUM" ,
              "SUMSQ"  , "MIN"      , "ARGMIN"      , "MAX" ,  "ARGMAX" ]

        
        m = array([[stats[k][i] for i in range(self.width())] for k in sk])

        _printMatrix(m, sk, self.fieldnames, os, pretty)
        
        print "\nCovariance Matrix:"
        _printMatrix(stats["COV"], self.fieldnames, self.fieldnames, os, pretty)
        print "\nCorrelation Matrix:"
        _printMatrix(stats["CORR"], self.fieldnames, self.fieldnames, os, pretty)