Exemplo n.º 1
0
    def getSumsMatrixAndNames(self, condvari, condvarj, summarize_min_prob=1e10):
        """ Returns the tuple (mat, rownames, colnames)"""
        try:
            cube = self.cubes[(condvari, condvarj)]
        except KeyError:
            cube = self.cubes[(condvarj, condvari)]
            cube = transpose(cube,(1,0,2))

        condi_domain = self.stringDomain(condvari)+self.numberDomain(condvari, condvarj, summarize_min_prob)
        condi_descr = [ str(interval) for interval,ids in condi_domain ]

        condj_descr, condj_id = self.var_domains[condvarj].domain_descr_id()

        values = cube[:,:,:]
        values = take(values,condj_id,axis=1)
        l, w, d = values.shape
        n = len(condi_domain)
        #mat = numarray.typefrom(zeros((1+n,1+w,d),values))
        mat = zeros((1+n,1+w,d),numarray.typefrom(values))
        firstrow = sum(values, 0)
        mat[0, 1:1+w, :] = firstrow

        mat[0, 0, :]   = sum(firstrow)
        for i in xrange(n):
            interval,ids = condi_domain[i]
            s = mat[1+i,0,:]
            for id in ids:
                row = values[id]                
                mat[1+i, 1:, :] += row
                s += sum(row)

        condi_descr.insert(0,'/*/')
        condj_descr.insert(0,'/*/')
        return mat, condi_descr, condj_descr
Exemplo n.º 2
0
def to_diagonal(a):
    """Returns a diagonal matrix with elements in 'a' on the diagonal."""
    assert len(a.shape)==1
    n = len(a)
    A = zeros(shape=(n,n), type=numarray.typefrom(a))
    for i in range(n):
        A[i,i] = a[i]
    return A
Exemplo n.º 3
0
def to_diagonal(a):
    """Returns a diagonal matrix with elements in 'a' on the diagonal."""
    assert len(a.shape) == 1
    n = len(a)
    A = zeros(shape=(n, n), type=numarray.typefrom(a))
    for i in range(n):
        A[i, i] = a[i]
    return A
Exemplo n.º 4
0
 def _check_input(array):
     shape = array.shape
     typecode = n.typefrom(array)
     if len(shape) == 3 and shape[2] == 3 and typecode == n.UInt8:
         return RGB
     elif len(shape) == 2:
         if _inverse_typecodes.has_key(typecode):
             return _inverse_typecodes[typecode]
     raise ValueError('Array is not one of the acceptable types (UInt8 * 3, UInt8, UInt16, UInt32, Float64, Complex64)')
Exemplo n.º 5
0
 def _check_input(array):
     shape = array.shape
     typecode = n.typefrom(array)
     if len(shape) == 3 and shape[2] == 3 and typecode == n.UInt8:
         return RGB
     elif len(shape) == 2:
         if _inverse_typecodes.has_key(typecode):
             return _inverse_typecodes[typecode]
     raise ValueError('Array is not one of the acceptable types (UInt8 * 3, UInt8, UInt16, UInt32, Float64, Complex64)')
Exemplo n.º 6
0
    def numberDomain(self, varname, condvar, summarize_min_prob = -1.):
        """Will return an ordered list of (interval, indexes) for the numerical part of the domain
        interval is an instance of Interval
        indexes is a list of corresponding cube-indexes for variable varname in a cube
        corresponding to varname, such that the union of the individual intervals associated with
        those indexes gives interval (thus the slices corresponding to those indexes must be summed
        to get the values corresponding to interval).
        Consecutive intervals will be merged until the largest conditional probability within them
        reaches summarize_min_prob
        """
        
        num_domain = self.var_domains[varname].number_domain()
        if not num_domain: # empty list
            return num_domain

        if summarize_min_prob<=0.:
            return [ (interval, [id] ) for interval,id in num_domain ]
        
        try:
            cube = self.cubes[(varname, condvar)]
        except KeyError:
            cube = self.cubes[(condvar, varname)]
            cube = transpose(cube,(1,0,2))

        num_ids = [ id for interval,id in num_domain ]
        # we consider only the "counts" (value index 0)
        counts = cube[:,:,0]
        # sum the counts 
        count_sums = abs(sum(counts, 0))+1e-6  # we add 1e-6 just to make sure we don't have zeros and divisions by zero
        condprobs = counts/count_sums
        # we keep only the numerical domain
        condprobs = take(condprobs,num_ids)
        l,w = condprobs.shape

        summarized_domain = []
        newinterval = None
        newrow = zeros(w, numarray.typefrom(condprobs))
        ids = []
        for i in xrange(l):
            row = condprobs[i]
            interval, id = num_domain[i]
            if not newinterval:
                newinterval = interval
            else:
                newinterval = Interval(newinterval.include_low, newinterval.low, interval.high, interval.include_high)
            ids.append(id)
            newrow += row
            if max(newrow)>=summarize_min_prob or i==l-1:
                summarized_domain.append( (newinterval, ids) )
                newinterval = None
                newrow[:] = 0.
                ids = []
        
        return summarized_domain
Exemplo n.º 7
0
 def enlarge(self,idx):
     if isinstance(idx,int):
         idx = [idx] # make it a list
     oldshape = self.data.shape
     newshape = list(oldshape)
     for k in xrange(len(idx)):
         newshape[k] = max(newshape[k], idx[k]+1)
     newdata = zeros(newshape, numarray.typefrom(self.data))
     slicespec = [ slice(0,dim) for dim in oldshape ]
     newdata[slicespec] = self.data
     self.data = newdata
Exemplo n.º 8
0
    def trimmedNumberDomain(self, varname, condvar, summarize_remove_n, summarize_min_prob):
        """Will return an ordered list of (interval, indexes) for the numerical part of the domain
        interval is an instance of Interval
        indexes is a list of corresponding cube-indexes for variable varname in a cube
        corresponding to varname, such that the union of the individual intervals associated with
        those indexes gives interval (thus the slices corresponding to those indexes must be summed
        to get the values corresponding to interval).
        summarize_remove_n is the number of small intervals to 'remove'
        In addition, all intervals whose prob is less than summarize_min_prob will also be 'removed'
        """

        num_domain = self.var_domains[varname].number_domain()
        summarized_domain = [ (interval, (id,) ) for interval,id in num_domain ]

        if (summarize_remove_n<=0 and summarize_min_prob>=1.) or len(summarized_domain)==0:
            return summarized_domain

        try:
            cube = self.cubes[(varname, condvar)]
        except KeyError:
            cube = self.cubes[(condvar, varname)]
            cube = transpose(cube,(1,0,2))

        num_ids = [ id for xrange,id in num_domain ] 
        # we consider only the "counts" (value index 0)
        counts = cube[:,:,0]
        # sum the counts 
        count_sums = abs(sum(counts, 0))+1e-6  # we add 1e-6 just to make sure we don't have zeros and divisions by zero
        condprobs = counts/count_sums
        # we keep only the numerical domain
        condprobs = take(condprobs,num_ids)

        l,w = condprobs.shape
        nremoved = 0

        while l>0:
            maxprobs = array([ max(p) for p in condprobs ])
            k = argmin(maxprobs)
            minprob = maxprobs[k]
            #print >>f, 'maxprobs: ',maxprobs
            #print >>f, 'k: ',k
            #print >>f, 'minprob: ',minprob
            if nremoved>=summarize_remove_n or minprob>=summarize_min_prob:
                break # exit while loop
            if k==0:
                k_a = 0
                k_b = 1
            elif k==l-1:
                k_a = l-2
                k_b = l-1
            elif maxprobs[k-1]<=maxprobs[k+1]:
                k_a = k-1
                k_b = k
            else:
                k_a = k
                k_b = k+1
            interval_a, ids_a = summarized_domain[k_a]
            interval_b, ids_b = summarized_domain[k_b]
            try:
                union_interval = interval_a + interval_b
            except ValueError: # union of the 2 intervals is not an interval!
                break
            summarized_domain[k_a] = (union_interval, ids_a+ids_b)
            del summarized_domain[k_b]
            #newcondprobs = numarray.typefrom(zeros((l-1,w),condprobs))
            newcondprobs = zeros((l-1,w),numarray.typefrom(condprobs))
            newcondprobs[0:k_b] += condprobs[0:k_b]
            newcondprobs[k_a:] += condprobs[k_b:]
            condprobs = newcondprobs
            l = l-1
            nremoved += 1

        return summarized_domain