Пример #1
0
    def ricipolla(self, k, m):

        #print "k,m",k,m
        self.alpha[0:k] = self.eval[0:k]

        self.beta[0:k] = self.beta[m - 1] * self.evect[0:k, m - 1]
        #print "beta beta",self.beta,self.beta[m-1],self.evect[0:k,m-1]

        # E = self.evect[0:k,0:m].copy()

        a = self.class4vect(k, self.dim)

        #print " LANCIO mat MULT "

        self.class4vect.mat_mult(a, self.evect[0:k, 0:m], self.q)

        for i in range(k):
            a[i].normalizzaauto()
            self.class4vect.copy_to_a_from_b(self.q[i], a[i])

        a = None

        self.class4vect.copy_to_a_from_b(self.q[k], self.q[m])

        o = self.omega[0:m, 0:m].copy()

        o = dotblas.dot(o, numpy.transpose(self.evect))

        for i in range(k):
            self.omega[i, k] = self.omega[k, i] = o[i, k]

        o = dotblas.dot(self.evect, o)

        self.omega[0:k, 0:k] = o[0:k, 0:k]
Пример #2
0
    def Moltiplica(self, res, v):
        if (len(self.mR) == 1):
            res.vr[:] += dotblas.dot(self.mR[0], v.vr)
        else:
            res.vr[:] += dotblas.dot(self.mR[0], dotblas.dot(self.mR[1], v.vr))

        if (self.shift != 0.0):
            res.add_from_vect_with_fact(v, self.shift)
Пример #3
0
    def Moltiplica(self,res,v):
        if( len(self.mR)==1 ):
            res.vr[:]+=dotblas.dot(self.mR[0],v.vr)
        else:
            res.vr[:]+=dotblas.dot(self.mR[0],dotblas.dot(self.mR[1],v.vr))

        if( self.shift !=0.0):
          res.add_from_vect_with_fact(v,self.shift)
Пример #4
0
    def ricipolla(self, k,m):

        #print "k,m",k,m
        self.alpha[0:k]= self.eval[0:k]


        self.beta[0:k] = self.beta[m-1]*self.evect[0:k,m-1]
        #print "beta beta",self.beta,self.beta[m-1],self.evect[0:k,m-1]

        # E = self.evect[0:k,0:m].copy()

        a=self.class4vect(k,  self.dim )

        #print " LANCIO mat MULT "

        self.class4vect.mat_mult(a, self.evect[0:k,0:m] , self.q)

        for i in range(k):
            a[i].normalizzaauto()
            self.class4vect.copy_to_a_from_b( self.q[i] , a[i]     )

        a=None

        self.class4vect.copy_to_a_from_b(self.q[k]  ,  self.q[m] )


        o = self.omega[0:m,0:m].copy()


        o = dotblas.dot(o, numpy.transpose(self.evect))


        for i in range(k):
            self.omega[i,k]=self.omega[k,i]=o[i,k]


        o = dotblas.dot(self.evect,o)


        self.omega[0:k,0:k]=o[0:k,0:k]
Пример #5
0
             a = images[i]
             #a.shape = r, c
             print("Eigenvalue %d = %f" % (i, eigenvalues[i]))
             fname = "Image%02d.edf" % (i+10)
             if os.path.exists(fname):
                 os.remove(fname)
             edf = EdfFile.EdfFile(fname,'wb')
             edf.WriteImage({},a)
             edf = None
 else:
     stack = EDFStack.EDFStack(inputfile, imagestack=False, dtype=numpy.float64)
     r, c, nChannels = stack.data.shape
     if 0:
         stack.data.shape = r * c, nChannels
         t0 = time.time()
         covMatrix0 = dotblas.dot(stack.data.T, stack.data)
         print("Standard Elapsed = ", time.time() - t0)
         print("Standard Shape = ", covMatrix0.shape)
         t0 = time.time()
         stack.data.shape = r , c, nChannels
         covMatrix1, sumSpectrum, nPixels = getCovarianceMatrix(stack,
                                                                index=-1,
                                                                dtype='float64',
                                                                force=True)
         print("Dynamic Elapsed = ", time.time() - t0)
         print("Dynamic Shape = ", covMatrix1.shape)
         print(covMatrix0.max(), covMatrix0.min(), "Reference  = ", covMatrix0[1300, 1350:1360])
         print(covMatrix1.max(), covMatrix1.min(), "Calculated = ", covMatrix1[1300, 1350:1360])
         delta = covMatrix1-covMatrix0
         maxDiff = delta.max()
         print("Max diff   = ", maxDiff)
Пример #6
0
 def mat_mult(self, evect, q):
     self.vr[:evect.shape[0]] = dotblas.dot(evect.astype(self.tipo),
                                            q.vr[:evect.shape[1]])
Пример #7
0
def lanczosPCA(stack, ncomponents=10, binning=None, **kw):
    if DEBUG:
        print("lanczosPCA")
    if binning is None:
        binning = 1

    if hasattr(stack, "info") and hasattr(stack, "data"):
        data = stack.data
    else:
        data = stack

    if not isinstance(data, numpy.ndarray):
        raise TypeError("lanczosPCA is only supported when using numpy arrays")

    # wrapmatrix = "double"
    wrapmatrix = "single"

    dtype = numpy.float64
    if wrapmatrix == "double":
        data = data.astype(dtype)

    if len(data.shape) == 3:
        r, c, N = data.shape
        data.shape = r * c, N
    else:
        r, N = data.shape
        c = 1

    npixels = r * c

    if binning > 1:
        # data.shape may fails with non-contiguous arrays
        # use reshape.
        data = numpy.reshape(data, [data.shape[0], data.shape[1] / binning, binning])
        data = numpy.sum(data, axis=-1)
        N /= binning

    if ncomponents > N:
        raise ValueError("Number of components too high.")

    avg = numpy.sum(data, 0) / (1.0 * npixels)
    numpy.subtract(data, avg, data)

    Lanczos.LanczosNumericMatrix.tipo = dtype
    Lanczos.LanczosNumericVector.tipo = dtype

    if wrapmatrix == "single":
        SM = [dotblas.dot(data.T, data).astype(dtype)]
        SM = Lanczos.LanczosNumericMatrix(SM)
    else:
        SM = Lanczos.LanczosNumericMatrix([data.T.astype(dtype), data.astype(dtype)])

    eigenvalues, eigenvectors = Lanczos.solveEigenSystem(SM, ncomponents, shift=0.0, tol=1.0e-15)
    SM = None
    numpy.add(data, avg, data)

    images = numpy.zeros((ncomponents, npixels), data.dtype)
    vectors = numpy.zeros((ncomponents, N), dtype)
    for i in range(ncomponents):
        vectors[i, :] = eigenvectors[i].vr
        images[i, :] = dotblas.dot(data, (eigenvectors[i].vr).astype(data.dtype))
    data = None
    images.shape = ncomponents, r, c
    return images, eigenvalues, vectors
Пример #8
0
def multipleArrayPCA(stackList, ncomponents=10, binning=None, **kw):
    """
    Given a list of arrays, calculate the requested principal components from
    the matrix resulting from their column concatenation. Therefore, all the
    input arrays must have the same number of rows.
    """
    stack = stackList[0]
    if hasattr(stack, "info") and hasattr(stack, "data"):
        data = stack.data
    else:
        data = stack

    if not isinstance(data, numpy.ndarray):
        raise TypeError("multipleArrayPCA is only supported when using numpy arrays")

    if len(data.shape) == 3:
        r, c = data.shape[:2]
        npixels = r * c
    else:
        c = None
        r = data.shape[0]
        npixels = r

    # reshape and subtract mean to all the input data
    shapeList = []
    avgList = []
    eigenvectorLength = 0
    for i in range(len(stackList)):
        shape = stackList[i].shape
        eigenvectorLength += shape[-1]
        shapeList.append(shape)
        stackList[i].shape = npixels, -1
        avg = numpy.sum(stackList[i], 0) / (1.0 * npixels)
        numpy.subtract(stackList[i], avg, stackList[i])
        avgList.append(avg)

    # create the needed storage space for the covariance matrix
    covMatrix = numpy.zeros((eigenvectorLength, eigenvectorLength), numpy.float32)

    rowOffset = 0
    indexDict = {}
    for i in range(len(stackList)):
        iVectorLength = shapeList[i][-1]
        colOffset = 0
        for j in range(len(stackList)):
            jVectorLength = shapeList[j][-1]
            if i <= j:
                covMatrix[
                    rowOffset : (rowOffset + iVectorLength), colOffset : (colOffset + jVectorLength)
                ] = dotblas.dot(stackList[i].T, stackList[j])
                if i < j:
                    key = "%02d%02d" % (i, j)
                    indexDict[key] = (rowOffset, rowOffset + iVectorLength, colOffset, colOffset + jVectorLength)
            else:
                key = "%02d%02d" % (j, i)
                rowMin, rowMax, colMin, colMax = indexDict[key]
                covMatrix[rowOffset : (rowOffset + iVectorLength), colOffset : (colOffset + jVectorLength)] = covMatrix[
                    rowMin:rowMax, colMin:colMax
                ].T
            colOffset += jVectorLength
        rowOffset += iVectorLength
    indexDict = None

    # I have the covariance matrix, calculate the eigenvectors and eigenvalues
    covMatrix = [covMatrix]
    covMatrix = Lanczos.LanczosNumericMatrix(covMatrix)
    eigenvalues, evectors = Lanczos.solveEigenSystem(covMatrix, ncomponents, shift=0.0, tol=1.0e-15)
    covMatrix = None

    images = numpy.zeros((ncomponents, npixels), numpy.float32)
    eigenvectors = numpy.zeros((ncomponents, eigenvectorLength), numpy.float32)
    for i in range(ncomponents):
        eigenvectors[i, :] = evectors[i].vr
        colOffset = 0
        for j in range(len(stackList)):
            jVectorLength = shapeList[j][-1]
            images[i, :] += dotblas.dot(stackList[j], eigenvectors[i, colOffset : (colOffset + jVectorLength)])
            colOffset += jVectorLength

    # restore shapes and values
    for i in range(len(stackList)):
        numpy.add(stackList[i], avgList[i], stackList[i])
        stackList[i].shape = shapeList[i]

    if c is None:
        images.shape = ncomponents, r, 1
    else:
        images.shape = ncomponents, r, c

    return images, eigenvalues, eigenvectors
Пример #9
0
def lanczosPCA2(stack, ncomponents=10, binning=None, **kw):
    """
    This is a fast method, but it may loose information
    """
    if hasattr(stack, "info") and hasattr(stack, "data"):
        data = stack.data
    else:
        data = stack

    # check we have received a numpy.ndarray and not an HDF5 group
    # or other type of dynamically loaded data
    if not isinstance(data, numpy.ndarray):
        raise TypeError(\
            "lanczosPCA2 is only supported when using numpy arrays")
    r, c, N = data.shape

    npixels = r * c  # number of pixels
    data.shape = r * c, N

    if npixels < 2000:
        BINNING = 2
    if npixels < 5000:
        BINNING = 4
    elif npixels < 10000:
        BINNING = 8
    elif npixels < 20000:
        BINNING = 10
    elif npixels < 30000:
        BINNING = 15
    elif npixels < 60000:
        BINNING = 20
    else:
        BINNING = 30
    if BINNING is not None:
        dataorig = data
        reminder = npixels % BINNING
        if reminder:
            data = data[0:BINNING * int(npixels / BINNING), :]
        data.shape = data.shape[0] / BINNING, BINNING, data.shape[1]
        data = numpy.swapaxes(data, 1, 2)
        data = numpy.sum(data, axis=-1)
        rc = int(r * c / BINNING)

    tipo = numpy.float64
    neig = ncomponents + 5

    # it does not create the covariance matrix but performs two multiplications
    rappmatrix = "doppia"
    # it creates the covariance matrix but performs only one multiplication
    rappmatrix = "singola"

    # calcola la media
    mediadata = numpy.sum(data, axis=0) / numpy.array([len(data)], data.dtype)

    numpy.subtract(data, mediadata, data)

    Lanczos.LanczosNumericMatrix.tipo = tipo
    Lanczos.LanczosNumericVector.tipo = tipo

    if rappmatrix == "singola":
        SM = [dotblas.dot(data.T, data).astype(tipo)]
        SM = Lanczos.LanczosNumericMatrix(SM)
    else:
        SM = Lanczos.LanczosNumericMatrix(
            [data.T.astype(tipo), data.astype(tipo)])

    # calculate eigenvalues and eigenvectors
    ev, eve = Lanczos.solveEigenSystem(SM, neig, shift=0.0, tol=1.0e-7)
    SM = None
    rc = rc * BINNING

    newmat = numpy.zeros((r * c, neig), numpy.float64)

    data = data.astype(tipo)

    # numpy in-place addition to make sure not intermediate copies are made
    numpy.add(data, mediadata, data)

    for i in range(neig):
        newmat[:, i] = dotblas.dot(dataorig,
                                   (eve[i].vr).astype(dataorig.dtype))

    newcov = dotblas.dot(newmat.T, newmat)
    evals, evects = numpy.linalg.eigh(newcov)

    nuovispettri = dotblas.dot(evects, eve.vr[:neig])
    images = numpy.zeros((ncomponents, npixels), data.dtype)
    vectors = numpy.zeros((ncomponents, N), tipo)
    for i in range(ncomponents):
        vectors[i, :] = nuovispettri[-1 - i, :]
        images[i, :] = dotblas.dot(newmat,
                                   evects[-1 - i].astype(dataorig.dtype))
    images.shape = ncomponents, r, c
    return images, evals, vectors
Пример #10
0
def multipleArrayPCA(stackList0,
                     ncomponents=10,
                     binning=None,
                     legacy=True,
                     **kw):
    """
    Given a list of arrays, calculate the requested principal components from
    the matrix resulting from their column concatenation. Therefore, all the
    input arrays must have the same number of rows.
    """
    stackList = [None] * len(stackList0)
    i = 0
    for stack in stackList0:
        if hasattr(stack, "info") and hasattr(stack, "data"):
            data = stack.data
        else:
            data = stack
        stackList[i] = data
        i += 1

    stack = stackList[0]
    if hasattr(stack, "info") and hasattr(stack, "data"):
        data = stack.data
    else:
        data = stack

    if not isinstance(data, numpy.ndarray):
        raise TypeError(\
            "multipleArrayPCA is only supported when using numpy arrays")

    if len(data.shape) == 3:
        r, c = data.shape[:2]
        npixels = r * c
    else:
        c = None
        r = data.shape[0]
        npixels = r

    #reshape and subtract mean to all the input data
    shapeList = []
    avgList = []
    eigenvectorLength = 0
    for i in range(len(stackList)):
        shape = stackList[i].shape
        eigenvectorLength += shape[-1]
        shapeList.append(shape)
        stackList[i].shape = npixels, -1
        avg = numpy.sum(stackList[i], 0) / (1.0 * npixels)
        numpy.subtract(stackList[i], avg, stackList[i])
        avgList.append(avg)

    #create the needed storage space for the covariance matrix
    covMatrix = numpy.zeros((eigenvectorLength, eigenvectorLength),
                            numpy.float32)

    rowOffset = 0
    indexDict = {}
    for i in range(len(stackList)):
        iVectorLength = shapeList[i][-1]
        colOffset = 0
        for j in range(len(stackList)):
            jVectorLength = shapeList[j][-1]
            if i <= j:
                covMatrix[rowOffset:(rowOffset + iVectorLength),
                          colOffset:(colOffset + jVectorLength)] =\
                          dotblas.dot(stackList[i].T, stackList[j])
                if i < j:
                    key = "%02d%02d" % (i, j)
                    indexDict[key] = (rowOffset, rowOffset + iVectorLength,
                                      colOffset, colOffset + jVectorLength)
            else:
                key = "%02d%02d" % (j, i)
                rowMin, rowMax, colMin, colMax = indexDict[key]
                covMatrix[rowOffset:(rowOffset + iVectorLength),
                          colOffset:(colOffset + jVectorLength)] =\
                          covMatrix[rowMin:rowMax, colMin:colMax].T
            colOffset += jVectorLength
        rowOffset += iVectorLength
    indexDict = None

    #I have the covariance matrix, calculate the eigenvectors and eigenvalues
    totalVariance = numpy.diag(covMatrix).sum()
    evalues, evectors = numpy.linalg.eigh(covMatrix)
    covMatrix = None
    print("Total Variance = ", totalVariance.sum())

    images = numpy.zeros((ncomponents, npixels), numpy.float32)
    eigenvectors = numpy.zeros((ncomponents, eigenvectorLength), numpy.float32)
    eigenvalues = numpy.zeros((ncomponents, ), numpy.float32)

    a = [(evalues[i], i) for i in range(len(evalues))]
    a.sort()
    a.reverse()
    totalExplainedVariance = 0.0
    for i0 in range(ncomponents):
        i = a[i0][1]
        eigenvalues[i0] = evalues[i]
        partialExplainedVariance = 100. * evalues[i] / \
                                   totalVariance
        print("PC%02d  Explained variance %.5f %% " %\
                                    (i0 + 1, partialExplainedVariance))
        totalExplainedVariance += partialExplainedVariance
        eigenvectors[i0, :] = evectors[:, i]
        #print("NORMA = ", numpy.dot(evectors[:, i].T, evectors[:, i]))
    print("Total explained variance = %.2f %% " % totalExplainedVariance)

    for i in range(ncomponents):
        colOffset = 0
        for j in range(len(stackList)):
            jVectorLength = shapeList[j][-1]
            images[i, :] +=\
                    dotblas.dot(stackList[j],
                                eigenvectors[i, colOffset:(colOffset + jVectorLength)])
            colOffset += jVectorLength

    #restore shapes and values
    for i in range(len(stackList)):
        numpy.add(stackList[i], avgList[i], stackList[i])
        stackList[i].shape = shapeList[i]

    if c is None:
        images.shape = ncomponents, r, 1
    else:
        images.shape = ncomponents, r, c

    if legacy:
        return images, eigenvalues, eigenvectors
    else:
        return {
            "scores": images,
            "eigenvalues": eigenvalues,
            "eigenvectors": eigenvectors,
            "average": avgList,
            "pixels": npixels,
            "variance": totalVariance
        }
Пример #11
0
def multipleArrayPCA(stackList0,
                     ncomponents=10,
                     binning=None,
                     legacy=True,
                     scale=False,
                     **kw):
    """
    Given a list of arrays, calculate the requested principal components from
    the matrix resulting from their column concatenation. Therefore, all the
    input arrays must have the same number of rows.
    """
    stackList = [None] * len(stackList0)
    i = 0
    for stack in stackList0:
        if hasattr(stack, "info") and hasattr(stack, "data"):
            data = stack.data
        else:
            data = stack
        stackList[i] = data
        i += 1

    stack = stackList[0]
    if hasattr(stack, "info") and hasattr(stack, "data"):
        data = stack.data
    else:
        data = stack

    if not isinstance(data, numpy.ndarray):
        raise TypeError(\
            "multipleArrayPCA is only supported when using numpy arrays")

    if len(data.shape) == 3:
        r, c = data.shape[:2]
        npixels = r * c
    else:
        c = None
        r = data.shape[0]
        npixels = r

    #reshape and subtract mean to all the input data
    shapeList = []
    avgList = []
    eigenvectorLength = 0
    for i in range(len(stackList)):
        shape = stackList[i].shape
        eigenvectorLength += shape[-1]
        shapeList.append(shape)
        stackList[i].shape = npixels, -1
        avg = numpy.sum(stackList[i], 0) / (1.0 * npixels)
        numpy.subtract(stackList[i], avg, stackList[i])
        avgList.append(avg)

    #create the needed storage space for the covariance matrix
    covMatrix = numpy.zeros((eigenvectorLength, eigenvectorLength),
                            numpy.float32)

    rowOffset = 0
    indexDict = {}
    for i in range(len(stackList)):
        iVectorLength = shapeList[i][-1]
        colOffset = 0
        for j in range(len(stackList)):
            jVectorLength = shapeList[j][-1]
            if i <= j:
                covMatrix[rowOffset:(rowOffset + iVectorLength),
                          colOffset:(colOffset + jVectorLength)] =\
                          dotblas.dot(stackList[i].T, stackList[j])/(npixels-1)
                if i < j:
                    key = "%02d%02d" % (i, j)
                    indexDict[key] = (rowOffset, rowOffset + iVectorLength,
                                      colOffset, colOffset + jVectorLength)
            else:
                key = "%02d%02d" % (j, i)
                rowMin, rowMax, colMin, colMax = indexDict[key]
                covMatrix[rowOffset:(rowOffset + iVectorLength),
                          colOffset:(colOffset + jVectorLength)] =\
                          covMatrix[rowMin:rowMax, colMin:colMax].T
            colOffset += jVectorLength
        rowOffset += iVectorLength
    indexDict = None

    #I have the covariance matrix, calculate the eigenvectors and eigenvalues
    totalVariance = numpy.array(numpy.diag(covMatrix), copy=True)
    # use the correlation matrix if required
    normalizeToUnitStandardDeviation = scale
    #option to normalize to unit standard deviation
    if normalizeToUnitStandardDeviation:
        for i in range(covMatrix.shape[0]):
            if totalVariance[i] > 0:
                covMatrix[i, :] /= numpy.sqrt(totalVariance[i])
                covMatrix[:, i] /= numpy.sqrt(totalVariance[i])
    totalVariance = numpy.diag(covMatrix).sum()
    evalues, evectors = numpy.linalg.eigh(covMatrix)
    covMatrix = None
    _logger.info("Total Variance = %s", totalVariance)
    # The total variance should also be the sum of all the eigenvalues
    calculatedTotalVariance = evalues.sum()
    if abs(totalVariance - calculatedTotalVariance) > \
           (0.0001 * calculatedTotalVariance):
        _logger.warning("Discrepancy on total variance")
        _logger.warning("Variance from matrix = %s", totalVariance)
        _logger.warning("Variance from sum of eigenvalues = %s",
                        calculatedTotalVariance)

    images = numpy.zeros((ncomponents, npixels), numpy.float32)
    eigenvectors = numpy.zeros((ncomponents, eigenvectorLength), numpy.float32)
    eigenvalues = numpy.zeros((ncomponents, ), numpy.float32)

    a = [(evalues[i], i) for i in range(len(evalues))]
    a.sort()
    a.reverse()
    totalExplainedVariance = 0.0
    for i0 in range(ncomponents):
        i = a[i0][1]
        eigenvalues[i0] = evalues[i]
        partialExplainedVariance = 100. * evalues[i] / \
                                   calculatedTotalVariance
        _logger.info("PC%02d  Explained variance %.5f %% " %\
                                    (i0 + 1, partialExplainedVariance))
        totalExplainedVariance += partialExplainedVariance
        eigenvectors[i0, :] = evectors[:, i]
        #print("NORMA = ", numpy.dot(evectors[:, i].T, evectors[:, i]))
    _logger.info("Total explained variance = %.2f %% " %
                 totalExplainedVariance)

    # figure out if eigenvectors are to be multiplied by -1
    for i0 in range(ncomponents):
        if eigenvectors[i0].sum() < 0.0:
            _logger.info("PC%02d multiplied by -1" % i0)
            eigenvectors[i0] *= -1

    for i in range(ncomponents):
        colOffset = 0
        for j in range(len(stackList)):
            jVectorLength = shapeList[j][-1]
            images[i, :] +=\
                    dotblas.dot(stackList[j],
                                eigenvectors[i, colOffset:(colOffset + jVectorLength)])
            colOffset += jVectorLength

    #restore shapes and values
    for i in range(len(stackList)):
        numpy.add(stackList[i], avgList[i], stackList[i])
        stackList[i].shape = shapeList[i]

    if c is None:
        images.shape = ncomponents, r, 1
    else:
        images.shape = ncomponents, r, c

    if legacy:
        return images, eigenvalues, eigenvectors
    else:
        return {
            "scores": images,
            "eigenvalues": eigenvalues,
            "eigenvectors": eigenvectors,
            "average": avgList,
            "pixels": npixels,
            "variance": calculatedTotalVariance
        }
Пример #12
0
def multipleArrayPCA(stackList0, ncomponents=10, binning=None, legacy=True, **kw):
    """
    Given a list of arrays, calculate the requested principal components from
    the matrix resulting from their column concatenation. Therefore, all the
    input arrays must have the same number of rows.
    """
    stackList = [None] * len(stackList0)
    i = 0
    for stack in stackList0:
        if hasattr(stack, "info") and hasattr(stack, "data"):
            data = stack.data
        else:
            data = stack
        stackList[i] = data
        i += 1

    stack = stackList[0]
    if hasattr(stack, "info") and hasattr(stack, "data"):
        data = stack.data
    else:
        data = stack

    if not isinstance(data, numpy.ndarray):
        raise TypeError(\
            "multipleArrayPCA is only supported when using numpy arrays")

    if len(data.shape) == 3:
        r, c = data.shape[:2]
        npixels = r * c
    else:
        c = None
        r = data.shape[0]
        npixels = r

    #reshape and subtract mean to all the input data
    shapeList = []
    avgList = []
    eigenvectorLength = 0
    for i in range(len(stackList)):
        shape = stackList[i].shape
        eigenvectorLength += shape[-1]
        shapeList.append(shape)
        stackList[i].shape = npixels, -1
        avg = numpy.sum(stackList[i], 0) / (1.0 * npixels)
        numpy.subtract(stackList[i], avg, stackList[i])
        avgList.append(avg)

    #create the needed storage space for the covariance matrix
    covMatrix = numpy.zeros((eigenvectorLength, eigenvectorLength),
                            numpy.float32)

    rowOffset = 0
    indexDict = {}
    for i in range(len(stackList)):
        iVectorLength = shapeList[i][-1]
        colOffset = 0
        for j in range(len(stackList)):
            jVectorLength = shapeList[j][-1]
            if i <= j:
                covMatrix[rowOffset:(rowOffset + iVectorLength),
                          colOffset:(colOffset + jVectorLength)] =\
                          dotblas.dot(stackList[i].T, stackList[j])
                if i < j:
                    key = "%02d%02d" % (i, j)
                    indexDict[key] = (rowOffset, rowOffset + iVectorLength,
                                      colOffset, colOffset + jVectorLength)
            else:
                key = "%02d%02d" % (j, i)
                rowMin, rowMax, colMin, colMax = indexDict[key]
                covMatrix[rowOffset:(rowOffset + iVectorLength),
                          colOffset:(colOffset + jVectorLength)] =\
                          covMatrix[rowMin:rowMax, colMin:colMax].T
            colOffset += jVectorLength
        rowOffset += iVectorLength
    indexDict = None

    #I have the covariance matrix, calculate the eigenvectors and eigenvalues
    totalVariance = numpy.diag(covMatrix).sum()
    evalues, evectors = numpy.linalg.eigh(covMatrix)
    covMatrix = None
    print("Total Variance = ", totalVariance.sum())

    images = numpy.zeros((ncomponents, npixels), numpy.float32)
    eigenvectors = numpy.zeros((ncomponents, eigenvectorLength), numpy.float32)
    eigenvalues = numpy.zeros((ncomponents,), numpy.float32)

    a = [(evalues[i], i) for i in range(len(evalues))]
    a.sort()
    a.reverse()
    totalExplainedVariance = 0.0
    for i0 in range(ncomponents):
        i = a[i0][1]
        eigenvalues[i0] = evalues[i]
        partialExplainedVariance = 100. * evalues[i] / \
                                   totalVariance
        print("PC%02d  Explained variance %.5f %% " %\
                                    (i0 + 1, partialExplainedVariance))
        totalExplainedVariance += partialExplainedVariance
        eigenvectors[i0, :] = evectors[:, i]
        #print("NORMA = ", numpy.dot(evectors[:, i].T, evectors[:, i]))
    print("Total explained variance = %.2f %% " % totalExplainedVariance)

    for i in range(ncomponents):
        colOffset = 0
        for j in range(len(stackList)):
            jVectorLength = shapeList[j][-1]
            images[i, :] +=\
                    dotblas.dot(stackList[j],
                                eigenvectors[i, colOffset:(colOffset + jVectorLength)])
            colOffset += jVectorLength

    #restore shapes and values
    for i in range(len(stackList)):
        numpy.add(stackList[i], avgList[i], stackList[i])
        stackList[i].shape = shapeList[i]

    if c is None:
        images.shape = ncomponents, r, 1
    else:
        images.shape = ncomponents, r, c

    if legacy:
        return images, eigenvalues, eigenvectors
    else:
        return {"scores": images,
                "eigenvalues": eigenvalues,
                "eigenvectors": eigenvectors,
                "average": avgList,
                "pixels": npixels,
                "variance": totalVariance}
Пример #13
0
             print("Eigenvalue %d = %f" % (i, eigenvalues[i]))
             fname = "Image%02d.edf" % (i + 10)
             if os.path.exists(fname):
                 os.remove(fname)
             edf = EdfFile.EdfFile(fname, 'wb')
             edf.WriteImage({}, a)
             edf = None
 else:
     stack = EDFStack.EDFStack(inputfile,
                               imagestack=False,
                               dtype=numpy.float64)
     r, c, nChannels = stack.data.shape
     if 0:
         stack.data.shape = r * c, nChannels
         t0 = time.time()
         covMatrix0 = dotblas.dot(stack.data.T, stack.data)
         print("Standard Elapsed = ", time.time() - t0)
         print("Standard Shape = ", covMatrix0.shape)
         t0 = time.time()
         stack.data.shape = r, c, nChannels
         covMatrix1, sumSpectrum, nPixels = getCovarianceMatrix(
             stack, index=-1, dtype='float64', force=True)
         print("Dynamic Elapsed = ", time.time() - t0)
         print("Dynamic Shape = ", covMatrix1.shape)
         print(covMatrix0.max(), covMatrix0.min(), "Reference  = ",
               covMatrix0[1300, 1350:1360])
         print(covMatrix1.max(), covMatrix1.min(), "Calculated = ",
               covMatrix1[1300, 1350:1360])
         delta = covMatrix1 - covMatrix0
         maxDiff = delta.max()
         print("Max diff   = ", maxDiff)
Пример #14
0
def getCovarianceMatrix(stack,
                        index=-1,
                        binning=None,
                        dtype=numpy.float64,
                        force=True,
                        center=True,
                        weights=None,
                        spatial_mask=None):
    #the 1D mask should correspond to the values, before or after
    #sampling?  it could be handled as weigths to be applied to the
    #spectra. That would allow two uses, as mask and as weights, at
    #the cost of a multiplication.

    #the spatial_mask accounts for pixels to be considered. It allows
    #to calculate the covariance matrix of a subset or to deal with
    #non finite data (NaN, +inf, -inf, ...). The calling program
    #should set the mask.

    #recover the actual data to work with
    if hasattr(stack, "info") and hasattr(stack, "data"):
        #we are dealing with a PyMca data object
        data = stack.data
    else:
        data = stack

    oldShape = data.shape
    if index not in [0, -1, len(oldShape) - 1]:
        data = None
        raise IndexError("1D index must be one of 0, -1 or %d" % len(oldShape))

    if index < 0:
        actualIndex = len(oldShape) + index
    else:
        actualIndex = index

    #the number of spatial pixels
    nPixels = 1
    for i in range(len(oldShape)):
        if i != actualIndex:
            nPixels *= oldShape[i]

    #remove inf or nan
    #image_data = data.sum(axis=actualIndex)
    #spatial_mask = numpy.isfinite(image_data)
    #

    #the starting number of channels or of images
    N = oldShape[actualIndex]

    # our binning (better said sampling) is spectral, in order not to
    # affect the spatial resolution
    if binning is None:
        binning = 1

    if weights is None:
        weights = numpy.ones(N, numpy.float)

    if spatial_mask is not None:
        cleanMask = spatial_mask[:].reshape(nPixels)
        usedPixels = cleanMask.sum()
        badMask = numpy.array(spatial_mask < 1, dtype=cleanMask.dtype)
        badMask.shape = nPixels
    else:
        cleanMask = None
        usedPixels = nPixels

    nChannels = int(N / binning)
    cleanWeights = weights[::binning]

    #end of checking part
    eigenvectorLength = nChannels

    if (not force)and isinstance(data, numpy.ndarray):
        if DEBUG:
            print("Memory consuming calculation")
        #make a direct calculation (memory cosuming)
        #take a view to the data
        dataView = data[:]
        if index in [0]:
            #reshape the view to allow the matrix multiplication
            dataView.shape = -1, nPixels
            cleanWeights.shape = -1, 1
            dataView = dataView[::binning] * cleanWeights
            if cleanMask is not None:
                dataView[:, badMask] = 0
            sumSpectrum = dataView.sum(axis=1, dtype=numpy.float64)
            #and return the standard covariance matrix as a matrix product
            covMatrix = dotblas.dot(dataView, dataView.T)\
                / float(usedPixels - 1)
        else:
            #the last index
            dataView.shape = nPixels, -1
            cleanWeights.shape = 1, -1
            dataView = dataView[:, ::binning] * cleanWeights
            if cleanMask is not None:
                cleanMask.shape = -1
                if 0:
                    for i in range(dataView.shape[-1]):
                        dataView[badMask, i] = 0
                else:
                    dataView[badMask] = 0
            sumSpectrum = dataView.sum(axis=0, dtype=numpy.float64)
            #and return the standard covariance matrix as a matrix product
            covMatrix = dotblas.dot(dataView.T, dataView )\
                / float(usedPixels - 1)
        if center:
            averageMatrix = numpy.outer(sumSpectrum, sumSpectrum)\
                / (usedPixels * (usedPixels - 1))
            covMatrix -= averageMatrix
            averageMatrix = None
        return covMatrix, sumSpectrum / usedPixels, usedPixels

    #we are dealing with dynamically loaded data
    if DEBUG:
        print("DYNAMICALLY LOADED DATA")
    #create the needed storage space for the covariance matrix
    try:
        covMatrix = numpy.zeros((eigenvectorLength, eigenvectorLength),
                                dtype=dtype)
        sumSpectrum = numpy.zeros((eigenvectorLength,), numpy.float64)
    except:
        #make sure no reference to the original input data is kept
        cleanWeights = None
        covMatrix = None
        averageMatrix = None
        data = None
        raise

    #workaround a problem with h5py
    try:
        if actualIndex in [0]:
            testException = data[0:1]
        else:
            if len(data.shape) == 2:
                testException = data[0:1,-1]
            elif len(data.shape) == 3:
                testException = data[0:1,0:1,-1]
    except AttributeError:
        txt = "%s" % type(data)
        if 'h5py' in txt:
            print("Implementing h5py workaround")
            import h5py
            data = h5py.Dataset(data.id)
        else:
            raise

    if actualIndex in [0]:
        #divider is used to decide the fraction of images to keep in memory
        #in order to limit file access on dynamically loaded data.
        #Since two chunks of the same size are used, the amount of memory
        #needed is twice the data size divided by the divider.
        #For instance, divider = 10 implies the data to be read 5.5 times
        #from disk while having a memory footprint of about one fifth of
        #the dataset size.
        step = 0
        divider = 10
        while step < 1:
            step = int(oldShape[index] / divider)
            divider -= 2
            if divider <= 0:
                step = oldShape[index]
                break
        if DEBUG:
            print("Reading chunks of %d images" % step)
        nImagesRead = 0
        if (binning == 1) and oldShape[index] >= step:
            chunk1 = numpy.zeros((step, nPixels), numpy.float64)
            chunk2 = numpy.zeros((nPixels, step), numpy.float64)
            if spatial_mask is not None:
                badMask.shape = -1
                cleanMask.shape = -1
            i = 0
            while i < N:
                iToRead = min(step, N - i)
                #get step images for the first chunk
                chunk1[0:iToRead] = data[i:i + iToRead].reshape(iToRead, -1)
                if spatial_mask is not None:
                    chunk1[0:iToRead, badMask] = 0
                sumSpectrum[i:i + iToRead] = chunk1[0:iToRead].sum(axis=1)
                if center:
                    average = sumSpectrum[i:i + iToRead] / usedPixels
                    average.shape = iToRead, 1
                    chunk1[0:iToRead] -= average
                if spatial_mask is not None:
                    chunk1[0:iToRead, badMask] = 0
                nImagesRead += iToRead
                j = 0
                while j <= i:
                    #get step images for the second chunk
                    if j == i:
                        jToRead = iToRead
                        if 0:
                            for k in range(0, jToRead):
                                chunk2[:, k] = chunk1[k]
                        else:
                            chunk2[:, 0:jToRead] = chunk1[0:jToRead, :].T
                    else:
                        #get step images for the second chunk
                        jToRead = min(step, nChannels - j)

                        #with loop:
                        #for k in range(0, jToRead):
                        #    chunk2[:,k] = data[(j+k):(j+k+1)].reshape(1,-1)
                        #    if spatial_mask is not None:
                        #        chunk2[badMask[(j+k):(j+k+1),k]] = 0
                        #equivalent without loop:
                        chunk2[:, 0:jToRead] =\
                            data[j:(j + jToRead)].reshape(jToRead, -1).T
                        if spatial_mask is not None:
                            chunk2[badMask, 0:jToRead] = 0
                        nImagesRead += jToRead
                        if center:
                            average = \
                                chunk2[:, 0:jToRead].sum(axis=0) / usedPixels
                            average.shape = 1, jToRead
                            chunk2[:, 0:jToRead] -= average
                            if spatial_mask is not None:
                                chunk2[badMask, 0:jToRead] = 0

                    #dot product
                    if (iToRead != step) or (jToRead != step):
                        covMatrix[i: (i + iToRead), j: (j + jToRead)] =\
                                        dotblas.dot(chunk1[:iToRead, :nPixels],
                                                    chunk2[:nPixels, :jToRead])
                    else:
                        covMatrix[i: (i + iToRead), j: (j + jToRead)] =\
                                        dotblas.dot(chunk1, chunk2)

                    if i != j:
                        covMatrix[j: (j + jToRead), i: (i + iToRead)] =\
                                covMatrix[i: (i + iToRead), j: (j + jToRead)].T

                    #increment j
                    j += jToRead
                i += iToRead
            chunk1 = None
            chunk2 = None
            if DEBUG:
                print("totalImages Read = ", nImagesRead)
        elif (binning > 1) and (oldShape[index] >= step):
            chunk1 = numpy.zeros((step, nPixels), numpy.float64)
            chunk2 = numpy.zeros((nPixels, step), numpy.float64)
            #one by one reading till we fill the chunks
            imagesToRead = numpy.arange(0, oldShape[index], binning)
            i = int(imagesToRead[weights > 0][0])
            spectrumIndex = 0
            nImagesRead = 0
            while i < N:
                #fill chunk1
                jj = 0
                for iToRead in range(0, int(min(step * binning, N - i)),
                                     binning):
                    chunk1[jj] = data[i + iToRead].reshape(1, -1) * \
                                 weights[i + iToRead]
                    jj += 1
                sumSpectrum[spectrumIndex:(spectrumIndex + jj)] = \
                                                    chunk1[0:jj].sum(axis=1)
                if center:
                    average = \
                        sumSpectrum[spectrumIndex:(spectrumIndex + jj)] / nPixels
                    average.shape = jj, 1
                    chunk1[0:jj] -= average
                nImagesRead += jj
                iToRead = jj
                j = 0
                while j <= i:
                    #get step images for the second chunk
                    if j == i:
                        jToRead = iToRead
                        chunk2[:, 0:jToRead] = chunk1[0:jToRead, :].T
                    else:
                        #get step images for the second chunk
                        jj = 0
                        for jToRead in range(0,
                                             int(min(step * binning, N - j)),
                                             binning):
                            chunk2[:, jj] =\
                                data[j + jToRead].reshape(1, -1)\
                                * weights[j + jToRead]
                            jj += 1
                        nImagesRead += jj
                        if center:
                            average = chunk2[:, 0:jj].sum(axis=0) / nPixels
                            average.shape = 1, jj
                            chunk2 -= average
                        jToRead = jj
                    #dot product
                    if (iToRead != step) or (jToRead != step):
                        covMatrix[i:(i + iToRead), j:(j + jToRead)] =\
                                dotblas.dot(chunk1[:iToRead, :nPixels],
                                            chunk2[:nPixels, :jToRead])
                    else:
                        covMatrix[i:(i + iToRead), j:(j + jToRead)] =\
                                dotblas.dot(chunk1, chunk2)

                    if i != j:
                        covMatrix[j:(j + jToRead), i:(i + iToRead)] =\
                                covMatrix[i:(i + iToRead), j:(j + jToRead)].T

                    #increment j
                    j += jToRead * step
                i += iToRead * step
            chunk1 = None
            chunk2 = None
        else:
            raise ValueError("Unhandled case")

        #should one divide by N or by N-1 ??  if we use images, we
        #assume the observables are the images, not the spectra!!!
        #so, covMatrix /= nChannels is wrong and one has to use:
        covMatrix /= usedPixels
    else:
        #the data are already arranged as (nPixels, nChannels) and we
        #basically have to return data.T * data to get back the covariance
        #matrix as (nChannels, nChannels)
        #if someone had the bad idea to store the data in HDF5 with a chunk
        #size based on the pixels and not on the spectra a loop based on
        #reading spectrum per spectrum can be very slow
        step = 0
        divider = 10
        while step < 1:
            step = int(nPixels / divider)
            divider -= 1
            if divider <= 0:
                step = nPixels
                break
        step = nPixels
        if DEBUG:
            print("Reading chunks of %d spectra" % step)

        cleanWeights.shape = 1, -1
        if len(data.shape) == 2:
            if cleanMask is not None:
                badMask.shape = -1
            tmpData = numpy.zeros((step, nChannels), numpy.float64)
            k = 0
            while k < nPixels:
                kToRead = min(step, nPixels - k)
                tmpData[0:kToRead] = data[k: k + kToRead, ::binning]
                if cleanMask is not None:
                    tmpData[badMask[k: k + kToRead]] = 0
                a = tmpData[0:kToRead] * cleanWeights
                sumSpectrum += a.sum(axis=0)
                covMatrix += dotblas.dot(a.T, a)
                a = None
                k += kToRead
            tmpData = None
        elif len(data.shape) == 3:
            if oldShape[0] == 1:
                #close to the previous case
                tmpData = numpy.zeros((step, nChannels), numpy.float64)
                if cleanMask is not None:
                    badMask.shape = data.shape[0], data.shape[1]
                for i in range(oldShape[0]):
                    k = 0
                    while k < oldShape[1]:
                        kToRead = min(step, oldShape[1] - k)
                        tmpData[0:kToRead] = data[i, k:k + kToRead, ::binning]\
                                             * cleanWeights
                        if cleanMask is not None:
                            tmpData[0:kToRead][badMask[i, k: k + kToRead]] = 0
                        a = tmpData[0:kToRead]
                        sumSpectrum += a.sum(axis=0)
                        covMatrix += dotblas.dot(a.T, a)
                        a = None
                        k += kToRead
                tmpData = None
            elif oldShape[1] == 1:
                #almost identical to the previous case
                tmpData = numpy.zeros((step, nChannels), numpy.float64)
                if cleanMask is not None:
                    badMask.shape = data.shape[0], data.shape[1]
                for i in range(oldShape[1]):
                    k = 0
                    while k < oldShape[0]:
                        kToRead = min(step, oldShape[0] - k)
                        tmpData[0:kToRead] = data[k: k + kToRead, i, ::binning]\
                                             * cleanWeights
                        if cleanMask is not None:
                            tmpData[0:kToRead][badMask[k: k + kToRead, i]] = 0
                        a = tmpData[0:kToRead]
                        sumSpectrum += a.sum(axis=0)
                        covMatrix += dotblas.dot(a.T, a)
                        a = None
                        k += kToRead
                tmpData = None
            elif oldShape[0] < 21:
                if step > oldShape[1]:
                    step = oldShape[1]
                tmpData = numpy.zeros((step, nChannels), numpy.float64)
                if cleanMask is not None:
                    badMask.shape = data.shape[0], data.shape[1]
                for i in range(oldShape[0]):
                    k = 0
                    while k < oldShape[1]:
                        kToRead = min(step, oldShape[1] - k)
                        tmpData[0:kToRead] = data[i, k: k + kToRead, ::binning]\
                                             * cleanWeights
                        if cleanMask is not None:
                            tmpData[0:kToRead][badMask[i, k: k + kToRead]] = 0
                        a = tmpData[0:kToRead]
                        sumSpectrum += a.sum(axis=0)
                        covMatrix += dotblas.dot(a.T, a)
                        a = None
                        k += kToRead
                tmpData = None
            else:
                #I should choose the sizes in terms of the size
                #of the dataset
                if oldShape[0] < 41:
                    #divide by 10
                    deltaRow = 4
                elif oldShape[0] < 101:
                    #divide by 10
                    deltaRow = 10
                else:
                    #take pieces of one tenth
                    deltaRow = int(oldShape[0] / 10)
                deltaCol = oldShape[1]
                tmpData = numpy.zeros((deltaRow, deltaCol, nChannels),
                                      numpy.float64)
                if cleanMask is not None:
                    badMask.shape = data.shape[0], data.shape[1]
                i = 0
                while i < oldShape[0]:
                    iToRead = min(deltaRow, oldShape[0] - i)
                    kToRead = iToRead * oldShape[1]
                    tmpData[:iToRead] = data[i:(i + iToRead), :, ::binning]
                    if cleanMask is not None:
                        tmpData[0:kToRead][badMask[i:(i + iToRead), :]] = 0
                    a = tmpData[:iToRead]
                    a.shape = kToRead, nChannels
                    a *= cleanWeights
                    if 0:
                        #weight each spectrum
                        a /= (a.sum(axis=1).reshape(-1, 1))
                    sumSpectrum += a.sum(axis=0)
                    covMatrix += dotblas.dot(a.T, a)
                    a = None
                    i += iToRead
        #should one divide by N or by N-1 ??
        covMatrix /= usedPixels - 1
        if center:
            #the n-1 appears again here
            averageMatrix = numpy.outer(sumSpectrum, sumSpectrum)\
                            / (usedPixels * (usedPixels - 1))
            covMatrix -= averageMatrix
            averageMatrix = None
    return covMatrix, sumSpectrum / usedPixels, usedPixels
Пример #15
0
def multipleArrayPCA(stackList, ncomponents=10, binning=None, **kw):
    """
    Given a list of arrays, calculate the requested principal components from
    the matrix resulting from their column concatenation. Therefore, all the
    input arrays must have the same number of rows.
    """
    stack = stackList[0]
    if hasattr(stack, "info") and hasattr(stack, "data"):
        data = stack.data
    else:
        data = stack

    if not isinstance(data, numpy.ndarray):
        raise TypeError(\
            "multipleArrayPCA is only supported when using numpy arrays")

    if len(data.shape) == 3:
        r, c = data.shape[:2]
        npixels = r * c
    else:
        c = None
        r = data.shape[0]
        npixels = r

    #reshape and subtract mean to all the input data
    shapeList = []
    avgList = []
    eigenvectorLength = 0
    for i in range(len(stackList)):
        shape = stackList[i].shape
        eigenvectorLength += shape[-1]
        shapeList.append(shape)
        stackList[i].shape = npixels, -1
        avg = numpy.sum(stackList[i], 0) / (1.0 * npixels)
        numpy.subtract(stackList[i], avg, stackList[i])
        avgList.append(avg)

    #create the needed storage space for the covariance matrix
    covMatrix = numpy.zeros((eigenvectorLength, eigenvectorLength),
                            numpy.float32)

    rowOffset = 0
    indexDict = {}
    for i in range(len(stackList)):
        iVectorLength = shapeList[i][-1]
        colOffset = 0
        for j in range(len(stackList)):
            jVectorLength = shapeList[j][-1]
            if i <= j:
                covMatrix[rowOffset:(rowOffset + iVectorLength),
                          colOffset:(colOffset + jVectorLength)] =\
                          dotblas.dot(stackList[i].T, stackList[j])
                if i < j:
                    key = "%02d%02d" % (i, j)
                    indexDict[key] = (rowOffset, rowOffset + iVectorLength,
                                      colOffset, colOffset + jVectorLength)
            else:
                key = "%02d%02d" % (j, i)
                rowMin, rowMax, colMin, colMax = indexDict[key]
                covMatrix[rowOffset:(rowOffset + iVectorLength),
                          colOffset:(colOffset + jVectorLength)] =\
                          covMatrix[rowMin:rowMax, colMin:colMax].T
            colOffset += jVectorLength
        rowOffset += iVectorLength
    indexDict = None

    #I have the covariance matrix, calculate the eigenvectors and eigenvalues
    covMatrix = [covMatrix]
    covMatrix = Lanczos.LanczosNumericMatrix(covMatrix)
    eigenvalues, evectors = Lanczos.solveEigenSystem(covMatrix,
                                                     ncomponents,
                                                     shift=0.0,
                                                     tol=1.0e-15)
    covMatrix = None

    images = numpy.zeros((ncomponents, npixels), numpy.float32)
    eigenvectors = numpy.zeros((ncomponents, eigenvectorLength), numpy.float32)
    for i in range(ncomponents):
        eigenvectors[i, :] = evectors[i].vr
        colOffset = 0
        for j in range(len(stackList)):
            jVectorLength = shapeList[j][-1]
            images[i, :] +=\
                    dotblas.dot(stackList[j],
                                eigenvectors[i, colOffset:(colOffset + jVectorLength)])
            colOffset += jVectorLength

    #restore shapes and values
    for i in range(len(stackList)):
        numpy.add(stackList[i], avgList[i], stackList[i])
        stackList[i].shape = shapeList[i]

    if c is None:
        images.shape = ncomponents, r, 1
    else:
        images.shape = ncomponents, r, c

    return images, eigenvalues, eigenvectors
Пример #16
0
def lanczosPCA2(stack, ncomponents=10, binning=None, **kw):
    """
    This is a fast method, but it may loose information
    """
    if hasattr(stack, "info") and hasattr(stack, "data"):
        data = stack.data
    else:
        data = stack

    # check we have received a numpy.ndarray and not an HDF5 group
    # or other type of dynamically loaded data
    if not isinstance(data, numpy.ndarray):
        raise TypeError("lanczosPCA2 is only supported when using numpy arrays")
    r, c, N = data.shape

    npixels = r * c  # number of pixels
    data.shape = r * c, N

    if npixels < 2000:
        BINNING = 2
    if npixels < 5000:
        BINNING = 4
    elif npixels < 10000:
        BINNING = 8
    elif npixels < 20000:
        BINNING = 10
    elif npixels < 30000:
        BINNING = 15
    elif npixels < 60000:
        BINNING = 20
    else:
        BINNING = 30
    if BINNING is not None:
        dataorig = data
        reminder = npixels % BINNING
        if reminder:
            data = data[0 : BINNING * int(npixels / BINNING), :]
        data.shape = data.shape[0] / BINNING, BINNING, data.shape[1]
        data = numpy.swapaxes(data, 1, 2)
        data = numpy.sum(data, axis=-1)
        rc = int(r * c / BINNING)

    tipo = numpy.float64
    neig = ncomponents + 5

    # it does not create the covariance matrix but performs two multiplications
    rappmatrix = "doppia"
    # it creates the covariance matrix but performs only one multiplication
    rappmatrix = "singola"

    # calcola la media
    mediadata = numpy.sum(data, axis=0) / numpy.array([len(data)], data.dtype)

    numpy.subtract(data, mediadata, data)

    Lanczos.LanczosNumericMatrix.tipo = tipo
    Lanczos.LanczosNumericVector.tipo = tipo

    if rappmatrix == "singola":
        SM = [dotblas.dot(data.T, data).astype(tipo)]
        SM = Lanczos.LanczosNumericMatrix(SM)
    else:
        SM = Lanczos.LanczosNumericMatrix([data.T.astype(tipo), data.astype(tipo)])

    # calculate eigenvalues and eigenvectors
    ev, eve = Lanczos.solveEigenSystem(SM, neig, shift=0.0, tol=1.0e-7)
    SM = None
    rc = rc * BINNING

    newmat = numpy.zeros((r * c, neig), numpy.float64)

    data = data.astype(tipo)

    # numpy in-place addition to make sure not intermediate copies are made
    numpy.add(data, mediadata, data)

    for i in range(neig):
        newmat[:, i] = dotblas.dot(dataorig, (eve[i].vr).astype(dataorig.dtype))

    newcov = dotblas.dot(newmat.T, newmat)
    evals, evects = numpy.linalg.eigh(newcov)

    nuovispettri = dotblas.dot(evects, eve.vr[:neig])
    images = numpy.zeros((ncomponents, npixels), data.dtype)
    vectors = numpy.zeros((ncomponents, N), tipo)
    for i in range(ncomponents):
        vectors[i, :] = nuovispettri[-1 - i, :]
        images[i, :] = dotblas.dot(newmat, evects[-1 - i].astype(dataorig.dtype))
    images.shape = ncomponents, r, c
    return images, evals, vectors
Пример #17
0
def expectationMaximizationPCA(stack, ncomponents=10, binning=None, **kw):
    """
    This is a fast method when the number of components is small
    """
    if DEBUG:
        print("expectationMaximizationPCA")
    #This part is common to all ...
    if binning is None:
        binning = 1

    if hasattr(stack, "info") and hasattr(stack, "data"):
        data = stack.data
    else:
        data = stack
    if len(data.shape) == 3:
        r, c, N = data.shape
        data.shape = r * c, N
    else:
        r, N = data.shape
        c = 1

    if binning > 1:
        data = numpy.reshape(data,
                             [data.shape[0], data.shape[1] / binning, binning])
        data = numpy.sum(data, axis=-1)
        N /= binning
    if ncomponents > N:
        raise ValueError("Number of components too high.")
    #end of common part
    avg = numpy.sum(data, axis=0, dtype=numpy.float) / (1.0 * r * c)
    numpy.subtract(data, avg, data)
    dataw = data * 1
    images = numpy.zeros((ncomponents, r * c), data.dtype)
    eigenvalues = numpy.zeros((ncomponents, ), data.dtype)
    eigenvectors = numpy.zeros((ncomponents, N), data.dtype)
    for i in range(ncomponents):
        #generate a random vector
        p = numpy.random.random(N)
        #10 iterations seems to be fairly accurate, but it is
        #slow when reaching "noise" components.
        #A variation threshold of 1 % seems to be acceptable.
        tmod_old = 0
        tmod = 0.02
        j = 0
        max_iter = 7
        while ((abs(tmod - tmod_old) / tmod) > 0.01) and (j < max_iter):
            tmod_old = tmod
            t = 0.0
            for k in range(r * c):
                t += dotblas.dot(dataw[k, :], p.T) * dataw[k, :]
            tmod = numpy.sqrt(numpy.sum(t * t))
            p = t / tmod
            j += 1

        eigenvectors[i, :] = p
        #subtract the found component from the dataset
        for k in range(r * c):
            dataw[k, :] -= dotblas.dot(dataw[k, :], p.T) * p
    # calculate eigenvalues via the Rayleigh Quotients:
    # eigenvalue = \
    # (Eigenvector.T * Covariance * EigenVector)/ (Eigenvector.T * Eigenvector)
    for i in range(ncomponents):
        tmp = dotblas.dot(data, eigenvectors[i, :].T)
        eigenvalues[i] = \
            dotblas.dot(tmp.T, tmp) / dotblas.dot(eigenvectors[i, :].T,
                                                  eigenvectors[i, :])

    #Generate the eigenimages
    for i0 in range(ncomponents):
        images[i0, :] = dotblas.dot(data, eigenvectors[i0, :])

    #restore the original data
    numpy.add(data, avg, data)

    #reshape the images
    images.shape = ncomponents, r, c
    return images, eigenvalues, eigenvectors
Пример #18
0
def expectationMaximizationPCA(stack, ncomponents=10, binning=None, **kw):
    """
    This is a fast method when the number of components is small
    """
    if DEBUG:
        print("expectationMaximizationPCA")
    # This part is common to all ...
    if binning is None:
        binning = 1

    if hasattr(stack, "info") and hasattr(stack, "data"):
        data = stack.data
    else:
        data = stack
    if len(data.shape) == 3:
        r, c, N = data.shape
        data.shape = r * c, N
    else:
        r, N = data.shape
        c = 1

    if binning > 1:
        data = numpy.reshape(data, [data.shape[0], data.shape[1] / binning, binning])
        data = numpy.sum(data, axis=-1)
        N /= binning
    if ncomponents > N:
        raise ValueError("Number of components too high.")
    # end of common part
    avg = numpy.sum(data, axis=0, dtype=numpy.float) / (1.0 * r * c)
    numpy.subtract(data, avg, data)
    dataw = data * 1
    images = numpy.zeros((ncomponents, r * c), data.dtype)
    eigenvalues = numpy.zeros((ncomponents,), data.dtype)
    eigenvectors = numpy.zeros((ncomponents, N), data.dtype)
    for i in range(ncomponents):
        # generate a random vector
        p = numpy.random.random(N)
        # 10 iterations seems to be fairly accurate, but it is
        # slow when reaching "noise" components.
        # A variation threshold of 1 % seems to be acceptable.
        tmod_old = 0
        tmod = 0.02
        j = 0
        max_iter = 7
        while ((abs(tmod - tmod_old) / tmod) > 0.01) and (j < max_iter):
            tmod_old = tmod
            t = 0.0
            for k in range(r * c):
                t += dotblas.dot(dataw[k, :], p.T) * dataw[k, :]
            tmod = numpy.sqrt(numpy.sum(t * t))
            p = t / tmod
            j += 1

        eigenvectors[i, :] = p
        # subtract the found component from the dataset
        for k in range(r * c):
            dataw[k, :] -= dotblas.dot(dataw[k, :], p.T) * p
    # calculate eigenvalues via the Rayleigh Quotients:
    # eigenvalue = \
    # (Eigenvector.T * Covariance * EigenVector)/ (Eigenvector.T * Eigenvector)
    for i in range(ncomponents):
        tmp = dotblas.dot(data, eigenvectors[i, :].T)
        eigenvalues[i] = dotblas.dot(tmp.T, tmp) / dotblas.dot(eigenvectors[i, :].T, eigenvectors[i, :])

    # Generate the eigenimages
    for i0 in range(ncomponents):
        images[i0, :] = dotblas.dot(data, eigenvectors[i0, :])

    # restore the original data
    numpy.add(data, avg, data)

    # reshape the images
    images.shape = ncomponents, r, c
    return images, eigenvalues, eigenvectors
Пример #19
0
def lanczosPCA(stack, ncomponents=10, binning=None, **kw):
    if DEBUG:
        print("lanczosPCA")
    if binning is None:
        binning = 1

    if hasattr(stack, "info") and hasattr(stack, "data"):
        data = stack.data
    else:
        data = stack

    if not isinstance(data, numpy.ndarray):
        raise TypeError(\
            "lanczosPCA is only supported when using numpy arrays")

    #wrapmatrix = "double"
    wrapmatrix = "single"

    dtype = numpy.float64
    if wrapmatrix == "double":
        data = data.astype(dtype)

    if len(data.shape) == 3:
        r, c, N = data.shape
        data.shape = r * c, N
    else:
        r, N = data.shape
        c = 1

    npixels = r * c

    if binning > 1:
        # data.shape may fails with non-contiguous arrays
        # use reshape.
        data = numpy.reshape(data,
                             [data.shape[0], data.shape[1] / binning, binning])
        data = numpy.sum(data, axis=-1)
        N /= binning

    if ncomponents > N:
        raise ValueError("Number of components too high.")

    avg = numpy.sum(data, 0) / (1.0 * npixels)
    numpy.subtract(data, avg, data)

    Lanczos.LanczosNumericMatrix.tipo = dtype
    Lanczos.LanczosNumericVector.tipo = dtype

    if wrapmatrix == "single":
        SM = [dotblas.dot(data.T, data).astype(dtype)]
        SM = Lanczos.LanczosNumericMatrix(SM)
    else:
        SM = Lanczos.LanczosNumericMatrix(
            [data.T.astype(dtype), data.astype(dtype)])

    eigenvalues, eigenvectors = Lanczos.solveEigenSystem(SM,
                                                         ncomponents,
                                                         shift=0.0,
                                                         tol=1.0e-15)
    SM = None
    numpy.add(data, avg, data)

    images = numpy.zeros((ncomponents, npixels), data.dtype)
    vectors = numpy.zeros((ncomponents, N), dtype)
    for i in range(ncomponents):
        vectors[i, :] = eigenvectors[i].vr
        images[i, :] = dotblas.dot(data,
                                   (eigenvectors[i].vr).astype(data.dtype))
    data = None
    images.shape = ncomponents, r, c
    return images, eigenvalues, vectors
Пример #20
0
def getCovarianceMatrix(stack,
                        index=None,
                        binning=None,
                        dtype=numpy.float64,
                        force=True,
                        center=True,
                        weights=None,
                        spatial_mask=None):
    """
    Calculate the covariance matrix of input data (stack) array. The input array is to be
    understood as a set of observables (spectra) taken at different instances (for instance
    spatial coordinates).
    
    :param stack: Array of data. Dimension greater than one.
    :type stack: Numpy ndarray.
    :param index: Integer specifying the array dimension containing the "observables". Only the first
    the first (index = 0) or the last dimension (index = -1 or index = (ndimensions - 1)) supported. 
    :type index: Integer (default is -1 to indicate it is the last dimension of input array)
    :param binning: Current implementation corresponds to a sampling of the spectral data and not to
    an actual binning. This may change in future versions.
    :type binning: Positive integer (default 1)
    :param dtype: Keyword indicating the data type of the returned covariance matrix.
    :type dtype: A valid numpy data type (default numpy.float64)
    :param force: Indicate how to calculate the covariance matrix:

            - False : Perform the product data.T * data in one call 
            - True  : Perform the product data.T * data progressively (smaller memory footprint)

    :type force: Boolean (default True)
    :param center: Indicate if the mean is to be subtracted from the observables.
    :type center: Boolean (default True)
    :param weights: Weight to be applied to each observable. It can therefore be used as a spectral mask
    setting the weight to 0 on the values to ignore.
    :type weights: Numpy ndarray of same size as the observables or None (default).
    :spatial_mask: Array of size n where n is the number of measurement instances. In mapping
    experiments, n would be equal to the number of pixels.
    :type spatial_mask: Numpy array of unsigned bytes (numpy.uint8) or None (default).
    :returns: The covMatrix, the average spectrum and the number of used pixels.
    """
    #the 1D mask = weights should correspond to the values, before or after
    #sampling?  it could be handled as weigths to be applied to the
    #spectra. That would allow two uses, as mask and as weights, at
    #the cost of a multiplication.

    #the spatial_mask accounts for pixels to be considered. It allows
    #to calculate the covariance matrix of a subset or to deal with
    #non finite data (NaN, +inf, -inf, ...). The calling program
    #should set the mask.

    #recover the actual data to work with
    if hasattr(stack, "info") and hasattr(stack, "data"):
        #we are dealing with a PyMca data object
        data = stack.data
        if index is None:
            index = stack.info.get("McaWindex", -1)
    else:
        data = stack

    if index is None:
        index = -1

    oldShape = data.shape
    if index not in [0, -1, len(oldShape) - 1]:
        data = None
        raise IndexError("1D index must be one of 0, -1 or %d" % len(oldShape))

    if index < 0:
        actualIndex = len(oldShape) + index
    else:
        actualIndex = index

    #the number of spatial pixels
    nPixels = 1
    for i in range(len(oldShape)):
        if i != actualIndex:
            nPixels *= oldShape[i]

    #remove inf or nan
    #image_data = data.sum(axis=actualIndex)
    #spatial_mask = numpy.isfinite(image_data)
    #

    #the starting number of channels or of images
    N = oldShape[actualIndex]

    # our binning (better said sampling) is spectral, in order not to
    # affect the spatial resolution
    if binning is None:
        binning = 1

    if spatial_mask is not None:
        cleanMask = spatial_mask[:].reshape(nPixels)
        usedPixels = cleanMask.sum()
        badMask = numpy.array(spatial_mask < 1, dtype=cleanMask.dtype)
        badMask.shape = nPixels
    else:
        cleanMask = None
        usedPixels = nPixels

    nChannels = int(N / binning)

    if weights is None:
        weights = numpy.ones(N, numpy.float)

    if weights.size == nChannels:
        # binning was taken into account
        cleanWeights = weights[:]
    else:
        cleanWeights = weights[::binning]

    #end of checking part
    eigenvectorLength = nChannels

    if (not force) and isinstance(data, numpy.ndarray):
        if DEBUG:
            print("Memory consuming calculation")
        #make a direct calculation (memory cosuming)
        #take a view to the data
        dataView = data[:]
        if index in [0]:
            #reshape the view to allow the matrix multiplication
            dataView.shape = -1, nPixels
            cleanWeights.shape = -1, 1
            dataView = dataView[::binning] * cleanWeights
            if cleanMask is not None:
                dataView[:, badMask] = 0
            sumSpectrum = dataView.sum(axis=1, dtype=numpy.float64)
            #and return the standard covariance matrix as a matrix product
            covMatrix = dotblas.dot(dataView, dataView.T)\
                / float(usedPixels - 1)
        else:
            #the last index
            dataView.shape = nPixels, -1
            cleanWeights.shape = 1, -1
            dataView = dataView[:, ::binning] * cleanWeights
            if cleanMask is not None:
                cleanMask.shape = -1
                if 0:
                    for i in range(dataView.shape[-1]):
                        dataView[badMask, i] = 0
                else:
                    dataView[badMask] = 0
            sumSpectrum = dataView.sum(axis=0, dtype=numpy.float64)
            #and return the standard covariance matrix as a matrix product
            covMatrix = dotblas.dot(dataView.T, dataView )\
                / float(usedPixels - 1)
        if center:
            averageMatrix = numpy.outer(sumSpectrum, sumSpectrum)\
                / (usedPixels * (usedPixels - 1))
            covMatrix -= averageMatrix
            averageMatrix = None
        return covMatrix, sumSpectrum / usedPixels, usedPixels

    #we are dealing with dynamically loaded data
    if DEBUG:
        print("DYNAMICALLY LOADED DATA")
    #create the needed storage space for the covariance matrix
    try:
        covMatrix = numpy.zeros((eigenvectorLength, eigenvectorLength),
                                dtype=dtype)
        sumSpectrum = numpy.zeros((eigenvectorLength, ), numpy.float64)
    except:
        #make sure no reference to the original input data is kept
        cleanWeights = None
        covMatrix = None
        averageMatrix = None
        data = None
        raise

    #workaround a problem with h5py
    try:
        if actualIndex in [0]:
            testException = data[0:1]
        else:
            if len(data.shape) == 2:
                testException = data[0:1, -1]
            elif len(data.shape) == 3:
                testException = data[0:1, 0:1, -1]
    except AttributeError:
        txt = "%s" % type(data)
        if 'h5py' in txt:
            print("Implementing h5py workaround")
            import h5py
            data = h5py.Dataset(data.id)
        else:
            raise

    if actualIndex in [0]:
        #divider is used to decide the fraction of images to keep in memory
        #in order to limit file access on dynamically loaded data.
        #Since two chunks of the same size are used, the amount of memory
        #needed is twice the data size divided by the divider.
        #For instance, divider = 10 implies the data to be read 5.5 times
        #from disk while having a memory footprint of about one fifth of
        #the dataset size.
        step = 0
        divider = 10
        while step < 1:
            step = int(oldShape[index] / divider)
            divider -= 2
            if divider <= 0:
                step = oldShape[index]
                break
        if DEBUG:
            print("Reading chunks of %d images" % step)
        nImagesRead = 0
        if (binning == 1) and oldShape[index] >= step:
            chunk1 = numpy.zeros((step, nPixels), numpy.float64)
            chunk2 = numpy.zeros((nPixels, step), numpy.float64)
            if spatial_mask is not None:
                badMask.shape = -1
                cleanMask.shape = -1
            i = 0
            while i < N:
                iToRead = min(step, N - i)
                #get step images for the first chunk
                chunk1[0:iToRead] = data[i:i + iToRead].reshape(iToRead, -1)
                if spatial_mask is not None:
                    chunk1[0:iToRead, badMask] = 0
                sumSpectrum[i:i + iToRead] = chunk1[0:iToRead].sum(axis=1)
                if center:
                    average = sumSpectrum[i:i + iToRead] / usedPixels
                    average.shape = iToRead, 1
                    chunk1[0:iToRead] -= average
                if spatial_mask is not None:
                    chunk1[0:iToRead, badMask] = 0
                nImagesRead += iToRead
                j = 0
                while j <= i:
                    #get step images for the second chunk
                    if j == i:
                        jToRead = iToRead
                        if 0:
                            for k in range(0, jToRead):
                                chunk2[:, k] = chunk1[k]
                        else:
                            chunk2[:, 0:jToRead] = chunk1[0:jToRead, :].T
                    else:
                        #get step images for the second chunk
                        jToRead = min(step, nChannels - j)

                        #with loop:
                        #for k in range(0, jToRead):
                        #    chunk2[:,k] = data[(j+k):(j+k+1)].reshape(1,-1)
                        #    if spatial_mask is not None:
                        #        chunk2[badMask[(j+k):(j+k+1),k]] = 0
                        #equivalent without loop:
                        chunk2[:, 0:jToRead] =\
                            data[j:(j + jToRead)].reshape(jToRead, -1).T
                        if spatial_mask is not None:
                            chunk2[badMask, 0:jToRead] = 0
                        nImagesRead += jToRead
                        if center:
                            average = \
                                chunk2[:, 0:jToRead].sum(axis=0) / usedPixels
                            average.shape = 1, jToRead
                            chunk2[:, 0:jToRead] -= average
                            if spatial_mask is not None:
                                chunk2[badMask, 0:jToRead] = 0

                    #dot product
                    if (iToRead != step) or (jToRead != step):
                        covMatrix[i: (i + iToRead), j: (j + jToRead)] =\
                                        dotblas.dot(chunk1[:iToRead, :nPixels],
                                                    chunk2[:nPixels, :jToRead])
                    else:
                        covMatrix[i: (i + iToRead), j: (j + jToRead)] =\
                                        dotblas.dot(chunk1, chunk2)

                    if i != j:
                        covMatrix[j: (j + jToRead), i: (i + iToRead)] =\
                                covMatrix[i: (i + iToRead), j: (j + jToRead)].T

                    #increment j
                    j += jToRead
                i += iToRead
            chunk1 = None
            chunk2 = None
            if DEBUG:
                print("totalImages Read = ", nImagesRead)
        elif (binning > 1) and (oldShape[index] >= step):
            chunk1 = numpy.zeros((step, nPixels), numpy.float64)
            chunk2 = numpy.zeros((nPixels, step), numpy.float64)
            #one by one reading till we fill the chunks
            imagesToRead = numpy.arange(0, oldShape[index], binning)
            i = int(imagesToRead[weights > 0][0])
            spectrumIndex = 0
            nImagesRead = 0
            while i < N:
                #fill chunk1
                jj = 0
                for iToRead in range(0, int(min(step * binning, N - i)),
                                     binning):
                    chunk1[jj] = data[i + iToRead].reshape(1, -1) * \
                                 weights[i + iToRead]
                    jj += 1
                sumSpectrum[spectrumIndex:(spectrumIndex + jj)] = \
                                                    chunk1[0:jj].sum(axis=1)
                if center:
                    average = \
                        sumSpectrum[spectrumIndex:(spectrumIndex + jj)] / nPixels
                    average.shape = jj, 1
                    chunk1[0:jj] -= average
                nImagesRead += jj
                iToRead = jj
                j = 0
                while j <= i:
                    #get step images for the second chunk
                    if j == i:
                        jToRead = iToRead
                        chunk2[:, 0:jToRead] = chunk1[0:jToRead, :].T
                    else:
                        #get step images for the second chunk
                        jj = 0
                        for jToRead in range(0, int(min(step * binning,
                                                        N - j)), binning):
                            chunk2[:, jj] =\
                                data[j + jToRead].reshape(1, -1)\
                                * weights[j + jToRead]
                            jj += 1
                        nImagesRead += jj
                        if center:
                            average = chunk2[:, 0:jj].sum(axis=0) / nPixels
                            average.shape = 1, jj
                            chunk2 -= average
                        jToRead = jj
                    #dot product
                    if (iToRead != step) or (jToRead != step):
                        covMatrix[i:(i + iToRead), j:(j + jToRead)] =\
                                dotblas.dot(chunk1[:iToRead, :nPixels],
                                            chunk2[:nPixels, :jToRead])
                    else:
                        covMatrix[i:(i + iToRead), j:(j + jToRead)] =\
                                dotblas.dot(chunk1, chunk2)

                    if i != j:
                        covMatrix[j:(j + jToRead), i:(i + iToRead)] =\
                                covMatrix[i:(i + iToRead), j:(j + jToRead)].T

                    #increment j
                    j += jToRead * step
                i += iToRead * step
            chunk1 = None
            chunk2 = None
        else:
            raise ValueError("PCATools.getCovarianceMatrix: Unhandled case")

        #should one divide by N or by N-1 ??  if we use images, we
        #assume the observables are the images, not the spectra!!!
        #so, covMatrix /= nChannels is wrong and one has to use:
        covMatrix /= usedPixels
    else:
        #the data are already arranged as (nPixels, nChannels) and we
        #basically have to return data.T * data to get back the covariance
        #matrix as (nChannels, nChannels)
        #if someone had the bad idea to store the data in HDF5 with a chunk
        #size based on the pixels and not on the spectra a loop based on
        #reading spectrum per spectrum can be very slow
        step = 0
        divider = 10
        while step < 1:
            step = int(nPixels / divider)
            divider -= 1
            if divider <= 0:
                step = nPixels
                break
        step = nPixels
        if DEBUG:
            print("Reading chunks of %d spectra" % step)

        cleanWeights.shape = 1, -1
        if len(data.shape) == 2:
            if cleanMask is not None:
                badMask.shape = -1
            tmpData = numpy.zeros((step, nChannels), numpy.float64)
            k = 0
            while k < nPixels:
                kToRead = min(step, nPixels - k)
                tmpData[0:kToRead] = data[k:k + kToRead, ::binning]
                if cleanMask is not None:
                    tmpData[badMask[k:k + kToRead]] = 0
                a = tmpData[0:kToRead] * cleanWeights
                sumSpectrum += a.sum(axis=0)
                covMatrix += dotblas.dot(a.T, a)
                a = None
                k += kToRead
            tmpData = None
        elif len(data.shape) == 3:
            if oldShape[0] == 1:
                #close to the previous case
                tmpData = numpy.zeros((step, nChannels), numpy.float64)
                if cleanMask is not None:
                    badMask.shape = data.shape[0], data.shape[1]
                for i in range(oldShape[0]):
                    k = 0
                    while k < oldShape[1]:
                        kToRead = min(step, oldShape[1] - k)
                        tmpData[0:kToRead] = data[i, k:k + kToRead, ::binning]\
                                             * cleanWeights
                        if cleanMask is not None:
                            tmpData[0:kToRead][badMask[i, k:k + kToRead]] = 0
                        a = tmpData[0:kToRead]
                        sumSpectrum += a.sum(axis=0)
                        covMatrix += dotblas.dot(a.T, a)
                        a = None
                        k += kToRead
                tmpData = None
            elif oldShape[1] == 1:
                #almost identical to the previous case
                tmpData = numpy.zeros((step, nChannels), numpy.float64)
                if cleanMask is not None:
                    badMask.shape = data.shape[0], data.shape[1]
                for i in range(oldShape[1]):
                    k = 0
                    while k < oldShape[0]:
                        kToRead = min(step, oldShape[0] - k)
                        tmpData[0:kToRead] = data[k: k + kToRead, i, ::binning]\
                                             * cleanWeights
                        if cleanMask is not None:
                            tmpData[0:kToRead][badMask[k:k + kToRead, i]] = 0
                        a = tmpData[0:kToRead]
                        sumSpectrum += a.sum(axis=0)
                        covMatrix += dotblas.dot(a.T, a)
                        a = None
                        k += kToRead
                tmpData = None
            elif oldShape[0] < 21:
                if step > oldShape[1]:
                    step = oldShape[1]
                tmpData = numpy.zeros((step, nChannels), numpy.float64)
                if cleanMask is not None:
                    badMask.shape = data.shape[0], data.shape[1]
                for i in range(oldShape[0]):
                    k = 0
                    while k < oldShape[1]:
                        kToRead = min(step, oldShape[1] - k)
                        tmpData[0:kToRead] = data[i, k: k + kToRead, ::binning]\
                                             * cleanWeights
                        if cleanMask is not None:
                            tmpData[0:kToRead][badMask[i, k:k + kToRead]] = 0
                        a = tmpData[0:kToRead]
                        sumSpectrum += a.sum(axis=0)
                        covMatrix += dotblas.dot(a.T, a)
                        a = None
                        k += kToRead
                tmpData = None
            else:
                #I should choose the sizes in terms of the size
                #of the dataset
                if oldShape[0] < 41:
                    #divide by 10
                    deltaRow = 4
                elif oldShape[0] < 101:
                    #divide by 10
                    deltaRow = 10
                else:
                    #take pieces of one tenth
                    deltaRow = int(oldShape[0] / 10)
                deltaCol = oldShape[1]
                tmpData = numpy.zeros((deltaRow, deltaCol, nChannels),
                                      numpy.float64)
                if cleanMask is not None:
                    badMask.shape = data.shape[0], data.shape[1]
                i = 0
                while i < oldShape[0]:
                    iToRead = min(deltaRow, oldShape[0] - i)
                    kToRead = iToRead * oldShape[1]
                    tmpData[:iToRead] = data[i:(i + iToRead), :, ::binning]
                    if cleanMask is not None:
                        tmpData[0:kToRead][badMask[i:(i + iToRead), :]] = 0
                    a = tmpData[:iToRead]
                    a.shape = kToRead, nChannels
                    a *= cleanWeights
                    if 0:
                        #weight each spectrum
                        a /= (a.sum(axis=1).reshape(-1, 1))
                    sumSpectrum += a.sum(axis=0)
                    covMatrix += dotblas.dot(a.T, a)
                    a = None
                    i += iToRead
        #should one divide by N or by N-1 ??
        covMatrix /= usedPixels - 1
        if center:
            #the n-1 appears again here
            averageMatrix = numpy.outer(sumSpectrum, sumSpectrum)\
                            / (usedPixels * (usedPixels - 1))
            covMatrix -= averageMatrix
            averageMatrix = None
    return covMatrix, sumSpectrum / usedPixels, usedPixels
Пример #21
0
 def mat_mult(self, evect , q):
     self.vr[:evect.shape[0]]=dotblas.dot(evect.astype(self.tipo),q.vr[:evect.shape[1]])