Пример #1
0
def getLocalCorrelation(a, b, scope=(7,7), verbose=True):
    """
    (811401,) (811401,) (811401,)
    208.300295858
    54067.9823039
    localCorr.max() 1.0
    the mean and var for local corr: 0.18482584644 0.189520182231
    time spent: 19.9111940861
    >>> LC=dbz(matrix=lc)
    """
    #localCov    = getLocalCovariance(a=a, b=b, scope=scope)
    #aVar        = getLocalVariance(a=a, scope=scope)
    #bVar        = getLocalVariance(a=b, scope=scope)
    #localCorr   = localCov / (aVar * bVar)**0.5
    tic()
    aa = a.copy()
    bb = b.copy()
    height, width = aa.matrix.shape
    commonMask = aa.matrix.mask + bb.matrix.mask
    aa.matrix.mask = commonMask
    bb.matrix.mask = commonMask
    aa.matrix.unshare_mask
    bb.matrix.unshare_mask
    
    aShifts = getMatrixShifts(aa, scope)
    bShifts = getMatrixShifts(bb, scope)
    aShifts    = [v.reshape(height*width) for v in aShifts]   #flatten before stacking
    aShifts    = ma.vstack(aShifts)    
    bShifts    = [v.reshape(height*width) for v in bShifts]   #flatten before stacking
    bShifts    = ma.vstack(bShifts) 
    #print "\n........................."
    #print aShifts.shape
    localProduct    = (aShifts * bShifts)
    localProduct    = localProduct.mean(axis=0)
    #print 'local product', localProduct.shape
    
    aVar            = aShifts.var(axis=0)
    bVar            = bShifts.var(axis=0)
    aMean           = aShifts.mean(axis=0)
    bMean           = bShifts.mean(axis=0)
    
    #print "........................."
    #print aVar.shape, aMean.shape, localProduct.shape
    #print (localProduct-aMean*bMean).max()
    #print (aVar*bVar).max()
    localCorr       = (localProduct - aMean*bMean) / (aVar * bVar)**.5
    #localCorr    = localCorr * (localCorr>=-1.0000001) * (localCorr<=1.0000001)    #cutting the pathologies
    localCorr.mask+= (localCorr<-1.0000001) + (localCorr>1.0000001)    #masking the pathologies

    #print "localCorr.max()", localCorr.max()
    if verbose:
        print "the mean and var for local corr:", localCorr.mean(), localCorr.var()
    localCorr    = localCorr.reshape(height,width)
    toc()
    return localCorr
Пример #2
0
def getLocalProduct(a, b, scope=(9,9)):
    """ to get the local "dot product" in a neighbourhood of every point of a 
    11 march 2013
    """ 
    height, width = a.matrix.shape
    aShifts = getMatrixShifts(a, scope)
    bShifts = getMatrixShifts(b, scope)
    aShifts    = [v.reshape(height*width) for v in aShifts]   #flatten before stacking
    aShifts    = ma.vstack(aShifts)    
    bShifts    = [v.reshape(height*width) for v in bShifts]   #flatten before stacking
    bShifts    = ma.vstack(bShifts) 
    localProduct    = (aShifts * bShifts)
    localProduct    = 1. * localProduct.mean(axis=0)
    localProduct    = localProduct.reshape(height,width)
    return localProduct
Пример #3
0
def calculate_aic(eigenworms_matrix_path, shapes_file, coiled_modes_file,
                  num_modes):
    eigenworms_matrix = np.loadtxt(eigenworms_matrix_path,
                                   delimiter=",").astype(np.float32)

    # Load angle library
    f = scipy.io.loadmat(shapes_file)
    thetas_w = ma.array(f["theta_ensemble"])
    thetas_w[thetas_w == 0] = ma.masked
    thetas_library_raw = ma.compress_rows(ma.vstack(thetas_w))
    raw_samples = thetas_library_raw[::2]

    # Load coiled modes library
    with h5py.File(coiled_modes_file, "r") as mat:
        refs = list(mat["#refs#"].keys())[1:]
        tseries_w = [
            ma.masked_invalid(np.array(mat["#refs#"][ref]).T)[:, :num_modes]
            for ref in refs
        ]

    modes_library = ma.compress_rows(ma.vstack(tseries_w))
    # find indices with larger curvature (on the tail of the distribution of angles that can be solved)
    indices_curved = np.abs(modes_library[:, 2]) > np.percentile(
        raw_samples.dot(eigenworms_matrix[:, 2]), 95)
    curved_samples = modes_library[indices_curved].dot(
        eigenworms_matrix[:, :num_modes].T)

    # combine samples
    thetas_library_combined = np.vstack((curved_samples, raw_samples))

    # sample uniformly from various degrees of curvature
    indices = _uniform_samples(
        thetas_library_combined.dot(eigenworms_matrix[:, 2]))

    training_data = thetas_library_combined[indices]

    aic = []
    n_components_range = np.arange(150, 350, 10)
    for n_components in n_components_range:
        # Fit a Gaussian mixture with EM
        try:
            gmm = GaussianMixture(n_components=n_components)
            gmm.fit(training_data)
            aic.append(gmm.aic(training_data))
        except:
            aic.append(np.nan)

    return np.vstack((n_components_range, aic)).T
Пример #4
0
def sum_running_stats():
    """Find avg per realisation and do a cumulative rolling mean.

    Memory consumption shall be very low.
    """

    for irel in range(NRUN):
        # load as Eclipse run; this will look for EGRID, INIT, UNRST

        print("Loading realization no {}".format(irel))

        srf = xtgeo.surface_from_file(EXPATH1)

        nnum = float(irel + 1)
        srf.values += irel * 1  # just to mimic variability

        if irel == 0:
            pcum = srf.values1d
        else:
            pavg = srf.values1d / nnum
            pcum = pcum * (nnum - 1) / nnum
            pcum = npma.vstack([pcum, pavg])
            pcum = pcum.sum(axis=0)

    # find the averages:
    print(pcum)
    print(pcum.mean())
    return pcum.mean()
Пример #5
0
    def insert(self, key, record):

        if key in self.index_table:
            raise KeyError("key %s already exists in table" % str(key))

        added_row_index = self.index_table[key]

        num_current_rows = len(self.index_table)
        if num_current_rows >= self.data_table.shape[0]:
            num_new_rows = int(self._data_table_growth_factor *
                               num_current_rows)
            new_rows = ma.masked_all((num_new_rows, len(self.column_table)),
                                     dtype=self._data_type)
            self.data_table = ma.vstack((self.data_table, new_rows))
            print "Table enlarged to %d rows" % self.data_table.shape[0]

        for key, in_value in record.items():

            if key not in self.column_table:
                raise KeyError(
                    'Variable "%s" is not registered as a table column' %
                    str(key))
            if key in self.hash_table:
                value = self.hash_table[key][in_value]
            else:
                value = in_value

            index = self.column_table[key]
            self.data_table[added_row_index, index] = value
Пример #6
0
def sum_running_stats_bytestream():
    """Find avg per realisation and do a cumulative rolling mean.

    Memory consumption shall be very low.
    """

    for irel in range(NRUN):
        # load as Eclipse run; this will look for EGRID, INIT, UNRST

        print('Loading realization no {}'.format(irel))

        with open(EXPATH1, "rb") as myfile:
            stream = io.BytesIO(myfile.read())

        srf = xtgeo.RegularSurface(stream, fformat="irap_binary")

        nnum = float(irel + 1)
        srf.values += irel * 1  # just to mimic variability

        if irel == 0:
            pcum = srf.values1d
        else:
            pavg = srf.values1d / nnum
            pcum = pcum * (nnum - 1) / nnum
            pcum = npma.vstack([pcum, pavg])
            pcum = pcum.sum(axis=0)

    # find the averages:
    print(pcum)
    print(pcum.mean())
    return pcum.mean()
Пример #7
0
 def _expected(self, transpose=False):
     data = self.data
     if transpose:
         data = self.data.T
     # Expected raster weights per target grid cell.
     # This is the (fractional) source cell contribution
     # to each target cell (out of 255)
     weights = np.array([[[63, 127, 127],   # top left hand cell (tlhc)
                          [127, 255, 255]],
                         [[127, 127, 63],   # top right hand cell (trhc)
                          [255, 255, 127]],
                         [[127, 255, 255],  # bottom left hand cell (blhc)
                          [63, 127, 127]],
                         [[255, 255, 127],  # bottom right hand cell (brhc)
                          [127, 127, 63]]], dtype=np.uint8)
     weights = weights / 255
     # Expected source points per target grid cell.
     tmp = data[1:-1, 1:-1]
     shape = (-1, 2, 3)
     cells = [tmp[slice(0, 2), slice(0, 3)].reshape(shape),       # tlhc
              tmp[slice(0, 2), slice(3, None)].reshape(shape),    # trhc
              tmp[slice(2, None), slice(0, 3)].reshape(shape),    # blhc
              tmp[slice(2, None), slice(3, None)].reshape(shape)] # brhc
     cells = ma.vstack(cells)
     # Expected fractional weighted result.
     num = (cells * weights).sum(axis=(1, 2))
     dom = weights.sum(axis=(1, 2))
     expected = num / dom
     expected = ma.asarray(expected.reshape(2, 2))
     if transpose:
         expected = expected.T
     return expected
    def postProcessor_characteristic_collector(self):
        collected_characteristics_vector = self.collected_burst
        for key in self.converted_characteristic.keys():
            collected_characteristics_vector = vstack(
                (collected_characteristics_vector,
                 self.converted_characteristic[key]))

        return collected_characteristics_vector
Пример #9
0
def generate(shapes_file, coiled_modes_file, eigenworms_matrix_path, out_file, num_gaussians):
    # Load angle library from Greg
    f = scipy.io.loadmat(shapes_file)
    thetas_w = ma.array(f["theta_ensemble"])
    thetas_w[thetas_w == 0] = ma.masked
    thetas_library_raw = ma.compress_rows(ma.vstack(thetas_w))

    # Load library from Onno
    mat = h5py.File(coiled_modes_file, "r")
    refs = list(mat["#refs#"].keys())[1:]
    tseries_w = [ma.masked_invalid(np.array(mat["#refs#"][ref]).T)[:, :5] for ref in refs]
    mat.close()
    modes_library = ma.compress_rows(ma.vstack(tseries_w))

    eigenworms_matrix = np.loadtxt(eigenworms_matrix_path, delimiter=",").astype(np.float32)

    # same number of samples from full theta
    # raw_samples = thetas_library_raw[np.random.choice(np.arange(len(thetas_library_raw)),np.sum(indices_curved),replace=False)]
    raw_samples = thetas_library_raw[::2]

    # find indices with larger curvature
    indices_curved = np.abs(modes_library[:, 2]) > np.percentile(raw_samples.dot(eigenworms_matrix[:, 2]), 97.5)
    # get same number of samples from raw angles and projected modes
    curved_samples = modes_library[indices_curved].dot(eigenworms_matrix[:, :5].T)

    thetas_library_combined = np.vstack((curved_samples, raw_samples))

    indices = uniform_samples(thetas_library_combined.dot(eigenworms_matrix[:, 2]))
    training_data = thetas_library_combined[indices]

    # fit gaussian mixture model
    gmm = GaussianMixture(n_components=num_gaussians)
    gmm.fit(training_data)

    # sort according to curvature
    sorting_indices = np.argsort(np.sum(np.abs(np.diff(gmm.means_, axis=1)), axis=1))
    means = gmm.means_[sorting_indices]
    covariances = gmm.covariances_[sorting_indices]
    weights = gmm.weights_[sorting_indices]

    with gzip.open(out_file, "wt") as f:
        json.dump({"means": means.tolist(), "covariances": covariances.tolist(), "weights": weights.tolist()}, f)
def compute_master_theta(model_indices, all_models, windows_sim, tseries_sim):
    master_tseries = []
    for model in all_models[model_indices]:
        sim_idx, kw = model
        t0, tf = windows_sim[sim_idx][kw]
        ts = tseries_sim[sim_idx][t0:tf]
        master_tseries.append(ts)
        master_tseries.append([np.nan] * ts.shape[1])
    master_tseries = ma.masked_invalid(ma.vstack(master_tseries))
    master_theta, eps = lvarc.get_theta_masked(master_tseries)
    return master_theta
Пример #11
0
def compute_master_theta(models, windows, tseries):
    master_tseries = []
    for window_idx in models:
        window = windows[window_idx]
        t0, tf = window
        ts = tseries[t0:tf]
        master_tseries.append(ts)
        master_tseries.append([np.nan] * ts.shape[1])
    master_tseries = ma.masked_invalid(ma.vstack(master_tseries))
    master_theta, eps = lvarc.get_theta_masked(master_tseries)
    return master_theta
Пример #12
0
def getLocalMean(a, scope=(5,5), verbose=True):
    
    matrixShifts = getMatrixShifts(a, scope)       
    height, width   = a.matrix.shape
    matrixShifts    = [v.reshape(height*width) for v in matrixShifts]   #flatten before stacking
    matrixShifts    = ma.vstack(matrixShifts)

    localMean = matrixShifts.mean(axis=0)
    localMean = localMean.reshape(height,width)
    if verbose:
        print "the mean and var for localMean:", localMean.mean(), localMean.var()
    return localMean
Пример #13
0
def test_steepest():
    data = mlclass.ex4()
    x = add_bias(StandardScaler().fit_transform(data['x']))
    y = data['y']
    theta = array([.01, .01, .01])
    c = lambda theta: logistic.model.cost(x, y, theta)
    g = lambda theta: logistic.model.grad(x, y, theta)
    assert_array_almost_equal([-0.0254469, 1.14114, 1.21333], steepest_gd(c, g, theta, max_iter=500)[0], decimal=1)

    y = vstack([data['y'], 1 - data['y']]).T
    theta = array([[.01, .01, .01]])
    c = lambda theta: maxent.model.cost(x, y, theta)
    g = lambda theta: maxent.model.grad(x, y, theta)
    assert_array_almost_equal([[-0.0254469, 1.14114, 1.21333]], steepest_gd(c, g, theta, max_iter=500)[0], decimal=1)
Пример #14
0
    def test_corrcoef(self):

        r = ma.masked_equal(np.load("data/ml-1m/rating.npy"), 0)
        # sim = ma.corrcoef(r[0], r[2412])
        # print(sim)

        # print(np.corrcoef(r[0].filled(0), r[2412].filled(0)))

        sim2 = ma.corrcoef(ma.vstack([r[0], r[2412]]))
        print(sim2)

        print(ma.dot(r[0], r[2412])/math.sqrt(ma.dot(r[0],r[0]))/math.sqrt(ma.dot(r[2412],r[2412])))

        r0_m = r[0] - ma.mean(r[0])
        r1_m = r[2412] - ma.mean(r[2412])
        print(ma.dot(r0_m, r1_m)/math.sqrt(ma.dot(r0_m,r0_m))/math.sqrt(ma.dot(r1_m,r1_m)))
Пример #15
0
def test_newton():
    data = mlclass.ex4()
    x = add_bias(data['x'])
    y = data['y']
    theta = array([.01, .01, .01])
    c = lambda theta: logistic.model.cost(x, y, theta, 0.)
    g = lambda theta: logistic.model.grad(x, y, theta, 0.)
    h = lambda theta: logistic.model.hessian(x, theta, 0.)
    assert_array_almost_equal([-16.3787, 0.1483, 0.1589], newton(c, g, h, theta)[0], decimal=3)

    y = vstack([data['y'], 1 - data['y']]).T
    theta = array([[.01, .01, .01]])
    c = lambda theta: maxent.model.cost(x, y, theta, 0.)
    g = lambda theta: maxent.model.grad(x, y, theta, 0.)
    h = lambda theta: maxent.model.hessian(x, theta, 0.)
    assert_array_almost_equal([[-16.3787, 0.1483, 0.1589]], newton(c, g, h, theta)[0], decimal=3)
Пример #16
0
def XWrap2(x, P0, fill_value=0, pow2=False):
    """
    Extend and wrap array.
    
    Fold array every y indecies.  There will typically be a hanging
    part of the array.  This is padded out.

    Parameters
    ----------

    x     : input
    P0    : Base period, units of elements
    pow2  : If true, pad out nRows so that it's the next power of 2.

    Return
    ------

    xwrap : Wrapped array.

    """

    ncad = x.size  # Number of cadences
    # for some reason np.ceil(ncad/P0) doesn't work!
    nrow = int(np.floor(ncad / P0) + 1)
    nExtend = nrow * P0 - ncad  # Pad out remainder of array with 0s.

    if type(x) is np.ma.core.MaskedArray:
        pad = ma.empty(nExtend)
        pad.mask = True
        x = ma.hstack((x, pad))
    else:
        pad = np.empty(nExtend)
        pad[:] = fill_value
        x = np.hstack((x, pad))

    xwrap = x.reshape(nrow, -1)

    if pow2:
        k = np.ceil(np.log2(nrow)).astype(int)
        nrow2 = 2**k
        fill = ma.empty((nrow2 - nrow, P0))
        fill[:] = fill_value
        fill.mask = True
        xwrap = ma.vstack([xwrap, fill])

    return xwrap
Пример #17
0
 def _expected(self, transpose=False):
     data = self.data
     if transpose:
         data = self.data.T
     # Expected raster weights per target grid cell.
     # This is the (fractional) source cell contribution
     # to each target cell (out of 255)
     weights = np.array(
         [
             [
                 [63, 127, 127],  # top left hand cell (tlhc)
                 [127, 255, 255]
             ],
             [
                 [127, 127, 63],  # top right hand cell (trhc)
                 [255, 255, 127]
             ],
             [
                 [127, 255, 255],  # bottom left hand cell (blhc)
                 [63, 127, 127]
             ],
             [
                 [255, 255, 127],  # bottom right hand cell (brhc)
                 [127, 127, 63]
             ]
         ],
         dtype=np.uint8)
     weights = weights / 255
     # Expected source points per target grid cell.
     tmp = data[1:-1, 1:-1]
     shape = (-1, 2, 3)
     cells = [
         tmp[slice(0, 2), slice(0, 3)].reshape(shape),  # tlhc
         tmp[slice(0, 2), slice(3, None)].reshape(shape),  # trhc
         tmp[slice(2, None), slice(0, 3)].reshape(shape),  # blhc
         tmp[slice(2, None), slice(3, None)].reshape(shape)
     ]  # brhc
     cells = ma.vstack(cells)
     # Expected fractional weighted result.
     num = (cells * weights).sum(axis=(1, 2))
     dom = weights.sum(axis=(1, 2))
     expected = num / dom
     expected = ma.asarray(expected.reshape(2, 2))
     if transpose:
         expected = expected.T
     return expected
Пример #18
0
def XWrap2(x,P0,fill_value=0,pow2=False):
    """
    Extend and wrap array.
    
    Fold array every y indecies.  There will typically be a hanging
    part of the array.  This is padded out.

    Parameters
    ----------

    x     : input
    P0    : Base period, units of elements
    pow2  : If true, pad out nRows so that it's the next power of 2.

    Return
    ------

    xwrap : Wrapped array.

    """

    ncad = x.size # Number of cadences
    # for some reason np.ceil(ncad/P0) doesn't work!
    nrow = int( np.floor(ncad/P0) +1 )
    nExtend = nrow * P0 - ncad # Pad out remainder of array with 0s.

    if type(x) is np.ma.core.MaskedArray:
        pad = ma.empty(nExtend)
        pad.mask = True
        x = ma.hstack( (x ,pad) )
    else:    
        pad = np.empty(nExtend) 
        pad[:] = fill_value
        x = np.hstack( (x ,pad) )

    xwrap = x.reshape( nrow,-1 )

    if pow2:
        k = np.ceil(np.log2(nrow)).astype(int)
        nrow2 = 2**k
        fill    = ma.empty( (nrow2-nrow,P0) )
        fill[:] = fill_value
        fill.mask=True
        xwrap = ma.vstack([xwrap,fill])

    return xwrap
Пример #19
0
def getLocalVariance(a, scope=(5,5), verbose=True):
    """ to get the local variance in a neighbourhood of every point of a
    variance = mean(X^2) - mean(X)^2
    """
    matrixShifts = getMatrixShifts(a, scope, verbose=verbose)        # matrixShifts[i+Nj]=a.shiftMatrix(i,j)
                                                    # where N=9 for the moment
    height, width   = a.matrix.shape
    matrixShifts    = [v.reshape(height*width) for v in matrixShifts]   #flatten before stacking
    matrixShifts    = ma.vstack(matrixShifts)

    #matrixCounts    = (1-matrixShifts.mask).sum(axis=0)         #count the valid entries at each position
    #matrixSums      = matrixShifts.sum(axis=0)
    #matrixSquares   = matrixShifts**2
    #matrixSquareSums= matrixSquares.sum(axis=0)
    #localVariance = matrixSquareSums*1./matrixCounts - (matrixSums*1./matrixCounts)**2
    localVariance   = matrixShifts.var(axis=0)
    localVariance   = localVariance.reshape(height, width)      # reform the matrix
    if verbose:
        print "local variance sum, var=", localVariance.sum(), localVariance.var()
    return localVariance
Пример #20
0
def sum_stats():
    """Accumulate numpies for all realisations and then do stats.

    This will be quite memory intensive, and memory consumption will
    increase linearly.
    """

    propsd = {}

    for irel in range(NRUN):
        # load as Eclipse run; this will look for EGRID, INIT, UNRST

        print("Loading realization no {}".format(irel))
        grd = xtgeo.grid3d.Grid()
        grd.from_file(
            GRIDFILEROOT,
            fformat="eclipserun",
            initprops=INITPROPS,
            restartprops=RESTARTPROPS,
            restartdates=RDATES,
        )

        for prop in grd.props:
            if prop.name not in propsd:
                propsd[prop.name] = []
            if prop.name == "PORO":
                prop.values += irel * 0.001  # mimic variability aka ensembles
            else:
                prop.values += irel * 1  # just to mimic variability

            propsd[prop.name].append(prop.values1d)

    # find the averages:
    porovalues = npma.vstack(propsd["PORO"])
    poromeanarray = porovalues.mean(axis=0)
    porostdarray = porovalues.std(axis=0)
    print(poromeanarray)
    print(poromeanarray.mean())
    print(porostdarray)
    print(porostdarray.mean())
    return poromeanarray.mean()
Пример #21
0
def rotate_rects(rects, center, angle):
    """
    @param rects: n by 5 by 2 array of n rectangles.
        Each rectangle consists of five (x, y) coordinates. Any polygon would work, in fact.
    @type rects: ndarray
    @type center: tuple
    @type angle: float
    @rtype: ndarray
    """
    # 2 by 3 rotation matrix
    rot_mat = cv2.getRotationMatrix2D(center, angle, 1.0)

    n, p, d = rects.shape

    # 2 by n*5 array
    points = vstack((rects.T.reshape(d, n * p, order='F'), ones((1, n * p))))

    rotated_points = num.dot(rot_mat, points)
    rotated_rects = rotated_points.T.reshape(n, p, d)

    return rotated_rects
Пример #22
0
def sum_running_stats():
    """Find avg per realisation and do a cumulative rolling mean.

    Memory consumption shall be very low.
    """

    for irel in range(NRUN):
        # load as Eclipse run; this will look for EGRID, INIT, UNRST

        print("Loading realization no {}".format(irel))

        grd = xtgeo.grid3d.Grid()
        grd.from_file(
            GRIDFILEROOT,
            fformat="eclipserun",
            restartprops=RESTARTPROPS,
            restartdates=RDATES,
            initprops=INITPROPS,
        )

        nnum = float(irel + 1)
        for prop in grd.props:
            if prop.name == "PORO":
                prop.values += irel * 0.001  # mimic variability aka ensembles
            else:
                prop.values += irel * 1  # just to mimic variability

            if prop.name == "PORO":
                if irel == 0:
                    pcum = prop.values1d
                else:
                    pavg = prop.values1d / nnum
                    pcum = pcum * (nnum - 1) / nnum
                    pcum = npma.vstack([pcum, pavg])
                    pcum = pcum.sum(axis=0)

    # find the averages:
    print(pcum)
    print(pcum.mean())
    return pcum.mean()
Пример #23
0
def probAdjustEquil(binProb,rates,uncert,threshold=0.0,fullCalcClust=False,fullCalcBins=False):
    """This function adjusts bin pops in binProb using rates and uncert matrices
    fullCalcBins --> True for weighted avg, False for simple calc
    fullCalcClust --> True for weighted avg, False for simple calc
    threshold --> minimum weight (relative to max) for another value to be averaged
            only matters if fullCalcBins == True (or later perhaps if fullCalcClust == True)
    """
    
    
    # Check that rate matrix is square
    Ni,Nj = rates.shape
    if Ni != Nj:
        print('\nWARNING: Not a square matrix!\n')

    zi = np.where(binProb == 0.0)[0]  # indices of bins with zero probability
    
    rates_uncert = UncertMath.UncertContainer(rates,rates - uncert,rates + uncert)
    
    # STEP 1a: Create matrix of ratios of probabilities based on DIRECT estimates
    # that is, ij element is p_i / p_j = k_ji / k_ij
    
    ratios_direct = rates_uncert.transpose() / rates_uncert  

    # STEP 1b: Create averaged matrix of ratios of probabilities based on both direct and indirect estimates
    # Indirect means '3rd bin' estimates: p_i / p_j = ( k_ki / k_ik ) ( k_jk / k_kj )
    # Turns out this is not helpful, so generally set fullCalcBins = 0 
    if fullCalcBins:
        # Calculate indirect ratios using Einstein Summation convention where
        # ratios_indirect_kij  = ( k_ki / k_ik ) ( k_jk / k_kj ) = ratios_direct_ik * ratios_direct_kj
        ri_vals = np.einsum('ik,kj->kij',ratios_direct.vals,ratios_direct.vals)
        ri_min = np.einsum('ik,kj->kij',ratios_direct.dmin,ratios_direct.dmin)
        ri_max = np.einsum('ik,kj->kij',ratios_direct.dmax,ratios_direct.dmax)
        ratios_indirect = UncertMath.UncertContainer(ri_vals,ri_min,ri_max,mask=ratios_direct.vals.mask)

        # Threshold indirect ratios 
        ti = ratios_indirect.wt < ratios_direct * threshold
        ratios_indirect.mask = ti
        ratios_indirect.update_mask()

        ratios_indirect.concatenate(ratios_direct,axis=0) 
        ratios_average = ratios_indirect.weighted_average(axis=0)
 
    else:
        ratios_average = ratios_direct.weighted_average(axis=0,expaxis=0)
    

    # STEP 2: Form clusters

    # STEP 2a: Sort probability ratios based on uncertainty
    # Sort uncertainties of ratios_average subject to the convention that p_i < p_j
    
    i,j = np.triu_indices(Ni,1) # indices of ij pairs where i != j

    # Remove pairs that include a bin that has zero probability
    nzi = (binProb[i] != 0.0) & (binProb[j] != 0.0)
    i = i[nzi]
    j = j[nzi]

    vals = ma.vstack((ratios_average.vals[i,j],ratios_average.vals[j,i]))
    ias = ma.argsort(vals,axis=0,fill_value=np.inf)
    
    ordered_ind = np.vstack((i,j))
    flip_ind = np.nonzero(ias[0,:]) # Find pairs in which to select ji rather than ij
    ordered_ind[:,flip_ind[0]] = ordered_ind[:,flip_ind[0]][::-1]
    
    iind = ordered_ind[0,:]
    jind = ordered_ind[1,:]
    uncertij = ratios_average.uncert[iind,jind] # Get the uncert for ij pairs

    count = uncertij.count() # Count of the unmasked uncertainties
    ias = ma.argsort(uncertij,fill_value=np.inf) # Get the indices that would sort uncertij
    iind = iind[ias[:count]] # Sort the indices excluding masked/undefined values
    jind = jind[ias[:count]]


    # STEP 2b: Create ClusterList object and cluster bins
    clusters = BinCluster.ClusterList(ratios_average,Ni)

    if fullCalcClust:
        clusters.join((iind,jind))
    else:
        clusters.join_simple((iind,jind))

    total_prob = 0.0  # total probability in all clusters
    for cid in clusters.cluster_contents:
        binlist = list(clusters.cluster_contents[cid])
        if len(binlist):
            prob_cluster = binProb[binlist].sum()
            total_prob += prob_cluster

            binProb[binlist] = prob_cluster * clusters.bin_data[binlist].vals

    binProb[zi] = 0.0 # re-zero bins that previously had zero prob
    #for bi,p in enumerate(binProb):
    #    print('bin: {} -- {}'.format(bi,p))
    print('.........Total Probability: {}'.format(binProb.sum()))
Пример #24
0
def probAdjustEquil(binProb,rates,uncert,threshold=0.0,fullCalcClust=False,fullCalcBins=False):
    """This function adjusts bin pops in binProb using rates and uncert matrices
    fullCalcBins --> True for weighted avg, False for simple calc
    fullCalcClust --> True for weighted avg, False for simple calc
    threshold --> minimum weight (relative to max) for another value to be averaged
            only matters if fullCalcBins == True (or later perhaps if fullCalcClust == True)
    """
    
    
    # Check that rate matrix is square
    Ni,Nj = rates.shape
    if Ni != Nj:
        print('\nWARNING: Not a square matrix!\n')

    zi = np.where(binProb == 0.0)[0]  # indices of bins with zero probability
    
    rates_uncert = UncertMath.UncertContainer(rates,rates - uncert,rates + uncert)
    
    # STEP 1a: Create matrix of ratios of probabilities based on DIRECT estimates
    # that is, ij element is p_i / p_j = k_ji / k_ij
    
    ratios_direct = rates_uncert.transpose() / rates_uncert  

    # STEP 1b: Create averaged matrix of ratios of probabilities based on both direct and indirect estimates
    # Indirect means '3rd bin' estimates: p_i / p_j = ( k_ki / k_ik ) ( k_jk / k_kj )
    # Turns out this is not helpful, so generally set fullCalcBins = 0 
    if fullCalcBins:
        # Calculate indirect ratios using Einstein Summation convention where
        # ratios_indirect_kij  = ( k_ki / k_ik ) ( k_jk / k_kj ) = ratios_direct_ik * ratios_direct_kj
        ri_vals = np.einsum('ik,kj->kij',ratios_direct.vals,ratios_direct.vals)
        ri_min = np.einsum('ik,kj->kij',ratios_direct.dmin,ratios_direct.dmin)
        ri_max = np.einsum('ik,kj->kij',ratios_direct.dmax,ratios_direct.dmax)
        ratios_indirect = UncertMath.UncertContainer(ri_vals,ri_min,ri_max,mask=ratios_direct.vals.mask)

        # Threshold indirect ratios 
        ti = ratios_indirect.wt < ratios_direct * threshold
        ratios_indirect.mask = ti
        ratios_indirect.update_mask()

        ratios_indirect.concatenate(ratios_direct,axis=0) 
        ratios_average = ratios_indirect.weighted_average(axis=0)
 
    else:
        ratios_average = ratios_direct.weighted_average(axis=0,expaxis=0)
    

    # STEP 2: Form clusters

    # STEP 2a: Sort probability ratios based on uncertainty
    # Sort uncertainties of ratios_average subject to the convention that p_i < p_j
    
    i,j = np.triu_indices(Ni,1) # indices of ij pairs where i != j

    # Remove pairs that include a bin that has zero probability
    nzi = (binProb[i] != 0.0) & (binProb[j] != 0.0)
    i = i[nzi]
    j = j[nzi]

    vals = ma.vstack((ratios_average.vals[i,j],ratios_average.vals[j,i]))
    ias = ma.argsort(vals,axis=0,fill_value=np.inf)
    
    ordered_ind = np.vstack((i,j))
    flip_ind = np.nonzero(ias[0,:]) # Find pairs in which to select ji rather than ij
    ordered_ind[:,flip_ind[0]] = ordered_ind[:,flip_ind[0]][::-1]
    
    iind = ordered_ind[0,:]
    jind = ordered_ind[1,:]
    uncertij = ratios_average.uncert[iind,jind] # Get the uncert for ij pairs

    count = uncertij.count() # Count of the unmasked uncertainties
    ias = ma.argsort(uncertij,fill_value=np.inf) # Get the indices that would sort uncertij
    iind = iind[ias[:count]] # Sort the indices excluding masked/undefined values
    jind = jind[ias[:count]]


    # STEP 2b: Create ClusterList object and cluster bins
    clusters = BinCluster.ClusterList(ratios_average,Ni)

    if fullCalcClust:
        clusters.join((iind,jind))
    else:
        clusters.join_simple((iind,jind))

    total_prob = 0.0  # total probability in all clusters
    for cid in clusters.cluster_contents:
        binlist = list(clusters.cluster_contents[cid])
        if len(binlist):
            prob_cluster = binProb[binlist].sum()
            total_prob += prob_cluster

            binProb[binlist] = prob_cluster * clusters.bin_data[binlist].vals

    binProb[zi] = 0.0 # re-zero bins that previously had zero prob
    #for bi,p in enumerate(binProb):
    #    print('bin: {} -- {}'.format(bi,p))
    print('.........Total Probability: {}'.format(binProb.sum()))
Пример #25
0
def gaussianSmooothNormalisedCorrelation(obs, wrf, sigma=20, sigmaWRF=5, thres=15, showImage=True,
                                         saveImage=True,  outputFolder="", 
                                         outputType="correlation",
                                         *args, **kwargs):
    """
    to used normalised correlation to study the similarity between obs and wrf
    codes from
    armor.tests.gaussianSmoothNormalisedCorrelation2
    input:
        sigma = sigma for obs
        sigmaWRF    = sigma for wrf
    """
    if outputFolder =="":
        try:
            outputFolder = obs.imageFolder
        except AttributeError:
            outputFolder = pattern.defaultOutputFolderForImages
    if showImage:            
        import pylab
        pylab.ion()
    k = obs         # alias
    w = wrf

    matrix0   = copy.copy(k.matrix)
    k.getCentroid()
    k.setThreshold(thres)  #2014-05-30
    k.matrix = k.gaussianFilter(sigma).matrix
    #k.matrix = 100.* (k.matrix>=thres) 
    k.matrix.mask = np.zeros(k.matrix.shape)
    #k.makeImage(closeAll=True)
    #pylab.draw()
    #correlations = []

    w.getCentroid()
    w.setThreshold(thres)  #2014-05-30
    w1 = w.gaussianFilter(sigmaWRF)
    topRowName = w.name + ', gaussian(' + str(sigmaWRF) + ') and ' + k.name
    topRow = ma.hstack([w.matrix, w1.matrix, matrix0])
    #w1.matrix = 100.*(w1.matrix>=thres)
    w1.matrix.mask = np.zeros(w1.matrix.shape)
    try:
        ############################################
        #   key lines
        w2 = w1.momentNormalise(k)
        w3 = w1.momentNormalise(k, extraAngle=np.pi)
        if outputType=="correlation" or outputType=="corr":
            corr    = w2.corr(k)
            corr2   = w3.corr(k)
            if  corr2 > corr:
                print '180 degree switch: '
                print '   ', k.name, w.name ,corr, corr2, '\n................................' 
                corr = corr2
                w2 = w3
            returnValue= corr 
        #elif outputType=="regression" or outputType=="regress":
        else:
            x,  residuals   = w2.regress(k)
            x2, residuals2  = w3.regress(k)
            if residuals2 < residuals:
                print '180 degree switch: '
                print '   ', k.name, w.name, residuals2, "<", residuals, '\n................................' 
                x = x2
                w2 = w3 
            returnValue = x
                
        #
        #############################################

        #######        
        #   making the output image

        w2.matrix = ma.hstack([w1.matrix, w2.matrix, k.matrix])
        w2.name   = w.name + ', normalised, and ' + k.name + '\nnormalised '
        if outputType=="corr" or outputType=="correlation":
            w2.name += 'correlation:  ' + str(corr)
        w2.matrix = ma.vstack([w2.matrix, topRow])
        w2.name  = topRowName + '\n' + "bottom row:" + w2.name
        w2.imagePath = outputFolder + w.name + '_' + k.name + '_sigma' + str(sigma) + '_thres' + str(thres) + '.png'
        w2.vmin= -20.
        w2.vmax = 100.
        if saveImage:
            w2.saveImage()
        if showImage:
            w2.makeImage(closeAll=True)
            pylab.draw()

        #
        ############################################
    #except IndexError:
    except SyntaxError:
        returnValue = -999
    # restoring the matrix
    k.backupMatrix('gaussian smooth normalised correlations, sigma='+ str(sigma) + 'threshold=' + str(thres)) 
    k.matrix = matrix0

    return returnValue
Пример #26
0
def update(i):

    offsets = ma.vstack((targets_array[i, :, :], centroids[i, :]))
    scat.set_offsets(offsets)
    scat.set_facecolors(['b'] * (np.shape(offsets)[0] - 1) + ['r'])
    return
Пример #27
0
def fuzzyfy(features, cfg):
    """

        FIXME: Looks like skfuzzy.trapmf does not handle well masked values.
               I must think better what to do with masked input values. What
               to do when there is one feature, but the other features are
               masked?
    """

    features_list = list(cfg['features'].keys())

    N = features[features_list[0]].size

    # The fuzzy set are usually: low, medium, high
    # The membership of each fuzzy set are each feature scaled.
    membership = {}
    for f in cfg['output'].keys():
        membership[f] = {}

    for t in features_list:
        for m in membership:
            assert m in cfg['features'][t], \
                    "Missing %s in %s" % (m, cfg['features'][t])

            membership[m][t] = ma.masked_all_like(features[t])
            ind = ~ma.getmaskarray(features[t])
            if m == 'low':
                membership[m][t][ind] = zmf(
                    np.asanyarray(features[t])[ind], cfg['features'][t][m])
            elif m == 'high':
                membership[m][t][ind] = smf(
                    np.asanyarray(features[t])[ind], cfg['features'][t][m])
            else:
                membership[m][t][ind] = trapmf(
                    np.asanyarray(features[t])[ind], cfg['features'][t][m])

    # Rule Set
    rules = {}
    # Low: u_low = mean(S_l(spike), S_l(clim)...)
    #u_low = np.mean([weights['spike']['low'],
    #    weights['woa_relbias']['low']], axis=0)

    tmp = membership['low'][features_list[0]]
    for f in features_list[1:]:
        tmp = ma.vstack((tmp, membership['low'][f]))

    # FIXME: If there is only one feature, it will return 1 value
    #          instead of an array with N values.
    rules['low'] = ma.mean(tmp, axis=0)

    # IMPROVE IT: Morello2014 doesn't even use the medium uncertainty,
    #   so no reason to estimate it. In the generalize this once the
    #   membership combining rules are defined in the cfg, so I can
    #   decide to use mean or max.
    if 'medium' in membership:
        # Medium: u_medium = mean(S_l(spike), S_l(clim)...)
        #u_medium = np.mean([weights['spike']['medium'],
        #    weights['woa_relbias']['medium']], axis=0)

        tmp = membership['medium'][features_list[0]]
        for f in features_list[1:]:
            tmp = ma.vstack((tmp, membership['medium'][f]))

        rules['medium'] = ma.mean(tmp, axis=0)

    # High: u_high = max(S_l(spike), S_l(clim)...)
    #u_high = np.max([weights['spike']['high'],
    #    weights['woa_relbias']['high']], axis=0)

    tmp = membership['high'][features_list[0]]
    for f in features_list[1:]:
        tmp = ma.vstack((tmp, membership['high'][f]))

    rules['high'] = ma.max(tmp, axis=0)

    return rules
                w1.matrix.mask = np.zeros(w1.matrix.shape)
                #w1.vmax = 2
                #w1.vmin =-2
                #w.makeImage(closeAll=True)
                #pylab.draw()
                #print "w.matrix.shape, w.matrix.mask.shape", w.matrix.shape, w.matrix.mask.shape
                try:
                    ############################################
                    #   punchlines
                    w2 = w1.momentNormalise(k)
                    corr = w2.corr(k)
                    #w2.vmax = 2
                    #w2.vmin =-2
                    w2.matrix = ma.hstack([w1.matrix, w2.matrix, k.matrix])
                    w2.name   = w.name + ', normalised, and ' + k.name + '\nnormalised correlation:  ' + str(corr)
                    w2.matrix = ma.vstack([w2.matrix, topRow])
                    w2.name  = topRowName + '\n' + "bottom row:" + w2.name
                    w2.imagePath = '/home/k/ARMOR/python/testing/' + w.name + '_' + k.name + '_sigma' + str(sigma) + '_thres' + str(thres) + '.png'
                    w2.vmin= -20.
                    w2.vmax = 100.
                    if saveImages:
                        w2.saveImage()
                    if makeImages:
                        w2.makeImage(closeAll=True)
                        pylab.draw()

                    #
                    ############################################
                #except IndexError:
                except SyntaxError:
                    corr = -999
Пример #29
0
             linestyle='dotted')

ax[0].set_ylim(-0.006, 0.064)
ax[0].set_xlim(np.min(t_in), np.max(t_in))

ax[0].tick_params(axis='x', which='major', labelsize=14)
ax[1].tick_params(axis='x', which='major', labelsize=14)
ax[1].hlines(mlim,
             np.min(t_in),
             np.max(t_in),
             color='black',
             alpha=0.9,
             linestyle='dotted')

#we stack up the separate instruments
tstack = ma.vstack([tmabrite, tmaastep, tmabring])
astack = ma.vstack([amabrite, amaastep, amabring])
estack = ma.vstack([emabrite, emaastep, emabring])

# now find which instrument has the smallest error and make an array from that

min_amp_ind = ma.argmin(estack, axis=0)

# pull out the lowest values

min_amp = astack[min_amp_ind, np.arange(min_amp_ind.size)]
min_tim = tstack[min_amp_ind, np.arange(min_amp_ind.size)]
min_err = estack[min_amp_ind, np.arange(min_amp_ind.size)]

ax[1].errorbar(min_tim,
               min_amp,
def choose_for_augmentation(X, Y_class, Y_loc, n_per_class):
    """A function to randomly select only some of the data, but in  fashion that ensures that the classes are balanced 
    (i.e. there are equal numbers of each class).  Particularly useful if working with real data, and the classes
    are usually very unbalanced (lots of no_def lables, normally).  
    Inputs:
        X           | rank 4 array | data.  
        Y_class     | rank 2 array | One hot encoding of class labels
        Y_loc       | rank 2 array | locations of deformation
        n_per_class | int | number of data per class. e.g. 3
    Returns:
        X_sample           | rank 4 array | data.  
        Y_class_sample     | rank 2 array | One hot encoding of class labels
        Y_loc_sample       | rank 2 array | locations of deformation
    History:
        2019/??/?? | MEG | Written
        2019/10/28 | MEG | Update to handle dicts
        2020/10/29 | MEG | Write the docs.  
        2020/10/30 | MEG | Fix bug that was causing Y_class and Y_loc to become masked arrays.  
        
    """
    import numpy as np
    import numpy.ma as ma

    n_classes = Y_class.shape[1]  # only works if one hot encoding is used
    X_sample = []
    Y_class_sample = []
    Y_loc_sample = []

    for i in range(n_classes):  # loop through each class
        args_class = np.ravel(np.argwhere(
            Y_class[:, i] != 0))  # get the args of the data of this label
        args_sample = args_class[np.random.randint(
            0, len(args_class), n_per_class
        )]  # choose n_per_class of these (ie so we always choose the same number from each label)
        X_sample.append(
            X[args_sample, :, :, :]
        )  # choose the data, and keep adding to a list (each item in the list is n_per_class_label x ny x nx n chanels)
        Y_class_sample.append(Y_class[args_sample, :])  # and class labels
        Y_loc_sample.append(Y_loc[args_sample, :])  # and location labels

    X_sample = ma.vstack(
        X_sample
    )  # maskd array, merge along the first axis, so now have n_class x n_per_class of data
    Y_class_sample = np.vstack(
        Y_class_sample
    )  # normal numpy array, note that these would be in order of the class (ie calss 0 first, then class 1 etc.  )
    Y_loc_sample = np.vstack(Y_loc_sample)  # also normal numpy array

    data_dict = {
        'X': X_sample,  # package the data and labels together into a dict
        'Y_class': Y_class_sample,
        'Y_loc': Y_loc_sample
    }

    data_dict_shuffled = shuffle_arrays(
        data_dict
    )  # shuffle (so that these aren't in the order of the class labels)

    X_sample = data_dict_shuffled[
        'X']  # and unpack as this function doesn't use dictionaries
    Y_class_sample = data_dict_shuffled['Y_class']
    Y_loc_sample = data_dict_shuffled['Y_loc']

    return X_sample, Y_class_sample, Y_loc_sample
Пример #31
0
def fuzzyfy(features, cfg):
    """

        FIXME: Looks like skfuzzy.trapmf does not handle well masked values.
               I must think better what to do with masked input values. What
               to do when there is one feature, but the other features are
               masked?
    """

    features_list = list(cfg['features'].keys())

    N = features[features_list[0]].size

    # The fuzzy set are usually: low, medium, high
    # The membership of each fuzzy set are each feature scaled.
    membership = {}
    for f in cfg['output'].keys():
        membership[f] = {}

    for t in features_list:
        for m in membership:
            assert m in cfg['features'][t], \
                    "Missing %s in %s" % (m, cfg['features'][t])

            membership[m][t] = ma.masked_all_like(features[t])
            ind = ~ma.getmaskarray(features[t])
            if m == 'low':
                membership[m][t][ind] = zmf(
                        np.asanyarray(features[t])[ind], cfg['features'][t][m])
            elif m == 'high':
                membership[m][t][ind] = smf(
                        np.asanyarray(features[t])[ind],
                        cfg['features'][t][m])
            else:
                membership[m][t][ind] = trapmf(
                        np.asanyarray(features[t])[ind],
                        cfg['features'][t][m])

    # Rule Set
    rules = {}
    # Low: u_low = mean(S_l(spike), S_l(clim)...)
    #u_low = np.mean([weights['spike']['low'],
    #    weights['woa_relbias']['low']], axis=0)

    tmp = membership['low'][features_list[0]]
    for f in features_list[1:]:
        tmp = ma.vstack((tmp, membership['low'][f]))

    # FIXME: If there is only one feature, it will return 1 value
    #          instead of an array with N values.
    rules['low'] = ma.mean(tmp, axis=0)

    # IMPROVE IT: Morello2014 doesn't even use the medium uncertainty,
    #   so no reason to estimate it. In the generalize this once the
    #   membership combining rules are defined in the cfg, so I can
    #   decide to use mean or max.
    if 'medium' in membership:
        # Medium: u_medium = mean(S_l(spike), S_l(clim)...)
        #u_medium = np.mean([weights['spike']['medium'],
        #    weights['woa_relbias']['medium']], axis=0)

        tmp = membership['medium'][features_list[0]]
        for f in features_list[1:]:
            tmp = ma.vstack((tmp, membership['medium'][f]))

        rules['medium'] = ma.mean(tmp, axis=0)

    # High: u_high = max(S_l(spike), S_l(clim)...)
    #u_high = np.max([weights['spike']['high'],
    #    weights['woa_relbias']['high']], axis=0)

    tmp = membership['high'][features_list[0]]
    for f in features_list[1:]:
        tmp = ma.vstack((tmp, membership['high'][f]))

    rules['high'] = ma.max(tmp, axis=0)

    return rules
Пример #32
0
def _do_combine(hdu_no: int, progress: float, progress_step: float,
                data_width: int, data_height: int,
                input_data: List[Union[pyfits.HDUList,
                                       Tuple[ndarray, pyfits.Header]]],
                mode: str = 'average', scaling: Optional[str] = None,
                rejection: Optional[str] = None, min_keep: int = 2,
                percentile: float = 50.0,
                lo: Optional[float] = None, hi: Optional[float] = None,
                max_mem_mb: float = 100.0,
                callback: Optional[callable] = None) \
        -> Tuple[Union[ndarray, ma.MaskedArray], float]:
    """
    Combine the given HDUs from all input images; used by :func:`combine` to
    get a stack of either all input images or, if lucky imaging is enabled,
    of their subset

    :return: image stack data and rejection percent
    """
    n = len(input_data)

    # Calculate scaling factors
    k_ref, k = None, []
    if scaling:
        for data_no, f in enumerate(input_data):
            if isinstance(f, pyfits.HDUList):
                data = f[hdu_no].data
            else:
                data = f[0]
            if scaling == 'average':
                k.append(data.mean())
            elif scaling == 'percentile':
                if percentile == 50:
                    k.append(
                        median(data) if not isinstance(data, ma.MaskedArray)
                        else ma.median(data))
                else:
                    k.append(
                        np_percentile(data, percentile)
                        if not isinstance(data, ma.MaskedArray)
                        else np_percentile(data.compressed(), percentile))
            elif scaling == 'mode':
                # Compute modal values from histograms; convert to integer
                # and assume 2 x 16-bit data range
                if isinstance(data, ma.MaskedArray):
                    data = data.compressed()
                else:
                    data = data.ravel()
                min_val = data.min(initial=0)
                k.append(
                    argmax(bincount(
                        (data - min_val).clip(0, 2*0x10000 - 1)
                        .astype(int32))) + min_val)
            else:
                raise ValueError(
                    'Unknown scaling mode "{}"'.format(scaling))
            if callback is not None:
                callback(progress + (data_no + 1)/n/2*progress_step)

        # Normalize to the first frame with non-zero average; keep images
        # with zero or same average as is
        k_ref = k[0]
        if not k_ref:
            for ki in k[1:]:
                if ki:
                    k_ref = ki
                    break

    # Process data in chunks to fit in the maximum amount of RAM allowed
    rowsize = 0
    for data in input_data:
        if isinstance(data, pyfits.HDUList):
            data = data[hdu_no].data
        else:
            data = data[0]
        rowsize += data[0].nbytes
        if rejection or isinstance(data, ma.MaskedArray):
            rowsize += data_width
    chunksize = min(max(int(max_mem_mb*(1 << 20)/rowsize), 1), data_height)
    while chunksize > 1:
        # Use as small chunks as possible but keep their total number
        if len(list(range(0, data_height, chunksize - 1))) > \
                len(list(range(0, data_height, chunksize))):
            break
        chunksize -= 1
    chunks = []
    rej_percent = 0
    for chunk in range(0, data_height, chunksize):
        datacube = [
            f[hdu_no].data[chunk:chunk + chunksize]
            if isinstance(f, pyfits.HDUList) else f[0][chunk:chunk + chunksize]
            for f in input_data
        ]
        if k_ref:
            # Scale data
            for data, ki in zip(datacube, k):
                if ki not in (0, k_ref):
                    data *= k_ref/ki

        # Reject outliers
        if rejection or any(isinstance(data, ma.MaskedArray)
                            for data in datacube):
            datacube = ma.masked_array(datacube)
            if not datacube.mask.shape:
                # No initially masked data, but we'll need an array instead
                # of mask=False to do slicing operations
                datacube.mask = full(datacube.shape, datacube.mask)
        else:
            datacube = array(datacube)

        if rejection == 'chauvenet':
            datacube.mask = chauvenet(datacube, min_vals=min_keep)
        elif rejection == 'iraf':
            if lo is None:
                lo = 1
            if hi is None:
                hi = 1
            if n - (lo + hi) < min_keep:
                raise ValueError(
                    'IRAF rejection with lo={}, hi={} would keep less than '
                    '{} values for a {}-image set'.format(lo, hi, min_keep, n))
            if lo or hi:
                # Mask "lo" smallest values and "hi" largest values along
                # the 0th axis
                order = datacube.argsort(0)
                mg = tuple(i.ravel() for i in indices(datacube.shape[1:]))
                for j in range(-hi, lo):
                    datacube.mask[(order[j].ravel(),) + mg] = True
                del order, mg
        elif rejection == 'minmax':
            if lo is not None and hi is not None:
                if lo > hi:
                    raise ValueError(
                        'lo={} > hi={} for minmax rejection'.format(lo, hi))
                datacube.mask[((datacube < lo) |
                               (datacube > hi)).nonzero()] = True
                if datacube.mask.all(0).any():
                    logging.warning(
                        '%d completely masked pixels left after minmax '
                        'rejection', datacube.mask.all(0).sum())
        elif rejection == 'sigclip':
            if lo is None:
                lo = 3
            if hi is None:
                hi = 3
            if lo < 0 or hi < 0:
                raise ValueError(
                    'Lower and upper limits for sigma clipping must be '
                    'positive, got lo={}, hi={}'.format(lo, hi))
            max_rej = n - min_keep
            while True:
                avg = datacube.mean(0)
                sigma = datacube.std(0)
                resid = datacube - avg
                outliers = (datacube.mask.sum(0) < max_rej) & \
                    (sigma > 0) & ((resid < -lo*sigma) | (resid > hi*sigma))
                if not outliers.any():
                    del avg, sigma, resid, outliers
                    break
                datacube.mask[outliers.nonzero()] = True
        elif rejection:
            raise ValueError(
                'Unknown rejection mode "{}"'.format(rejection))

        if isinstance(datacube, ma.MaskedArray):
            if datacube.mask is None or not datacube.mask.any():
                # Nothing was rejected
                datacube = datacube.data
            else:
                # Calculate the percentage of rejected pixels
                rej_percent += datacube.mask.sum()

        # Combine data
        if mode == 'average':
            res = datacube.mean(0)
        elif mode == 'sum':
            res = datacube.sum(0)
        elif mode == 'percentile':
            if percentile == 50:
                if isinstance(datacube, ma.MaskedArray):
                    res = ma.median(datacube, 0)
                else:
                    res = median(datacube, 0)
            else:
                if isinstance(datacube, ma.MaskedArray):
                    res = nanpercentile(
                        datacube.filled(nan), percentile, 0)
                else:
                    res = np_percentile(datacube, percentile, 0)
        else:
            raise ValueError('Unknown stacking mode "{}"'.format(mode))
        chunks.append(res)

        if callback is not None:
            callback(
                progress +
                ((0.5 if scaling else 0) +
                 min(chunk + chunksize, data_height)/data_height /
                 (2 if scaling else 1))*progress_step)

    if len(chunks) > 1:
        res = ma.vstack(chunks)
    else:
        res = chunks[0]
    if isinstance(res, ma.MaskedArray) and (
            res.mask is None or not res.mask.any()):
        res = res.data
    return res, rej_percent