Пример #1
0
def subtract_unary(a, b):
    """Returns a-b with masked values only in places where both a and b are masked.
    """
    a = MA.asarray(a)
    b = MA.asarray(b)
    el = MA.subtract(a.filled(0), b.filled(0))
    mask = Numeric.logical_and(MA.getmaskarray(a), MA.getmaskarray(b))
    return MA.array(el, mask=mask)
Пример #2
0
def divide_unary(a, b):
    """Returns a*b with masked values only in places where both a and b are masked.
    """
    a = MA.asarray(a)
    b = MA.asarray(b)
    el = MA.divide(a.filled(1), b.filled(1))
    mask = Numeric.logical_and(MA.getmaskarray(a), MA.getmaskarray(b))
    return MA.array(el, mask=mask)
Пример #3
0
def dotMA(a, b):
    """Returns dot-product for MA arrays; fixed masked values.
    """
    a = MA.asarray(a)
    b = MA.asarray(b)
    ab = MA.dot(a,b)
    # fix masked values in ab (MA.dot returns 0 instead of MA.masked)
    nonMasked = Numeric.dot(1-MA.getmaskarray(a).astype(Numeric.Int), 1-MA.getmaskarray(b).astype(Numeric.Int))
    return MA.where(Numeric.equal(nonMasked,0), MA.masked, ab)
Пример #4
0
def distEuclidean(x, y):
    """normalized euclidean distance
    """
    x = MA.asarray(x)
    y = MA.asarray(y)
    assert MA.rank(x) == MA.rank(y) == 1
    sumWeights = MA.add.reduce(
        MA.logical_not(MA.logical_or(
            MA.getmaskarray(x), MA.getmaskarray(y))).astype(Numeric.Float))
    return MA.sqrt(MA.add.reduce((x - y)**2) / sumWeights)
Пример #5
0
def distManhattan(x, y):
    """normalized Manhattan distance
    """
    x = MA.asarray(x)
    y = MA.asarray(y)
    assert MA.rank(x) == MA.rank(y) == 1
    sumWeights = MA.add.reduce(
        MA.logical_not(MA.logical_or(
            MA.getmaskarray(x), MA.getmaskarray(y))).astype(Numeric.Float))
    return MA.add.reduce(MA.absolute(x - y)) / sumWeights
Пример #6
0
def distSpearman(x, y):
    """distance corresponding to 1 - spearman's correlation coefficient for arrays x,y
    returns distance: 1 - spearman_r
    """
    x = MA.asarray(x)
    y = MA.asarray(y)
    assert MA.rank(x) == MA.rank(y) == 1
    cond = MA.logical_not(MA.logical_or(MA.getmaskarray(x),
                                        MA.getmaskarray(y)))
    return 1 - statc.spearmanr(
        MA.compress(cond, x).tolist(),
        MA.compress(cond, y).tolist())[0]
Пример #7
0
def _distSpearmanW_MA(x, y, w):
    """if any of x,y,w is a MA array containing masked values
    """
    x = MA.asarray(x)
    y = MA.asarray(y)
    w = MA.asarray(w)
    assert MA.rank(x) == MA.rank(y) == MA.rank(w) == 1
    cond = MA.logical_not(
        MA.logical_or(MA.logical_or(MA.getmaskarray(x), MA.getmaskarray(y)),
                      MA.getmaskarray(w)))
    # with MA use compress before tolist() !
    rankx = Numeric.array(statc.rankdata(MA.compress(cond, x).tolist()))
    ranky = Numeric.array(statc.rankdata(MA.compress(cond, y).tolist()))
    return distPearsonW(rankx, ranky, MA.compress(cond, w))
Пример #8
0
def kNNimputeMA(arr2d, K=20, callback=None):
    """Returns a new 2D MA.array with missing values imputed from K nearest neighbours.
    Find K rows (axis 0) with the most similar values where similarity measure corresponds to weighted Euclidean distance.
    Imputed value = weighted average of the corresponding values of K nearest neighbours,
    where weights equal to tricubic distribution of distances to all rows.
    Impute missing rows by average over all rows.
    Version: 30.8.2005
    """
    arr2d = MA.asarray(arr2d)
    assert len(arr2d.shape) == 2, "2D array expected"
    # make a copy for imputation
    aImp2 = MA.array(arr2d)
    # leave out columns with 0 known values (columnInd: non-zero columns)
    columnCond = Numeric.greater(MA.count(arr2d, axis=0), 0)
    columnIndAll = Numeric.arange(arr2d.shape[1])
    columnInd = Numeric.compress(columnCond, columnIndAll)
    # impute the rows where 0 < #known_values < #non_zero_columns, i.e. exclude the rows with 0 and all (non-zero-column) values
    countByRows = MA.count(arr2d, axis=1)
    for rowIdx in Numeric.compress(Numeric.logical_and(Numeric.greater(countByRows, 0), Numeric.less(countByRows, columnInd.shape[0])), Numeric.arange(arr2d.shape[0])):
        rowResized = MA.resize(arr2d[rowIdx], arr2d.shape)
        diff = arr2d - rowResized
        distances = MA.sqrt(MA.add.reduce((diff)**2, 1) / MA.count(diff, axis=1))
        # nearest neighbours row indices (without the current row index)
        indSorted = MA.argsort(distances)[1:]
        distSorted = distances.take(indSorted)
        # number of distances different from MA.masked
        numNonMasked = distSorted.shape[0] - Numeric.add.reduce(Numeric.asarray(MA.getmaskarray(distSorted), Numeric.Int))
        # number of distances to account for (K or less)
        if numNonMasked > 1:
            weightsSorted = MA.power(1-MA.power(distSorted/distSorted[numNonMasked-1],3),3) # tricubic distribution of all weights
        else:
            weightsSorted = Numeric.ones(distSorted.shape[0])
        # compute average for each column separately in order to account for K non-masked values
        colInd4CurrRow = Numeric.compress(Numeric.logical_and(MA.getmaskarray(arr2d[rowIdx]), columnCond), columnIndAll)
        for colIdx in colInd4CurrRow:
            # column values sorted by distances
            columnVals = arr2d[:,colIdx].take(indSorted)
            # take only those weights where columnVals does not equal MA.masked
            weightsSortedCompressed = MA.compress(1-MA.getmaskarray(columnVals), weightsSorted)
            # impute from K (or possibly less) values
            aImp2[rowIdx,colIdx] = MA.average(columnVals.compressed()[:K], weights=weightsSortedCompressed[:K])
        if callback:
            callback()
    # impute the unknown rows with average profile
    avrgRow = MA.average(arr2d, 0)
    for rowIdx in Numeric.compress(Numeric.equal(countByRows, 0), Numeric.arange(arr2d.shape[0])):
        aImp2[rowIdx] = avrgRow
        if callback:
            callback()
    return aImp2
Пример #9
0
def triangularPut(m1d, upper=1, lower=0):
    """Returns 2D masked array with elements of the given 1D array in the strictly upper (lower) triangle.
    Elements of the 1D array should be ordered according to the upper triangular part of the 2D matrix.
    The lower triangular part (if requested) equals to the transposed upper triangular part.
    If upper == lower == 1 a symetric matrix is returned.
    """
    assert upper in [0,1] and lower in [0,1], "[0|1] expected for upper / lower"
    m1d = MA.asarray(m1d)
    assert MA.rank(m1d) == 1, "1D masked array expected"
    m2dShape0 = math.ceil(math.sqrt(2*m1d.shape[0]))
    assert m1d.shape[0] == m2dShape0*(m2dShape0-1)/2, "the length of m1d does not correspond to n(n-1)/2"
    if upper:
        if lower:
            mask = Numeric.fromfunction(lambda i,j: i==j, (m2dShape0, m2dShape0))
        else:
            mask = Numeric.fromfunction(lambda i,j: i>=j, (m2dShape0, m2dShape0))
    else:
        if lower:
            mask = Numeric.fromfunction(lambda i,j: i<=j, (m2dShape0, m2dShape0))
        else:
            mask = Numeric.ones((m2dShape0, m2dShape0))

    m2d = MA.ravel(MA.zeros((m2dShape0, m2dShape0), m1d.dtype.char))
    condUpperTriang = Numeric.fromfunction(lambda i,j: i<j, (m2dShape0, m2dShape0))
    putIndices = Numeric.compress(Numeric.ravel(condUpperTriang), Numeric.arange(0, m2dShape0**2, typecode=Numeric.Int))
    MA.put(m2d, putIndices, m1d)
    m2d = MA.reshape(m2d, (m2dShape0, m2dShape0))
    m2d = MA.where(condUpperTriang, m2d, MA.transpose(m2d))
    return MA.array(m2d, mask=Numeric.logical_or(mask, MA.getmaskarray(m2d)))
Пример #10
0
def loessMA(m, windowSize, axis=0, approxMasked=True, verbose=False, callback=None):
    """Returns a new array with values at the given axis smoothed by loess;
    if approxMasked==True: the masked values are approximated by loess;
    assumes equidistant spacing of points on the given axis.
    """
    assert 0 < windowSize <= m.shape[axis]+0.1, "0 < windowSize[%s] <= 1 OR windowSize in range(1.1,m.shape[axis]+1) expected, got %f" % ("%", windowSize)
    m = MA.asarray(m)
    if m.dtype.char <> Numeric.Float:
        m = m.astype(Numeric.Float)
    shp_other = list(m.shape)
    shp_other.pop(axis)
    # get a transposed and reshaped mask and data from m; if m.mask() == None, construct a new array of zeros
    mask = Numeric.reshape(Numeric.transpose(MA.getmaskarray(m), [axis] + range(0,axis) + range(axis+1,len(m.shape))), (m.shape[axis], Numeric.multiply.reduce(shp_other)))
    data = MA.reshape(MA.transpose(m, [axis] + range(0,axis) + range(axis+1,len(m.shape))), (m.shape[axis], Numeric.multiply.reduce(shp_other)))
    maskInv = -1*(mask-1)
    xall = Numeric.arange(data.shape[0])
    xallList = xall.tolist()
    for ii in Numeric.compress(Numeric.add.reduce(maskInv,0) > 1, range(data.shape[1])):    # run loess if the profile contains more than 2 values
        try:
            data[:,ii] = MA.array(statc.loess(zip(MA.compress(maskInv[:,ii], xall).tolist(), MA.compress(maskInv[:,ii], data[:,ii]).tolist()), xallList, windowSize))[:,1]
        except:
            if verbose:
                print "Warning: loessMA: could not loess axis %i index %i" % (axis, ii)
        if callback:
            callback()
    if not approxMasked:
        data = MA.array(data, mask=mask)
    return MA.transpose(MA.reshape(data, [m.shape[axis]] + shp_other), [axis] + range(0,axis) + range(axis+1,len(m.shape)))
Пример #11
0
def compressIndices(ma):
    """Returns 1D compressed Numeric array and the indices of the non-masked places.
    usage:  nu,ind = compressIndices(ma)
            nu = Numeric.elementwise_function(nu)
            ma = MA.put(ma, ind, nu)
    """
    ma = MA.asarray(ma)
    nonMaskedInd = Numeric.compress(1-Numeric.ravel(MA.getmaskarray(ma)), Numeric.arange(Numeric.multiply.reduce(ma.shape)))
    return MA.filled(ma.compressed()), nonMaskedInd
Пример #12
0
 def ttest_rsmplA(self, ma3d, callback):
     """conducts related samples t-test on individual examples wrt factor A (variables, ma3d axis 1);
     returns Numeric array of p-values in shape (1, numExamples).
     """
     ps = -1*Numeric.ones((ma3d.shape[0],), Numeric.Float)
     for eIdx in range(ma3d.shape[0]):
         a = ma3d[eIdx][0]
         b = ma3d[eIdx][1]
         cond = Numeric.logical_not(Numeric.logical_or(MA.getmaskarray(a), MA.getmaskarray(b)))
         a = Numeric.asarray(MA.compress(cond, a))
         b = Numeric.asarray(MA.compress(cond, b))
         if len(a) >= 2:
             try:
                 ps[eIdx] = scipy.stats.ttest_rel(a,b)[1]
             except Exception, inst:
                 print "Warning: %s" % str(inst)
                 print "Example %i:\n%s\n%s\n" % (eIdx, str(a), str(b))
                 ps[eIdx] = 1.0
         else:
             print "Warning: removing example %i:\n%s\n%s\n" % (eIdx, str(a), str(b))
             ps[eIdx] = 1.0
         callback()
Пример #13
0
def distPearsonW(x, y, w):
    """weighted distance corresponding to 1 - pearson's correlation coefficient for arrays x,y and weights w
    returns distance: 1 - pearson_r
    """
    #TINY = 1.0e-20
    # ones for non-masked places at x,y and w
    x = MA.asarray(x)
    y = MA.asarray(y)
    w = MA.asarray(w)
    assert MA.rank(x) == MA.rank(y) == MA.rank(w) == 1
    mask = MA.logical_or(MA.logical_or(MA.getmaskarray(x), MA.getmaskarray(y)),
                         MA.getmaskarray(w))
    # set mask to w that is equal to the mask from x, y and w
    w = MA.masked_array(w, mask=mask)
    n_w_mean = MA.add.reduce(w)  # n * mean(w)
    x_w = x * w  # x * w
    y_w = y * w  # y * w
    x_wmean = MA.divide(MA.add.reduce(x_w), n_w_mean)  # weighted_mean(x)
    y_wmean = MA.divide(MA.add.reduce(y_w), n_w_mean)  # weighted_mean(x)
    r_num = MA.add.reduce(x * y * w) - n_w_mean * x_wmean * y_wmean
    r_den = MA.sqrt((MA.add.reduce(x_w * x) - n_w_mean * x_wmean**2) *
                    (MA.add.reduce(y_w * y) - n_w_mean * y_wmean**2))
    return 1 - MA.divide(r_num, r_den)
Пример #14
0
def loessMA(m,
            windowSize,
            axis=0,
            approxMasked=True,
            verbose=False,
            callback=None):
    """Returns a new array with values at the given axis smoothed by loess;
    if approxMasked==True: the masked values are approximated by loess;
    assumes equidistant spacing of points on the given axis.
    """
    assert 0 < windowSize <= m.shape[
        axis] + 0.1, "0 < windowSize[%s] <= 1 OR windowSize in range(1.1,m.shape[axis]+1) expected, got %f" % (
            "%", windowSize)
    m = MA.asarray(m)
    if m.dtype.char <> Numeric.Float:
        m = m.astype(Numeric.Float)
    shp_other = list(m.shape)
    shp_other.pop(axis)
    # get a transposed and reshaped mask and data from m; if m.mask() == None, construct a new array of zeros
    mask = Numeric.reshape(
        Numeric.transpose(MA.getmaskarray(m), [axis] + range(0, axis) +
                          range(axis + 1, len(m.shape))),
        (m.shape[axis], Numeric.multiply.reduce(shp_other)))
    data = MA.reshape(
        MA.transpose(m,
                     [axis] + range(0, axis) + range(axis + 1, len(m.shape))),
        (m.shape[axis], Numeric.multiply.reduce(shp_other)))
    maskInv = -1 * (mask - 1)
    xall = Numeric.arange(data.shape[0])
    xallList = xall.tolist()
    for ii in Numeric.compress(
            Numeric.add.reduce(maskInv, 0) > 1, range(data.shape[1])
    ):  # run loess if the profile contains more than 2 values
        try:
            data[:, ii] = MA.array(
                statc.loess(
                    zip(
                        MA.compress(maskInv[:, ii], xall).tolist(),
                        MA.compress(maskInv[:, ii], data[:, ii]).tolist()),
                    xallList, windowSize))[:, 1]
        except:
            if verbose:
                print "Warning: loessMA: could not loess axis %i index %i" % (
                    axis, ii)
        if callback:
            callback()
    if not approxMasked:
        data = MA.array(data, mask=mask)
    return MA.transpose(MA.reshape(data, [m.shape[axis]] + shp_other), [axis] +
                        range(0, axis) + range(axis + 1, len(m.shape)))
Пример #15
0
def rankDataMA(m, inverse=False):
    """Returns ranks of 1D masked array; masked values ignored, range 1...#non-masked_values.
    """
    m = MA.asarray(m)
    assert MA.rank(m) == 1
    fill_val = m.fill_value()
    m.set_fill_value(MA.maximum(m) + 1)
    r = MA.zeros(m.shape[0], Numeric.Float)
    MA.put(r, MA.argsort(m), Numeric.arange(m.shape[0]))
    m.set_fill_value(fill_val)
    r = MA.array(r, mask=MA.getmaskarray(m))
    if inverse:
        return -1*r+MA.count(m)
    else:
        return r+1
Пример #16
0
def kNNimputeMA(arr2d, K=20, callback=None):
    """Returns a new 2D MA.array with missing values imputed from K nearest neighbours.
    Find K rows (axis 0) with the most similar values where similarity measure corresponds to weighted Euclidean distance.
    Imputed value = weighted average of the corresponding values of K nearest neighbours,
    where weights equal to tricubic distribution of distances to all rows.
    Impute missing rows by average over all rows.
    Version: 30.8.2005
    """
    arr2d = MA.asarray(arr2d)
    assert len(arr2d.shape) == 2, "2D array expected"
    # make a copy for imputation
    aImp2 = MA.array(arr2d)
    # leave out columns with 0 known values (columnInd: non-zero columns)
    columnCond = Numeric.greater(MA.count(arr2d, axis=0), 0)
    columnIndAll = Numeric.arange(arr2d.shape[1])
    columnInd = Numeric.compress(columnCond, columnIndAll)
    # impute the rows where 0 < #known_values < #non_zero_columns, i.e. exclude the rows with 0 and all (non-zero-column) values
    countByRows = MA.count(arr2d, axis=1)
    for rowIdx in Numeric.compress(
            Numeric.logical_and(Numeric.greater(countByRows, 0),
                                Numeric.less(countByRows, columnInd.shape[0])),
            Numeric.arange(arr2d.shape[0])):
        rowResized = MA.resize(arr2d[rowIdx], arr2d.shape)
        diff = arr2d - rowResized
        distances = MA.sqrt(
            MA.add.reduce((diff)**2, 1) / MA.count(diff, axis=1))
        # nearest neighbours row indices (without the current row index)
        indSorted = MA.argsort(distances)[1:]
        distSorted = distances.take(indSorted)
        # number of distances different from MA.masked
        numNonMasked = distSorted.shape[0] - Numeric.add.reduce(
            Numeric.asarray(MA.getmaskarray(distSorted), Numeric.Int))
        # number of distances to account for (K or less)
        if numNonMasked > 1:
            weightsSorted = MA.power(
                1 - MA.power(distSorted / distSorted[numNonMasked - 1], 3),
                3)  # tricubic distribution of all weights
        else:
            weightsSorted = Numeric.ones(distSorted.shape[0])
        # compute average for each column separately in order to account for K non-masked values
        colInd4CurrRow = Numeric.compress(
            Numeric.logical_and(MA.getmaskarray(arr2d[rowIdx]), columnCond),
            columnIndAll)
        for colIdx in colInd4CurrRow:
            # column values sorted by distances
            columnVals = arr2d[:, colIdx].take(indSorted)
            # take only those weights where columnVals does not equal MA.masked
            weightsSortedCompressed = MA.compress(
                1 - MA.getmaskarray(columnVals), weightsSorted)
            # impute from K (or possibly less) values
            aImp2[rowIdx,
                  colIdx] = MA.average(columnVals.compressed()[:K],
                                       weights=weightsSortedCompressed[:K])
        if callback:
            callback()
    # impute the unknown rows with average profile
    avrgRow = MA.average(arr2d, 0)
    for rowIdx in Numeric.compress(Numeric.equal(countByRows, 0),
                                   Numeric.arange(arr2d.shape[0])):
        aImp2[rowIdx] = avrgRow
        if callback:
            callback()
    return aImp2
Пример #17
0
def logical_unary_and(m1, m2):
    el = Numeric.logical_and(m1.filled(1), m2.filled(1))
    mask = Numeric.logical_and(MA.getmaskarray(m1), MA.getmaskarray(m2))
    return MA.array(el, mask=mask)