Exemplo n.º 1
0
def merge_replicas(aETStruct, type):
    """Returns a list of tuples (strain, [avrg_orngET]) where aETStruct corresponds to a list of tuples (strain, [orngET1, orngET2, ...]);
    type = ["mean" | "median" | "min" | "max"]
    """
    shape = [0,0,0]
    et0 = aETStruct[0][1][0]                                            # the first example table
    shape[0] = len(et0)                                                 # number of examples (genes)
    shape[1] = len(et0.domain.attributes)                               # number of attributes (time points)
    mergedETStruct = []
    if type == "mean":
        merge_func = MA.average
    elif type == "median":
        merge_func = numpyExtn.medianMA
    elif type == "min":
        merge_func = numpyExtn.minMA
    elif type == "max":
        merge_func = numpyExtn.maxMA
    else:
        raise AttributeError, "type = ['mean' | 'median' | 'min' | 'max']"
    for st, etList in aETStruct:
        shape[2] = len(etList)
        ma3d = MA.zeros(shape, Numeric.Float)
        for idx, et in enumerate(etList):
            ma3d[:,:,idx] = orng2ma(et)
        mergedETStruct.append((st, [ma2orng_keepClassMetas(merge_func(ma3d, 2), etList[0])]))
    return mergedETStruct
Exemplo n.º 2
0
def triangularPut(m1d, upper=1, lower=0):
    """Returns 2D masked array with elements of the given 1D array in the strictly upper (lower) triangle.
    Elements of the 1D array should be ordered according to the upper triangular part of the 2D matrix.
    The lower triangular part (if requested) equals to the transposed upper triangular part.
    If upper == lower == 1 a symetric matrix is returned.
    """
    assert upper in [0,1] and lower in [0,1], "[0|1] expected for upper / lower"
    m1d = MA.asarray(m1d)
    assert MA.rank(m1d) == 1, "1D masked array expected"
    m2dShape0 = math.ceil(math.sqrt(2*m1d.shape[0]))
    assert m1d.shape[0] == m2dShape0*(m2dShape0-1)/2, "the length of m1d does not correspond to n(n-1)/2"
    if upper:
        if lower:
            mask = Numeric.fromfunction(lambda i,j: i==j, (m2dShape0, m2dShape0))
        else:
            mask = Numeric.fromfunction(lambda i,j: i>=j, (m2dShape0, m2dShape0))
    else:
        if lower:
            mask = Numeric.fromfunction(lambda i,j: i<=j, (m2dShape0, m2dShape0))
        else:
            mask = Numeric.ones((m2dShape0, m2dShape0))

    m2d = MA.ravel(MA.zeros((m2dShape0, m2dShape0), m1d.dtype.char))
    condUpperTriang = Numeric.fromfunction(lambda i,j: i<j, (m2dShape0, m2dShape0))
    putIndices = Numeric.compress(Numeric.ravel(condUpperTriang), Numeric.arange(0, m2dShape0**2, typecode=Numeric.Int))
    MA.put(m2d, putIndices, m1d)
    m2d = MA.reshape(m2d, (m2dShape0, m2dShape0))
    m2d = MA.where(condUpperTriang, m2d, MA.transpose(m2d))
    return MA.array(m2d, mask=Numeric.logical_or(mask, MA.getmaskarray(m2d)))
Exemplo n.º 3
0
def merge_replicas(aETStruct, type):
    """Returns a list of tuples (strain, [avrg_orngET]) where aETStruct corresponds to a list of tuples (strain, [orngET1, orngET2, ...]);
    type = ["mean" | "median" | "min" | "max"]
    """
    shape = [0, 0, 0]
    et0 = aETStruct[0][1][0]  # the first example table
    shape[0] = len(et0)  # number of examples (genes)
    shape[1] = len(et0.domain.attributes)  # number of attributes (time points)
    mergedETStruct = []
    if type == "mean":
        merge_func = MA.average
    elif type == "median":
        merge_func = numpyExtn.medianMA
    elif type == "min":
        merge_func = numpyExtn.minMA
    elif type == "max":
        merge_func = numpyExtn.maxMA
    else:
        raise AttributeError, "type = ['mean' | 'median' | 'min' | 'max']"
    for st, etList in aETStruct:
        shape[2] = len(etList)
        ma3d = MA.zeros(shape, Numeric.Float)
        for idx, et in enumerate(etList):
            ma3d[:, :, idx] = orng2ma(et)
        mergedETStruct.append(
            (st, [ma2orng_keepClassMetas(merge_func(ma3d, 2), etList[0])]))
    return mergedETStruct
Exemplo n.º 4
0
    def runANOVA(self):
        """converts structured data [(name, [orngET1, orngET2, ...]),...] to a 3D masked array
        with the following axes: 0: examples, 1: variables, 2: ExampleTables;
        runs ANOVA computations and sets self.ps;
        """
        if self.dataStructure and self.numExamples > 0:
            ma3d = MA.zeros((self.numExamples, self.numVariables, reduce(lambda a,b: a+len(b[1]), self.dataStructure, 0)), Numeric.Float) * MA.masked
            groupLens = []
            etIdx = 0
            for dsName, etList in self.dataStructure:
                for et in etList:
                    ma3d[:,:,etIdx] = et.toNumpyMA("ac")[0]
                    etIdx += 1
                groupLens.append(len(etList))
            # run ANOVA
            self.infoc.setText('ANOVA computation started...')
            self.progressBarInit()
            pbStep = 100./self.numExamples
            self.ps = Numeric.ones((3, self.numExamples), Numeric.Float)

            if self.anovaType == OWHypTest.St2AB or self.anovaType == OWHypTest.St2ABI:
                ps = self.anova2(ma3d, groupLens, addInteraction=self.anovaType==OWHypTest.St2ABI, repMeasuresOnA=False, callback=lambda: self.progressBarAdvance(pbStep))
                for rIdx in range(ps.shape[0]):
                    self.ps[rIdx] = ps[rIdx]

            elif self.anovaType == OWHypTest.St1B:
                self.ps[1] = self.anova1B(ma3d, groupLens, repMeasures=False, callback=lambda: self.progressBarAdvance(pbStep))

            elif self.anovaType == OWHypTest.St1A:
                self.ps[0] = self.anova1A(ma3d, repMeasures=False, callback=lambda: self.progressBarAdvance(pbStep))

            elif self.anovaType == OWHypTest.StSST:
                try:
                    popMeanVal = float(self.popMean)
                except ValueError:
                    print "Warning: cannot convert %s to float, using 0" % str(self.popMean)
                    self.popMean = 0
                    popMeanVal = 0
                self.ps[0] = self.ttest_ssmpl(ma3d, popMeanVal, callback=lambda: self.progressBarAdvance(pbStep))

            elif self.anovaType == OWHypTest.StLPE:
               raise Exception, "NOT IMPLEMENTED"
               if self.numVariables == 2:
                  self.ps[0] = self.lpeA(ma3d, callback=lambda: self.progressBarAdvance(pbStep))
               elif self.numVariables == 1:
                  self.ps[1] = self.lpeB(ma3d, groupLens, callback=lambda: self.progressBarAdvance(pbStep))
               else:
                  raise RuntimeError, "%s: expected 2 variables and 1 group, or 1 variable and 2 groups, got %s variables and %s groups" % (OWHypTest.StNames[self.anovaType], self.numVariables, len(groupLens))

            elif self.anovaType == OWHypTest.StRST:
               if self.numVariables == 2 and len(groupLens) == 1:
                  self.ps[0] = self.ttest_rsmplA(ma3d, callback=lambda: self.progressBarAdvance(pbStep))
               elif self.numVariables == 1 and len(groupLens) == 2 and groupLens[0] == groupLens[1]:
                  self.ps[1] = self.ttest_rsmplB(ma3d, groupLens, callback=lambda: self.progressBarAdvance(pbStep))
               else:
                  raise RuntimeError, "%s: expected 2 variables and 1 group, or 1 variable and 2 groups of equal length, got %s variables and %s groups of length %s" % (OWHypTest.StNames[self.anovaType], self.numVariables, len(groupLens), str(groupLens))
                  
            self.progressBarFinished()
Exemplo n.º 5
0
def etStruct2ma3d(aETStruct):
    """Converts a list of tuples (strain, [orngET1, orngET2, ...]) to a 3D masked array and returns it.
    """
    shape = [0,0,0]
    et0 = aETStruct[0][1][0]                                            # the first example table
    shape[0] = len(et0)                                                 # number of examples (genes)
    shape[1] = len(et0.domain.attributes)                               # number of attributes (time points)
    shape[2] = Numeric.add.reduce(map(lambda x: len(x[1]), aETStruct))  # number of ETs (replicas over all strains)
    ma3d = MA.zeros(shape, Numeric.Float)
    k = 0
    for st, etList in aETStruct:
        for et in etList:
            ma3d[:,:,k] = orng2ma(et)
            k += 1
    return ma3d
Exemplo n.º 6
0
def rankDataMA(m, inverse=False):
    """Returns ranks of 1D masked array; masked values ignored, range 1...#non-masked_values.
    """
    m = MA.asarray(m)
    assert MA.rank(m) == 1
    fill_val = m.fill_value()
    m.set_fill_value(MA.maximum(m) + 1)
    r = MA.zeros(m.shape[0], Numeric.Float)
    MA.put(r, MA.argsort(m), Numeric.arange(m.shape[0]))
    m.set_fill_value(fill_val)
    r = MA.array(r, mask=MA.getmaskarray(m))
    if inverse:
        return -1*r+MA.count(m)
    else:
        return r+1
Exemplo n.º 7
0
def etStruct2ma3d(aETStruct):
    """Converts a list of tuples (strain, [orngET1, orngET2, ...]) to a 3D masked array and returns it.
    """
    shape = [0, 0, 0]
    et0 = aETStruct[0][1][0]  # the first example table
    shape[0] = len(et0)  # number of examples (genes)
    shape[1] = len(et0.domain.attributes)  # number of attributes (time points)
    shape[2] = Numeric.add.reduce(
        map(lambda x: len(x[1]),
            aETStruct))  # number of ETs (replicas over all strains)
    ma3d = MA.zeros(shape, Numeric.Float)
    k = 0
    for st, etList in aETStruct:
        for et in etList:
            ma3d[:, :, k] = orng2ma(et)
            k += 1
    return ma3d
Exemplo n.º 8
0
 def anova2(self, ma3d, groupLens, addInteraction, repMeasuresOnA, callback):
     """Conducts two-way ANOVA on individual examples;
     returns a Numeric array of p-values in shape (2, numExamples) or (3, numExamples), depending whether we test for interaction;
     Note: levels of factors A and B that cause empty cells are removed prior to conducting ANOVA.
     """
     groupLens = Numeric.asarray(groupLens)
     # arrays to store p-vals
     if addInteraction:
         ps = Numeric.ones((3, ma3d.shape[0]), Numeric.Float)
     else:
         ps = Numeric.ones((2, ma3d.shape[0]), Numeric.Float)
     # decide between non-repeated / repeated measures ANOVA for factor time
     if repMeasuresOnA:
         fAnova = Anova.AnovaRM12LR
     else:
         fAnova = Anova.Anova2wayLR
     # check for empty cells for all genes at once and remove them
     tInd2rem = []
     ax2Ind = Numeric.concatenate(([0], Numeric.add.accumulate(groupLens)))
     for aIdx in range(ma3d.shape[1]):
         for rIdx in range(groupLens.shape[0]):
             if Numeric.add.reduce(MA.count(ma3d[:,aIdx,ax2Ind[rIdx]:ax2Ind[rIdx+1]],1)) == 0:
                 tInd2rem.append(aIdx)
                 break
     if len(tInd2rem) > 0:
         print "Warning: removing time indices %s for all genes" % (str(tInd2rem))
         tInd2keep = range(ma3d.shape[1])
         for aIdx in tInd2rem:
             tInd2keep.remove(aIdx)
         ma3d = ma3d.take(tInd2keep, 1)
     # for each gene...
     for eIdx in range(ma3d.shape[0]):
         # faster check for empty cells for that gene -> remove time indices with empty cells
         ma2d = ma3d[eIdx]
         cellCount = MA.zeros((ma2d.shape[0], groupLens.shape[0]), Numeric.Int)
         for g,(i0,i1) in enumerate(zip(ax2Ind[:-1], ax2Ind[1:])):
             cellCount[:,g] = MA.count(ma2d[:,i0:i1], 1)
         ma2dTakeInd = Numeric.logical_not(Numeric.add.reduce(Numeric.equal(cellCount,0),1)) # 1 where to take, 0 where not to take
         if Numeric.add.reduce(ma2dTakeInd) != ma2dTakeInd.shape[0]:
             print "Warning: removing time indices %s for gene %i" % (str(Numeric.compress(ma2dTakeInd == 0, Numeric.arange(ma2dTakeInd.shape[0]))), eIdx)
             ma2d = MA.compress(ma2dTakeInd, ma2d, 0)
         an = fAnova(ma2d, groupLens, addInteraction, allowReductA=True, allowReductB=True)
         ps[:,eIdx] = an.ps
         callback()
     return ps
Exemplo n.º 9
0
    def runANOVA(self):
        """converts structured data [(name, [orngET1, orngET2, ...]),...] to a 3D masked array
        with the following axes: 0: examples, 1: variables, 2: ExampleTables;
        runs ANOVA computations and sets self.ps;
        """
        if self.dataStructure and self.numExamples > 0:
            ma3d = MA.zeros((self.numExamples, self.numVariables, reduce(lambda a,b: a+len(b[1]), self.dataStructure, 0)), Numeric.Float) * MA.masked
            groupLens = []
            etIdx = 0
            for dsName, etList in self.dataStructure:
                for et in etList:
                    ma3d[:,:,etIdx] = et.toNumpyMA("ac")[0]
                    etIdx += 1
                groupLens.append(len(etList))

            #print "ma3d SHAPE", ma3d.shape
            #print "ma3d from top", ma3d[0,:,:]
            # run ANOVA
            self.infoc.setText('ANOVA computation started...')
            self.progressBarInit()
            pbStep = 100./self.numExamples
            self.ps = Numeric.ones((3, self.numExamples), Numeric.Float)
            if self.anovaType >= 3:
                ps = self.anova2(ma3d, groupLens, self.anovaType==4, repMeasuresOnA=False, callback=lambda: self.progressBarAdvance(pbStep))
                for rIdx in range(ps.shape[0]):
                    self.ps[rIdx] = ps[rIdx]
            elif self.anovaType == 2:
                self.ps[1] = self.anova1B(ma3d, groupLens, repMeasures=False, callback=lambda: self.progressBarAdvance(pbStep))
            elif self.anovaType == 1:
                self.ps[0] = self.anova1A(ma3d, repMeasures=False, callback=lambda: self.progressBarAdvance(pbStep))
            elif self.anovaType == 0:
                try:
                    compToVal = float(self.compareToValue)
                except:
                    print "Warning: cannot convert %s to float, using 0" % str(self.compareToValue)
                    self.compareToValue = 0
                    compToVal = 0
                self.ps[0] = self.ttest_ssmpl(ma3d, compToVal, callback=lambda: self.progressBarAdvance(pbStep))
            self.progressBarFinished()
Exemplo n.º 10
0
 def anova1B(self, ma3d, groupLens, repMeasures, callback):
     """conducts one-way ANOVA on individual examples wrt factor B (data sets);
     ma3d axis 2 also contains replicas according to groupLens;
     returns Numeric array of p-values in shape (1, numExamples).
     WARNING: works slower than anova1A because it requires to copy 1D array to 2D array
              although we could use Anova1wayLR instead of Anova1wayLR_2D, but not for repeated measures
              additionaly, Anova1wayLR_2D handles missing factor levels correctly, which is not the case for Anova1wayLR
     """
     ps = -1*Numeric.ones((ma3d.shape[0],), Numeric.Float)
     # groupLens [2,3,4] -> groupInd [[0,1],[2,3,4],[5,6,7,8]]
     if repMeasures:
         fAnova = Anova.AnovaRM12LR
     else:
         fAnova = Anova.Anova1wayLR_2D
     grpLensAcc = Numeric.concatenate([[0],Numeric.add.accumulate(groupLens)])
     grpInd = map(lambda i,j: range(i, j), grpLensAcc[:-1], grpLensAcc[1:])
     for eIdx in range(ma3d.shape[0]):
         m2 = MA.zeros((max(groupLens)*ma3d.shape[1], len(groupLens)), Numeric.Float) * MA.masked # axis0: replicas, axis1: factor B levels
         for groupIdx,takeInd in enumerate(grpInd):
             m2[:groupLens[groupIdx]*ma3d.shape[1], groupIdx] = MA.ravel(ma3d[eIdx].take(takeInd, 1))
         an = fAnova(m2)
         ps[eIdx] = an.Fprob
         callback()
     return ps