def merge_replicas(aETStruct, type): """Returns a list of tuples (strain, [avrg_orngET]) where aETStruct corresponds to a list of tuples (strain, [orngET1, orngET2, ...]); type = ["mean" | "median" | "min" | "max"] """ shape = [0,0,0] et0 = aETStruct[0][1][0] # the first example table shape[0] = len(et0) # number of examples (genes) shape[1] = len(et0.domain.attributes) # number of attributes (time points) mergedETStruct = [] if type == "mean": merge_func = MA.average elif type == "median": merge_func = numpyExtn.medianMA elif type == "min": merge_func = numpyExtn.minMA elif type == "max": merge_func = numpyExtn.maxMA else: raise AttributeError, "type = ['mean' | 'median' | 'min' | 'max']" for st, etList in aETStruct: shape[2] = len(etList) ma3d = MA.zeros(shape, Numeric.Float) for idx, et in enumerate(etList): ma3d[:,:,idx] = orng2ma(et) mergedETStruct.append((st, [ma2orng_keepClassMetas(merge_func(ma3d, 2), etList[0])])) return mergedETStruct
def triangularPut(m1d, upper=1, lower=0): """Returns 2D masked array with elements of the given 1D array in the strictly upper (lower) triangle. Elements of the 1D array should be ordered according to the upper triangular part of the 2D matrix. The lower triangular part (if requested) equals to the transposed upper triangular part. If upper == lower == 1 a symetric matrix is returned. """ assert upper in [0,1] and lower in [0,1], "[0|1] expected for upper / lower" m1d = MA.asarray(m1d) assert MA.rank(m1d) == 1, "1D masked array expected" m2dShape0 = math.ceil(math.sqrt(2*m1d.shape[0])) assert m1d.shape[0] == m2dShape0*(m2dShape0-1)/2, "the length of m1d does not correspond to n(n-1)/2" if upper: if lower: mask = Numeric.fromfunction(lambda i,j: i==j, (m2dShape0, m2dShape0)) else: mask = Numeric.fromfunction(lambda i,j: i>=j, (m2dShape0, m2dShape0)) else: if lower: mask = Numeric.fromfunction(lambda i,j: i<=j, (m2dShape0, m2dShape0)) else: mask = Numeric.ones((m2dShape0, m2dShape0)) m2d = MA.ravel(MA.zeros((m2dShape0, m2dShape0), m1d.dtype.char)) condUpperTriang = Numeric.fromfunction(lambda i,j: i<j, (m2dShape0, m2dShape0)) putIndices = Numeric.compress(Numeric.ravel(condUpperTriang), Numeric.arange(0, m2dShape0**2, typecode=Numeric.Int)) MA.put(m2d, putIndices, m1d) m2d = MA.reshape(m2d, (m2dShape0, m2dShape0)) m2d = MA.where(condUpperTriang, m2d, MA.transpose(m2d)) return MA.array(m2d, mask=Numeric.logical_or(mask, MA.getmaskarray(m2d)))
def merge_replicas(aETStruct, type): """Returns a list of tuples (strain, [avrg_orngET]) where aETStruct corresponds to a list of tuples (strain, [orngET1, orngET2, ...]); type = ["mean" | "median" | "min" | "max"] """ shape = [0, 0, 0] et0 = aETStruct[0][1][0] # the first example table shape[0] = len(et0) # number of examples (genes) shape[1] = len(et0.domain.attributes) # number of attributes (time points) mergedETStruct = [] if type == "mean": merge_func = MA.average elif type == "median": merge_func = numpyExtn.medianMA elif type == "min": merge_func = numpyExtn.minMA elif type == "max": merge_func = numpyExtn.maxMA else: raise AttributeError, "type = ['mean' | 'median' | 'min' | 'max']" for st, etList in aETStruct: shape[2] = len(etList) ma3d = MA.zeros(shape, Numeric.Float) for idx, et in enumerate(etList): ma3d[:, :, idx] = orng2ma(et) mergedETStruct.append( (st, [ma2orng_keepClassMetas(merge_func(ma3d, 2), etList[0])])) return mergedETStruct
def runANOVA(self): """converts structured data [(name, [orngET1, orngET2, ...]),...] to a 3D masked array with the following axes: 0: examples, 1: variables, 2: ExampleTables; runs ANOVA computations and sets self.ps; """ if self.dataStructure and self.numExamples > 0: ma3d = MA.zeros((self.numExamples, self.numVariables, reduce(lambda a,b: a+len(b[1]), self.dataStructure, 0)), Numeric.Float) * MA.masked groupLens = [] etIdx = 0 for dsName, etList in self.dataStructure: for et in etList: ma3d[:,:,etIdx] = et.toNumpyMA("ac")[0] etIdx += 1 groupLens.append(len(etList)) # run ANOVA self.infoc.setText('ANOVA computation started...') self.progressBarInit() pbStep = 100./self.numExamples self.ps = Numeric.ones((3, self.numExamples), Numeric.Float) if self.anovaType == OWHypTest.St2AB or self.anovaType == OWHypTest.St2ABI: ps = self.anova2(ma3d, groupLens, addInteraction=self.anovaType==OWHypTest.St2ABI, repMeasuresOnA=False, callback=lambda: self.progressBarAdvance(pbStep)) for rIdx in range(ps.shape[0]): self.ps[rIdx] = ps[rIdx] elif self.anovaType == OWHypTest.St1B: self.ps[1] = self.anova1B(ma3d, groupLens, repMeasures=False, callback=lambda: self.progressBarAdvance(pbStep)) elif self.anovaType == OWHypTest.St1A: self.ps[0] = self.anova1A(ma3d, repMeasures=False, callback=lambda: self.progressBarAdvance(pbStep)) elif self.anovaType == OWHypTest.StSST: try: popMeanVal = float(self.popMean) except ValueError: print "Warning: cannot convert %s to float, using 0" % str(self.popMean) self.popMean = 0 popMeanVal = 0 self.ps[0] = self.ttest_ssmpl(ma3d, popMeanVal, callback=lambda: self.progressBarAdvance(pbStep)) elif self.anovaType == OWHypTest.StLPE: raise Exception, "NOT IMPLEMENTED" if self.numVariables == 2: self.ps[0] = self.lpeA(ma3d, callback=lambda: self.progressBarAdvance(pbStep)) elif self.numVariables == 1: self.ps[1] = self.lpeB(ma3d, groupLens, callback=lambda: self.progressBarAdvance(pbStep)) else: raise RuntimeError, "%s: expected 2 variables and 1 group, or 1 variable and 2 groups, got %s variables and %s groups" % (OWHypTest.StNames[self.anovaType], self.numVariables, len(groupLens)) elif self.anovaType == OWHypTest.StRST: if self.numVariables == 2 and len(groupLens) == 1: self.ps[0] = self.ttest_rsmplA(ma3d, callback=lambda: self.progressBarAdvance(pbStep)) elif self.numVariables == 1 and len(groupLens) == 2 and groupLens[0] == groupLens[1]: self.ps[1] = self.ttest_rsmplB(ma3d, groupLens, callback=lambda: self.progressBarAdvance(pbStep)) else: raise RuntimeError, "%s: expected 2 variables and 1 group, or 1 variable and 2 groups of equal length, got %s variables and %s groups of length %s" % (OWHypTest.StNames[self.anovaType], self.numVariables, len(groupLens), str(groupLens)) self.progressBarFinished()
def etStruct2ma3d(aETStruct): """Converts a list of tuples (strain, [orngET1, orngET2, ...]) to a 3D masked array and returns it. """ shape = [0,0,0] et0 = aETStruct[0][1][0] # the first example table shape[0] = len(et0) # number of examples (genes) shape[1] = len(et0.domain.attributes) # number of attributes (time points) shape[2] = Numeric.add.reduce(map(lambda x: len(x[1]), aETStruct)) # number of ETs (replicas over all strains) ma3d = MA.zeros(shape, Numeric.Float) k = 0 for st, etList in aETStruct: for et in etList: ma3d[:,:,k] = orng2ma(et) k += 1 return ma3d
def rankDataMA(m, inverse=False): """Returns ranks of 1D masked array; masked values ignored, range 1...#non-masked_values. """ m = MA.asarray(m) assert MA.rank(m) == 1 fill_val = m.fill_value() m.set_fill_value(MA.maximum(m) + 1) r = MA.zeros(m.shape[0], Numeric.Float) MA.put(r, MA.argsort(m), Numeric.arange(m.shape[0])) m.set_fill_value(fill_val) r = MA.array(r, mask=MA.getmaskarray(m)) if inverse: return -1*r+MA.count(m) else: return r+1
def etStruct2ma3d(aETStruct): """Converts a list of tuples (strain, [orngET1, orngET2, ...]) to a 3D masked array and returns it. """ shape = [0, 0, 0] et0 = aETStruct[0][1][0] # the first example table shape[0] = len(et0) # number of examples (genes) shape[1] = len(et0.domain.attributes) # number of attributes (time points) shape[2] = Numeric.add.reduce( map(lambda x: len(x[1]), aETStruct)) # number of ETs (replicas over all strains) ma3d = MA.zeros(shape, Numeric.Float) k = 0 for st, etList in aETStruct: for et in etList: ma3d[:, :, k] = orng2ma(et) k += 1 return ma3d
def anova2(self, ma3d, groupLens, addInteraction, repMeasuresOnA, callback): """Conducts two-way ANOVA on individual examples; returns a Numeric array of p-values in shape (2, numExamples) or (3, numExamples), depending whether we test for interaction; Note: levels of factors A and B that cause empty cells are removed prior to conducting ANOVA. """ groupLens = Numeric.asarray(groupLens) # arrays to store p-vals if addInteraction: ps = Numeric.ones((3, ma3d.shape[0]), Numeric.Float) else: ps = Numeric.ones((2, ma3d.shape[0]), Numeric.Float) # decide between non-repeated / repeated measures ANOVA for factor time if repMeasuresOnA: fAnova = Anova.AnovaRM12LR else: fAnova = Anova.Anova2wayLR # check for empty cells for all genes at once and remove them tInd2rem = [] ax2Ind = Numeric.concatenate(([0], Numeric.add.accumulate(groupLens))) for aIdx in range(ma3d.shape[1]): for rIdx in range(groupLens.shape[0]): if Numeric.add.reduce(MA.count(ma3d[:,aIdx,ax2Ind[rIdx]:ax2Ind[rIdx+1]],1)) == 0: tInd2rem.append(aIdx) break if len(tInd2rem) > 0: print "Warning: removing time indices %s for all genes" % (str(tInd2rem)) tInd2keep = range(ma3d.shape[1]) for aIdx in tInd2rem: tInd2keep.remove(aIdx) ma3d = ma3d.take(tInd2keep, 1) # for each gene... for eIdx in range(ma3d.shape[0]): # faster check for empty cells for that gene -> remove time indices with empty cells ma2d = ma3d[eIdx] cellCount = MA.zeros((ma2d.shape[0], groupLens.shape[0]), Numeric.Int) for g,(i0,i1) in enumerate(zip(ax2Ind[:-1], ax2Ind[1:])): cellCount[:,g] = MA.count(ma2d[:,i0:i1], 1) ma2dTakeInd = Numeric.logical_not(Numeric.add.reduce(Numeric.equal(cellCount,0),1)) # 1 where to take, 0 where not to take if Numeric.add.reduce(ma2dTakeInd) != ma2dTakeInd.shape[0]: print "Warning: removing time indices %s for gene %i" % (str(Numeric.compress(ma2dTakeInd == 0, Numeric.arange(ma2dTakeInd.shape[0]))), eIdx) ma2d = MA.compress(ma2dTakeInd, ma2d, 0) an = fAnova(ma2d, groupLens, addInteraction, allowReductA=True, allowReductB=True) ps[:,eIdx] = an.ps callback() return ps
def runANOVA(self): """converts structured data [(name, [orngET1, orngET2, ...]),...] to a 3D masked array with the following axes: 0: examples, 1: variables, 2: ExampleTables; runs ANOVA computations and sets self.ps; """ if self.dataStructure and self.numExamples > 0: ma3d = MA.zeros((self.numExamples, self.numVariables, reduce(lambda a,b: a+len(b[1]), self.dataStructure, 0)), Numeric.Float) * MA.masked groupLens = [] etIdx = 0 for dsName, etList in self.dataStructure: for et in etList: ma3d[:,:,etIdx] = et.toNumpyMA("ac")[0] etIdx += 1 groupLens.append(len(etList)) #print "ma3d SHAPE", ma3d.shape #print "ma3d from top", ma3d[0,:,:] # run ANOVA self.infoc.setText('ANOVA computation started...') self.progressBarInit() pbStep = 100./self.numExamples self.ps = Numeric.ones((3, self.numExamples), Numeric.Float) if self.anovaType >= 3: ps = self.anova2(ma3d, groupLens, self.anovaType==4, repMeasuresOnA=False, callback=lambda: self.progressBarAdvance(pbStep)) for rIdx in range(ps.shape[0]): self.ps[rIdx] = ps[rIdx] elif self.anovaType == 2: self.ps[1] = self.anova1B(ma3d, groupLens, repMeasures=False, callback=lambda: self.progressBarAdvance(pbStep)) elif self.anovaType == 1: self.ps[0] = self.anova1A(ma3d, repMeasures=False, callback=lambda: self.progressBarAdvance(pbStep)) elif self.anovaType == 0: try: compToVal = float(self.compareToValue) except: print "Warning: cannot convert %s to float, using 0" % str(self.compareToValue) self.compareToValue = 0 compToVal = 0 self.ps[0] = self.ttest_ssmpl(ma3d, compToVal, callback=lambda: self.progressBarAdvance(pbStep)) self.progressBarFinished()
def anova1B(self, ma3d, groupLens, repMeasures, callback): """conducts one-way ANOVA on individual examples wrt factor B (data sets); ma3d axis 2 also contains replicas according to groupLens; returns Numeric array of p-values in shape (1, numExamples). WARNING: works slower than anova1A because it requires to copy 1D array to 2D array although we could use Anova1wayLR instead of Anova1wayLR_2D, but not for repeated measures additionaly, Anova1wayLR_2D handles missing factor levels correctly, which is not the case for Anova1wayLR """ ps = -1*Numeric.ones((ma3d.shape[0],), Numeric.Float) # groupLens [2,3,4] -> groupInd [[0,1],[2,3,4],[5,6,7,8]] if repMeasures: fAnova = Anova.AnovaRM12LR else: fAnova = Anova.Anova1wayLR_2D grpLensAcc = Numeric.concatenate([[0],Numeric.add.accumulate(groupLens)]) grpInd = map(lambda i,j: range(i, j), grpLensAcc[:-1], grpLensAcc[1:]) for eIdx in range(ma3d.shape[0]): m2 = MA.zeros((max(groupLens)*ma3d.shape[1], len(groupLens)), Numeric.Float) * MA.masked # axis0: replicas, axis1: factor B levels for groupIdx,takeInd in enumerate(grpInd): m2[:groupLens[groupIdx]*ma3d.shape[1], groupIdx] = MA.ravel(ma3d[eIdx].take(takeInd, 1)) an = fAnova(m2) ps[eIdx] = an.Fprob callback() return ps