m1 = [] m2 = [] percentTotalVariation = [] for line in file(inFilePath): lineItems = line.rstrip().split("\t") pProbe = float(lineItems[2]) m1Probe = float(lineItems[4]) m2Probe = float(lineItems[5]) normalized.append(float(lineItems[1])) probCodes.append(pProbe) raw.append(float(lineItems[3])) m1.append(m1Probe) m2.append(m2Probe) percentTotalVariation.append(((1 - pProbe) * m1Probe) + (pProbe * m2Probe)) meanProbCode = utilities.calculateMean(probCodes) r2BackgroundActual = math.pow(utilities.calculateCorrelationCoefficient(raw, m1), 2) r2PercentTotalVariation = math.pow(utilities.calculateCorrelationCoefficient(raw, percentTotalVariation), 2) percentVariationBackground = r2BackgroundActual / r2PercentTotalVariation preSignalNoise = (r2PercentTotalVariation - r2BackgroundActual) / r2BackgroundActual postSignalNoise = preSignalNoise / (1 - r2PercentTotalVariation) postPreSignalNoise = postSignalNoise / preSignalNoise outFile.write("\t".join([str(x) for x in [fileName, meanProbCode, r2BackgroundActual, r2PercentTotalVariation, percentVariationBackground, preSignalNoise, postSignalNoise, postPreSignalNoise]]) + "\n") outFile.flush() outFile.close()
def standardScore(x): mean = utilities.calculateMean([float(y) for y in x]) sd = utilities.calculateStandardDeviation([float(y) for y in x]) return [(float(y) - mean) / sd for y in x]
def mean(x): return [utilities.calculateMean([float(y) for y in x])]
def calculateMean(values): return utilities.calculateMean(values)
uniqueSamples = set([x[sampleIndex] for x in data]) print "%i samples" % len(uniqueSamples) uniqueGenes = set([x[geneIndex] for x in data]) print "%i genes with at least one variant" % len(uniqueGenes) uniquePathways = set() for row in data: pathways = row[pathwaysIndex].split(",") for pathway in pathways: if pathway != "": uniquePathways.add(pathway) print "%i pathways with at least one variant" % len(uniquePathways) numGenesMutatedPerSample = [float(len(set([x[geneIndex] for x in data if x[sampleIndex]==sample]))) for sample in uniqueSamples] print "%.1f genes with variant per sample" % utilities.calculateMean(numGenesMutatedPerSample) numSamplesMutatedPerGene = [float(len(set([x[sampleIndex] for x in data if x[geneIndex]==gene]))) for gene in uniqueGenes] print "%.1f samples with variant per gene" % utilities.calculateMean(numSamplesMutatedPerGene) numMutatedPerGenePerSample = [] for x in sampleGeneVariantDict.keys(): for geneID in sampleGeneVariantDict[x]: numMutatedPerGenePerSample.append(float(len(sampleGeneVariantDict[x][geneID]))) print "%.3f variants per gene when sample had at least one variant in gene" % utilities.calculateMean(numMutatedPerGenePerSample) numMutatedPerGenePerSample = [float(len(set([x[geneIndex] for x in data if x[sampleIndex]==sample]))) for sample in uniqueSamples] print "%.1f variants per genes with variant per sample" % utilities.calculateMean(numGenesMutatedPerSample) numSamplesPerPathway = [] for pathway in uniquePathways:
print sampleID sampleDataDict = {} sampleData = getData(sampleSection, "ID_REF") sampleData.pop(0) for row in sampleData: geneID = probeGeneDict[row[0]] value = row[1] if geneID != '': sampleDataDict[geneID] = sampleDataDict.setdefault(geneID, []) + [value] for geneID in sampleDataDict.keys(): values = [float(x) for x in sampleDataDict[geneID]] sampleDataDict[geneID] = utilities.calculateMean(values) allSamplesDataDict[sampleID] = sampleDataDict sampleIDs = sorted(allSamplesDataDict.keys()) geneIDs = sorted(allSamplesDataDict[sampleIDs[0]]) outData = [] outData.append(["Description"] + sampleIDs) for geneID in geneIDs: outData.append([geneID] + [str(allSamplesDataDict[sampleID][geneID]) for sampleID in sampleIDs]) utilities.writeMatrixToFile(outData, outFilePath)
inFile = open(inFilePath) header = None if hasHeader: header = inFile.readline().rstrip() for line in inFile: lineItems = line.rstrip().split("\t") meta = lineItems[0] values = [float(x) for x in lineItems[1:]] outDict[meta] = outDict.setdefault(meta, []) + [values] inFile.close() outData = [] if header != None: outData.append([header]) for meta in outDict.keys(): outValues = [meta] for i in range(len(outDict[meta][0])): iValues = [x[i] for x in outDict[meta]] outValues.append(utilities.calculateMean(iValues)) outData.append(outValues) utilities.writeMatrixToFile(outData, outFilePath)
sampleDataDict = {} sampleData = getData(sampleSection, "ID_REF") sampleData.pop(0) for row in sampleData: geneID = probeGeneDict[row[0]] value = row[1] if geneID != '': sampleDataDict[geneID] = sampleDataDict.setdefault(geneID, []) + [value] for geneID in sampleDataDict.keys(): values = [float(x) for x in sampleDataDict[geneID]] sampleDataDict[geneID] = utilities.calculateMean(values) allSamplesDataDict[sampleID] = sampleDataDict sampleIDs = sorted(allSamplesDataDict.keys()) geneIDs = sorted(allSamplesDataDict[sampleIDs[0]]) outData = [] outData.append(["Description"] + sampleIDs) for geneID in geneIDs: outData.append( [geneID] + [str(allSamplesDataDict[sampleID][geneID]) for sampleID in sampleIDs]) utilities.writeMatrixToFile(outData, outFilePath)
def averageHighestGenes(values): median = utilities.calculateMedian(values) return utilities.calculateMean([x for x in values if x > median])