Пример #1
0
    m1 = []
    m2 = []
    percentTotalVariation = []

    for line in file(inFilePath):
        lineItems = line.rstrip().split("\t")

        pProbe = float(lineItems[2])
        m1Probe = float(lineItems[4])
        m2Probe = float(lineItems[5])

        normalized.append(float(lineItems[1]))
        probCodes.append(pProbe)
        raw.append(float(lineItems[3]))
        m1.append(m1Probe)
        m2.append(m2Probe)
        percentTotalVariation.append(((1 - pProbe) * m1Probe) + (pProbe * m2Probe))

    meanProbCode = utilities.calculateMean(probCodes)
    r2BackgroundActual = math.pow(utilities.calculateCorrelationCoefficient(raw, m1), 2)
    r2PercentTotalVariation = math.pow(utilities.calculateCorrelationCoefficient(raw, percentTotalVariation), 2)
    percentVariationBackground = r2BackgroundActual / r2PercentTotalVariation
    preSignalNoise = (r2PercentTotalVariation - r2BackgroundActual) / r2BackgroundActual
    postSignalNoise = preSignalNoise / (1 - r2PercentTotalVariation)
    postPreSignalNoise = postSignalNoise / preSignalNoise

    outFile.write("\t".join([str(x) for x in [fileName, meanProbCode, r2BackgroundActual, r2PercentTotalVariation, percentVariationBackground, preSignalNoise, postSignalNoise, postPreSignalNoise]]) + "\n")
    outFile.flush()

outFile.close()
Пример #2
0
def standardScore(x):
    mean = utilities.calculateMean([float(y) for y in x])
    sd = utilities.calculateStandardDeviation([float(y) for y in x])
    return [(float(y) - mean) / sd for y in x]
Пример #3
0
def mean(x):
    return [utilities.calculateMean([float(y) for y in x])]
Пример #4
0
def calculateMean(values):
    return utilities.calculateMean(values)
Пример #5
0
uniqueSamples = set([x[sampleIndex] for x in data])
print "%i samples" % len(uniqueSamples)

uniqueGenes = set([x[geneIndex] for x in data])
print "%i genes with at least one variant" % len(uniqueGenes)

uniquePathways = set()
for row in data:
    pathways = row[pathwaysIndex].split(",")
    for pathway in pathways:
        if pathway != "":
            uniquePathways.add(pathway)
print "%i pathways with at least one variant" % len(uniquePathways)

numGenesMutatedPerSample = [float(len(set([x[geneIndex] for x in data if x[sampleIndex]==sample]))) for sample in uniqueSamples]
print "%.1f genes with variant per sample" % utilities.calculateMean(numGenesMutatedPerSample)

numSamplesMutatedPerGene = [float(len(set([x[sampleIndex] for x in data if x[geneIndex]==gene]))) for gene in uniqueGenes]
print "%.1f samples with variant per gene" % utilities.calculateMean(numSamplesMutatedPerGene)

numMutatedPerGenePerSample = []
for x in sampleGeneVariantDict.keys():
    for geneID in sampleGeneVariantDict[x]:
        numMutatedPerGenePerSample.append(float(len(sampleGeneVariantDict[x][geneID])))
print "%.3f variants per gene when sample had at least one variant in gene" % utilities.calculateMean(numMutatedPerGenePerSample)

numMutatedPerGenePerSample = [float(len(set([x[geneIndex] for x in data if x[sampleIndex]==sample]))) for sample in uniqueSamples]
print "%.1f variants per genes with variant per sample" % utilities.calculateMean(numGenesMutatedPerSample)

numSamplesPerPathway = []
for pathway in uniquePathways:
Пример #6
0
def calculateMean(values):
    return utilities.calculateMean(values)
Пример #7
0
    print sampleID

    sampleDataDict = {}

    sampleData = getData(sampleSection, "ID_REF")
    sampleData.pop(0)

    for row in sampleData:
        geneID = probeGeneDict[row[0]]
        value = row[1]

        if geneID != '':
            sampleDataDict[geneID] = sampleDataDict.setdefault(geneID, []) + [value]

    for geneID in sampleDataDict.keys():
        values = [float(x) for x in sampleDataDict[geneID]]
        sampleDataDict[geneID] = utilities.calculateMean(values)

    allSamplesDataDict[sampleID] = sampleDataDict

sampleIDs = sorted(allSamplesDataDict.keys())
geneIDs = sorted(allSamplesDataDict[sampleIDs[0]])

outData = []
outData.append(["Description"] + sampleIDs)

for geneID in geneIDs:
    outData.append([geneID] + [str(allSamplesDataDict[sampleID][geneID]) for sampleID in sampleIDs])

utilities.writeMatrixToFile(outData, outFilePath)
Пример #8
0
inFile = open(inFilePath)

header = None
if hasHeader:
  header = inFile.readline().rstrip()

for line in inFile:
    lineItems = line.rstrip().split("\t")
    meta = lineItems[0]
    values = [float(x) for x in lineItems[1:]]

    outDict[meta] = outDict.setdefault(meta, []) + [values]

inFile.close()

outData = []

if header != None:
    outData.append([header])

for meta in outDict.keys():
    outValues = [meta]
    for i in range(len(outDict[meta][0])):
        iValues = [x[i] for x in outDict[meta]]
        outValues.append(utilities.calculateMean(iValues))

    outData.append(outValues)

utilities.writeMatrixToFile(outData, outFilePath)
Пример #9
0
    sampleDataDict = {}

    sampleData = getData(sampleSection, "ID_REF")
    sampleData.pop(0)

    for row in sampleData:
        geneID = probeGeneDict[row[0]]
        value = row[1]

        if geneID != '':
            sampleDataDict[geneID] = sampleDataDict.setdefault(geneID,
                                                               []) + [value]

    for geneID in sampleDataDict.keys():
        values = [float(x) for x in sampleDataDict[geneID]]
        sampleDataDict[geneID] = utilities.calculateMean(values)

    allSamplesDataDict[sampleID] = sampleDataDict

sampleIDs = sorted(allSamplesDataDict.keys())
geneIDs = sorted(allSamplesDataDict[sampleIDs[0]])

outData = []
outData.append(["Description"] + sampleIDs)

for geneID in geneIDs:
    outData.append(
        [geneID] +
        [str(allSamplesDataDict[sampleID][geneID]) for sampleID in sampleIDs])

utilities.writeMatrixToFile(outData, outFilePath)
Пример #10
0
def averageHighestGenes(values):
    median = utilities.calculateMedian(values)
    return utilities.calculateMean([x for x in values if x > median])