def computeSimilarity(dataArray):
    start = time()
    sampleIDList = dataArray[8]
    n = len(sampleIDList)
    matrix = np.zeros((n,n))
    for i in range(n):
        for j in range(i,n):
            #Total ratio computation
            common,in1,in2,_,_,_,_,_ = compute(dataArray[7],[sampleIDList[i]],[sampleIDList[j]])
            commonA = countAssignmentsInCommon(common,[sampleIDList[i]],[sampleIDList[j]])
            numberA1 = countAssignments(in1,[sampleIDList[i]])
            numberA2 = countAssignments(in2,[sampleIDList[j]])
            tratio = totalRatio(commonA,numberA1,numberA2)
            #Pattern ratio computation
            commonPatternsList = enumerateCommonPatterns(dataArray[7],[sampleIDList[i]],[sampleIDList[j]])
            specificPatternsList1 = enumerateSpecificPatterns(dataArray[7],[sampleIDList[i]],[sampleIDList[j]])
            specificPatternsList2 = enumerateSpecificPatterns(dataArray[7],[sampleIDList[i]],[sampleIDList[j]])
            pRatio = patternRatio(commonPatternsList,specificPatternsList1,specificPatternsList2)
            #Diversity coefficient
            dRatio1,_ = computeDiversityCoefficient(dataArray[5],[sampleIDList[i]],dataArray)
            dRatio2,_ = computeDiversityCoefficient(dataArray[5],[sampleIDList[j]],dataArray)
            subdRatio = abs(dRatio1 - dRatio2)
            if subdRatio:
                s = sumOpInf(pRatio,tratio) - subdRatio
            else:
                s = sumOpInf(pRatio,tratio)
            matrix[i][j] = s
            matrix[j][i] = s
    end = time()
    print "TIME:",(end-start)
    return matrix
Пример #2
0
def totalDiffRatioAct(dataArray):
    print "First list of samples."
    sampleNameList1,metadataList1,interval1List1,interval2List1 = createSampleNameList(dataArray)
    print "Second list of samples."
    sampleNameList2,metadataList2,interval1List2,interval2List2 = createSampleNameList(dataArray)
    common,in1,in2,numberA1,numberA2,_,_,_ = compute(dataArray[7],sampleNameList1,sampleNameList2)
    commonA = countAssignmentsInCommon(common,sampleNameList1,sampleNameList2)
    tratio = totalRatio(commonA,numberA1,numberA2)
    ntRatio = totalRatioNormalized(commonA,numberA1,numberA2)
    dratio = diffRatio(commonA)
    ndRatio = diffRatioNormalized(commonA,numberA1,numberA2)
    print "\nTotal Ratio Distance is: " + str(tratio)
    print "normalized Total Ratio is: " + str(ntRatio) + "\n[The more it is close to 1, the more the two groups are alike]\n"
    print "Diff Ratio Distance is: " + str(dratio)
    print "normalized Diff Ratio is: " + str(ndRatio) + "\n[The more it is close to 0, the more the two groups are alike]\n"
    print "[If you have obtained +inf (resp. -inf), it could mean you have selected no sample.]\n"
    answer = raw_input("Save the results? Y/N\n")
    if (answer == "Y"):
        data = "Total Ratio Results ****\n for " + str(sampleNameList1) + "\n"
        if metadataList1:
            data += "selected on metadata: " + str(metadataList1) + " with extremum values: " + str(interval1List1) + " (lower bounds) and " + str(interval2List1) + " (upper bounds) \n"
        data += " and " + str(sampleNameList2) + "\n"
        if metadataList2:
            data += "selected on metadata: " + str(metadataList2) + " with extremum values: " + str(interval1List2) + " (lower bounds) and " + str(interval2List2) + " (upper bounds) \n"
        data += "\nTotal Ratio Distance is: " + str(tratio) + "\n normalized Total Ratio is: " + str(ntRatio) + "\nDiff Ratio Distance is: " + str(dratio) + "\n normalized Diff Ratio is: " + str(ndRatio) +"\n\nEND OF FILE ****"  
        writeFile(data,"","text")
    elif not (answer == "N"):
        print "/!\ You should answer 'Y' or 'N'!"