Пример #1
0
    def getAnalysisResult(self, mainClassPath):

        print("getAnalysisResult(" + mainClassPath + ")")
        pearsonResultPath = os.path.join(mainClassPath, "PearsonResult.csv").replace("\\","/")
        listdir = os.listdir(mainClassPath)

        mainClass = mainClassPath.split("/")[-1]
        #mainClassNum = mainClass[0]
        print("mainClass = " + mainClass)
        #print("mainClassNum = " + mainClassNum)
        mainClassDictValue = self.mainClassDict.get(mainClass, None)
        # if mainClassDictValue is None:
        #     mainClassDictValue = self.mainClassDict.get(mainClass, None)

        if mainClassDictValue is None:
            mainClassDictValue = self.defaultSelectProperty

        print("mainClassDictValue = ", mainClassDictValue)
        if mainClassDictValue is None:
            raise Exception("getSlnFiles(" + mainClass + "): 未正確設定欲做為皮爾森分析的屬性,請正確設定 self.mainClassDict 或 self.defaultSelectProperty !!")

        subClassFolderSet = set()
        com_phenoSet = set()
        for lists in listdir:
            if self.re_SubClass.match(lists):
                subClassFolderSet.add(lists)

            if self.re_Com_Pheno.match(lists):
                com_phenoSet.add(lists)

        subClassCount = len(subClassFolderSet)
        print("subClassCount = " + str(subClassCount))
        if len(com_phenoSet) != 1:
            raise Exception("com_pheno檔案位於 " + mainClassPath + " 路徑下不只一個,請移除或重新命名非必要項目(搜尋規則:com_pheno.*\.[(xlsx)(xls)(csv)])!")

        print("Get Excel File: " + os.path.join(mainClassPath, list(com_phenoSet)[0]).replace("\\","/"))
        excel = Excel(os.path.join(mainClassPath, list(com_phenoSet)[0]).replace("\\","/"), self.blnShowExcel, self.blnForceRestartExcel)

        #excel.get_sheet(1)
        self.getPhoneSheet(excel)

        dataRecoderCount = excel.sheetRowCounts - 1
        print("dataRecoderCount = " + str(dataRecoderCount))

        #randomPickLists = randomPickNGroup(dataRecoderCount, subClassCount)

        randomPickDict = {}

        case = 0
        for subClassFolder in subClassFolderSet:
            randomPickList = []
            case+=1
            phone_v_Path = os.path.join(mainClassPath, "pheno_" + str(subClassFolder) + ".csv").replace("\\","/")
            print("Read phone_v_File(" + phone_v_Path + ")")
            csv = CSVFile(phone_v_Path, decoding="utf-8")
            #print(csv.readToString())
            list2D = csv.readTo2DList(",")
            #print list2D
            #print "-------------------------------------"

            mappingColIdx = None
            if is_integer(self.pheno_MappingColId):
                mappingColIdx = self.pheno_MappingColId - 1
            else:
                m = -1
                for colName in list2D[0]:
                    m+=1
                    if colName.strip().lower() == self.pheno_MappingColId.strip().lower():
                        mappingColIdx = m

            #print("mappingColIdx = " + str(mappingColIdx))

            list2D.pop(0)
            #print list2D[0]
            for line in list2D:
                #print(line)
                #print(line[0])'
                if line[mappingColIdx] != "":
                    data = line[mappingColIdx].lower()
                    randomPickList.append(data)

            randomPickDict[subClassFolder] = randomPickList
            pass

        print ("randomPickDict = ", randomPickDict)
        for (k, randomList) in randomPickDict.items():
            print (len(randomList))

        colNamesString = list2str(excel.get_rowData(1))
        #range1 = excel.get_range(1, 1, 1 , excel.sheetColCounts)
        print("list2str=", colNamesString)

        #case = 0

        #subClassesResult = [] #[v1, v2, v3, ...]
        subClassesResult = {} #[v1, v2, v3, ...]
        for subClass in subClassFolderSet:
            result = self.getSlnFiles(excel, mainClassDictValue, mainClassPath, subClass, randomPickDict[subClass])
            #subClassesResult.append(result)
            subClassesResult[subClass] = result
            #case+=1

        print subClassesResult
        excel.close()
        print("++++++++++++++++++++++++++++++++++++++++++++++++")

        csv = CSVFile(pearsonResultPath, "utf-8")
        classCaseNum = 0
        #for slnFilesResult  in subClassesResult:
        for (subClassName, slnFilesResult)  in subClassesResult.items():
            #classRecoderSize = randomPickLists[classCaseNum]
            classRecoderSize = len(randomPickDict[subClassName])


            for lineNum in range(1, classRecoderSize+2):
                strLine = ""
                for sectionNum in range(1, len(mainClassDictValue)+1):
                    #print(slnFilesResult[sectionNum-1][lineNum-1])
                    strLine = strLine + list2str(slnFilesResult[sectionNum-1][lineNum-1]) + ",,"

                csv.writeLine(strLine)

            csv.writeLine("")
            classCaseNum+=1

        #subClassNum=0
        subClassResultDict = OrderedDict()
        for (subClassName, slnFilesResult)  in subClassesResult.items():
            #subClassNum+=1

            slnClassNum=0
            slnResultDict = OrderedDict()
            #subClass_v1
            v_type = slnFilesResult[0][0][2][9:]
            for slnFile in slnFilesResult:
                slnClassNum+=1
                pearsonr_list1 = []
                pearsonr_list2 = []
                #type_9
                select_Type = slnFile[0][5][5:]

                slnFile.pop(0)
                for lineData in slnFile:
                    print("lineData = ", lineData)
                    pearsonr_list1.append(lineData[4])
                    pearsonr_list2.append(lineData[5])

                print("pearsonr_list1(v_type=" + str(v_type) +  ", slnClassNum=" + str(slnClassNum) + ", select_Type=" + str(select_Type) + ") : " , pearsonr_list1)
                print("pearsonr_list2(v_type=" + str(v_type) +  ", slnClassNum=" + str(slnClassNum) + ", select_Type=" + str(select_Type) + ") : " , pearsonr_list2)

                pearson_result = pearsonr(pearsonr_list1, pearsonr_list2)
                pearsonr_list1[:] = []
                pearsonr_list2[:] = []
                print("pearsonr_result(v_type=" + str(v_type) +  ", slnClassNum=" + str(slnClassNum) + ", select_Type=" + str(select_Type) + ") : " + str(pearson_result))
                if str(select_Type) in slnResultDict:
                    raise Exception("select_Type 重複,請檢查 self.mainClassDict 或 self.defaultSelectProperty 是否有重複設定!!")
                slnResultDict[str(select_Type)] = pearson_result

            subClassResultDict[str(v_type)] = slnResultDict

        return subClassResultDict
Пример #2
0
    def getPhenoResult(self, mainClassPath):
        print("getPhenoResult(" + mainClassPath + ")")
        listdir = os.listdir(mainClassPath)

        mainClass = mainClassPath.split("/")[-1]
        print("mainClass = " + mainClass)

        #subClassFolderSet = set()
        com_phenoSet = set()
        for lists in listdir:
            # if self.re_SubClass.match(lists):
            #     subClassFolderSet.add(lists)

            if self.re_Com_Pheno.match(lists):
                com_phenoSet.add(lists)

        subClassFolderList = self.mainClass_subClassMap.get(mainClass, None)

        if subClassFolderList is None:
            subClassFolderList = self.defaultSubClassList

        subClassCount = len(subClassFolderList)
        print("subClassCount = " + str(subClassCount))
        if len(com_phenoSet) != 1:
            raise Exception("com_pheno檔案位於 " + mainClassPath + " 路徑下不只一個,請移除或重新命名非必要項目(搜尋規則:com_pheno.*\.[(xlsx)(xls)(csv)])!")

        print("Get Excel File: " + os.path.join(mainClassPath, list(com_phenoSet)[0]))
        excel = Excel(os.path.join(mainClassPath, list(com_phenoSet)[0]).replace("\\","/"), self.blnShowExcel, self.blnForceRestartExcel)

        #excel.get_sheet(1)
        self.getPhoneSheet(excel)

        dataRecoderCount = excel.sheetRowCounts - 1
        print("dataRecoderCount = " + str(dataRecoderCount))

        randomPickLists = randomPickNGroup(dataRecoderCount, subClassCount)
        print ("randomPickList = ", randomPickLists)
        for randomList in randomPickLists:
            print (len(randomList))

        colNamesList = excel.get_rowData(1)
        colNamesString = list2str(colNamesList)
        #range1 = excel.get_range(1, 1, 1 , excel.sheetColCounts)
        print("list2str=", colNamesString)

        case = 0
        #for randomList in randomPickLists:
        for subClassFolder in subClassFolderList:

            phenoPath =  os.path.join(mainClassPath, "pheno_" + str(subClassFolder) + ".csv").replace("\\","/")
            phenoExceptPath = os.path.join(mainClassPath, "pheno_except_" + str(subClassFolder) + ".csv").replace("\\","/")

            csv = CSVFile(phenoPath, "utf-8")
            csv.writeLine(colNamesString)

            print("len(randomPickLists[case] = ", len(randomPickLists[case]))
            for num in randomPickLists[case]:

                rowData = excel.get_rowData(num+1)
                rowData = list(rowData)

                #if not self.isExcelFirstRowFloat:
                for colNum in self.com_pheno_NotFloatCols:
                    mappingColIdx = None
                    if is_integer(colNum):
                        #print("is_integer")
                        mappingColIdx = colNum - 1
                    else:
                        #print("is not integer")
                        m = -1
                        for colName in colNamesList:
                            #print(colName, colNum)
                            m+=1
                            if str(colNum).strip().lower() == colName.strip().lower():
                                mappingColIdx = m

                    #print("mappingColIdx = " + str(mappingColIdx))

                    try:
                        rowData[mappingColIdx] = int(rowData[mappingColIdx])
                    except Exception:
                        print("Change Excel Value Warning(int(rowData[" + str(mappingColIdx) + "])): " + str(rowData[mappingColIdx]), "You can check self.com_pheno_NotFloatCols, ignore this warning  if expected !!")
                        rowData[mappingColIdx] = str(rowData[mappingColIdx])


                csv.writeLine(list2str(rowData))

            csv = CSVFile(phenoExceptPath, "utf-8")
            csv.writeLine(colNamesString)

            differenceList = []
            i = 0
            for it in randomPickLists:
                if i != case:
                    differenceList.extend(it)
                i+=1

            print("differenceList", differenceList)
            print("differenceList len = " + str(len(differenceList)))


            for num in differenceList:
                rowData = excel.get_rowData(num+1)
                rowData = list(rowData)

                #if not self.isExcelFirstRowFloat:
                for colNum in self.com_pheno_NotFloatCols:
                    mappingColIdx = None
                    if is_integer(colNum):
                        #print("is_integer")
                        mappingColIdx = colNum - 1
                    else:
                        #print("is not integer")
                        m = -1
                        for colName in colNamesList:
                            #print(colName, colNum)
                            m+=1
                            if str(colNum).strip().lower() == colName.strip().lower():
                                mappingColIdx = m

                    #print("mappingColIdx = " + str(mappingColIdx))

                    try:
                        rowData[mappingColIdx] = int(rowData[mappingColIdx])
                    except Exception:
                        print("Change Excel Value Warning(int(rowData[" + str(mappingColIdx) + "])): " + str(rowData[mappingColIdx]), "You can check self.com_pheno_NotFloatCols, ignore this warning  if expected !!")
                        rowData[mappingColIdx] = str(rowData[mappingColIdx])


                csv.writeLine(list2str(rowData))

            case+=1

        excel.close()