def countInconsistencyFromFile(path, reduceData=False): data=Loader.loadExtensionSensitive(path) if (reduceData): data = Loader.reduceRepetitions(data) inconsistencyCounter=countInconsistency(data) inconsistencyRatio = float(inconsistencyCounter)/len(data) return inconsistencyCounter, inconsistencyRatio
def loadAndCount(pathToFile, reduceData=False): loadedData = Loader.loadExtensionSensitive(pathToFile) if reduceData: loadedData = Loader.reduceRepetitions(loadedData) rows = len(loadedData) cols = len(loadedData[0]) fields = rows*cols dicts=countOccurency(loadedData) pDicts = countProbabilities(dicts) eDicts = countEntropyOfDicts(pDicts) sumEnt, meanEnt = countEntropyInData(eDicts) metricEnt = sumEnt/(float)(rows) bitsToSaveData = sumEnt*(float)(rows) numberOfInstances = len(loadedData) valuesOfAtts,importantAtts = countValuesOfAttributes(dicts) return (sumEnt,meanEnt,metricEnt,bitsToSaveData,numberOfInstances,valuesOfAtts,importantAtts)
meanEntropy=sumEntropy/len(entropyDict) return (sumEntropy,meanEntropy) def loadAndCount(pathToFile, reduceData=False): loadedData = Loader.loadExtensionSensitive(pathToFile) if reduceData: loadedData = Loader.reduceRepetitions(loadedData) rows = len(loadedData) cols = len(loadedData[0]) fields = rows*cols dicts=countOccurency(loadedData) pDicts = countProbabilities(dicts) eDicts = countEntropyOfDicts(pDicts) sumEnt, meanEnt = countEntropyInData(eDicts) metricEnt = sumEnt/(float)(rows) bitsToSaveData = sumEnt*(float)(rows) numberOfInstances = len(loadedData) valuesOfAtts,importantAtts = countValuesOfAttributes(dicts) return (sumEnt,meanEnt,metricEnt,bitsToSaveData,numberOfInstances,valuesOfAtts,importantAtts) if __name__ == "__main__": file = "C:\Users\CJank\Desktop\\tmp\\wineDscr.arff" loadedData = Loader.loadExtensionSensitive(file) dicks=countOccurency(loadedData) pDics = countProbabilities(dicks) eDicts = countEntropyOfDicts(pDics) sumEnt, meanEnt = countEntropyInData(eDicts) print(dicks) print (pDics) print (eDicts) print ("Sum: "+(str)(sumEnt) +" Mean: "+(str)(meanEnt)+"")
consistent = True for b in range(min(a+1,len(loadedData)-1),len(loadedData)): if(checkIfInconsistencyOccurs(loadedData[a],loadedData[b])): knownInconsistencies.add(a) knownInconsistencies.add(b) return len(knownInconsistencies) def checkIfInconsistencyOccurs(instanceA, instanceB): numberOfAtts = len(instanceA) inconsistency = False for col in range(numberOfAtts): if(col< numberOfAtts-1): if(instanceA[col]!=instanceB[col]): break else: if(instanceA[col]!=instanceB[col]): inconsistency=True return inconsistency def countInconsistencyFromFile(path, reduceData=False): data=Loader.loadExtensionSensitive(path) if (reduceData): data = Loader.reduceRepetitions(data) inconsistencyCounter=countInconsistency(data) inconsistencyRatio = float(inconsistencyCounter)/len(data) return inconsistencyCounter, inconsistencyRatio if __name__ == "__main__": data=Loader.loadExtensionSensitive("C:\Users\CJank\Desktop\Dyskretyzator\Results_\\australianDiscretizationResults_Reduced.txt") c=countInconsistency(data) print c