def evaluateImgPair(predictionImg, groundTruthImg, confMatrix, args): # Loading all resources for evaluation. predictionNp = np.array(predictionImg) groundTruthNp = np.array(groundTruthImg) # load ground truth instances, if needed # Check for equal image sizes if groundTruthImg.shape[0] != predictionImg.shape[0]: printError("Image widths are not equal.") if groundTruthImg.shape[1] != predictionImg.shape[1]: printError("Image heights are not equal.") if 2 != len(predictionNp.shape): printError("Predicted image has multiple channels.") imgWidth = predictionImg.shape[0] imgHeight = predictionImg.shape[1] nbPixels = imgWidth * imgHeight # Evaluate images if CSUPPORT: # using cython confMatrix = addToConfusionMatrix.cEvaluatePair( predictionNp.astype(np.uint8), groundTruthNp.astype(np.uint8), confMatrix, args.evalLabels) else: # the slower python way for (groundTruthImgPixel, predictionImgPixel) in izip(groundTruthImg.getdata(), predictionImg.getdata()): if not groundTruthImgPixel in args.evalLabels: printError( "Unknown label with id {:}".format(groundTruthImgPixel)) confMatrix[groundTruthImgPixel][predictionImgPixel] += 1 return nbPixels
def evaluatePair(predictionImgFileName, groundTruthImgFileName, confMatrix, instanceStats, perImageStats, args): print(f'j) gtImgFile: {groundTruthImgFileName}') # Loading all resources for evaluation. try: predictionImg = Image.open(predictionImgFileName) predictionNp = np.array(predictionImg) except: printError("Unable to load " + predictionImgFileName) try: groundTruthImg = Image.open(groundTruthImgFileName) groundTruthNp = np.array(groundTruthImg) except: printError("Unable to load " + groundTruthImgFileName) # load ground truth instances, if needed if args.evalInstLevelScore: groundTruthInstanceImgFileName = groundTruthImgFileName.replace( "labelIds", "instanceIds") # i. TODO 내플젝에선 바로위의 원래코드 대신 아래코드를 사용해야함!! 파이썬의 스트링.replace 는, replace 할 스트링이 없으면 그냥 원래스트링 그대로를 리턴함. # 지금 groundTruthImgFileName 의 값은 내플젝의경우 path/to/~~_labelTrainIds.png 이런식일텐데, # 위의 기존코드 그대로쓰면 바꿔줄 "labelIds" 스트링이 없으니 그냥 아무것도 안바꾸고 그대로 리턴한다는거지. # 그러면 ~~_instanceIds.png 를 열어야하는데 그게아니라 ~~_labelTrainIds.png 를 열겠지. # 아마도 그래서 코랩에서 이밸류에이션돌렸을때 thing클래스들의 nIoU 값들이 죄다 0으로 나왔던게 아닌가 싶음. 이제 고쳤으니 함 다시 돌려보자. /21.3.28.21:16. # groundTruthInstanceImgFileName = groundTruthImgFileName.replace("labelTrainIds","instanceIds") try: instanceImg = Image.open(groundTruthInstanceImgFileName) instanceNp = np.array(instanceImg) except: printError("Unable to load " + groundTruthInstanceImgFileName) # Check for equal image sizes if (predictionImg.size[0] != groundTruthImg.size[0]): printError("Image widths of " + predictionImgFileName + " and " + groundTruthImgFileName + " are not equal.") if (predictionImg.size[1] != groundTruthImg.size[1]): printError("Image heights of " + predictionImgFileName + " and " + groundTruthImgFileName + " are not equal.") if (len(predictionNp.shape) != 2): printError("Predicted image has multiple channels.") imgWidth = predictionImg.size[0] imgHeight = predictionImg.size[1] nbPixels = imgWidth * imgHeight # Evaluate images if (CSUPPORT): # using cython confMatrix = addToConfusionMatrix.cEvaluatePair( predictionNp, groundTruthNp, confMatrix, args.evalLabels) else: # the slower python way encoding_value = max(groundTruthNp.max(), predictionNp.max()).astype( np.int32) + 1 encoded = (groundTruthNp.astype(np.int32) * encoding_value) + predictionNp values, cnt = np.unique(encoded, return_counts=True) for value, c in zip(values, cnt): pred_id = value % encoding_value gt_id = int((value - pred_id) / encoding_value) if not gt_id in args.evalLabels: printError("Unknown label with id {:}".format(gt_id)) confMatrix[gt_id][pred_id] += c if args.evalInstLevelScore: # Generate category masks categoryMasks = {} for category in instanceStats["categories"]: categoryMasks[category] = np.in1d( predictionNp, instanceStats["categories"][category]["labelIds"]).reshape( predictionNp.shape) instList = np.unique(instanceNp[instanceNp > 1000]) for instId in instList: labelId = int(instId / 1000) label = id2label[labelId] if label.ignoreInEval: continue mask = instanceNp == instId instSize = np.count_nonzero(mask) tp = np.count_nonzero(predictionNp[mask] == labelId) fn = instSize - tp weight = args.avgClassSize[label.name] / float(instSize) # i. avgClassSize 가 각 인스턴스의 area 평균이 맞는지 체크해보려고 print 출력좀 해줘봄. 맞다면 weight 이 1 근처겠지. /21.3.30.10:26. # ->아니다. cityscapes 공홈 벤치마크 페이지의 Pixel-Level Semantic Labeling Task 의 iIoU 설명 수식보고 생각해보니 # 각 클래스의 전체 인스턴스들 area 합의 평균일듯. /21.3.30.10:55. # ->아닌데... 걍 각 인스턴스의 area 평균이 맞을듯...아닌가?? /21.3.30.11:09. # ->음. 생각완료. 각 인스턴스의 area 평균이 맞음. 이제 코랩돌린결과 확인해보자. weight 이 1근처일거임. # 지금 요 for문에서는 각 인스턴스마다 tp fn 등의 값을 '누적'시켜 더해주고잇고(1개 이미지, 즉 1개 gt,pred 쌍에 대해서), # 이 evaluatePair 함수는 또다시 (더 상위의)for문 안에서 실행되어 모든 gt,pred 쌍에 대해서 작동하고있지. # FP 는 나중에 따로 계산해줄테고. # iIoU 계산 관련해서는, cityscapes 공홈 벤치마크 페이지의 Pixel-Level Semantic Labeling Task 설명 참고하삼. /21.3.30.11:26. # ->weight값들이 1근처는 아니지만 0.1,0.2 등 엄청 작은값들도 있는걸로봐서는 내생각이 맞는듯. 논리적으로도 내생각이 맞고. /21.3.30.11:38. # i.21.3.30.12:19) 참고로, tp 랑 fn 은 죄다 누적해서 더해줄거면 어차피 클래스레벨에서의 tp 와 fn 과 동일해질테니 # 인스턴스레벨에서 해줄필요없고 걍 클래스레벨에서 계산하면 되는데 뭐하러 이 값들을 계산해주나 했는데, # 실제로 요 tp, fn 은 사용되는곳이 없네. tpWeighted, fnWeighted 만 사용됨. print( f'j) weight = avgClassSize[{label.name}]({args.avgClassSize[label.name]}) / instSize({instSize}) : {weight}' ) tpWeighted = float(tp) * weight fnWeighted = float(fn) * weight instanceStats["classes"][label.name]["tp"] += tp instanceStats["classes"][label.name]["fn"] += fn instanceStats["classes"][label.name]["tpWeighted"] += tpWeighted instanceStats["classes"][label.name]["fnWeighted"] += fnWeighted category = label.category if category in instanceStats["categories"]: catTp = 0 catTp = np.count_nonzero( np.logical_and(mask, categoryMasks[category])) catFn = instSize - catTp catTpWeighted = float(catTp) * weight catFnWeighted = float(catFn) * weight instanceStats["categories"][category]["tp"] += catTp instanceStats["categories"][category]["fn"] += catFn instanceStats["categories"][category][ "tpWeighted"] += catTpWeighted instanceStats["categories"][category][ "fnWeighted"] += catFnWeighted if args.evalPixelAccuracy: notIgnoredLabels = [ l for l in args.evalLabels if not id2label[l].ignoreInEval ] notIgnoredPixels = np.in1d(groundTruthNp, notIgnoredLabels, invert=True).reshape(groundTruthNp.shape) erroneousPixels = np.logical_and(notIgnoredPixels, (predictionNp != groundTruthNp)) perImageStats[predictionImgFileName] = {} perImageStats[predictionImgFileName][ "nbNotIgnoredPixels"] = np.count_nonzero(notIgnoredPixels) perImageStats[predictionImgFileName][ "nbCorrectPixels"] = np.count_nonzero(erroneousPixels) return nbPixels
def evaluatePair(predictionImgFileName, groundTruthImgFileName, confMatrix, instanceStats, perImageStats, args): # Loading all resources for evaluation. try: predictionImg = Image.open(predictionImgFileName) predictionNp = np.array(predictionImg) except: printError("Unable to load " + predictionImgFileName) try: groundTruthImg = Image.open(groundTruthImgFileName) groundTruthNp = np.array(groundTruthImg) except: printError("Unable to load " + groundTruthImgFileName) # load ground truth instances, if needed if args.evalInstLevelScore: groundTruthInstanceImgFileName = groundTruthImgFileName.replace( "labelIds", "instanceIds") try: instanceImg = Image.open(groundTruthInstanceImgFileName) instanceNp = np.array(instanceImg) except: printError("Unable to load " + groundTruthInstanceImgFileName) # Check for equal image sizes if (predictionImg.size[0] != groundTruthImg.size[0]): printError("Image widths of " + predictionImgFileName + " and " + groundTruthImgFileName + " are not equal.") if (predictionImg.size[1] != groundTruthImg.size[1]): printError("Image heights of " + predictionImgFileName + " and " + groundTruthImgFileName + " are not equal.") if (len(predictionNp.shape) != 2): printError("Predicted image has multiple channels.") imgWidth = predictionImg.size[0] imgHeight = predictionImg.size[1] nbPixels = imgWidth * imgHeight # Evaluate images if (CSUPPORT): # using cython confMatrix = addToConfusionMatrix.cEvaluatePair( predictionNp, groundTruthNp, confMatrix, args.evalLabels) else: # the slower python way encoding_value = max(groundTruthNp.max(), predictionNp.max()).astype( np.int32) + 1 encoded = (groundTruthNp.astype(np.int32) * encoding_value) + predictionNp values, cnt = np.unique(encoded, return_counts=True) for value, c in zip(values, cnt): pred_id = value % encoding_value gt_id = int((value - pred_id) / encoding_value) if not gt_id in args.evalLabels: printError("Unknown label with id {:}".format(gt_id)) confMatrix[gt_id][pred_id] += c if args.evalInstLevelScore: # Generate category masks categoryMasks = {} for category in instanceStats["categories"]: categoryMasks[category] = np.in1d( predictionNp, instanceStats["categories"][category]["labelIds"]).reshape( predictionNp.shape) instList = np.unique(instanceNp[instanceNp > 1000]) for instId in instList: labelId = int(instId / 1000) label = id2label[labelId] if label.ignoreInEval: continue mask = instanceNp == instId instSize = np.count_nonzero(mask) tp = np.count_nonzero(predictionNp[mask] == labelId) fn = instSize - tp weight = args.avgClassSize[label.name] / float(instSize) tpWeighted = float(tp) * weight fnWeighted = float(fn) * weight instanceStats["classes"][label.name]["tp"] += tp instanceStats["classes"][label.name]["fn"] += fn instanceStats["classes"][label.name]["tpWeighted"] += tpWeighted instanceStats["classes"][label.name]["fnWeighted"] += fnWeighted category = label.category if category in instanceStats["categories"]: catTp = 0 catTp = np.count_nonzero( np.logical_and(mask, categoryMasks[category])) catFn = instSize - catTp catTpWeighted = float(catTp) * weight catFnWeighted = float(catFn) * weight instanceStats["categories"][category]["tp"] += catTp instanceStats["categories"][category]["fn"] += catFn instanceStats["categories"][category][ "tpWeighted"] += catTpWeighted instanceStats["categories"][category][ "fnWeighted"] += catFnWeighted if args.evalPixelAccuracy: notIgnoredLabels = [ l for l in args.evalLabels if not id2label[l].ignoreInEval ] notIgnoredPixels = np.in1d(groundTruthNp, notIgnoredLabels, invert=True).reshape(groundTruthNp.shape) erroneousPixels = np.logical_and(notIgnoredPixels, (predictionNp != groundTruthNp)) perImageStats[predictionImgFileName] = {} perImageStats[predictionImgFileName][ "nbNotIgnoredPixels"] = np.count_nonzero(notIgnoredPixels) perImageStats[predictionImgFileName][ "nbCorrectPixels"] = np.count_nonzero(erroneousPixels) return nbPixels
def evaluatePair(predictionImgFileName, groundTruthImgFileName, confMatrix, instanceStats, perImageStats, args): # Loading all resources for evaluation. try: predictionImg = Image.open(predictionImgFileName) predictionNp = np.array( predictionImg) ######################## i.21.3.29.23:49) except: printError("Unable to load " + predictionImgFileName) try: groundTruthImg = Image.open(groundTruthImgFileName) groundTruthNp = np.array( groundTruthImg) ######################## i.21.3.29.23:49) except: printError("Unable to load " + groundTruthImgFileName) # load ground truth instances, if needed if args.evalInstLevelScore: # groundTruthInstanceImgFileName = groundTruthImgFileName.replace("labelIds","instanceIds") # i. TODO 내플젝에선 바로위의 원래코드 대신 아래코드를 사용해야함!! 파이썬의 스트링.replace 는, replace 할 스트링이 없으면 그냥 원래스트링 그대로를 리턴함. # 지금 groundTruthImgFileName 의 값은 내플젝의경우 path/to/~~_labelTrainIds.png 이런식일텐데, # 위의 기존코드 그대로쓰면 바꿔줄 "labelIds" 스트링이 없으니 그냥 아무것도 안바꾸고 그대로 리턴한다는거지. # 그러면 ~~_instanceIds.png 를 열어야하는데 그게아니라 ~~_labelTrainIds.png 를 열겠지. # 아마도 그래서 코랩에서 이밸류에이션돌렸을때 thing클래스들의 nIoU 값들이 죄다 0으로 나왔던게 아닌가 싶음. 이제 고쳤으니 함 다시 돌려보자. /21.3.28.21:16. # ->일케해주니 죠아래 args.avgClassSize[label.name] 에서 키에러가 뜨네 t_normal 키가 없다고. # 일케되는게 정상인데, 위의 원래코드썼을땐 이런문제 안생기고 넘어갓엇음;; 걍 넘어간 이유가 잇겟지 조사해보면 나올거임 일단패스. # 일단 avgClassSize 를 내플젝에 맞게 고쳐주자. /21.3.28.21:42. groundTruthInstanceImgFileName = groundTruthImgFileName.replace( "labelTrainIds", "instanceIds") try: instanceImg = Image.open(groundTruthInstanceImgFileName) instanceNp = np.array( instanceImg) ######################## i.21.3.29.23:49) except: printError("Unable to load " + groundTruthInstanceImgFileName) # Check for equal image sizes if (predictionImg.size[0] != groundTruthImg.size[0]): printError("Image widths of " + predictionImgFileName + " and " + groundTruthImgFileName + " are not equal.") if (predictionImg.size[1] != groundTruthImg.size[1]): printError("Image heights of " + predictionImgFileName + " and " + groundTruthImgFileName + " are not equal.") if (len(predictionNp.shape) != 2): printError("Predicted image has multiple channels.") imgWidth = predictionImg.size[0] imgHeight = predictionImg.size[1] nbPixels = imgWidth * imgHeight # Evaluate images if (CSUPPORT): # using cython confMatrix = addToConfusionMatrix.cEvaluatePair( predictionNp, groundTruthNp, confMatrix, args.evalLabels) else: # the slower python way # i.21.3.30.0:09) ->일단 요 느린 파이썬방식 코드라도 이해완료. confMatrix 의 각 칸에 픽셀갯수 넣어주는 작업 하는거임. # (참고로 컨퓨젼매트릭스는 gt 클래스들과 pred된 클래스들에 대해 각각의 경우에 픽셀갯수 넣어준 매트릭스. 클래스갯수 x 클래스갯수 만큼 칸이 있는거지.) # i.21.4.11.10:08) confusion matrix 에서 TP,TN,FP,FN 이해완료. TP,TN,FP,FN 은 "각 클래스별로" 생각해야하는거네. # 즉, 클래스에따라 confusion matrix 의 특정 칸이 TP 일수도있고 TN 일수도있는거임. # 좀더자세히는, confusion matrix 의 diagonal 칸들은 TP/TN 가능, 그 외의 칸들은 TP 말고 다(FP/FN/TN) 가능하네. encoding_value = max(groundTruthNp.max(), predictionNp.max()).astype( np.int32) + 1 encoded = (groundTruthNp.astype(np.int32) * encoding_value) + predictionNp values, cnt = np.unique(encoded, return_counts=True) for value, c in zip(values, cnt): pred_id = value % encoding_value # i. '나머지' 구하기. /21.4.11.10:01. gt_id = int( (value - pred_id) / encoding_value ) # i. '몫' 구하기. pred_id 빼줄필요 없고, int()대신 //써도되지. gt_id = value//encoding_value 이렇게. /21.4.11.9:58. if not gt_id in args.evalLabels: printError("Unknown label with id {:}".format(gt_id)) # i. 이렇게해줬기때매 컨퓨젼매트릭스에서 row 가 gt 고 column 이 pred 가 되는거지. confMatrix[pred_id][gt_id]+=c 로 했으면 반대였겠지. /21.4.11.10:05. confMatrix[gt_id][pred_id] += c if args.evalInstLevelScore: # Generate category masks predCategoryMasksJ = {} for category in instanceStats["categories"]: predCategoryMasksJ[category] = np.in1d( predictionNp, instanceStats["categories"][category]["labelIds"]).reshape( predictionNp.shape) # i.21.3.28.23:44) TODO Q: instanceNp 는 단지 ~~_instanceIds.png 를 넘파이어레이로 만든것일뿐일텐데, # 인스턴스id 가 1000 일수도 있는데...??? >1000 이아니고 >999 로 해줘야하지않나??? 뭐 일단 cityscapes 나 내플젝에서는 상관없을것같지만. instList = np.unique(instanceNp[instanceNp > 1000]) for instId in instList: labelId = int(instId / 1000) label = id2label[labelId] if label.ignoreInEval: continue # i. 좌변원래변수명은 mask. # 명확의미위해 (변수명 gtInstMaskJ 의)앞에 gt 붙여줬지만, 사실 여기선 모델의 아웃풋중 sem seg 정보만 이용중이라 # 인스턴스의 mask 라면 gt (~~_instanceIds.png) 에서부터 온 정보밖에없음. /21.3.30.9:44. gtInstMaskJ = instanceNp == instId instSize = np.count_nonzero(gtInstMaskJ) # i. instanceNp 는 ~~_instanceIds.png 의 넘파이어레이. 인스턴스id 들의 정보가 들어있음. # predictionNp 는 모델의 프레딕션결과중 "sem_seg" 정보를 이용한, (임시폴더의) ~~_pred.png 의 넘파이어레이. # predictionNp 에는 인스턴스id 가 아닌, 클래스id 들의 정보가 담겨있음. /21.3.29.0:52. -> 틀린내용 수정. /21.3.30.0:29. tp = np.count_nonzero(predictionNp[gtInstMaskJ] == labelId) fn = instSize - tp # i. avgClassSize, tp, fn 관련해 원래파일(evalPixelLevelSemanticLabeling.py)의 이위치에남긴 코멘트 참고. /21.4.11.9:04. weight = args.avgClassSize[label.name] / float(instSize) tpWeighted = float(tp) * weight fnWeighted = float(fn) * weight instanceStats["classes"][label.name]["tp"] += tp instanceStats["classes"][label.name]["fn"] += fn instanceStats["classes"][label.name]["tpWeighted"] += tpWeighted instanceStats["classes"][label.name]["fnWeighted"] += fnWeighted category = label.category if category in instanceStats["categories"]: catTp = 0 catTp = np.count_nonzero( np.logical_and(gtInstMaskJ, predCategoryMasksJ[category])) catFn = instSize - catTp catTpWeighted = float(catTp) * weight catFnWeighted = float(catFn) * weight instanceStats["categories"][category]["tp"] += catTp instanceStats["categories"][category]["fn"] += catFn instanceStats["categories"][category][ "tpWeighted"] += catTpWeighted instanceStats["categories"][category][ "fnWeighted"] += catFnWeighted if args.evalPixelAccuracy: notIgnoredLabels = [ l for l in args.evalLabels if not id2label[l].ignoreInEval ] notIgnoredPixels = np.in1d(groundTruthNp, notIgnoredLabels, invert=True).reshape(groundTruthNp.shape) erroneousPixels = np.logical_and(notIgnoredPixels, (predictionNp != groundTruthNp)) perImageStats[predictionImgFileName] = {} perImageStats[predictionImgFileName][ "nbNotIgnoredPixels"] = np.count_nonzero(notIgnoredPixels) perImageStats[predictionImgFileName][ "nbCorrectPixels"] = np.count_nonzero(erroneousPixels) return nbPixels