def getPerformanceStatistics(self, project, entities_eval, entity_target, entityType='user', threshold=0.5, goldenQuestionsOnly=True): ''' Compares the accuracy of a list of users or model states with a target user. The following measures of accuracy are reported, depending on the annotation type: - image labels: overall accuracy - points: RMSE (distance to closest point with the same label; in pixels) overall accuracy (labels) - bounding boxes: IoU (max. with any target bounding box, regardless of label) overall accuracy (labels) - segmentation masks: TODO Value 'threshold' determines the geometric requirement for an annotation to be counted as correct (or incorrect) as follows: - points: maximum euclidean distance in pixels to closest target - bounding boxes: minimum IoU with best matching target If 'goldenQuestionsOnly' is True, only images with flag 'isGoldenQuestion' = True will be considered for evaluation. ''' entityType = entityType.lower() # get annotation and prediction types for project annoTypes = self.dbConnector.execute('''SELECT annotationType, predictionType FROM aide_admin.project WHERE shortname = %s;''', (project,), 1) annoType = annoTypes[0]['annotationtype'] predType = annoTypes[0]['predictiontype'] if entityType != 'user' and annoType != predType: # different combinations of annotation and prediction types are currently not supported raise Exception('Statistics for unequal annotation and AI model prediction types are currently not supported.') # for segmentation masks: get label classes and their ordinals #TODO: implement per-class statistics for all types labelClasses = {} lcDef = self.dbConnector.execute(sql.SQL(''' SELECT id, name, idx, color FROM {id_lc}; ''').format(id_lc=sql.Identifier(project, 'labelclass')), None, 'all') if lcDef is not None: for l in lcDef: labelClasses[str(l['id'])] = (l['idx'], l['name'], l['color']) else: # no label classes defined return {} # compose args list and complete query queryArgs = [entity_target, tuple(entities_eval)] if annoType == 'points' or annoType == 'boundingBoxes': queryArgs.append(threshold) if annoType == 'points': queryArgs.append(threshold) if goldenQuestionsOnly: sql_goldenQuestion = sql.SQL('''JOIN ( SELECT id FROM {id_img} WHERE isGoldenQuestion = true ) AS qi ON qi.id = q2.image''').format( id_img=sql.Identifier(project, 'image') ) else: sql_goldenQuestion = sql.SQL('') # result tokens tokens = {} tokens_normalize = [] if annoType == 'labels': tokens = { 'num_matches': 0, 'correct': 0, 'incorrect': 0, 'overall_accuracy': 0.0 } tokens_normalize = ['overall_accuracy'] elif annoType == 'points': tokens = { 'num_pred': 0, 'num_target': 0, 'tp': 0, 'fp': 0, 'fn': 0, 'avg_dist': 0.0 } tokens_normalize = ['avg_dist'] elif annoType == 'boundingBoxes': tokens = { 'num_pred': 0, 'num_target': 0, 'tp': 0, 'fp': 0, 'fn': 0, 'avg_iou': 0.0 } tokens_normalize = ['avg_iou'] elif annoType == 'segmentationMasks': tokens = { 'num_matches': 0, 'overall_accuracy': 0.0, 'per_class': {} } for clID in labelClasses.keys(): tokens['per_class'][clID] = { 'num_matches': 0, 'prec': 0.0, 'rec': 0.0, 'f1': 0.0 } tokens_normalize = [] if entityType == 'user': queryStr = getattr(StatisticalFormulas_user, annoType).value queryStr = sql.SQL(queryStr).format( id_anno=sql.Identifier(project, 'annotation'), id_iu=sql.Identifier(project, 'image_user'), sql_goldenQuestion=sql_goldenQuestion ) else: queryStr = getattr(StatisticalFormulas_model, annoType).value queryStr = sql.SQL(queryStr).format( id_anno=sql.Identifier(project, 'annotation'), id_iu=sql.Identifier(project, 'image_user'), id_pred=sql.Identifier(project, 'prediction'), sql_goldenQuestion=sql_goldenQuestion ) #TODO: update points query (according to bboxes); re-write stats parsing below # get stats response = {} result = self.dbConnector.execute(queryStr, tuple(queryArgs), 'all') if result is not None and len(result): for b in result: if entityType == 'user': entity = b['username'] else: entity = str(b['cnnstate']) if not entity in response: response[entity] = copy.deepcopy(tokens) if annoType in ('points', 'boundingBoxes'): response[entity]['num_matches'] = 1 if b['num_target'] > 0: response[entity]['num_matches'] += 1 if annoType == 'segmentationMasks': # decode segmentation masks try: mask_target = np.array(base64ToImage(b['q1segmask'], b['q1width'], b['q1height'])) mask_source = np.array(base64ToImage(b['q2segmask'], b['q2width'], b['q2height'])) if mask_target.shape == mask_source.shape and np.any(mask_target) and np.any(mask_source): # calculate OA intersection = (mask_target>0) * (mask_source>0) if np.any(intersection): oa = np.mean(mask_target[intersection] == mask_source[intersection]) response[entity]['overall_accuracy'] += oa response[entity]['num_matches'] += 1 # calculate per-class precision and recall values for clID in labelClasses.keys(): idx = labelClasses[clID][0] tp = np.sum((mask_target==idx) * (mask_source==idx)) fp = np.sum((mask_target!=idx) * (mask_source==idx)) fn = np.sum((mask_target==idx) * (mask_source!=idx)) if (tp+fp+fn) > 0: prec, rec, f1 = self._calc_geometric_stats(tp, fp, fn) response[entity]['per_class'][clID]['num_matches'] += 1 response[entity]['per_class'][clID]['prec'] += prec response[entity]['per_class'][clID]['rec'] += rec response[entity]['per_class'][clID]['f1'] += f1 except Exception as e: print(f'TODO: error in segmentation mask statistics calculation ("{str(e)}").') else: for key in tokens.keys(): if key == 'correct' or key == 'incorrect': # classification correct = b['label_correct'] # ignore None if correct is True: response[entity]['correct'] += 1 response[entity]['num_matches'] += 1 elif correct is False: response[entity]['incorrect'] += 1 response[entity]['num_matches'] += 1 elif key in b and b[key] is not None: response[entity][key] += b[key] for entity in response.keys(): for t in tokens_normalize: if t in response[entity]: if t == 'overall_accuracy': response[entity][t] = float(response[entity]['correct']) / \ float(response[entity]['correct'] + response[entity]['incorrect']) elif annoType in ('points', 'boundingBoxes'): response[entity][t] /= response[entity]['num_matches'] if annoType == 'points' or annoType == 'boundingBoxes': prec, rec, f1 = self._calc_geometric_stats( response[entity]['tp'], response[entity]['fp'], response[entity]['fn'] ) response[entity]['prec'] = prec response[entity]['rec'] = rec response[entity]['f1'] = f1 elif annoType == 'segmentationMasks': # normalize OA response[entity]['overall_accuracy'] /= response[entity]['num_matches'] # normalize all label class values as well for lcID in labelClasses.keys(): numMatches = response[entity]['per_class'][lcID]['num_matches'] if numMatches > 0: response[entity]['per_class'][lcID]['prec'] /= numMatches response[entity]['per_class'][lcID]['rec'] /= numMatches response[entity]['per_class'][lcID]['f1'] /= numMatches return { 'label_classes': labelClasses, 'per_entity': response }
def prepareDataDownload(self, project, dataType='annotation', userList=None, dateRange=None, extraFields=None, segmaskFilenameOptions=None, segmaskEncoding='rgb'): ''' Polls the database for project data according to the specified restrictions: - dataType: "annotation" or "prediction" - userList: for type "annotation": None (all users) or an iterable of user names - dateRange: None (all dates) or two values for a mini- mum and maximum timestamp - extraFields: None (no field) or dict of keywords and bools for additional fields (e.g. browser meta) to be queried. - segmaskFilenameOptions: customization parameters for segmentation mask images' file names. - segmaskEncoding: encoding of the segmentation mask pixel values ("rgb" or "indexed") Creates a file in this machine's temporary directory and returns the file name to it. Note that in some cases (esp. for semantic segmentation), the number of queryable entries may be limited due to file size and free disk space restrictions. An upper cei- ling is specified in the configuration *.ini file ('TODO') ''' now = datetime.now(tz=pytz.utc) # argument check if userList is None: userList = [] elif isinstance(userList, str): userList = [userList] if dateRange is None: dateRange = [] elif len(dateRange) == 1: dateRange = [dateRange, now] if extraFields is None or not isinstance(extraFields, dict): extraFields = {'meta': False} else: if not 'meta' in extraFields or not isinstance( extraFields['meta'], bool): extraFields['meta'] = False if segmaskFilenameOptions is None: segmaskFilenameOptions = { 'baseName': 'filename', 'prefix': '', 'suffix': '' } else: if not 'baseName' in segmaskFilenameOptions or \ segmaskFilenameOptions['baseName'] not in ('filename', 'id'): segmaskFilenameOptions['baseName'] = 'filename' try: segmaskFilenameOptions['prefix'] = str( segmaskFilenameOptions['prefix']) except: segmaskFilenameOptions['prefix'] = '' try: segmaskFilenameOptions['suffix'] = str( segmaskFilenameOptions['suffix']) except: segmaskFilenameOptions['suffix'] = '' for char in self.FILENAMES_PROHIBITED_CHARS: segmaskFilenameOptions['prefix'] = segmaskFilenameOptions[ 'prefix'].replace(char, '_') segmaskFilenameOptions['suffix'] = segmaskFilenameOptions[ 'suffix'].replace(char, '_') # check metadata type: need to deal with segmentation masks separately if dataType == 'annotation': metaField = 'annotationtype' elif dataType == 'prediction': metaField = 'predictiontype' else: raise Exception('Invalid dataType specified ({})'.format(dataType)) metaType = self.dbConnector.execute( ''' SELECT {} FROM aide_admin.project WHERE shortname = %s; '''.format(metaField), (project, ), 1)[0][metaField] if metaType.lower() == 'segmentationmasks': is_segmentation = True fileExtension = '.zip' # create indexed color palette for segmentation masks if segmaskEncoding == 'indexed': try: indexedColors = [] labelClasses = self.dbConnector.execute( sql.SQL(''' SELECT idx, color FROM {id_lc} ORDER BY idx ASC; ''').format( id_lc=sql.Identifier(project, 'labelclass')), None, 'all') currentIndex = 1 for lc in labelClasses: if lc['idx'] == 0: # background class continue while currentIndex < lc['idx']: # gaps in label classes; fill with zeros indexedColors.extend([0, 0, 0]) currentIndex += 1 color = lc['color'] if color is None: # no color specified; add from defaults #TODO indexedColors.extend([0, 0, 0]) else: # convert to RGB format indexedColors.extend(helpers.hexToRGB(color)) except: # an error occurred; don't convert segmentation mask to indexed colors indexedColors = None else: indexedColors = None else: is_segmentation = False fileExtension = '.txt' #TODO: support JSON? # prepare output file filename = 'aide_query_{}'.format( now.strftime('%Y-%m-%d_%H-%M-%S')) + fileExtension destPath = os.path.join(self.tempDir, 'aide/downloadRequests', project) os.makedirs(destPath, exist_ok=True) destPath = os.path.join(destPath, filename) # generate query queryArgs = [] tableID = sql.Identifier(project, dataType) userStr = sql.SQL('') iuStr = sql.SQL('') dateStr = sql.SQL('') queryFields = [ 'filename', 'isGoldenQuestion', 'date_image_added', 'last_requested_image', 'image_corrupt' # default image fields ] if dataType == 'annotation': iuStr = sql.SQL(''' JOIN (SELECT image AS iu_image, username AS iu_username, viewcount, last_checked, last_time_required FROM {id_iu}) AS iu ON t.image = iu.iu_image AND t.username = iu.iu_username ''').format(id_iu=sql.Identifier(project, 'image_user')) if len(userList): userStr = sql.SQL('WHERE username IN %s') queryArgs.append(tuple(userList)) queryFields.extend( getattr(QueryStrings_annotation, metaType).value) queryFields.extend([ 'username', 'viewcount', 'last_checked', 'last_time_required' ]) #TODO: make customizable else: queryFields.extend( getattr(QueryStrings_prediction, metaType).value) if len(dateRange): if len(userStr.string): dateStr = sql.SQL( ' AND timecreated >= to_timestamp(%s) AND timecreated <= to_timestamp(%s)' ) else: dateStr = sql.SQL( 'WHERE timecreated >= to_timestamp(%s) AND timecreated <= to_timestamp(%s)' ) queryArgs.extend(dateRange) if not is_segmentation: # join label classes lcStr = sql.SQL(''' JOIN (SELECT id AS lcID, name AS labelclass_name, idx AS labelclass_index FROM {id_lc} ) AS lc ON label = lc.lcID ''').format(id_lc=sql.Identifier(project, 'labelclass')) queryFields.extend(['labelclass_name', 'labelclass_index']) else: lcStr = sql.SQL('') # remove redundant query fields queryFields = set(queryFields) for key in extraFields.keys(): if not extraFields[key]: queryFields.remove(key) queryFields = list(queryFields) queryStr = sql.SQL(''' SELECT * FROM {tableID} AS t JOIN ( SELECT id AS imgID, filename, isGoldenQuestion, date_added AS date_image_added, last_requested AS last_requested_image, corrupt AS image_corrupt FROM {id_img} ) AS img ON t.image = img.imgID {lcStr} {iuStr} {userStr} {dateStr} ''').format(tableID=tableID, id_img=sql.Identifier(project, 'image'), lcStr=lcStr, iuStr=iuStr, userStr=userStr, dateStr=dateStr) # query and process data if is_segmentation: mainFile = zipfile.ZipFile(destPath, 'w', zipfile.ZIP_DEFLATED) else: mainFile = open(destPath, 'w') metaStr = '; '.join(queryFields) + '\n' with self.dbConnector.execute_cursor(queryStr, tuple(queryArgs)) as cursor: while True: b = cursor.fetchone() if b is None: break if is_segmentation: # convert and store segmentation mask separately segmask_filename = 'segmentation_masks/' if segmaskFilenameOptions['baseName'] == 'id': innerFilename = b['image'] parent = '' else: innerFilename = b['filename'] parent, innerFilename = os.path.split(innerFilename) finalFilename = os.path.join( parent, segmaskFilenameOptions['prefix'] + innerFilename + segmaskFilenameOptions['suffix'] + '.tif') segmask_filename += finalFilename segmask = base64ToImage(b['segmentationmask'], b['width'], b['height']) if indexedColors is not None and len(indexedColors) > 0: # convert to indexed color and add color palette from label classes segmask = segmask.convert('RGB').convert( 'P', palette=Image.ADAPTIVE, colors=3) segmask.putpalette(indexedColors) # save bio = io.BytesIO() segmask.save(bio, 'TIFF') mainFile.writestr(segmask_filename, bio.getvalue()) # store metadata metaLine = '' for field in queryFields: if field.lower() == 'segmentationmask': continue metaLine += '{}; '.format(b[field.lower()]) metaStr += metaLine + '\n' if is_segmentation: mainFile.writestr('query.txt', metaStr) else: mainFile.write(metaStr) if is_segmentation: # append separate text file for label classes labelclassQuery = sql.SQL(''' SELECT id, name, color, labelclassgroup, idx AS labelclass_index FROM {id_lc}; ''').format(id_lc=sql.Identifier(project, 'labelclass')) result = self.dbConnector.execute(labelclassQuery, None, 'all') lcStr = 'id,name,color,labelclassgroup,labelclass_index\n' for r in result: lcStr += '{},{},{},{},{}\n'.format(r['id'], r['name'], r['color'], r['labelclassgroup'], r['labelclass_index']) mainFile.writestr('labelclasses.csv', lcStr) mainFile.close() return filename