def evaluating_ftypes_targets_separated(eval_ds, eval_size, model, cls_extract_types, verbose_progress=True, plots_prefix=None): ft_per_wb = defaultdict(lambda: np.zeros( (2, 2), dtype=int)) # predicted, real cls_extract_type per wordbox ft_satisfication = defaultdict(lambda: [0, 0, 0]) # success (needed & provided), miss (needed & not provided), extra (not needed & provided) ... we do not care # about extras btw ft_per_annotation = defaultdict(lambda: dict({ 'good': 0, 'wrong': 0, 'miss': 0, 'extra': 0 })) for istep in tqdm(range(eval_size), total=eval_size, disable=not verbose_progress): batch = six.next(eval_ds) # batch structire: [0] - x, [1]: concatenated y and weights annotations = { item: batch[0][item] for item in FtypesTrgtDocsTextsSqliteNearest.ANNOTATION_COLUMNS if item in batch[0] } x = { item: batch[0][item] for item in batch[0] if item not in FtypesTrgtDocsTextsSqliteNearest.ANNOTATION_COLUMNS } predicted_data = model.predict_on_batch(x) for b, (wb_poso, pred, truth, truth_weights, annotation) in enumerate( zip(x['wb-poso'], predicted_data, batch[1], batch[2], annotations['annotations'])): # first remove batched-padded items: this_count = truth.shape[0] for i in range(truth.shape[0]): if truth_weights[i] == 0: # as soon as we hit the padded 0-weight, we know that that is the real length of # the array of wordboxes this_count = i truth = truth[:this_count, ...] # btw might be cut as a single piece from a much longer page, thats why some annotations # might be missing! pred = pred[:this_count, ...] wb_poso = wb_poso[:this_count, :] trgt_annots = [(ft, annot_ids) for ft, annot_ids in zip( annotation['cls_extract_type'], annotation['ids']) if len(annot_ids) > 0] if plots_prefix is not None: produce_drawings( istep, b, cls_extract_types, truth, pred, x['wb-bbox'][b], x['nearest-annotated'][b] if 'nearest-annotated' in x else None, x['nearest-wb-bbox'][b] if 'nearest-wb-bbox' in x else None, plots_prefix) # per wordbox stats: for ft, ft_name in enumerate(cls_extract_types): voted_cls_extract_type = pred[:, ft] >= 0.5 truth_cls_extract_type = truth[:, ft] >= 0.5 tp = np.sum(voted_cls_extract_type & truth_cls_extract_type) fp = np.sum(voted_cls_extract_type & ~truth_cls_extract_type) fn = np.sum(~voted_cls_extract_type & truth_cls_extract_type) tn = np.sum(~voted_cls_extract_type & ~truth_cls_extract_type) ft_per_wb[ft_name][1, 1] += tp ft_per_wb[ft_name][0, 0] += tn ft_per_wb[ft_name][1, 0] += fp ft_per_wb[ft_name][0, 1] += fn # Groupping mechanism for evaluating per annotation: # now the wordboxes are ordered in a reading order. Lets say that we have a mechanism, # that would concatenate all in the same line to be the same annotation! # [x['wb-poso'][0, i, 0:2] for i in range(all wordboxes!)] # ... maybe this does not need to be the best algorithm! the rows are selected using a constant! produced_fls = [] votes = list(voted_cls_extract_type) + [False] rowsbegs = list(wb_poso[:, 1]) + [0] annot_beg = None for wordbox_i in range(this_count + 1): voted = votes[wordbox_i] if ( (annot_beg is not None) and # we are already in an annotation (not voted or rowsbegs[wordbox_i] == 0) ): # and now we encounter something not-for extraction or on 'new line' # produce annotation_beg -> wordbox_i - 1 INCLUDING produced_fls.append((annot_beg, wordbox_i - 1)) annot_beg = None elif annot_beg is None and voted: # encountered begin: annot_beg = wordbox_i pred_annots = [ list(range(prod[0], prod[1] + 1)) for prod in produced_fls ] trgt_fl_annots = [ item[1] for item in trgt_annots if item[0] == ft_name ] lables_eval = eval_match_annotations( repair_annotations(trgt_fl_annots), pred_annots) for restype in ft_per_annotation[ft_name].keys(): ft_per_annotation[ft_name][restype] += lables_eval[restype] print("micro nongb f1:") for ft in cls_extract_types: print("{}: {} (lbl {} -> {})".format(ft, T_f1(ft_per_wb[ft]), str(dict(ft_per_annotation[ft])), GWME_f1(ft_per_annotation[ft]))) print("{}".format(ft_per_wb[ft])) print(" ") totalmicrof1 = T_f1(sum([ft_per_wb[ft] for ft in ft_per_wb])) print("total micro conf matrix: (total micro nonbg f1: {})".format( totalmicrof1)) print(sum([ft_per_wb[ft] for ft in ft_per_wb])) lbl_tot = GWME_sum(ft_per_annotation) print("total micro lbl: (f1: {})".format(GWME_f1(lbl_tot))) print(dict(lbl_tot)) return totalmicrof1
def evaluating_ftypes_targets_reuse(eval_ds, eval_size, model, cls_extract_types, verbose_progress=True, plots_prefix=None): ft_per_wb = defaultdict(lambda: np.zeros( (2, 2), dtype=int)) # predicted, real cls_extract_type per wordbox ft_satisfication = defaultdict(lambda: [0, 0, 0]) # success (needed & provided), miss (needed & not provided), extra (not needed & provided) ... we do not care # about extras btw for istep in tqdm(range(eval_size), total=eval_size, disable=not verbose_progress): batch = six.next(eval_ds) # batch structire: [0] - x, [1]: concatenated y and weights annotations = { item: batch[0][item] for item in FtypesTrgtDocsTextsSqliteNearest.ANNOTATION_COLUMNS } x = { item: batch[0][item] for item in batch[0] if item not in FtypesTrgtDocsTextsSqliteNearest.ANNOTATION_COLUMNS } predicted_data = model.predict_on_batch(x) for b, (wb_poso, pred, truth, annotation, nearest_annotation, nearest_cls_extract_type_to_ordered_ids) in enumerate( zip(x['wb-poso'], predicted_data, batch[1], annotations['annotations'], annotations['nearest-annotations'], annotations['nearest-cls_extract_type-to-ordered-ids']) ): # first remove batched-padded items: this_count = truth.shape[0] for i in range(truth.shape[0]): if truth[i, -1] == 0: # the last channel is surely a weight # as soon as we hit the padded 0-weight, we know that that is the real length of # the array of wordboxes this_count = i truth = truth[:this_count, ...] # btw might be cut as a single piece from a much longer page, thats why some annotations # might be missing! pred = pred[:this_count, ...] wb_poso = wb_poso[:this_count, :] if plots_prefix is not None: produce_drawings(istep, b, cls_extract_types, truth, pred, x['wb-bbox'][b], x['nearest-annotated'][b], x['nearest-wb-bbox'][b], plots_prefix) # per wordbox stats: for ft, ft_name in enumerate(cls_extract_types): voted_cls_extract_type = pred[:, ft] >= 0.5 truth_cls_extract_type = truth[:, ft] >= 0.5 tp = np.sum(voted_cls_extract_type & truth_cls_extract_type) fp = np.sum(voted_cls_extract_type & ~truth_cls_extract_type) fn = np.sum(~voted_cls_extract_type & truth_cls_extract_type) tn = np.sum(~voted_cls_extract_type & ~truth_cls_extract_type) ft_per_wb[ft_name][1, 1] += tp ft_per_wb[ft_name][0, 0] += tn ft_per_wb[ft_name][1, 0] += fp ft_per_wb[ft_name][0, 1] += fn # embedding - capacity stats: needed_fts = { anot for anot, ids in zip(annotation['cls_extract_type'], annotation['ids']) if len(ids) > 0 } provided_fts = { anot for anot, ids in zip(nearest_annotation['cls_extract_type'], nearest_annotation['ids']) if len(ids) > 0 } for ft in needed_fts.union(provided_fts): isneed = ft in needed_fts isprov = ft in provided_fts if isneed and isprov: ft_satisfication[ft][0] += 1 elif isneed and not isprov: ft_satisfication[ft][1] += 1 elif not isneed and isprov: ft_satisfication[ft][2] += 1 print("micro nongb f1:") for ft in cls_extract_types: print("{}: {}".format(ft, T_f1(ft_per_wb[ft]))) print("{}".format(ft_per_wb[ft])) print(" ") print("total micro conf matrix:") print(sum([ft_per_wb[ft] for ft in ft_per_wb])) print("total micro nonbg f1:") totf = T_f1(sum([ft_per_wb[ft] for ft in ft_per_wb])) print("... {}".format(totf)) return totf
def evaluating_f_reuse(eval_ds, eval_size, model, verbose_progress=True, plots_prefix=None): ft_per_wb = defaultdict(lambda: np.zeros( (2, 2), dtype=int)) # predicted, real cls_extract_type per wordbox ft_per_wb_capacity = defaultdict(lambda: np.zeros( (2, 2), dtype=int)) # predicted, real type per wordbox capacity_tp = 0 capacity_tp_ft = 0 capacity_fn = 0 capacity_fn_ft = 0 ft_satisfication = defaultdict(lambda: [0, 0, 0]) # success (needed & provided), miss (needed & not provided), extra (not needed & provided) ... we do not care about extras btw for istep in tqdm(range(eval_size), total=eval_size, disable=not verbose_progress): batch = six.next(eval_ds) # batch structire: [0] - x, [1]: concatenated y and weights annotations = { item: batch[0][item] for item in DocsTextsSqliteNearest.ANNOTATION_COLUMNS } x = { item: batch[0][item] for item in batch[0] if item not in DocsTextsSqliteNearest.ANNOTATION_COLUMNS } predicted_data = model.predict_on_batch(x) for b, (wb_poso, pred, truth, annotation, nearest_annotation, nearest_cls_extract_type_to_ordered_ids) in enumerate( zip(x['wb-poso'], predicted_data, batch[1], annotations['annotations'], annotations['nearest-annotations'], annotations['nearest-cls_extract_type-to-ordered-ids']) ): # first remove batched-padded items: nearest_count = max( sum(nearest_cls_extract_type_to_ordered_ids.values(), [])) + 1 this_count = truth.shape[0] for i in range(truth.shape[0]): if truth[i, 0, 1] == 0: # as soon as we hit the padded 0-weight, we know that that is the real length of the array of wordboxes this_count = i truth = truth[:this_count, :nearest_count, ...] # btw might be cut as a single piece from a much longer page, thats why some annotations might be missing! pred = pred[:this_count, :nearest_count, ...] wb_poso = wb_poso[:this_count, :] voted_cls_extract_type = are_wordboxes_in_cls_extract_type( pred, nearest_cls_extract_type_to_ordered_ids) truth_cls_extract_type = are_wordboxes_in_cls_extract_type( truth, nearest_cls_extract_type_to_ordered_ids) # the same in different format for drawing: ft_all_ordered = list( sorted( set(annotation['cls_extract_type'] + nearest_annotation['cls_extract_type']))) cls_extract_type_order_now = { ft: i for i, ft in enumerate(ft_all_ordered) } truth_draw = np.zeros((len(wb_poso), len(ft_all_ordered))) pred_draw = np.zeros((len(wb_poso), len(ft_all_ordered))) for ft_bb_ids, ft in zip(annotation['ids'], annotation['cls_extract_type']): ft_i = cls_extract_type_order_now[ft] truth_draw[ft_bb_ids, ft_i] = 1.0 for ft in cls_extract_type_order_now: ft_i = cls_extract_type_order_now[ft] if ft in voted_cls_extract_type and isinstance( voted_cls_extract_type[ft], list): pred_draw[voted_cls_extract_type[ft], ft_i] = 1.0 produce_drawings(istep, b, ft_all_ordered, truth_draw, pred_draw, x['wb-bbox'][b], x['nearest-annotated'][b], x['nearest-wb-bbox'][b], plots_prefix) # per wordbox stats: for ft in truth_cls_extract_type.keys(): tp = np.sum(voted_cls_extract_type[ft] & truth_cls_extract_type[ft]) fp = np.sum(voted_cls_extract_type[ft] & ~truth_cls_extract_type[ft]) fn = np.sum(~voted_cls_extract_type[ft] & truth_cls_extract_type[ft]) tn = np.sum(~voted_cls_extract_type[ft] & ~truth_cls_extract_type[ft]) ft_per_wb[ft][1, 1] += tp ft_per_wb[ft][0, 0] += tn ft_per_wb[ft][1, 0] += fp ft_per_wb[ft][0, 1] += fn # embedding - capacity stats: needed_fts = defaultdict(lambda: 0) provided_fts = defaultdict(lambda: 0) for anot, ids in zip(annotation['cls_extract_type'], annotation['ids']): if len(ids) > 0: needed_fts[anot] += len(ids) for anot, ids in zip(nearest_annotation['cls_extract_type'], nearest_annotation['ids']): if len(ids) > 0: provided_fts[anot] += len(ids) needed_fts = dict(needed_fts) provided_fts = dict(provided_fts) for ft in set(needed_fts.keys()).union(set(provided_fts.keys())): isneed = ft in needed_fts isprov = ft in provided_fts if isneed and isprov: ft_satisfication[ft][0] += 1 capacity_tp += needed_fts[ft] capacity_tp_ft += 1 elif isneed and not isprov: ft_satisfication[ft][1] += 1 capacity_fn += needed_fts[ft] capacity_fn_ft += 1 elif not isneed and isprov: ft_satisfication[ft][2] += 1 print("micro nonbg f1:") for ft in ft_per_wb: print("{}: {}".format(ft, T_f1(ft_per_wb[ft]))) print("total micro conf matrix:") print(sum([ft_per_wb[ft] for ft in ft_per_wb])) print("total micro nonbg f1:") totalmicrof1 = T_f1(sum([ft_per_wb[ft] for ft in ft_per_wb])) print("... {}".format(totalmicrof1)) print("from nearest capacity acc:") print("... {}".format(capacity_tp / (capacity_fn + capacity_tp))) print("from nearest capacity in types acc:") print("... {}".format(capacity_tp_ft / (capacity_fn_ft + capacity_tp_ft))) return totalmicrof1