def _only_one_stem_per_notehead(self, cropobjects, edges): _cdict = {c.objid: c for c in cropobjects} # Collect stems per notehead stems_per_notehead = collections.defaultdict(list) stem_objids = set() for f_objid, t_objid in edges: f = _cdict[f_objid] t = _cdict[t_objid] if (f.clsname in _CONST.NOTEHEAD_CLSNAMES) and \ (t.clsname == 'stem'): stems_per_notehead[f_objid].append(t_objid) stem_objids.add(t_objid) # Pick the closest one (by minimum distance) closest_stems_per_notehead = dict() for n_objid in stems_per_notehead: n = _cdict[n_objid] stems = [_cdict[objid] for objid in stems_per_notehead[n_objid]] closest_stem = min(stems, key=lambda s: cropobject_distance(n, s)) closest_stems_per_notehead[n_objid] = closest_stem.objid # Filter the edges edges = [(f_objid, t_objid) for f_objid, t_objid in edges if (f_objid not in closest_stems_per_notehead) or ( t_objid not in stem_objids) or ( closest_stems_per_notehead[f_objid] == t_objid)] return edges
def get_features_distance_relative_bbox_and_clsname(self, c_from, c_to): """Extract a feature vector from the given pair of CropObjects. Does *NOT* convert the class names to integers. Features: bbox(c_to) - bbox(c_from), clsname(c_from), clsname(c_to) Target: 1 if there is a link from u to v Returns a tuple. """ target = 0 if c_from.doc == c_to.doc: if c_to.objid in c_from.outlinks: target = 1 distance = cropobject_distance(c_from, c_to) features = (distance, c_to.top - c_from.top, c_to.left - c_from.left, c_to.bottom - c_from.bottom, c_to.right - c_from.right, c_from.clsname, c_to.clsname, target) dist, dt, dl, db, dr, cu, cv, tgt = features if cu.startswith('letter'): cu = 'letter' if cu.startswith('numeral'): cu = 'numeral' if cv.startswith('letter'): cv = 'letter' if cv.startswith('numeral'): cv = 'numeral' feature_dict = { 'dist': dist, 'dt': dt, 'dl': dl, 'db': db, 'dr': dr, 'cls_from': cu, 'cls_to': cv, 'target': tgt } return feature_dict
def _every_full_notehead_has_a_stem(self, cropobjects, edges): _cdict = {c.objid: c for c in cropobjects} # Collect stems per notehead notehead_objids = set( [c.objid for c in cropobjects if c.clsname == 'notehead-full']) stem_objids = set( [c.objid for c in cropobjects if c.clsname == 'stem']) noteheads_with_stem_objids = set() stems_with_notehead_objids = set() for f, t in edges: if _cdict[f].clsname == 'notehead-full': if _cdict[t].clsname == 'stem': noteheads_with_stem_objids.add(f) stems_with_notehead_objids.add(t) noteheads_without_stems = { n: _cdict[n] for n in notehead_objids if n not in noteheads_with_stem_objids } stems_without_noteheads = { n: _cdict[n] for n in stem_objids if n not in stems_with_notehead_objids } # To each notehead, assign the closest stem that is not yet taken. closest_stem_per_notehead = { objid: min(stems_without_noteheads, key=lambda x: cropobject_distance(_cdict[x], n)) for objid, n in noteheads_without_stems.items() } # Filter edges that are too long _n_before_filter = len(closest_stem_per_notehead) closest_stem_threshold_distance = 80 closest_stem_per_notehead = { n_objid: s_objid for n_objid, s_objid in closest_stem_per_notehead.items() if cropobject_distance(_cdict[n_objid], _cdict[s_objid]) < closest_stem_threshold_distance } return edges + list(closest_stem_per_notehead.items())
def extract_all_pairs(self, cropobjects): pairs = [] features = [] for u in cropobjects: for v in cropobjects: if u.objid == v.objid: continue distance = cropobject_distance(u, v) if distance < self.MAXIMUM_DISTANCE_THRESHOLD: pairs.append((u, v)) f = self.extractor(u, v) features.append(f) # logging.info('Parsing features: {0}'.format(features[0])) features = numpy.array(features) # logging.info('Parsing features: {0}/{1}'.format(features.shape, features)) return pairs, features
def symbol_distances(cropobjects): """For each pair of cropobjects, compute the closest distance between their bounding boxes. :returns: A dict of dicts, indexed by objid, then objid, then distance. """ _start_time = time.clock() distances = {} for c in cropobjects: distances[c] = {} for d in cropobjects: if d not in distances: distances[d] = {} if d not in distances[c]: delta = cropobject_distance(c, d) distances[c][d] = delta distances[d][c] = delta print('Distances for {0} cropobjects took {1:.3f} seconds' ''.format(len(cropobjects), time.clock() - _start_time)) return distances
def get_closest_objects(self, cropobjects: List[CropObject], threshold) -> Dict[CropObject, List[CropObject]]: """For each pair of cropobjects, compute the closest distance between their bounding boxes. :returns: A dict of dicts, indexed by objid, then objid, then distance. """ close_objects = {} for c in cropobjects: close_objects[c] = [] for c in cropobjects: for d in cropobjects: distance = cropobject_distance(c, d) if distance < threshold: close_objects[c].append(d) close_objects[d].append(c) # Remove duplicates from lists for key, neighbors in close_objects.items(): unique_neighbors = list(dict.fromkeys(neighbors)) close_objects[key] = unique_neighbors return close_objects