def __load_mung(filename: str, exclude_classes: List[str]) -> NotationGraph: mungos = read_nodes_from_file(filename) mung = NotationGraph(mungos) objects_to_exclude = [m for m in mungos if m.class_name in exclude_classes] for m in objects_to_exclude: mung.remove_vertex(m.id) return mung
def count_nodes_and_relationships(annot_file: str) -> Tuple[int, int]: nodes = read_nodes_from_file(annot_file) n_inlinks = 0 for node in nodes: if node.inlinks is not None: n_inlinks += len(node.inlinks) return len(nodes), n_inlinks
def main(args): logging.info('Starting main...') _start_time = time.clock() ########################################################################## logging.info('Import the Node list') if not os.path.isfile(args.annot): raise ValueError('Annotation file {0} not found!' ''.format(args.annot)) nodes = read_nodes_from_file(args.annot) output_nodes = add_staff_relationships( nodes, notehead_staffspace_threshold=args.notehead_staffspace_threshold) ########################################################################## logging.info('Export the combined list.') nodes_string = export_node_list(output_nodes) if args.export is not None: with open(args.export, 'w') as hdl: hdl.write(nodes_string) else: print(nodes_string) _end_time = time.clock() logging.info('add_staff_reationships.py done in {0:.3f} s' ''.format(_end_time - _start_time))
def test_read_nodes_from_file_with_data(self): test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'test', 'test_data') file = os.path.join(test_data_dir, '01_basic_binary.xml') nodes = read_nodes_from_file(file) self.assertEqual("G", nodes[0].data['pitch_step']) self.assertEqual(79, nodes[0].data['midi_pitch_code']) self.assertEqual([8, 17], nodes[0].data['precedence_outlinks'])
def main(args): logging.info('Starting main...') _start_time = time.clock() # Your code goes here if not os.path.isfile(args.annot): raise ValueError('Annotation file {0} not found!' ''.format(args.annot)) nodes = read_nodes_from_file(args.annot) pitch_inference_engine = PitchInferenceEngine() time_inference_engine = OnsetsInferenceEngine(nodes=nodes) logging.info('Running pitch inference.') pitches, pitch_names = pitch_inference_engine.infer_pitches( nodes, with_names=True) # Export logging.info('Adding pitch information to <Data> attributes.') for node in nodes: if node.id in pitches: midi_pitch_code = pitches[node.id] pitch_step, pitch_octave = pitch_names[node.id] if node.data is None: node.data = dict() node.data['midi_pitch_code'] = midi_pitch_code node.data['normalized_pitch_step'] = pitch_step node.data['pitch_octave'] = pitch_octave logging.info('Adding duration info to <Data> attributes.') durations = time_inference_engine.durations(nodes) logging.info('Total durations: {0}'.format(len(durations))) for node in nodes: if node.id in durations: node.data['duration_beats'] = durations[node.id] logging.info('Some durations: {0}'.format(sorted(durations.items())[:10])) logging.info('Adding onset info to <Data> attributes.') onsets = time_inference_engine.onsets(nodes) logging.info('Total onsets: {0}'.format(len(onsets))) for node in nodes: if node.id in onsets: node.data['onset_beats'] = onsets[node.id] if args.export is not None: with open(args.export, 'w') as hdl: hdl.write(export_node_list(nodes)) hdl.write('\n') else: print(export_node_list(nodes)) if args.midi is not None: mf = build_midi(pitches, durations, onsets) with open(args.midi, 'wb') as hdl: mf.writeFile(hdl) _end_time = time.clock() logging.info('infer_pitches.py done in {0:.3f} s'.format(_end_time - _start_time))
def prepare_annotations(muscima_pp_dataset_directory: str, exported_annotations_file_path: str, annotations_path: str): muscima_image_directory = os.path.join(muscima_pp_dataset_directory, "v2.0", "data", "images", "*.png") image_paths = glob(muscima_image_directory) xml_annotations_directory = os.path.join(muscima_pp_dataset_directory, "v2.0", "data", "annotations") all_xml_files = [ y for x in os.walk(xml_annotations_directory) for y in glob(os.path.join(x[0], '*.xml')) ] if os.path.exists(exported_annotations_file_path): os.remove(exported_annotations_file_path) shutil.rmtree(annotations_path, ignore_errors=True) for xml_file in tqdm(all_xml_files, desc='Parsing annotation files'): nodes = read_nodes_from_file(xml_file) doc = nodes[0].document image_path = None for path in image_paths: if doc in path: image_path = path break image = Image.open(image_path, "r") # type: Image.Image image_width = image.width image_height = image.height create_annotations_in_pascal_voc_format_from_nodes( annotations_path, os.path.basename(image_path), nodes, image_width, image_height, 3)
def load_nodes_from_xml_files(self, xml_files: List[str]) -> List[Node]: nodes = [] # type: List[Node] for xml_file in tqdm(xml_files, desc="Loading nodes from xml-files", smoothing=0.1): nodes.extend(read_nodes_from_file(xml_file)) print("Loaded {0} nodes".format(len(nodes))) return nodes
def main(args): logging.info('Starting main...') _start_time = time.clock() # Your code goes here ########################################################################## logging.info('Import the Node list') if not os.path.isfile(args.annot): raise ValueError('Annotation file {0} not found!' ''.format(args.annot)) nodes = read_nodes_from_file(args.annot) ########################################################################## staff_id_to_node_mapping = { node.id: node for node in nodes if node.class_name in _CONST.STAFF_CLASS_NAMES } output_nodes = [] for node in nodes: if node.id in staff_id_to_node_mapping: continue new_c = copy.deepcopy(node) new_c.inlinks = [ i for i in node.inlinks if i not in staff_id_to_node_mapping ] new_c.outlinks = [ o for o in node.outlinks if o not in staff_id_to_node_mapping ] output_nodes.append(new_c) ########################################################################## logging.info('Export the stripped list.') nodes_string = export_node_list(output_nodes) if args.export is not None: with open(args.export, 'w') as hdl: hdl.write(nodes_string) else: print(nodes_string) _end_time = time.clock() logging.info( 'strip_staffline_symbols.py done in {0:.3f} s'.format(_end_time - _start_time))
def render_node_masks(self, raw_data_directory: str, destination_directory: str): """ Extracts all symbols from the raw XML documents and generates individual symbols from the masks :param raw_data_directory: The directory, that contains the xml-files and matching images :param destination_directory: The directory, in which the symbols should be generated into. Per file, one mask will be generated. """ print("Extracting Masks from Muscima++ Dataset...") file_paths = self.__get_all_file_paths(raw_data_directory) for xml_file, png_file in tqdm(file_paths, desc="Generating mask images"): original_image = Image.open(png_file) # type: Image.Image nodes = read_nodes_from_file(xml_file) destination_filename = os.path.basename(xml_file).replace(".xml", ".png") self.__render_masks_of_staff_blob_for_instance_segmentation(nodes, destination_directory, destination_filename, original_image.width, original_image.height)
def render_node_masks(self, raw_data_directory: str, destination_directory: str, mask_type: MaskType): """ Extracts all symbols from the raw XML documents and generates individual symbols from the masks :param raw_data_directory: The directory, that contains the xml-files and matching images :param destination_directory: The directory, in which the symbols should be generated into. Per file, one mask will be generated. :param mask_type: The type of masks that you want to generate, e.g., masks for each node or staff lines only. """ print("Extracting Masks from Muscima++ Dataset...") node_classes = parse_node_classes( os.path.join(raw_data_directory, "v2.0", "specifications", "mff-muscima-mlclasses-annot.xml")) for index, node_class in enumerate(node_classes): self.class_to_color_mapping[node_class.name] = index + 1 file_paths = self.__get_all_file_paths(raw_data_directory) for xml_file, png_file in tqdm(file_paths, desc="Generating mask images"): original_image = Image.open(png_file) # type: Image.Image nodes = read_nodes_from_file(xml_file) destination_filename = os.path.basename(xml_file).replace( ".xml", ".png") if mask_type == MaskType.NODES_SEMANTIC_SEGMENTATION: self.__render_masks_of_nodes_for_semantic_segmentation( nodes, destination_directory, destination_filename, original_image.width, original_image.height) if mask_type == MaskType.STAFF_LINES_INSTANCE_SEGMENTATION: self.__render_masks_of_staff_lines_for_instance_segmentation( nodes, destination_directory, destination_filename, original_image.width, original_image.height) if mask_type == MaskType.STAFF_BLOBS_INSTANCE_SEGMENTATION: self.__render_masks_of_staff_blob_for_instance_segmentation( nodes, destination_directory, destination_filename, original_image.width, original_image.height) original_image.close()
def main(args): logging.info('Starting main...') _start_time = time.time() nodes = read_nodes_from_file(args.input_mung) graph = NotationGraph(nodes) ###################################################################### # export MIDI for each staff separately if args.per_staff: if not args.output_dir: raise ValueError('When exporting per-staff MIDI, the output' ' directory must be specified.') basename = os.path.splitext(os.path.basename(args.input_mung))[0] # ...store all the output files in the same args.output_dir, # so that it is easier to just load them all in midi-evaluation.py # output_path = os.path.join(output_path, basename) # if not os.path.isdir(output_path): # os.mkdir(output_path) # basename is used both in the output dir, for grouping the per-staff # MIDI, and in the names of the MIDI files themselves. _output_base = os.path.join(args.output_dir, basename) # Collect all staffs. # They are sorted top-down, so that during retrieval, we can easily # check for a hit. staffs = sorted([c for c in nodes if c.class_name == 'staff'], key=lambda x: x.top) # For each staff, collect its noteheads noteheads_per_staff = { s.id: graph.parents(s, class_filter=constants. InferenceEngineConstants.NOTEHEAD_CLASS_NAMES) for s in staffs } for staff_idx, s in enumerate(staffs): noteheads = noteheads_per_staff[s.id] if len(noteheads) == 0: continue logging.info('Processing staff: {0}, noteheads: {1}' ''.format(s.id, len(noteheads))) mf = build_midi(nodes, selected_nodes=noteheads) output_path = _output_base + '.staff-{0}'.format( staff_idx) + '.mid' with open(output_path, 'wb') as stream_out: mf.writeFile(stream_out) ################################################################### # straightforward single-file processing else: output_path = args.output_midi if args.output_dir: basename = os.path.splitext(os.path.basename(args.input_mung))[0] output_path = os.path.join(args.output_dir, basename + '.mid') mf = build_midi(nodes=nodes) os.makedirs(os.path.dirname(output_path), exist_ok=True) with open(output_path, 'wb') as stream_out: mf.writeFile(stream_out) _end_time = time.time() logging.info('mung2midi_builder.py done in {0:.3f} s'.format(_end_time - _start_time))
def main(args): logging.info('Starting main...') _start_time = time.clock() # The algorithm: # - build the cost function(s) for a pair of Nodes # - align the objects, using the cost function # First alignment: try just matching a predicted object to the nearest # true object. # First distance function: proportion of shared pixels. # Rule: if two objects don't share a pixel, they cannot be considered related. # Object classes do not factor into this so far. ground_truth_nodes = read_nodes_from_file(args.true) predicted_nodes = read_nodes_from_file(args.prediction) _parse_time = time.clock() logging.info('Parsing {0} true and {1} prediction Nodes took {2:.2f} s' ''.format(len(ground_truth_nodes), len(predicted_nodes), _parse_time - _start_time)) recall, precision, fscore = compute_recall_precision_fscore( ground_truth_nodes, predicted_nodes) _rpf_time = time.clock() logging.info('Computing {0} entries of r/p/f matrices took {1:.2f} s' ''.format( len(ground_truth_nodes) * len(predicted_nodes), _rpf_time - _parse_time)) alignment_tp = align_nodes(ground_truth_nodes, predicted_nodes, fscore=fscore) alignment_pt = align_nodes(predicted_nodes, ground_truth_nodes, fscore=fscore.T) # Intersect alignments _aln_tp_set = frozenset(alignment_tp) alignment_tp_symmetric = [ (t, p) for p, t in alignment_pt if (t, p) in _aln_tp_set and ( ground_truth_nodes[t].class_name == predicted_nodes[p].class_name) ] truth_not_aligned = [ t for p, t in alignment_pt if (t, p) not in alignment_tp_symmetric ] n_truth_not_aligned = len(truth_not_aligned) preds_not_aligned = [ p for t, p in alignment_tp if (t, p) not in alignment_tp_symmetric ] n_preds_not_aligned = len(preds_not_aligned) n_not_aligned = n_truth_not_aligned + n_preds_not_aligned _aln_time = time.clock() logging.info('Computing alignment took {0:.2f} s' ''.format(_aln_time - _rpf_time)) # Now compute agreement: precision and recall on pixels # of the aligned Nodes. # We apply strict classnames only here, after the Nodes have been # aligned to each other using pixel metrics. strict_classnames = (not args.no_strict_classnames) total_r, total_p, total_f = compute_recall_precision_fscore_given_an_alignment( alignment_tp_symmetric, recall, precision, n_not_aligned=n_not_aligned, strict_classnames=strict_classnames, truths=ground_truth_nodes, predictions=predicted_nodes) if not args.print_fscore_only: print('Truth objs.:\t{0}'.format(len(ground_truth_nodes))) print('Pred. objs.:\t{0}'.format(len(predicted_nodes))) print('Aligned objs.:\t{0}'.format(len(alignment_tp_symmetric))) print('==============================================') print('Recall:\t\t{0:.3f}\nPrecision:\t{1:.3f}\nF-score:\t{2:.3f}' ''.format(total_r, total_p, total_f)) print('') else: print('{0:.3f}'.format(total_f)) return if args.log_alignment: print('==============================================') print('Alignments:\n{0}'.format('\n'.join([ '({0}: {1}) -- ({2}: {3})'.format(ground_truth_nodes[t].id, ground_truth_nodes[t].class_name, predicted_nodes[p].id, predicted_nodes[p].class_name) for t, p in alignment_tp_symmetric ]))) print('Truth, not aligned:\n{0}'.format('\n'.join([ '({0}: {1})'.format(ground_truth_nodes[t].id, ground_truth_nodes[t].class_name) for t in truth_not_aligned ]))) print('Preds, not aligned:\n{0}'.format('\n'.join([ '({0}: {1})'.format(predicted_nodes[p].id, predicted_nodes[p].class_name) for p in preds_not_aligned ]))) ########################################################################## # Check if the alignment is a pairing -- find truth objects # with more than one prediction aligned to them. if args.analyze_alignment: t_aln_dict = collections.defaultdict(list) for i, j in alignment_tp_symmetric: t_aln_dict[i].append(predicted_nodes[j]) multiple_truths = [ ground_truth_nodes[i] for i in t_aln_dict if len(t_aln_dict[i]) > 1 ] multiple_truths_aln_dict = { t: t_aln_dict[t] for t in t_aln_dict if len(t_aln_dict[t]) > 1 } print('Truth multi-aligned Node classes:\n{0}' ''.format( pprint.pformat({(ground_truth_nodes[t].id, ground_truth_nodes[t].class_name): [(p.id, p.class_name) for p in t_aln_dict[t]] for t in multiple_truths_aln_dict}))) ########################################################################## # Check if the aligned objects have the same classes if args.analyze_classnames: different_classnames_pairs = [] for i, j in alignment_tp_symmetric: if ground_truth_nodes[i].class_name != predicted_nodes[ j].class_name: different_classnames_pairs.append( (ground_truth_nodes[i], predicted_nodes[j])) print('Aligned pairs with different class_names:\n{0}' ''.format('\n'.join([ '{0}.{1}\t{2}.{3}' ''.format(t.id, t.class_name, p.id, p.class_name) for t, p in different_classnames_pairs ]))) _end_time = time.clock() logging.info('analyze_agreement.py done in {0:.3f} s'.format(_end_time - _start_time))
def test_read_nodes_from_file(self): test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'test', 'test_data') clfile = os.path.join(test_data_dir, '01_basic.xml') nodes = read_nodes_from_file(clfile) self.assertEqual(len(nodes), 48)
if args.verbose: logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) if args.debug: logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG) logging.info('Starting main...') _start_time = time.clock() # Parse individual Node lists. node_lists = [] number_of_parsed_nodes = 0 for i, f in enumerate(args.input): node_list = read_nodes_from_file(f) node_lists.append(node_list) # Logging progress number_of_parsed_nodes += len(node_list) if i % 10 == 0 and i > 0: _time_parsing = time.clock() - _start_time nodes_per_second = number_of_parsed_nodes / _time_parsing logging.info('Parsed {0} Nodes in {1:.2f} s ({2:.2f} objs/s)' ''.format(number_of_parsed_nodes, _time_parsing, nodes_per_second)) # Merge the Node lists into one. # This is done so that the resulting object graph can be manipulated # at once, without id clashes. merged_node_list = merge_node_lists_from_multiple_documents(node_lists)
coco_output = { "info": INFO, "licenses": LICENSES, "categories": CATEGORIES, "images": [], "annotations": [] } image_id = 1 annotation_id = 1 annotation_file_paths = glob(ANNOTATION_DIR + "/*.xml") for annotation_file_path in tqdm(annotation_file_paths, desc="Parsing annotations"): nodes = read_nodes_from_file(annotation_file_path) image_name = os.path.splitext( os.path.basename(annotation_file_path))[0] + ".png" image = Image.open(os.path.join(IMAGE_DIR, image_name)) # type: Image.Image image_info = create_image_info(image_id, image_name, (image.width, image.height)) coco_output["images"].append(image_info) for node in nodes: annotation_info = create_annotation_info( annotation_id, image_id, class_name_to_category_id_mapping[node.class_name], node) coco_output["annotations"].append(annotation_info) annotation_id = annotation_id + 1
if __name__ == '__main__': import os from mung.io import read_nodes_from_file from mung.graph import NotationGraph, NotationGraph, NotationGraph, NotationGraph test_data_root = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'test', 'test_data', 'mungmatcher') gt_root = os.path.join(test_data_root, 'gt') de_root = os.path.join(test_data_root, 'detected') wc_root = os.path.join(test_data_root, 'without_contained') names = ['minifull.xml', 'mini2full.xml'] gt = NotationGraph( read_nodes_from_file(os.path.join(test_data_root, names[0]))) wc = NotationGraph( read_nodes_from_file(os.path.join(test_data_root, names[1]))) matcher = MungMatcher() aln = matcher.run(gt, gt) print('Matched GT against GT: {} gt, {} gt, {} matched' ''.format(len(gt), len(gt), len(aln))) aln = matcher.run(gt, wc) print('Matched GT against WC: {} gt, {} wc, {} matched' ''.format(len(gt), len(wc), len(aln))) aln = matcher.run(gt, gt) print('Matched WC against WC: {} wc, {} wc, {} matched'
def main(args): logging.info('Starting main...') _start_time = time.clock() ######################################################## # Load gt image. logging.info('Loading staffline image.') # - Initialize Dataset. This checks for the root. if args.staff_imfile is None: cvc_dataset = CvcMuscimaDataset(root=args.root) args.staff_imfile = cvc_dataset.imfile(page=args.number, writer=args.writer, distortion='ideal', mode='staff_only') # - Load the image. gt = (imread(args.staff_imfile, as_grey=True) * 255).astype('uint8') # - Cast as binary mask. gt[gt > 0] = 1 ######################################################## # Locate stafflines in gt image. logging.info('Getting staffline connected components.') # - Get connected components in gt image. connected_components, labels, bboxes = compute_connected_components(gt) # - Use vertical dimension of CCs to determine which ones belong together # to form stafflines. (Criterion: row overlap.) n_rows, n_cols = gt.shape intervals = [[] for _ in range(n_rows) ] # For each row: which CCs have pxs on that row? for label, (t, l, b, r) in list(bboxes.items()): if label == 0: continue # Ignore very short staffline segments that can easily be artifacts # and should not affect the vertical range of the staffline anyway. if (r - l) < 8: continue for row in range(t, b): intervals[row].append(label) logging.info('Grouping staffline connected components into stafflines.') staffline_components = [ ] # For each staffline, we collect the CCs that it is made of _in_staffline = False _current_staffline_components = [] for r_labels in intervals: if not _in_staffline: # Last row did not contain staffline components. if len(r_labels) == 0: # No staffline component on current row continue else: _in_staffline = True _current_staffline_components += r_labels else: # Last row contained staffline components. if len(r_labels) == 0: # Current staffline has no more rows. staffline_components.append(set(_current_staffline_components)) _current_staffline_components = [] _in_staffline = False continue else: # Current row contains staffline components: the current # staffline continues. _current_staffline_components += r_labels logging.info('No. of stafflines, with component groups: {0}' ''.format(len(staffline_components))) # Now: merge the staffline components into one bbox/mask. logging.info( 'Merging staffline components into staffline bboxes and masks.') staffline_bboxes = [] staffline_masks = [] for sc in sorted(staffline_components, key=lambda c: min([bboxes[cc][0] for cc in c])): # Sorted top-down st, sl, sb, sr = n_rows, n_cols, 0, 0 for component in sc: t, l, b, r = bboxes[component] st, sl, sb, sr = min(t, st), min(l, sl), max(b, sb), max(r, sr) _sm = gt[st:sb, sl:sr] staffline_bboxes.append((st, sl, sb, sr)) staffline_masks.append(_sm) # Check if n. of stafflines is divisible by 5 n_stafflines = len(staffline_bboxes) logging.info('\tTotal stafflines: {0}'.format(n_stafflines)) if n_stafflines % 5 != 0: import matplotlib.pyplot as plt stafllines_mask_image = numpy.zeros(gt.shape) for i, (_sb, _sm) in enumerate(zip(staffline_bboxes, staffline_masks)): t, l, b, r = _sb stafllines_mask_image[t:b, l:r] = min(255, (i * 333) % 255 + 40) plt.imshow(stafllines_mask_image, cmap='jet', interpolation='nearest') plt.show() raise ValueError('No. of stafflines is not divisible by 5!') logging.info('Creating staff bboxes and masks.') # - Go top-down and group the stafflines by five to get staves. # (The staffline bboxes are already sorted top-down.) staff_bboxes = [] staff_masks = [] for i in range(n_stafflines // 5): _sbb = staffline_bboxes[5 * i:5 * (i + 1)] _st = min([bb[0] for bb in _sbb]) _sl = min([bb[1] for bb in _sbb]) _sb = max([bb[2] for bb in _sbb]) _sr = max([bb[3] for bb in _sbb]) staff_bboxes.append((_st, _sl, _sb, _sr)) staff_masks.append(gt[_st:_sb, _sl:_sr]) logging.info('Total staffs: {0}'.format(len(staff_bboxes))) ################################################################## # (Optionally fill in missing pixels, based on full image.) logging.info('SKIP: fill in missing pixels based on full image.') # - Load full image # - Find gap regions # - Obtain gap region masks from full image # - Add gap region mask to staffline mask. # Create the Nodes for stafflines and staffs: # - Load corresponding annotation, to which the stafflines and # staves should be added. (This is needed to correctly set docname # and node_ids.) if not args.annot: nodes = [] next_node_id = 0 dataset = 'FCNOMR' document = os.path.splitext(os.path.basename(args.staff_imfile))[0] else: if not os.path.isfile(args.annot): raise ValueError('Annotation file {0} does not exist!'.format( args.annot)) logging.info('Creating Nodes...') nodes = read_nodes_from_file(args.annot) logging.info('Non-staffline Nodes: {0}'.format(len(nodes))) next_node_id = max([c.id for c in nodes]) + 1 dataset = nodes[0].dataset document = nodes[0].document # - Create the staffline Nodes stafflines = [] for sl_bb, sl_m in zip(staffline_bboxes, staffline_masks): t, l, b, r = sl_bb c = Node(id_=next_node_id, class_name=_CONST.STAFFLINE_CLASS_NAME, top=t, left=l, height=b - t, width=r - l, mask=sl_m, dataset=dataset, document=document) stafflines.append(c) next_node_id += 1 if not args.stafflines_only: # - Create the staff Nodes staffs = [] for s_bb, s_m in zip(staff_bboxes, staff_masks): t, l, b, r = s_bb c = Node(id_=next_node_id, class_name=_CONST.STAFF_CLASS_NAME, top=t, left=l, height=b - t, width=r - l, mask=s_m, dataset=dataset, document=document) staffs.append(c) next_node_id += 1 # - Add the inlinks/outlinks for i, sc in enumerate(staffs): sl_from = 5 * i sl_to = 5 * (i + 1) for sl in stafflines[sl_from:sl_to]: sl.inlinks.append(sc.id) sc.outlinks.append(sl.id) # Add the staffspaces. staffspaces = [] for i, staff in enumerate(staffs): current_stafflines = [ sc for sc in stafflines if sc.id in staff.outlinks ] sorted_stafflines = sorted(current_stafflines, key=lambda x: x.top) current_staffspaces = [] # Percussion single-line staves do not have staffspaces. if len(sorted_stafflines) == 1: continue # Internal staffspace for s1, s2 in zip(sorted_stafflines[:-1], sorted_stafflines[1:]): # s1 is the UPPER staffline, s2 is the LOWER staffline # Left and right limits: to simplify things, we take the column # *intersection* of (s1, s2). This gives the invariant that # the staffspace is limited from top and bottom in each of its columns. l = max(s1.left, s2.left) r = min(s1.right, s2.right) # Shift s1, s2 to the right by this much to have the cols. align # All of these are non-negative. dl1, dl2 = l - s1.left, l - s2.left dr1, dr2 = s1.right - r, s2.right - r # The stafflines are not necessarily straight, # so top is given for the *topmost bottom edge* of the top staffline + 1 # First create mask canvas = numpy.zeros((s2.bottom - s1.top, r - l), dtype='uint8') # Paste masks into canvas. # This assumes that the top of the bottom staffline is below # the top of the top staffline... and that the bottom # of the top staffline is above the bottom of the bottom # staffline. This may not hold in very weird situations, # but it's good for now. logging.debug(s1.bounding_box, s1.mask.shape) logging.debug(s2.bounding_box, s2.mask.shape) logging.debug(canvas.shape) logging.debug( 'l={0}, dl1={1}, dl2={2}, r={3}, dr1={4}, dr2={5}' ''.format(l, dl1, dl2, r, dr1, dr2)) # canvas[:s1.height, :] += s1.mask[:, dl1:s1.width-dr1] # canvas[-s2.height:, :] += s2.mask[:, dl2:s2.width-dr2] # We have to deal with staffline interruptions. # One way to do this # is watershed fill: put markers along the bottom and top # edge, use mask * 10000 as elevation s1_above, s1_below = staffline_surroundings_mask(s1) s2_above, s2_below = staffline_surroundings_mask(s2) # Get bounding boxes of the individual stafflines' masks # that intersect with the staffspace bounding box, in terms # of the staffline bounding box. s1_t, s1_l, s1_b, s1_r = 0, dl1, \ s1.height, s1.width - dr1 s1_h, s1_w = s1_b - s1_t, s1_r - s1_l s2_t, s2_l, s2_b, s2_r = canvas.shape[0] - s2.height, dl2, \ canvas.shape[0], s2.width - dr2 s2_h, s2_w = s2_b - s2_t, s2_r - s2_l logging.debug(s1_t, s1_l, s1_b, s1_r, (s1_h, s1_w)) # We now take the intersection of s1_below and s2_above. # If there is empty space in the middle, we fill it in. staffspace_mask = numpy.ones(canvas.shape) staffspace_mask[s1_t:s1_b, :] -= ( 1 - s1_below[:, dl1:s1.width - dr1]) staffspace_mask[s2_t:s2_b, :] -= ( 1 - s2_above[:, dl2:s2.width - dr2]) ss_top = s1.top ss_bottom = s2.bottom ss_left = l ss_right = r staffspace = Node(next_node_id, _CONST.STAFFSPACE_CLASS_NAME, top=ss_top, left=ss_left, height=ss_bottom - ss_top, width=ss_right - ss_left, mask=staffspace_mask, dataset=dataset, document=document) staffspace.inlinks.append(staff.id) staff.outlinks.append(staffspace.id) current_staffspaces.append(staffspace) next_node_id += 1 # Add top and bottom staffspace. # These outer staffspaces will have the width # of their bottom neighbor, and height derived # from its mask columns. # This is quite approximate, but it should do. # Upper staffspace tsl = sorted_stafflines[0] tsl_heights = tsl.mask.sum(axis=0) tss = current_staffspaces[0] tss_heights = tss.mask.sum(axis=0) uss_top = max(0, tss.top - max(tss_heights)) uss_left = tss.left uss_width = tss.width # We use 1.5, so that large noteheads # do not "hang out" of the staffspace. uss_height = int(tss.height / 1.2) # Shift because of height downscaling: uss_top += tss.height - uss_height uss_mask = tss.mask[:uss_height, :] * 1 staffspace = Node(next_node_id, _CONST.STAFFSPACE_CLASS_NAME, top=uss_top, left=uss_left, height=uss_height, width=uss_width, mask=uss_mask, dataset=dataset, document=document) current_staffspaces.append(staffspace) staff.outlinks.append(staffspace.id) staffspace.inlinks.append(staff.id) next_node_id += 1 # Lower staffspace bss = current_staffspaces[-1] bss_heights = bss.mask.sum(axis=0) bsl = sorted_stafflines[-1] bsl_heights = bsl.mask.sum(axis=0) lss_top = bss.bottom # + max(bsl_heights) lss_left = bss.left lss_width = bss.width lss_height = int(bss.height / 1.2) lss_mask = bss.mask[:lss_height, :] * 1 staffspace = Node(next_node_id, _CONST.STAFFSPACE_CLASS_NAME, top=lss_top, left=lss_left, height=lss_height, width=lss_width, mask=lss_mask, dataset=dataset, document=document) current_staffspaces.append(staffspace) staff.outlinks.append(staffspace.id) staffspace.inlinks.append(staff.id) next_node_id += 1 # ################ End of dealing with upper/lower staffspace ###### # Add to current list staffspaces += current_staffspaces # - Join the lists together nodes_with_staffs = nodes \ + stafflines \ + staffspaces \ + staffs else: nodes_with_staffs = nodes + stafflines logging.info('Exporting the new Node list: {0} objects' ''.format(len(nodes_with_staffs))) # - Export the combined list. nodes_string = export_node_list(nodes_with_staffs) if args.export is not None: with open(args.export, 'w') as hdl: hdl.write(nodes_string) else: print(nodes_string) _end_time = time.clock() logging.info('add_staffline_symbols.py done in {0:.3f} s' ''.format(_end_time - _start_time))
def main(args): logging.info('Starting main...') _start_time = time.clock() ############################################################### # Preparation: loading the parsing apparatus with open(args.vectorizer) as hdl: vectorizer = pickle.load(hdl) feature_extractor = PairwiseClassificationFeatureExtractor( vectorizer=vectorizer) with open(args.parser) as hdl: classifier = pickle.load(hdl) mlclass_list = parse_node_classes(args.mlclasses) mlclasses = {m.name for m in mlclass_list} grammar = DependencyGrammar(grammar_filename=args.grammar, alphabet=mlclasses) parser = PairwiseClassificationParser(grammar=grammar, classifier=classifier, feature_extractor=feature_extractor) ################################################################# logging.info('Load graph') nodes = read_nodes_from_file(args.input_mung) logging.info('Filter very small') very_small_nodes = find_very_small_nodes(nodes, bbox_threshold=40, mask_threshold=35) very_small_nodes = set(very_small_nodes) nodes = [c for c in nodes if c not in very_small_nodes] logging.info('Parsing') nodes = do_parse(nodes, parser=parser) # Filter contained here. if args.filter_contained: logging.info('Finding contained Nodes...') contained = find_contained_nodes(nodes, mask_threshold=0.95) NEVER_DISCARD_CLASSES = ['key_signature', 'time_signature'] contained = [ c for c in contained if c.class_name not in NEVER_DISCARD_CLASSES ] _contained_counts = collections.defaultdict(int) for c in contained: _contained_counts[c.class_name] += 1 logging.info('Found {} contained Nodes'.format(len(contained))) logging.info('Contained counts:\n{0}'.format( pprint.pformat(dict(_contained_counts)))) nodes = remove_contained_nodes(nodes, contained) logging.info('Removed contained Nodes: {}...'.format( [m.id for m in contained])) logging.info('Inferring staffline & staff objects, staff relationships') nodes = process_stafflines(nodes) if args.add_key_signatures: nodes = add_key_signatures(nodes) logging.info('Filter invalid edges') graph = NotationGraph(nodes) # Operatng on the graph changes the Nodes # -- the graph only keeps a pointer wrong_edges = find_wrong_edges(nodes, grammar) for f, t in wrong_edges: graph.remove_edge(f, t) logging.info('Add precedence relationships, factored only by staff') prec_edges = infer_precedence_edges(nodes) nodes = add_precedence_edges(nodes, prec_edges) logging.info('Ensuring MIDI can be built') mf = build_midi(nodes, retain_pitches=True, retain_durations=True, retain_onsets=True, tempo=180) logging.info('Save output') docname = os.path.splitext(os.path.basename(args.output_mung))[0] xml = export_node_list(nodes, document=docname, dataset='FNOMR_results') with open(args.output_mung, 'w') as out_stream: out_stream.write(xml) out_stream.write('\n') _end_time = time.clock() logging.info( 'baseline_process_symbols.py done in {0:.3f} s'.format(_end_time - _start_time))
def count_nodes(annot_file): return len(read_nodes_from_file(annot_file))
:param soundfont: A *.sf2 soundfont for FluidSynth to load. """ tmp_midi_path = os.path.join(tmp_dir, 'play_' + str(uuid.uuid4())[:8] + '.mid') with open(tmp_midi_path, 'wb') as hdl: midi.writeFile(hdl) if not os.path.isfile(tmp_midi_path): logging.warning('Could not write MIDI data to temp file {0}!'.format( tmp_midi_path)) return play_midi_file_from_disk(tmp_midi_path, soundfont) # Here's hoping it's a blocking call. Otherwise, just leave the MIDI; # MUSCIMarker cleans its tmp dir whenever it exits. if cleanup: os.unlink(tmp_midi_path) if __name__ == '__main__': # play_midi_file_from_disk() # exit() # sample_mung = "mung2midi/sample/CVC-MUSCIMA_W-01_N-10_D-ideal.xml" # sample_mung = "test/test_data/01_basic_binary_2.0.xml" sample_mung = "/Users/elona/Documents/GitHub/mung2midi/mung/test/test_data/CVC-MUSCIMA_W-01_N-10_D-ideal.pdo.xml" nodes = read_nodes_from_file(sample_mung) midi_file = convert_mung_to_midi(nodes) play_midi_file(midi_file)