def main(filepath): #------------------------------------------------------------------------------- # Image Preprocessing (Blurring, Noise Removal, Binarization, Deskewing) #------------------------------------------------------------------------------- # Noise Removal: https://docs.opencv.org/3.3.1/d5/d69/tutorial_py_non_local_means.html # Deskewing: https://www.pyimagesearch.com/2017/02/20/text-skew-correction-opencv-python/ # Binarization + Blurring (Otsu): https://docs.opencv.org/3.3.1/d7/d4d/tutorial_py_thresholding.html # ============ Read Image ============ #img_file = sys.argv[1:][0] #img_file = path + r'\test\mary.jpg' img = cv2.imread(filepath, 0) # ============ Noise Removal ============ img = cv2.fastNlMeansDenoising(img, None, 10, 7, 21) # ============ Binarization ============ # Global Thresholding # retval, img = cv2.threshold(img,127,255,cv2.THRESH_BINARY) # Otsu's Thresholding retval, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) #cv2.imwrite(path + r'\output\binarized.jpg', img) # ============ Deskewing ============ # angle, img = deskew(img) # print("[INFO] Deskew Angle: {:.3f}".format(angle)) # cv2.imshow("Input", img) # cv2.waitKey(0) # ============ Reference Lengths ============ # Reference lengths staff line thickness (staffline_height) # and vertical line distance within the same staff (staffspace_height) # computed, providing the basic scale for relative size comparisons # Use run-length encoding on columns to estimate staffline height and staffspace height line_width, line_spacing = get_ref_lengths(img) #------------------------------------------------------------------------------- # Staff Line Detection #------------------------------------------------------------------------------- # In practice, several horizontal projections on images with slightly different # rotation angles are computed to deal with not completely horizontal staff lines. # The projection with the highest local maxima is then chosen. # ============ Find Staff Line Rows ============ all_staffline_vertical_indices = find_staffline_rows(img, line_width, line_spacing) # ============ Find Staff Line Columns ============ # Find column with largest index that has no black pixels all_staffline_horizontal_indices = find_staffline_columns(img, all_staffline_vertical_indices, line_width, line_spacing) # ============ Show Detected Staffs ============ staffs = [] half_dist_between_staffs = (all_staffline_vertical_indices[1][0][0] - all_staffline_vertical_indices[0][4][line_width - 1])//2 for i in range(len(all_staffline_vertical_indices)): # Create Bounding Box x = all_staffline_horizontal_indices[i][0] y = all_staffline_vertical_indices[i][0][0] width = all_staffline_horizontal_indices[i][1] - x height = all_staffline_vertical_indices[i][4][line_width - 1] - y staff_box = BoundingBox(x, y, width, height) # Create Cropped Staff Image staff_img = img[max(0, y - half_dist_between_staffs): min(y+ height + half_dist_between_staffs, img.shape[0] - 1), x:x+width] # Normalize Staff line Numbers to Cropped Image pixel = half_dist_between_staffs normalized_staff_line_vertical_indices = [] for j in range(5): line = [] for k in range(line_width): line.append(pixel) pixel += 1 normalized_staff_line_vertical_indices.append(line) pixel += line_spacing + 1 staff = Staff(normalized_staff_line_vertical_indices, staff_box, line_width, line_spacing, staff_img) staffs.append(staff) staff_boxes_img = img.copy() staff_boxes_img = cv2.cvtColor(staff_boxes_img, cv2.COLOR_GRAY2RGB) red = (0, 0, 255) box_thickness = 2 for staff in staffs: box = staff.getBox() box.draw(staff_boxes_img, red, box_thickness) x = int(box.getCorner()[0] + (box.getWidth() // 2)) y = int(box.getCorner()[1] + box.getHeight() + 35) cv2.putText(staff_boxes_img, "Staff", (x, y), cv2.FONT_HERSHEY_DUPLEX, 0.9 , red) #cv2.imwrite(path + r'\output\detected_staffs.jpg', staff_boxes_img) # open_file('output/detected_staffs.jpg') #------------------------------------------------------------------------------- # Symbol Segmentation, Object Recognition, and Semantic Reconstruction #------------------------------------------------------------------------------- # The score is then divided into regions of interest to localize and isolate the musical primitives. # Music score is analyzed and split by staves # Primitive symbols extracted # Find all primitives on each stave first # then move from left to right and create structure # ============ Determine Clef, Time Signature ============ staff_imgs_color = [] for i in range(len(staffs)): red = (0, 0, 255) box_thickness = 2 staff_img = staffs[i].getImage() staff_img_color = staff_img.copy() staff_img_color = cv2.cvtColor(staff_img_color, cv2.COLOR_GRAY2RGB) # ------- Clef ------- for clef in clef_imgs: clef_boxes = locate_templates(staff_img, clef_imgs[clef], clef_lower, clef_upper, clef_thresh) clef_boxes = merge_boxes([j for i in clef_boxes for j in i], 0.5) if (len(clef_boxes) == 1): staffs[i].setClef(clef) clef_boxes_img = staffs[i].getImage() clef_boxes_img = clef_boxes_img.copy() for boxes in clef_boxes: boxes.draw(staff_img_color, red, box_thickness) x = int(boxes.getCorner()[0] + (boxes.getWidth() // 2)) y = int(boxes.getCorner()[1] + boxes.getHeight() + 10) cv2.putText(staff_img_color, "{} clef".format(clef), (x, y), cv2.FONT_HERSHEY_DUPLEX, 0.9, red) break # # ------- Time ------- for time in time_imgs: time_boxes = locate_templates(staff_img, time_imgs[time], time_lower, time_upper, time_thresh) time_boxes = merge_boxes([j for i in time_boxes for j in i], 0.5) if (len(time_boxes) == 1): staffs[i].setTimeSignature(time) for boxes in time_boxes: boxes.draw(staff_img_color, red, box_thickness) x = int(boxes.getCorner()[0] - (boxes.getWidth() // 2)) y = int(boxes.getCorner()[1] + boxes.getHeight() + 20) cv2.putText(staff_img_color, "{} time".format(time), (x, y), cv2.FONT_HERSHEY_DUPLEX, 0.9, red) break elif (len(time_boxes) == 0 and i > 0): # Take time signature of previous staff previousTime = staffs[i-1].getTimeSignature() staffs[i].setTimeSignature(previousTime) #print("[INFO] No time signature found on staff", i + 1, ". Using time signature from previous staff line: ", previousTime) break staff_imgs_color.append(staff_img_color) # ============ Find Primitives ============ # always assert that notes in a bar equal duration dictated by time signature for i in range(len(staffs)): staff_primitives = [] staff_img = staffs[i].getImage() staff_img_color = staff_imgs_color[i] red = (0, 0, 255) box_thickness = 2 # ------- Find primitives on staff ------- sharp_boxes = locate_templates(staff_img, sharp_imgs, sharp_lower, sharp_upper, sharp_thresh) sharp_boxes = merge_boxes([j for i in sharp_boxes for j in i], 0.5) for box in sharp_boxes: box.draw(staff_img_color, red, box_thickness) text = "sharp" font = cv2.FONT_HERSHEY_DUPLEX textsize = cv2.getTextSize(text, font, fontScale=0.7, thickness=1)[0] x = int(box.getCorner()[0] - (textsize[0] // 2)) y = int(box.getCorner()[1] + box.getHeight() + 20) cv2.putText(staff_img_color, text, (x, y), font, fontScale=0.7, color=red, thickness=1) sharp = Primitive("sharp", 0, box) staff_primitives.append(sharp) flat_boxes = locate_templates(staff_img, flat_imgs, flat_lower, flat_upper, flat_thresh) flat_boxes = merge_boxes([j for i in flat_boxes for j in i], 0.5) for box in flat_boxes: box.draw(staff_img_color, red, box_thickness) text = "flat" font = cv2.FONT_HERSHEY_DUPLEX textsize = cv2.getTextSize(text, font, fontScale=0.7, thickness=1)[0] x = int(box.getCorner()[0] - (textsize[0] // 2)) y = int(box.getCorner()[1] + box.getHeight() + 20) cv2.putText(staff_img_color, text, (x, y), font, fontScale=0.7, color=red, thickness=1) flat = Primitive("flat", 0, box) staff_primitives.append(flat) quarter_boxes = locate_templates(staff_img, quarter_note_imgs, quarter_note_lower, quarter_note_upper, quarter_note_thresh) quarter_boxes = merge_boxes([j for i in quarter_boxes for j in i], 0.5) for box in quarter_boxes: box.draw(staff_img_color, red, box_thickness) text = "1/4 note" font = cv2.FONT_HERSHEY_DUPLEX textsize = cv2.getTextSize(text, font, fontScale=0.7, thickness=1)[0] x = int(box.getCorner()[0] - (textsize[0] // 2)) y = int(box.getCorner()[1] + box.getHeight() + 20) cv2.putText(staff_img_color, text, (x, y), font, fontScale=0.7, color=red, thickness=1) pitch = staffs[i].getPitch(round(box.getCenter()[1])) quarter = Primitive("note", 1, box, pitch) staff_primitives.append(quarter) half_boxes = locate_templates(staff_img, half_note_imgs, half_note_lower, half_note_upper, half_note_thresh) half_boxes = merge_boxes([j for i in half_boxes for j in i], 0.5) for box in half_boxes: box.draw(staff_img_color, red, box_thickness) text = "1/2 note" font = cv2.FONT_HERSHEY_DUPLEX textsize = cv2.getTextSize(text, font, fontScale=0.7, thickness=1)[0] x = int(box.getCorner()[0] - (textsize[0] // 2)) y = int(box.getCorner()[1] + box.getHeight() + 20) cv2.putText(staff_img_color, text, (x, y), font, fontScale=0.7, color=red, thickness=1) pitch = staffs[i].getPitch(round(box.getCenter()[1])) half = Primitive("note", 2, box, pitch) staff_primitives.append(half) whole_boxes = locate_templates(staff_img, whole_note_imgs, whole_note_lower, whole_note_upper, whole_note_thresh) whole_boxes = merge_boxes([j for i in whole_boxes for j in i], 0.5) for box in whole_boxes: box.draw(staff_img_color, red, box_thickness) text = "1 note" font = cv2.FONT_HERSHEY_DUPLEX textsize = cv2.getTextSize(text, font, fontScale=0.7, thickness=1)[0] x = int(box.getCorner()[0] - (textsize[0] // 2)) y = int(box.getCorner()[1] + box.getHeight() + 20) cv2.putText(staff_img_color, text, (x, y), font, fontScale=0.7, color=red, thickness=1) pitch = staffs[i].getPitch(round(box.getCenter()[1])) whole = Primitive("note", 4, box, pitch) staff_primitives.append(whole) eighth_boxes = locate_templates(staff_img, eighth_rest_imgs, eighth_rest_lower, eighth_rest_upper, eighth_rest_thresh) eighth_boxes = merge_boxes([j for i in eighth_boxes for j in i], 0.5) for box in eighth_boxes: box.draw(staff_img_color, red, box_thickness) text = "1/8 rest" font = cv2.FONT_HERSHEY_DUPLEX textsize = cv2.getTextSize(text, font, fontScale=0.7, thickness=1)[0] x = int(box.getCorner()[0] - (textsize[0] // 2)) y = int(box.getCorner()[1] + box.getHeight() + 20) cv2.putText(staff_img_color, text, (x, y), font, fontScale=0.7, color=red, thickness=1) eighth = Primitive("rest", 0.5, box) staff_primitives.append(eighth) quarter_boxes = locate_templates(staff_img, quarter_rest_imgs, quarter_rest_lower, quarter_rest_upper, quarter_rest_thresh) quarter_boxes = merge_boxes([j for i in quarter_boxes for j in i], 0.5) for box in quarter_boxes: box.draw(staff_img_color, red, box_thickness) text = "1/4 rest" font = cv2.FONT_HERSHEY_DUPLEX textsize = cv2.getTextSize(text, font, fontScale=0.7, thickness=1)[0] x = int(box.getCorner()[0] - (textsize[0] // 2)) y = int(box.getCorner()[1] + box.getHeight() + 20) cv2.putText(staff_img_color, text, (x, y), font, fontScale=0.7, color=red, thickness=1) quarter = Primitive("rest", 1, box) staff_primitives.append(quarter) half_boxes = locate_templates(staff_img, half_rest_imgs, half_rest_lower, half_rest_upper, half_rest_thresh) half_boxes = merge_boxes([j for i in half_boxes for j in i], 0.5) for box in half_boxes: box.draw(staff_img_color, red, box_thickness) text = "1/2 rest" font = cv2.FONT_HERSHEY_DUPLEX textsize = cv2.getTextSize(text, font, fontScale=0.7, thickness=1)[0] x = int(box.getCorner()[0] - (textsize[0] // 2)) y = int(box.getCorner()[1] + box.getHeight() + 20) cv2.putText(staff_img_color, text, (x, y), font, fontScale=0.7, color=red, thickness=1) half = Primitive("rest", 2, box) staff_primitives.append(half) whole_boxes = locate_templates(staff_img, whole_rest_imgs, whole_rest_lower, whole_rest_upper, whole_rest_thresh) whole_boxes = merge_boxes([j for i in whole_boxes for j in i], 0.5) for box in whole_boxes: box.draw(staff_img_color, red, box_thickness) text = "1 rest" font = cv2.FONT_HERSHEY_DUPLEX textsize = cv2.getTextSize(text, font, fontScale=0.7, thickness=1)[0] x = int(box.getCorner()[0] - (textsize[0] // 2)) y = int(box.getCorner()[1] + box.getHeight() + 20) cv2.putText(staff_img_color, text, (x, y), font, fontScale=0.7, color=red, thickness=1) whole = Primitive("rest", 4, box) staff_primitives.append(whole) flag_boxes = locate_templates(staff_img, eighth_flag_imgs, eighth_flag_lower, eighth_flag_upper, eighth_flag_thresh) flag_boxes = merge_boxes([j for i in flag_boxes for j in i], 0.5) for box in flag_boxes: box.draw(staff_img_color, red, box_thickness) text = "1/8 flag" font = cv2.FONT_HERSHEY_DUPLEX textsize = cv2.getTextSize(text, font, fontScale=0.7, thickness=1)[0] x = int(box.getCorner()[0] - (textsize[0] // 2)) y = int(box.getCorner()[1] + box.getHeight() + 20) cv2.putText(staff_img_color, text, (x, y), font, fontScale=0.7, color=red, thickness=1) flag = Primitive("eighth_flag", 0, box) staff_primitives.append(flag) bar_boxes = locate_templates(staff_img, bar_imgs, bar_lower, bar_upper, bar_thresh) bar_boxes = merge_boxes([j for i in bar_boxes for j in i], 0.5) for box in bar_boxes: box.draw(staff_img_color, red, box_thickness) text = "line" font = cv2.FONT_HERSHEY_DUPLEX textsize = cv2.getTextSize(text, font, fontScale=0.7, thickness=1)[0] x = int(box.getCorner()[0] - (textsize[0] // 2)) y = int(box.getCorner()[1] + box.getHeight() + 20) cv2.putText(staff_img_color, text, (x, y), font, fontScale=0.7, color=red, thickness=1) line = Primitive("line", 0, box) staff_primitives.append(line) #cv2.imwrite(path + r'\output\staff_{}_primitives.jpg'.format(i+1), staff_img_color) # open_file("output/staff_{}_primitives.jpg".format(i+1)) # ------- Sort primitives on staff from left to right ------- staff_primitives.sort(key=lambda primitive: primitive.getBox().getCenter()) eighth_flag_indices = [] for j in range(len(staff_primitives)): if (staff_primitives[j].getPrimitive() == "eighth_flag"): # Find all eighth flags eighth_flag_indices.append(j) # ------- Correct for eighth notes ------- # Sort out eighth flags # Assign to closest note for j in eighth_flag_indices: distances = [] distance = staff_primitives[j].getBox().distance(staff_primitives[j-1].getBox()) distances.append(distance) if (j + 1 < len(staff_primitives)): distance = staff_primitives[j].getBox().distance(staff_primitives[j+1].getBox()) distances.append(distance) if (distances[1] and distances[0] > distances[1]): staff_primitives[j+1].setDuration(0.5) else: staff_primitives[j-1].setDuration(0.5) del staff_primitives[j] # Correct for beamed eighth notes # If number of pixels in center row of two notes # greater than 5 * line_width, then notes are # beamed for j in range(len(staff_primitives)): if (j+1 < len(staff_primitives) and staff_primitives[j].getPrimitive() == "note" and staff_primitives[j+1].getPrimitive() == "note" and (staff_primitives[j].getDuration() == 1 or staff_primitives[j].getDuration() == 0.5) and staff_primitives[j+1].getDuration() == 1): # Notes of interest note_1_center_x = staff_primitives[j].getBox().getCenter()[0] note_2_center_x = staff_primitives[j+1].getBox().getCenter()[0] # Regular number of black pixels in staff column num_black_pixels = 5 * staffs[i].getLineWidth() # Actual number of black pixels in mid column center_column = (note_2_center_x - note_1_center_x) // 2 mid_col = staff_img[:, int(note_1_center_x + center_column)] num_black_pixels_mid = len(np.where(mid_col == 0)[0]) if (num_black_pixels_mid > num_black_pixels): # Notes beamed # Make eighth note length staff_primitives[j].setDuration(0.5) staff_primitives[j+1].setDuration(0.5) # ------- Account for Key Signature ------- num_sharps = 0 num_flats = 0 j = 0 while (staff_primitives[j].getDuration() == 0): accidental = staff_primitives[j].getPrimitive() if (accidental == "sharp"): num_sharps += 1 j += 1 elif (accidental == "flat"): num_flats += 1 j += 1 # Check if last accidental belongs to note if (j != 0): # Determine if accidental coupled with first note # Center of accidental should be within a note width from note max_accidental_offset_x = staff_primitives[j].getBox().getCenter()[0] - staff_primitives[j].getBox().getWidth() accidental_center_x = staff_primitives[j-1].getBox().getCenter()[0] accidental_type = staff_primitives[j-1].getPrimitive() if (accidental_center_x > max_accidental_offset_x): num_sharps = num_sharps - 1 if accidental_type == "sharp" else num_sharps num_flats = num_flats - 1 if accidental_type == "flat" else num_flats # Modify notes in staff notes_to_modify = [] if (accidental_type == "sharp"): notes_to_modify = key_signature_changes[accidental_type][num_sharps] # Remove accidentals from primitive list staff_primitives = staff_primitives[num_sharps:] else: notes_to_modify = key_signature_changes[accidental_type][num_flats] # Remove accidentals from primitive list staff_primitives = staff_primitives[num_flats:] for primitive in staff_primitives: type = primitive.getPrimitive() note = primitive.getPitch() if (type == "note" and note[0] in notes_to_modify): new_note = MIDI_to_pitch[pitch_to_MIDI[note] + 1] if accidental_type == "sharp" else MIDI_to_pitch[pitch_to_MIDI[note] - 1] primitive.setPitch(new_note) # ------- Apply Sharps and Flats ------- primitive_indices_to_remove = [] for j in range(len(staff_primitives)): accidental_type = staff_primitives[j].getPrimitive() if (accidental_type == "flat" or accidental_type == "sharp"): max_accidental_offset_x = staff_primitives[j+1].getBox().getCenter()[0] - staff_primitives[j+1].getBox().getWidth() accidental_center_x = staff_primitives[j].getBox().getCenter()[0] primitive_type = staff_primitives[j+1].getPrimitive() if (accidental_center_x > max_accidental_offset_x and primitive_type == "note"): note = staff_primitives[j+1].getPitch() new_note = MIDI_to_pitch[pitch_to_MIDI[note] + 1] if accidental_type == "sharp" else MIDI_to_pitch[pitch_to_MIDI[note] - 1] staff_primitives[j+1].setPitch(new_note) primitive_indices_to_remove.append(i) # Removed actioned accidentals for j in primitive_indices_to_remove: del staff_primitives[j] # ------- Assemble Staff ------- bar = Bar() while (len(staff_primitives) > 0): primitive = staff_primitives.pop(0) if (primitive.getPrimitive() != "line"): bar.addPrimitive(primitive) else: staffs[i].addBar(bar) bar = Bar() # Add final bar in staff staffs[i].addBar(bar) # ------------------------------------------------------------------------------- # Output MusicXml # ------------------------------------------------------------------------------- score = Score() part = Part("Piano") score.append(part) measures = [] time = int(staffs[0].getTimeSignature()) clef = staffs[0].getClef() key_signature = "C" if(num_sharps!=0): key_signature = key[num_sharps - 1] if(num_flats!=0): key_signature = key[len(key) - num_flats] for i in range(len(staffs)): bars = staffs[i].getBars() for j in range(len(bars)): m = Measure(clef=clef, time_signature=(time//10, time%10) if i == 0 else None) primitives = bars[j].getPrimitives() for k in range(len(primitives)): duration = primitives[k].getDuration() if (primitives[k].getPrimitive() == "note"): pitch = primitives[k].getPitch() m.append(Note(pitch, duration)) elif(primitives[k].getPrimitive() == "rest"): m.append(Rest(duration)) measures.append(m) part.extend(measures) # ------- Write to disk ------- print("[INFO] Writing MusicXml to disk") #outputpath = path + '\\output\\' + key_signature + 'major.xml' output_path = "/sdcard/Documents/" + key_signature + "sheet.xml" score.export_to_file(output_path) return output_path
sheetLine.append(currentNote) print(currentNote) else: print(staff_primitives[j].getPrimitive(), end=", ") print("\n") # ------- Assemble Staff ------- print("[INFO] Assembling current staff") bar = Bar() while (len(staff_primitives) > 0): primitive = staff_primitives.pop(0) if (primitive.getPrimitive() != "line"): bar.addPrimitive(primitive) else: staffs[i].addBar(bar) bar = Bar() # Add final bar in staff staffs[i].addBar(bar) # ------- Convert extracted notes to harmonica ------- # C harmonica is C4 to C7 harmonicaHoles = { 'C4': '1', 'D4': '-1', 'E4': '2', 'F4': '2*',