def main(): usage = "%prog [options] <in-file>" parser = OptionParser(usage=usage) parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", default=DEFAULT_PARTITIONS, help="the number of partitions to use (default: %d)" % DEFAULT_PARTITIONS) parser.add_option("--ids", dest="ids", action="store_true", help="don't output any files - just print out a list of the ids of the sequences in each partition") options, arguments = parser.parse_args() if len(arguments) == 0: print >>sys.stderr, "You must specify an input data file" sys.exit(1) filename = os.path.abspath(arguments[0]) # Read in the data file seqs = SequenceIndex.from_file(filename) part_pattern = "%s.part%%d" % filename heldout_pattern = "%s.heldout_part%%d" % filename # Divide the data up into partitions, with their complements parts = zip(partition(seqs.sequences, options.partitions), holdout_partition(seqs.sequences, options.partitions)) # Save each partition and its complement for i,(part,heldout) in enumerate(parts): if options.ids: # Just print out a list of the ids in the partition print " ".join(["%d" % s.id for s in part]) else: save_sequences(part_pattern % i, part) save_sequences(heldout_pattern % i, heldout) print >>sys.stderr, "Wrote partition %d to %s and %s" % (i,part_pattern % i,heldout_pattern % i)
def main(): usage = "%prog [options] <seq-file> <index>" description = "Displays a tree for the annotated derivation of a chord "\ "sequence in the gold standard" parser = OptionParser(usage=usage, description=description) options, arguments = parser.parse_args() if len(arguments) < 2: print "You must specify a sequence file and index" sys.exit(1) index = int(arguments[1]) # Get the chord sequence sequence = SequenceIndex.from_file(arguments[0]).sequence_by_index(index) try: # Show the song name print "Tree for '%s'" % sequence.string_name tree = build_tree_for_sequence(sequence) # Output the linear textual form of the tree print tree # Display the tree using NLTK ntree = tree_to_nltk(tree) ntree.draw() except TreeBuildError, err: print >> sys.stderr, "Error parsing: %s" % err sys.exit(1)
def test_from_sequence(self): # Load the sequence index file index = SequenceIndex.from_file(DB_SEQUENCES_FILE) # Pick out a sequence seq = index.sequences[0] # Construct a DbInput from this sequence dbi = DbInput.from_sequence(seq)
def main(): usage = "%prog [options] <in-file> <out-file>" description = "Reads a sequence index file and produces a tag sequence "\ "file containing the gold standard tags for every sequence" parser = OptionParser(usage=usage, description=description) options, arguments = parser.parse_args() if len(arguments) < 2: print >> sys.stderr, "You must specify input and output data files" sys.exit(1) in_filename = os.path.abspath(arguments[0]) out_filename = os.path.abspath(arguments[1]) # Read in the data file seqs = SequenceIndex.from_file(in_filename) tags = {} for seq in seqs.sequences: # Convert each sequence to a list of tags tags[seq.id] = [c.category for c in seq] # Output the results to a file tagsfile = TagsFile(tags) tagsfile.to_file(out_filename) print >> sys.stderr, "Wrote tags data to %s" % out_filename
def main(): usage = "%prog [options] <in-file> <out-file>" parser = OptionParser(usage=usage) options, arguments = parser.parse_args() if len(arguments) < 2: print >> sys.stderr, "You must specify input and output data files" sys.exit(1) in_filename = os.path.abspath(arguments[0]) out_filename = os.path.abspath(arguments[1]) # Read in the data file seqs = SequenceIndex.from_file(in_filename) output = [] for seq in seqs.sequences: # Convert each sequence to C&C supertagger training data output.append(sequence_to_candc_chord_super(seq)) # Output the results to a file outfile = open(out_filename, 'w') outfile.write("".join(output)) outfile.close() print >> sys.stderr, "Wrote C&C supertagger training data to %s" % out_filename
def prepare_db_input(): """ Loads a sequence index file, pulls out some data and prepares it as it using it as input to the parser. This may be used by tests to get hold of data as example input. @note: Don't rely on the size of the returned tuple to stay the same. I may add more return items in the future, so access the ones that are being returned currently by index. @rtype: tuple @return: (sequence index, sequence, DbInput instance) """ from jazzparser.data.db_mirrors import SequenceIndex from jazzparser.data.input import DbInput from jazzparser.settings import TEST as settings seqs = SequenceIndex.from_file(settings.SEQUENCE_DATA) seq = seqs.sequences[0] input_sequence = DbInput.from_sequence(seq) return seqs, seq, input_sequence
def main(): usage = "%prog [options] <in-file> <out-file>" parser = OptionParser(usage=usage) options, arguments = parser.parse_args() if len(arguments) < 2: print >>sys.stderr, "You must specify input and output data files" sys.exit(1) in_filename = os.path.abspath(arguments[0]) out_filename = os.path.abspath(arguments[1]) # Read in the data file seqs = SequenceIndex.from_file(in_filename) output = [] for seq in seqs.sequences: # Convert each sequence to C&C supertagger training data output.append(sequence_to_candc_chord_super(seq)) # Output the results to a file outfile = open(out_filename, 'w') outfile.write("".join(output)) outfile.close() print >>sys.stderr, "Wrote C&C supertagger training data to %s" % out_filename
def main(): usage = "%prog [options] <in-file> <out-file>" description = ( "Reads a sequence index file and produces a tag sequence " "file containing the gold standard tags for every sequence" ) parser = OptionParser(usage=usage, description=description) options, arguments = parser.parse_args() if len(arguments) < 2: print >>sys.stderr, "You must specify input and output data files" sys.exit(1) in_filename = os.path.abspath(arguments[0]) out_filename = os.path.abspath(arguments[1]) # Read in the data file seqs = SequenceIndex.from_file(in_filename) tags = {} for seq in seqs.sequences: # Convert each sequence to a list of tags tags[seq.id] = [c.category for c in seq] # Output the results to a file tagsfile = TagsFile(tags) tagsfile.to_file(out_filename) print >>sys.stderr, "Wrote tags data to %s" % out_filename
def main(): usage = "%prog [options] <seq-file> <out-file>" description = "Outputs a full corpus from a sequence index file to a text "\ "file that can more easily be read by other people. If <out-file> is "\ "omitted, data is output to stdout" parser = OptionParser(usage=usage, description=description) options, arguments = parser.parse_args() if len(arguments) < 1: print "You must specify a sequence file" sys.exit(1) # Get the chord sequence seqindex = SequenceIndex.from_file(arguments[0]) if len(arguments) > 1: # Open a file to write to outfile = open(arguments[1], 'w') else: # Output to stdout outfile = sys.stdout try: output_sequence_index(seqindex, outfile) finally: outfile.close()
def count_categories(options, arguments): # Read in the sequence data from the file filename = os.path.abspath(arguments[0]) seqs = SequenceIndex.from_file(filename) category_counts = {} total = 0 # Count up how many times each category is used for seq in seqs.sequences: for chord in seq.iterator(): total += 1 if chord.category not in category_counts: category_counts[chord.category] = 1 else: category_counts[chord.category] += 1 table_header = [['Category','Count','%']] table_data = [] for cat,count in category_counts.items(): category = cat or "No category" percent = float(count) / float(total) * 100.0 table_data.append([category, count, percent]) # Sort the rows by the count table_data = reversed(sorted(table_data, key=lambda d: d[1])) # Now format the numbers table_data = [[row[0], "%s" % row[1], "%.02f" % row[2]] for row in table_data] # Add the header on the top table_data = table_header + table_data if options.csv: print "\n".join([",".join([v for v in row]) for row in table_data]) else: pprint_table(sys.stdout, table_data, [True,False,False], "|") print "Total chords: %s" % total return 0
def main(): usage = "%prog [options] <seq-file> <index>" description = "Displays a tree for the annotated derivation of a chord "\ "sequence in the gold standard" parser = OptionParser(usage=usage, description=description) options, arguments = parser.parse_args() if len(arguments) < 2: print "You must specify a sequence file and index" sys.exit(1) index = int(arguments[1]) # Get the chord sequence sequence = SequenceIndex.from_file(arguments[0]).sequence_by_index(index) try: # Show the song name print "Tree for '%s'" % sequence.string_name tree = build_tree_for_sequence(sequence) # Output the linear textual form of the tree print tree # Display the tree using NLTK ntree = tree_to_nltk(tree) ntree.draw() except TreeBuildError, err: print >>sys.stderr, "Error parsing: %s" % err sys.exit(1)
def main(): usage = "%prog <in-file> <part>/<parts>" description = "Takes a sequence data file, partitions it into the "\ "number of partitions given and prints out the indices of the "\ "sequences the appear in the requested partition. Specify the "\ "partition number (from 0) and total number of partitions in the "\ "form <partition-num>/<total-parts>." parser = OptionParser(usage=usage, description=description) options, arguments = parser.parse_args() if len(arguments) == 0: print >>sys.stderr, "You must specify an input data file" sys.exit(1) elif len(arguments) == 1: print >>sys.stderr, "You must give a partition specifier: <part>/<parts>" filename = os.path.abspath(arguments[0]) part, parts = arguments[1].split("/") part, parts = int(part), int(parts) # Read in the data file seqs = SequenceIndex.from_file(filename) # Partition the sequences indices = range(len(seqs)) # Use the partition function to ensure this partitioning is consistent # with all other places the sequences get partitioned all_parts = partition(indices, parts) print " ".join(["%d" % i for i in all_parts[part]])
def from_file(filename, options={}): # Load up a sequence index file according to the filename seqs = SequenceIndex.from_file(filename) # Get a sequence by index from the file seq = seqs.sequence_by_index(options["index"]) if seq is None: raise InputReadError("%d is not a valid sequence index in %s" % (options["index"], filename)) return AnnotatedDbInput.from_sequence(seq)
def from_file(filename, options={}): # Load up a sequence index file according to the filename seqs = SequenceIndex.from_file(filename) # Get a sequence by index from the file seq = seqs.sequence_by_index(options['index']) if seq is None: raise InputReadError("%d is not a valid sequence index in %s" % \ (options['index'], filename)) return AnnotatedDbInput.from_sequence(seq)
def train_model_on_sequence_data(model, data_filename, *args, **kwargs): """ Same as train_model, but takes a db_mirrors sequence data file as input, rather than a C&C training data file. """ # Read in the training data si = SequenceIndex.from_file(data_filename) # Generate a temporary file with C&C training data in it file = sequence_index_to_training_file(si) train_model(model, file.name, *args, **kwargs)
def run(self, args, state): from jazzparser.data.db_mirrors import SequenceIndex from .shell import ShellError if len(args) < 1: raise ShellError, "Please specify a file to load" filename = args[0] # Load the data file si = SequenceIndex.from_file(filename) # Store it in the state state.gs_sequences = si print "Loaded %d gold standard sequences from %s" % (len(si), filename)
def run(self, args, state): from jazzparser.data.db_mirrors import SequenceIndex from .shell import ShellError if len(args) < 1: raise ShellError, "Please specify a file to load" filename = args[0] # Load the data file si = SequenceIndex.from_file(filename) # Store it in the state state.gs_sequences = si print "Loaded %d gold standard sequences from %s" % (len(si),filename)
def main(): usage = "%prog [options] <seq-file>" description = "Outputs the details of all chord sequences from a "\ "sequence index file to stdout. This is for getting a "\ "(relatively) human-readable form of the data" parser = OptionParser(usage=usage, description=description) parser.add_option("--categories", "-c", dest="categories", action="store_true", help="include category annotations") parser.add_option("--coordinations", "-o", dest="coordinations", action="store_true", help="include coordination annotations") parser.add_option("--meta", "-m", dest="meta", action="store_true", help="output sequence meta data") parser.add_option("--no-map", "-n", dest="no_map", action="store_true", help="don't apply a mapping from the names in the corpus to those used in the paper") parser.add_option("--all", "-a", dest="all", action="store_true", help="output everything") options, arguments = parser.parse_args() if len(arguments) < 1: print "You must specify a sequence file" sys.exit(1) # Get the chord sequence seqs = SequenceIndex.from_file(arguments[0]) # Show the song name for seq in seqs: print "Chords for '%s'" % seq.string_name if options.meta or options.all: print "Main key: %s" % seq.key print "Bar length: %d" % seq.bar_length # Put together a table of chords plus annotations (if requested) data = [[ str(chord) for chord in seq ], [ str(chord.duration) for chord in seq ]] if options.categories or options.all: if options.no_map: # Don't apply any mapping to the category names data.append([ chord.category for chord in seq ]) else: # Map the names to those used in the paper/thesis data.append([ annotation_to_lexicon_name(chord.category) for chord in seq ]) if options.coordinations or options.all: coords = [] for chord in seq: ti = chord.treeinfo if ti.coord_resolved and ti.coord_unresolved: coords.append(")(") elif ti.coord_resolved: coords.append(")") elif ti.coord_unresolved: coords.append("(") else: coords.append("") data.append(coords) pprint_table(sys.stdout, data, default_just=True) print
def main(): usage = "%prog [options] <seq-file>:<index> <midi-file> <midi-out>" description = "Aligns a chord sequence with a MIDI file and inserts "\ "marker events into the MIDI data to mark where chord changes "\ "are. Alignment parameters will be loaded from a file (not "\ "implemented yet), but can be overridden using the script's "\ "options." parser = OptionParser(usage=usage, description=description) parser.add_option("--mbpb", "--midi-beats-per-beat", dest="beats_per_beat", type="int", help="number of midi beats to align with a single sequence beat (see SequenceMidiAlignment.midi_beats_per_beat)") parser.add_option("--ss", "--sequence-start", dest="sequence_start", type="int", help="number of midi ticks after the first note-on event when the chord sequence begins (see SequenceMidiAlignment.sequence_start)") parser.add_option("--repeats", dest="repeats", help="repeat spans, in the form 'start_chord,end_chord,count', separated by semicolons (see SequenceMidiAlignment.repeat_spans)") parser.add_option("--lyrics", dest="lyrics", action="store_true", help="use lyrics events instead of marker events to mark the chords") options, arguments = parser.parse_args() if len(arguments) < 3: print "You must specify a sequence file, midi file and output midi filename" sys.exit(1) # Get the chord sequence filename,__,index = arguments[0].partition(":") index = int(index) seq = SequenceIndex.from_file(filename).sequence_by_index(index) # Load the input midi data mid = read_midifile(arguments[1]) outfile = arguments[2] # For now, just create a new default alignment # TODO: load the alignment parameters from a file or from the # sequence data itself alignment = SequenceMidiAlignment() # Override alignment parameters if options are given if options.beats_per_beat is not None: alignment.midi_beats_per_beat = options.beats_per_beat if options.sequence_start is not None: alignment.sequence_start = options.sequence_start if options.repeats is not None: repeats = [] try: for string_triple in options.repeats.split(":"): start,end,count = string_triple.split(",") start,end,count = int(start), int(end), int(count) repeats.append((start,end,count)) except: print "Error parsing repeat spans:" raise alignment.repeat_spans = repeats alignment.align(seq, mid, lyrics=options.lyrics) write_midifile(mid, outfile)
def main(): parser = OptionParser() usage = "%prog [options] [<seq-db-file>]" description = "Measure the degree of ambiguity (average cats per chord) "\ "for a grammar over a particular dataset" parser.add_option('-g', '--grammar', dest='grammar', action='store', help='Speficy a grammar by name') options, arguments = parser.parse_args() if len(arguments) < 1: print "No sequence index file given: grammar stats only" seq_file = None else: seq_file = arguments[0] # Load the grammar grammar = get_grammar(options.grammar) # Some stats about ambiguity in the grammar table = [] class_cats = [] for class_name, chord_class in grammar.chord_classes.items(): if class_name not in EXCLUDE_CLASSES: cats = grammar.get_signs_for_word(str(chord_class.words[0])) table.append([str(class_name), str(len(cats))]) class_cats.append(len(cats)) table.append(["Mean", "%.2f" % (float(sum(class_cats)) / len(class_cats))]) table.append(["Std dev", "%.2f" % (std(class_cats))]) print "Cats for each chord class:" pprint_table(sys.stdout, table, justs=[True, True]) # Ambiguity stats on the dataset if seq_file is not None: seqs = SequenceIndex.from_file(arguments[0]) counts = [] for seq in seqs: for chord in seq: cats = grammar.get_signs_for_word(chord) counts.append(len(cats)) table = [] table.append(["Chords", str(len(counts))]) table.append( ["Cats per chord", "%.2f" % (float(sum(counts)) / len(counts))]) table.append(["Std dev", "%.2f" % (std(counts))]) print pprint_table(sys.stdout, table, justs=[True, True])
def main(): usage = "%prog [options] <seq-file> <index>" description = "Outputs the details of a chord sequence from a "\ "sequence index file to stdout." parser = OptionParser(usage=usage, description=description) parser.add_option("--categories", "-c", dest="categories", action="store_true", help="include category annotations") parser.add_option("--coordinations", "-o", dest="coordinations", action="store_true", help="include coordination annotations") parser.add_option("--meta", "-m", dest="meta", action="store_true", help="output sequence meta data") options, arguments = parser.parse_args() if len(arguments) < 2: print "You must specify a sequence file and index" sys.exit(1) index = int(arguments[1]) # Get the chord sequence seq = SequenceIndex.from_file(arguments[0]).sequence_by_index(index) # Show the song name print "Chords for '%s'" % seq.string_name if options.meta: print "Main key: %s" % seq.key print "Bar length: %d" % seq.bar_length print "Notes:\n%s\n\n" % seq.notes for i, chord in enumerate(seq.iterator()): output = "%d\t%s\t%d" % (i, chord, chord.duration) if options.categories: output += "\t%s" % chord.category if options.coordinations: ti = chord.treeinfo if ti.coord_resolved and ti.coord_unresolved: output += "\t)(" elif ti.coord_resolved: output += "\t)" elif ti.coord_unresolved: output += "\t(" print output
def main(): usage = "%prog [options] <seq-file> <index>" description = "Outputs the key associated with each chord of a sequence "\ "from an annotated corpus" parser = OptionParser(usage=usage, description=description) options, arguments = parser.parse_args() if len(arguments) < 2: print "You must specify a sequence file and index" sys.exit(1) index = int(arguments[1]) # Get the chord sequence seq = SequenceIndex.from_file(arguments[0]).sequence_by_index(index) print keys_for_sequence(seq)
def main(): parser = OptionParser() usage = "%prog [options] [<seq-db-file>]" description = "Measure the degree of ambiguity (average cats per chord) "\ "for a grammar over a particular dataset" parser.add_option('-g', '--grammar', dest='grammar', action='store', help='Speficy a grammar by name') options, arguments = parser.parse_args() if len(arguments) < 1: print "No sequence index file given: grammar stats only" seq_file = None else: seq_file = arguments[0] # Load the grammar grammar = get_grammar(options.grammar) # Some stats about ambiguity in the grammar table = [] class_cats = [] for class_name,chord_class in grammar.chord_classes.items(): if class_name not in EXCLUDE_CLASSES: cats = grammar.get_signs_for_word(str(chord_class.words[0])) table.append([str(class_name), str(len(cats))]) class_cats.append(len(cats)) table.append(["Mean", "%.2f" % (float(sum(class_cats))/len(class_cats))]) table.append(["Std dev", "%.2f" % (std(class_cats))]) print "Cats for each chord class:" pprint_table(sys.stdout, table, justs=[True, True]) # Ambiguity stats on the dataset if seq_file is not None: seqs = SequenceIndex.from_file(arguments[0]) counts = [] for seq in seqs: for chord in seq: cats = grammar.get_signs_for_word(chord) counts.append(len(cats)) table = [] table.append(["Chords", str(len(counts))]) table.append(["Cats per chord", "%.2f" % (float(sum(counts)) / len(counts))]) table.append(["Std dev", "%.2f" % (std(counts))]) print pprint_table(sys.stdout, table, justs=[True, True])
def main(): usage = "%prog [options] <in-file>" description = "Filter a sequence data file to remove any sequences "\ "that are not fully annotated and write the result back to the file." parser = OptionParser(usage=usage, description=description) options, arguments = parser.parse_args() if len(arguments) == 0: print >>sys.stderr, "You must specify an input data file" sys.exit(1) in_filename = os.path.abspath(arguments[0]) # Read in the data file seqs = SequenceIndex.from_file(in_filename) sequences = [seq for seq in seqs.sequences if seq.fully_annotated] save_sequences(in_filename, sequences) print >>sys.stderr, "Removed %d sequences" % (len(seqs.sequences)-len(sequences))
def main(): usage = "%prog [options] <in-file>" parser = OptionParser(usage=usage) parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", default=DEFAULT_PARTITIONS, help="the number of partitions to use (default: %d)" % DEFAULT_PARTITIONS) parser.add_option( "--ids", dest="ids", action="store_true", help= "don't output any files - just print out a list of the ids of the sequences in each partition" ) options, arguments = parser.parse_args() if len(arguments) == 0: print >> sys.stderr, "You must specify an input data file" sys.exit(1) filename = os.path.abspath(arguments[0]) # Read in the data file seqs = SequenceIndex.from_file(filename) part_pattern = "%s.part%%d" % filename heldout_pattern = "%s.heldout_part%%d" % filename # Divide the data up into partitions, with their complements parts = zip(partition(seqs.sequences, options.partitions), holdout_partition(seqs.sequences, options.partitions)) # Save each partition and its complement for i, (part, heldout) in enumerate(parts): if options.ids: # Just print out a list of the ids in the partition print " ".join(["%d" % s.id for s in part]) else: save_sequences(part_pattern % i, part) save_sequences(heldout_pattern % i, heldout) print >> sys.stderr, "Wrote partition %d to %s and %s" % ( i, part_pattern % i, heldout_pattern % i)
def main(): usage = "%prog <in-file> <command>" parser = OptionParser(usage=usage) parser.add_option("-c", "--commands", dest="commands", action="store_true", help="show a list of available commands") options, arguments = parser.parse_args() commands = { 'ids': "output a space-separated list of the ids of all sequences", 'count': "output the total number of sequences", 'help': "show this help", } if options.commands: print "Available commands:\n%s" % \ "\n".join(["%s %s" % (format(cmd, " >10s"), help) for cmd,help in commands.items()]) sys.exit(0) if len(arguments) == 0: print >> sys.stderr, "You must specify an input data file" sys.exit(1) filename = os.path.abspath(arguments[0]) # Read in the data file seqs = SequenceIndex.from_file(filename) if len(arguments) > 1: command = arguments[1].lower() if command not in commands: print >> sys.stderr, "%s is not a valid command. Use -c for a list of available commands." elif command == "ids": # Output a list of the ids of sequences print " ".join(["%s" % id for id in seqs.ids]) elif command == "count": print len(seqs) else: print >> sys.stderr, "Oops, I've not defined this command" else: print "Successfully read in sequences"
def main(): usage = "%prog [options] <in-file>" description = "Filter a sequence data file to remove any sequences "\ "that are not fully annotated and write the result back to the file." parser = OptionParser(usage=usage, description=description) options, arguments = parser.parse_args() if len(arguments) == 0: print >> sys.stderr, "You must specify an input data file" sys.exit(1) in_filename = os.path.abspath(arguments[0]) # Read in the data file seqs = SequenceIndex.from_file(in_filename) sequences = [seq for seq in seqs.sequences if seq.fully_annotated] save_sequences(in_filename, sequences) print >> sys.stderr, "Removed %d sequences" % (len(seqs.sequences) - len(sequences))
def main(): usage = "%prog [options] <seq-file> <index>" description = "Outputs the details of a chord sequence from a "\ "sequence index file to stdout." parser = OptionParser(usage=usage, description=description) parser.add_option("--categories", "-c", dest="categories", action="store_true", help="include category annotations") parser.add_option("--coordinations", "-o", dest="coordinations", action="store_true", help="include coordination annotations") parser.add_option("--meta", "-m", dest="meta", action="store_true", help="output sequence meta data") options, arguments = parser.parse_args() if len(arguments) < 2: print "You must specify a sequence file and index" sys.exit(1) index = int(arguments[1]) # Get the chord sequence seq = SequenceIndex.from_file(arguments[0]).sequence_by_index(index) # Show the song name print "Chords for '%s'" % seq.string_name if options.meta: print "Main key: %s" % seq.key print "Bar length: %d" % seq.bar_length print "Notes:\n%s\n\n" % seq.notes for i,chord in enumerate(seq.iterator()): output = "%d\t%s\t%d" % (i,chord,chord.duration) if options.categories: output += "\t%s" % chord.category if options.coordinations: ti = chord.treeinfo if ti.coord_resolved and ti.coord_unresolved: output += "\t)(" elif ti.coord_resolved: output += "\t)" elif ti.coord_unresolved: output += "\t(" print output
def main(): usage = "%prog [options] <seq-file>" description = "Outputs some statistics about a chord sequence corpus file" parser = OptionParser(usage=usage, description=description) options, arguments = parser.parse_args() if len(arguments) < 1: print "You must specify a sequence file" sys.exit(1) # Get the chord sequence seqindex = SequenceIndex.from_file(arguments[0]) print "Sequences: %d" % len(seqindex) # Get the sequence lengths lengths = [len(seq) for seq in seqindex] # Count up chords print "Chords: %d" % sum(lengths) print "Min length: %d" % min(lengths) print "Max length: %d" % max(lengths) print "Mean length: %f" % (float(sum(lengths)) / len(lengths))
def main(): usage = "%prog <in-file> <command>" parser = OptionParser(usage=usage) parser.add_option("-c", "--commands", dest="commands", action="store_true", help="show a list of available commands") options, arguments = parser.parse_args() commands = { 'ids' : "output a space-separated list of the ids of all sequences", 'count' : "output the total number of sequences", 'help' : "show this help", } if options.commands: print "Available commands:\n%s" % \ "\n".join(["%s %s" % (format(cmd, " >10s"), help) for cmd,help in commands.items()]) sys.exit(0) if len(arguments) == 0: print >>sys.stderr, "You must specify an input data file" sys.exit(1) filename = os.path.abspath(arguments[0]) # Read in the data file seqs = SequenceIndex.from_file(filename) if len(arguments) > 1: command = arguments[1].lower() if command not in commands: print >>sys.stderr, "%s is not a valid command. Use -c for a list of available commands." elif command == "ids": # Output a list of the ids of sequences print " ".join(["%s" % id for id in seqs.ids]) elif command == "count": print len(seqs) else: print >>sys.stderr, "Oops, I've not defined this command" else: print "Successfully read in sequences"
def main(): usage = "%prog [options] <results-files>" description = "Prints a dependency tree for a parse result" parser = OptionParser(usage=usage, description=description) parser.add_option("-t", "--times", dest="times", action="store_true", help="show timings of nodes") parser.add_option("-l", "--latex", dest="latex", action="store_true", help="output Latex for the graphs using tikz-dependency") parser.add_option("--la", "--latex-align", dest="latex_align", action="store_true", help="show node alignments in Latex output") parser.add_option("--align-time", dest="align_time", action="store_true", help="show the graph of common dependencies when the two graphs are aligned by node times") parser.add_option("--align-max", dest="align_max", action="store_true", help="show the graph of common dependencies when the two graphs are aligned to maximize the dependency recovery") options, arguments = parser.parse_args() if len(arguments) == 0: print >>sys.stderr, "Specify a file to read the results from" sys.exit(1) filename = arguments[0] # Swith PCCG/St+PCCG PARSER = "PCCG" FEATURE_PARAMS = "../xuanhong/params_2_pcfg.txt" if filename.find("stpcfg") != -1: PARSER = "St+PCCG" FEATURE_PARAMS = "../xuanhong/params_2_stpcfg.txt" # Input sequence list_songs = read_list_songs("../xuanhong/list_songs.txt") song_name = os.path.basename(filename) seqs = SequenceIndex.from_file(settings.SEQUENCE_DATA) seq = seqs.sequences[list_songs[song_name]] input_sequence = DbInput.from_sequence(seq) try: pres = ParseResults.from_file(filename) except ParseResults.LoadError, err: print >>sys.stderr, "Error loading file: %s" % (err) sys.exit(1)
def from_file(filename, options={}): # Read in the sequence index file f = SequenceIndex.from_file(filename) inputs = [AnnotatedDbInput.from_sequence(s) for s in f] return AnnotatedDbBulkInput(inputs)
def main(): usage = "%prog [options] <seq-file>" description = "Outputs the details of all chord sequences from a "\ "sequence index file to stdout. This is for getting a "\ "(relatively) human-readable form of the data" parser = OptionParser(usage=usage, description=description) parser.add_option("--categories", "-c", dest="categories", action="store_true", help="include category annotations") parser.add_option("--coordinations", "-o", dest="coordinations", action="store_true", help="include coordination annotations") parser.add_option("--meta", "-m", dest="meta", action="store_true", help="output sequence meta data") parser.add_option( "--no-map", "-n", dest="no_map", action="store_true", help= "don't apply a mapping from the names in the corpus to those used in the paper" ) parser.add_option("--all", "-a", dest="all", action="store_true", help="output everything") options, arguments = parser.parse_args() if len(arguments) < 1: print "You must specify a sequence file" sys.exit(1) # Get the chord sequence seqs = SequenceIndex.from_file(arguments[0]) # Show the song name for seq in seqs: print "Chords for '%s'" % seq.string_name if options.meta or options.all: print "Main key: %s" % seq.key print "Bar length: %d" % seq.bar_length # Put together a table of chords plus annotations (if requested) data = [[str(chord) for chord in seq], [str(chord.duration) for chord in seq]] if options.categories or options.all: if options.no_map: # Don't apply any mapping to the category names data.append([chord.category for chord in seq]) else: # Map the names to those used in the paper/thesis data.append([ annotation_to_lexicon_name(chord.category) for chord in seq ]) if options.coordinations or options.all: coords = [] for chord in seq: ti = chord.treeinfo if ti.coord_resolved and ti.coord_unresolved: coords.append(")(") elif ti.coord_resolved: coords.append(")") elif ti.coord_unresolved: coords.append("(") else: coords.append("") data.append(coords) pprint_table(sys.stdout, data, default_just=True) print
def from_file(filename, options={}): # Read in the sequence index file f = SequenceIndex.from_file(filename) inputs = [DbInput.from_sequence(s) for s in f] return DbBulkInput(inputs)
def main(): usage = "%prog [options] <in-file> [<index1> [<index2> ...]]" description = ( "Print the names of sequences in a sequence input " "file. Optionally specify indices of sequences. If no index " "is given, displays all sequences." ) parser = OptionParser(usage=usage, description=description) parser.add_option( "--sa", "-a", "--sort-alpha", "--alpha", dest="alphabetical", action="store_true", help="order sequences alphabetically by name", ) parser.add_option( "--sl", "--sort-length", dest="sort_length", action="store_true", help="order sequences by length" ) parser.add_option( "-i", "--index", dest="index", action="store_true", help="also display the indices in the sequence file of each sequence, in the column before the ids", ) parser.add_option("-l", "--lengths", dest="lengths", action="store_true", help="output lengths of the sequences") options, arguments = parser.parse_args() if len(arguments) < 1: print "You must specify an input file" sys.exit(1) seqs = SequenceIndex.from_file(arguments[0]) indices = [int(ind) for ind in arguments[1:]] if len(indices) == 0: sequences = seqs.sequences else: sequences = [seqs.sequence_by_index(index) for index in indices] if options.alphabetical: # Sort by string_name sequences.sort(key=lambda s: s.string_name) elif options.sort_length: # Sort by sequence length sequences.sort(key=lambda s: len(s)) header = ["Song name", "Id"] justs = [True, False] if options.lengths: header.append("Length") justs.append(False) if options.index: header.append("Index") justs.append(False) rows = [header] for seq in sequences: row = [seq.string_name, str(seq.id)] if options.lengths: row.append(str(len(seq))) if options.index: row.append(str(seqs.index_for_id(seq.id))) rows.append(row) pprint_table(sys.stdout, rows, justs=justs)
def main(): usage = "%prog [<options>] <model-name> <training-input>" description = "Training of PCFG models." parser = OptionParser(usage=usage, description=description) parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \ help="Number of partitions to divide the data into. "\ "For train, divides the input file, trains a model on each "\ "partition's complement and appends partition number to "\ "the model names. For del, appends partition numbers to model "\ "names and deletes all the models. Recache does similarly. "\ "Has no effect for parse.") parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options") parser.add_option("--debug", dest="debug", action="store_true", help="Output verbose logging information to stderr") parser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.") options, arguments = parse_args_with_config(parser) if options.debug: log_level = logging.DEBUG else: log_level = logging.WARN # Create a logger for training logger = create_logger(log_level = log_level, name = "training", stderr = True) # Load a grammar grammar = get_grammar(options.grammar) # Get the pcfg model class for the formalism PcfgModel = grammar.formalism.PcfgModel # Parse the option string if options.training_opts is None: opts = {} elif options.training_opts.lower() == "help": print options_help_text(PcfgModel.TRAINING_OPTIONS, intro="Training options for PCFGs") sys.exit(0) else: opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), PcfgModel.TRAINING_OPTIONS) if len(arguments) == 0: print >>sys.stderr, "Specify a model name" models = PcfgModel.list_models() print >>sys.stderr, "Available models: %s" % ", ".join(models) sys.exit(1) model_name = arguments[0] print "Model base name:", model_name if options.partitions is not None: parts = [(i, "%s%d" % (model_name, i)) for i in range(options.partitions)] else: parts = [(None, model_name)] if len(arguments) < 2: print >>sys.stderr, "Specify an input file to read sequence data from" sys.exit(1) # Read in the training data from the given file seqs = SequenceIndex.from_file(arguments[1]) if options.partitions is not None: # Prepare each training partition datasets = holdout_partition(seqs.sequences, options.partitions) else: datasets = [seqs.sequences] for dataset,(parti,part_model) in zip(datasets,parts): # Train the named model on the sequence data model = PcfgModel.train(part_model, dataset, opts, grammar=grammar, logger=logger) model.save() print "Trained model", part_model
def main(): usage = "%prog [<options>] <model-name> <training-input>" description = "Training of PCFG models." parser = OptionParser(usage=usage, description=description) parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \ help="Number of partitions to divide the data into. "\ "For train, divides the input file, trains a model on each "\ "partition's complement and appends partition number to "\ "the model names. For del, appends partition numbers to model "\ "names and deletes all the models. Recache does similarly. "\ "Has no effect for parse.") parser.add_option( '--opts', dest="training_opts", action="store", help= "options to pass to the model trainer. Type '--opts help' for a list of options" ) parser.add_option("--debug", dest="debug", action="store_true", help="Output verbose logging information to stderr") parser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.") options, arguments = parse_args_with_config(parser) if options.debug: log_level = logging.DEBUG else: log_level = logging.WARN # Create a logger for training logger = create_logger(log_level=log_level, name="training", stderr=True) # Load a grammar grammar = get_grammar(options.grammar) # Get the pcfg model class for the formalism PcfgModel = grammar.formalism.PcfgModel # Parse the option string if options.training_opts is None: opts = {} elif options.training_opts.lower() == "help": print options_help_text(PcfgModel.TRAINING_OPTIONS, intro="Training options for PCFGs") sys.exit(0) else: opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), PcfgModel.TRAINING_OPTIONS) if len(arguments) == 0: print >> sys.stderr, "Specify a model name" models = PcfgModel.list_models() print >> sys.stderr, "Available models: %s" % ", ".join(models) sys.exit(1) model_name = arguments[0] print "Model base name:", model_name if options.partitions is not None: parts = [(i, "%s%d" % (model_name, i)) for i in range(options.partitions)] else: parts = [(None, model_name)] if len(arguments) < 2: print >> sys.stderr, "Specify an input file to read sequence data from" sys.exit(1) # Read in the training data from the given file seqs = SequenceIndex.from_file(arguments[1]) if options.partitions is not None: # Prepare each training partition datasets = holdout_partition(seqs.sequences, options.partitions) else: datasets = [seqs.sequences] for dataset, (parti, part_model) in zip(datasets, parts): # Train the named model on the sequence data model = PcfgModel.train(part_model, dataset, opts, grammar=grammar, logger=logger) model.save() print "Trained model", part_model
def main(): usage = "%prog [options] <in-file> [<index1> [<index2> ...]]" description = "Print the names of sequences in a sequence input "\ "file. Optionally specify indices of sequences. If no index "\ "is given, displays all sequences." parser = OptionParser(usage=usage, description=description) parser.add_option("--sa", "-a", "--sort-alpha", "--alpha", dest="alphabetical", action="store_true", help="order sequences alphabetically by name") parser.add_option("--sl", "--sort-length", dest="sort_length", action="store_true", help="order sequences by length") parser.add_option( "-i", "--index", dest="index", action="store_true", help= "also display the indices in the sequence file of each sequence, in the column before the ids" ) parser.add_option("-l", "--lengths", dest="lengths", action="store_true", help="output lengths of the sequences") options, arguments = parser.parse_args() if len(arguments) < 1: print "You must specify an input file" sys.exit(1) seqs = SequenceIndex.from_file(arguments[0]) indices = [int(ind) for ind in arguments[1:]] if len(indices) == 0: sequences = seqs.sequences else: sequences = [seqs.sequence_by_index(index) for index in indices] if options.alphabetical: # Sort by string_name sequences.sort(key=lambda s: s.string_name) elif options.sort_length: # Sort by sequence length sequences.sort(key=lambda s: len(s)) header = ["Song name", "Id"] justs = [True, False] if options.lengths: header.append("Length") justs.append(False) if options.index: header.append("Index") justs.append(False) rows = [header] for seq in sequences: row = [seq.string_name, str(seq.id)] if options.lengths: row.append(str(len(seq))) if options.index: row.append(str(seqs.index_for_id(seq.id))) rows.append(row) pprint_table(sys.stdout, rows, justs=justs)
def prepare_evaluation_options(usage=None, description=None, optparse_options=[], check_args=None, optparse_groups=[]): """ Various tasks common to the initial part of the evaluation routine scripts (C{models/eval.py}). @todo: This is not used any more. Remove it, after checking it's definitely not used. @param usage: the optparse usage string @param description: the optparse description string @type optparse_options: list of tuples @param optparse_options: (args,kwargs) pairs to add additional options to the optparse parser. @type check_args: function @param check_args: function to take the command-line arguments and check them. This will be called early in the script. Must return a tuple of (1) the model name (or model basename) that will be used in the partition model names and (2) the input filename to get sequences from. @type optparse_groups: list of pairs @param optparse_groups: specificatios for option groups to add to the optparse option parser. The first of each pair is a tuple of args to C{OptionGroup}'s init (excluding the first). The second is a list of options each formatted as C{optparse_options}. @rtype: tuple @return: (1) list of (sequences,model_name,partition_index) tuples for each partition; (2) list of lists containing the sequence ids for each partition; (3) optparse options; (4) optparse arguments. """ import sys from optparse import OptionParser, OptionGroup from jazzparser.utils.config import parse_args_with_config from jazzparser.utils.loggers import init_logging from jazzparser.data.db_mirrors import SequenceIndex from jazzparser.utils.data import partition parser = OptionParser(usage=usage, description=description) group = OptionGroup(parser, "Input", "Input data and partitioning for evaluation") group.add_option("-s", "--sequence", dest="sequence", action="store", help="limit the evaluation to just one sequence, with the given index in the input file") group.add_option("--partition", dest="partition", action="store", help="restrict to only one partition of the data. Specify as i/n, where i is the partition number and n the total number of partitions.") group.add_option("-p", "--partitions", dest="partitions", type="int", action="store", help="test on all n partitions of the data, using a different model for each. Will look for a model <NAME>i, where <NAME> is the given model name and i the partition number.") parser.add_option_group(group) parser.add_option("--debug", dest="debug", action="store_true", help="show debugging output") # Add the options according to their specs for args,kwargs in optparse_options: parser.add_option(*args, **kwargs) # Add groups and their options for group_args,options in optparse_groups: # Check whether the group already exists same_titles = [g for g in parser.option_groups if g.title == group_args[0]] if same_titles: group = same_titles[0] else: group = OptionGroup(parser, *group_args) parser.add_option_group(group) # Add options to this group for args,kwargs in options: group.add_option(*args, **kwargs) options, arguments = parse_args_with_config(parser) if check_args is None: raise ValueError, "could not check arguments and get model "\ "name. check_args must not be None" model_name,input_filename = check_args(arguments) if options.debug: # Set the log level to debug and do the standard logging init init_logging(logging.DEBUG) else: init_logging() # Load up sequences seqs = SequenceIndex.from_file(input_filename) def _get_seq_by_index(index): seq = seqs.sequence_by_index(index) if seq is None: print >>sys.stderr, "There are only %d sequences" % len(seqs) sys.exit(1) return seq ################ Data partitioning #################### if options.partitions is not None: # Divide the data up into n partitions and use a different model name for each total_parts = options.partitions print >>sys.stderr, "Cross validation: dividing test data into %d partitions" % total_parts partitions = [(part,"%s%d" % (model_name,i), i) for i,part in enumerate(partition(seqs.sequences, total_parts))] part_ids = partition(seqs.ids, total_parts) elif options.partition is not None: # Just select one partition # Split up the argument to get two integers parti,total_parts = options.partition.split("/") parti,total_parts = int(parti), int(total_parts) print >>sys.stderr, "Restricting sequences to %d-way partition %d" % (total_parts,parti) # Get a list of sequence indices to restrict our set to part_ids = partition(seqs.ids, total_parts)[parti] partitions = [ [(part,"%s%d" % (model_name,i), i) for i,part in enumerate(partition(seqs.sequences, total_parts))][parti] ] elif options.sequence is not None: # Just select one sequence seq = _get_seq_by_index(int(options.sequence)) partitions = [( [seq], model_name, 0 )] part_ids = [seq.id] else: # Don't partition the sequences partitions = [(seqs.sequences, model_name,0)] part_ids = [None] return partitions,part_ids,options,arguments
def main(): usage = "%prog [options] <seq-file>" description = "Parses a sequence from a sequence index file using the "\ "annotations stored in the same file." parser = OptionParser(usage=usage, description=description) parser.add_option( "--popt", "--parser-options", dest="popts", action="append", help= "specify options for the parser. Type '--popt help' to get a list of options (we use a DirectedCkyParser)" ) parser.add_option("--derivations", "--deriv", dest="derivations", action="store_true", help="print out derivation traces of all the results") parser.add_option("--index", "-i", dest="index", action="store", type="int", help="parse just the sequence with this index") parser.add_option("--quiet", "-q", dest="quiet", action="store_true", help="show only errors in the output") parser.add_option( "--tonal-space", "--ts", dest="tonal_space", action="store_true", help="show the tonal space path (with -q, shows only paths)") parser.add_option( "--output-set", "-o", dest="output_set", action="store", help="store the analyses to a tonal space analysis set with this name") parser.add_option( "--trace-parse", "-t", dest="trace_parse", action="store_true", help= "output a trace of the shift-reduce parser's operations in producing the full interpretation from the annotations" ) options, arguments = parser.parse_args() if len(arguments) < 1: print "You must specify a sequence file" sys.exit(1) if options.popts is not None: poptstr = options.popts if "help" in [s.strip().lower() for s in poptstr]: # Output this tagger's option help print options_help_text( DirectedCkyParser.PARSER_OPTIONS, intro="Available options for the directed parser") return 0 else: poptstr = "" popts = ModuleOption.process_option_string(poptstr) grammar = get_grammar() if options.quiet: logger = create_plain_stderr_logger(log_level=logging.ERROR) else: logger = create_plain_stderr_logger() if options.trace_parse: parse_logger = logger else: parse_logger = None seq_index = SequenceIndex.from_file(arguments[0]) # Get the chord sequence(s) if options.index is None: seqs = seq_index.sequences else: seqs = [seq_index.sequence_by_index(options.index)] logger.info("%d sequences\n" % len(seqs)) full_analyses = [] stats = { 'full': 0, 'partial': 0, 'fail': 0, } # Try parsing every sequence for seq in seqs: logger.info("====== Sequence %s =======" % seq.string_name) try: results = parse_sequence_with_annotations( seq, grammar, logger=logger, parse_logger=parse_logger) except ParseError, err: logger.error("Error parsing: %s" % err) stats['fail'] += 1 else: # This may have resulted in multiple partial parses logger.info("%d partial parses" % len(results)) if len(results) == 1: stats['full'] += 1 else: stats['partial'] += 1 if options.derivations: # Output the derivation trace for each partial parse for result in results: print print result.derivation_trace if options.tonal_space: # Output the tonal space coordinates path = grammar.formalism.sign_to_coordinates(results[0]) for i, point in enumerate(path): print "%d, %d: %s" % (seq.id, i, point) # Only include a result in the output analyses if it was a full parse if len(results) == 1: full_analyses.append((seq.string_name, results[0].semantics)) else: logger.warn("%s was not included in the output analyses, "\ "since it was not fully parsed" % seq.string_name)
def main(): usage = "%prog [options] <seq-file>:<index> <midi-file> <midi-out>" description = "Aligns a chord sequence with a MIDI file and inserts "\ "marker events into the MIDI data to mark where chord changes "\ "are. Alignment parameters will be loaded from a file (not "\ "implemented yet), but can be overridden using the script's "\ "options." parser = OptionParser(usage=usage, description=description) parser.add_option( "--mbpb", "--midi-beats-per-beat", dest="beats_per_beat", type="int", help= "number of midi beats to align with a single sequence beat (see SequenceMidiAlignment.midi_beats_per_beat)" ) parser.add_option( "--ss", "--sequence-start", dest="sequence_start", type="int", help= "number of midi ticks after the first note-on event when the chord sequence begins (see SequenceMidiAlignment.sequence_start)" ) parser.add_option( "--repeats", dest="repeats", help= "repeat spans, in the form 'start_chord,end_chord,count', separated by semicolons (see SequenceMidiAlignment.repeat_spans)" ) parser.add_option( "--lyrics", dest="lyrics", action="store_true", help="use lyrics events instead of marker events to mark the chords") options, arguments = parser.parse_args() if len(arguments) < 3: print "You must specify a sequence file, midi file and output midi filename" sys.exit(1) # Get the chord sequence filename, __, index = arguments[0].partition(":") index = int(index) seq = SequenceIndex.from_file(filename).sequence_by_index(index) # Load the input midi data mid = read_midifile(arguments[1]) outfile = arguments[2] # For now, just create a new default alignment # TODO: load the alignment parameters from a file or from the # sequence data itself alignment = SequenceMidiAlignment() # Override alignment parameters if options are given if options.beats_per_beat is not None: alignment.midi_beats_per_beat = options.beats_per_beat if options.sequence_start is not None: alignment.sequence_start = options.sequence_start if options.repeats is not None: repeats = [] try: for string_triple in options.repeats.split(":"): start, end, count = string_triple.split(",") start, end, count = int(start), int(end), int(count) repeats.append((start, end, count)) except: print "Error parsing repeat spans:" raise alignment.repeat_spans = repeats alignment.align(seq, mid, lyrics=options.lyrics) write_midifile(mid, outfile)
def main(): usage = "%prog [options] <names-index> <seq-index>" description = "Loads the MIDI downloaded files in the names index "\ "and the sequence index with the chord sequences in "\ "it and performs operations on the MIDI files. "\ "By default, counts the files for each sequence." parser = OptionParser(usage=usage, description=description) parser.add_option("-z", "--zeroes", dest="zeroes", action="store_true", help="display the names of sequences with no midi files") parser.add_option( "-f", "--few", dest="few", action="store", type="int", help= "display the names of sequences with few midi files, below the given threshold" ) parser.add_option( "--names", dest="names", action="store_true", help= "only show the names in the output, not the numbers (only applies to --zeroes or --few)" ) parser.add_option( "-d", "--diff", dest="diff", action="store_true", help= "check every pair of files for each sequence and report the similarity of the midi notes" ) parser.add_option( "--min-diff", dest="min_diff", action="store", type="float", help= "the minimum similarity the report when diffing files (see --diff). By default, all are reported (i.e. 0)", default=0.0) options, arguments = parser.parse_args() if len(arguments) == 0: print >> sys.stderr, "You must specify a names index file" sys.exit(1) if len(arguments) == 1: print >> sys.stderr, "You must specify a sequence index file" sys.exit(1) names_filename = os.path.abspath(arguments[0]) # Use this directory to get midi files from midi_base_dir = os.path.dirname(names_filename) names_file = open(names_filename, 'r') names = UnicodeCsvReader(names_file) lines = list(names) # Load the sequence index file seq_filename = arguments[1] sequences = SequenceIndex.from_file(seq_filename) # Index the entries in the names index by the sequence id midi_seqs = {} for row in lines[1:]: # Col 0: filename # Col 1: name from web page midi_seqs.setdefault(int(row[2]), []).append((row[0], row[1])) # Filter out the ones that don't exist def _exists(filename): return os.path.exists(os.path.join(midi_base_dir, filename)) existing_seqs = dict([(seq_id, list( set([(filename, name) for (filename, name) in files if _exists(filename)]))) for (seq_id, files) in midi_seqs.items()]) def _load_midi(filename): return read_midifile(open(os.path.join(midi_base_dir, filename), 'r')) if options.zeroes or options.few is not None: # Look for sequences with few (or no) midi files if options.zeroes: threshold = 1 else: threshold = options.few seq_counts = [ (seq, 0 if seq.id not in existing_seqs else len(existing_seqs[seq.id])) for seq in sequences ] few_seqs = [(seq, count) for (seq, count) in seq_counts if count < threshold] if options.names: print "\n".join([seq.string_name for (seq, count) in few_seqs]) else: print "\n".join([ "%s (%d)" % (seq.string_name, count) for (seq, count) in few_seqs ]) elif options.diff: # Measure the similarity between each pair of files for seq_id, files in existing_seqs.items(): seq = sequences.sequence_by_id(seq_id) print "%s (%d)" % (seq.string_name, len(files)) # Compare every pair for i, (filename0, __) in enumerate(files): mid0 = _load_midi(filename0) for (filename1, __) in files[:i]: mid1 = _load_midi(filename1) similarity0, similarity1 = note_on_similarity(mid0, mid1) if similarity0 >= options.min_diff: print " %s, %s: %f" % (filename0, filename1, similarity0) if similarity1 >= options.min_diff: print " %s, %s: %f" % (filename1, filename0, similarity1) else: # By default, count the midi files found for each sequence for seq in sequences: files = existing_seqs.get(seq.id, []) print "%s\t%d" % (seq.string_name, len(files))
def main(): usage = "%prog [options] <in-file>" description = "Reads in a sequence index file and tries to find "\ "midi files of each song by looking up the name online. Writes "\ "them all to the given directory." parser = OptionParser(usage=usage, description=description) parser.add_option("-i", "--index", dest="index", action="store", type="int", help="select a single sequence by index from the file and just get files for that sequence") parser.add_option("-n", "--name", dest="name", action="store_true", help="interpret the arguments as a song name to look up directly instead of fetching the name of a sequence from a file") parser.add_option("-v", "--verbose", dest="verbose", action="store_true", help="verbose output") parser.add_option("-s", "--source", dest="sources", action="append", help="sources to get midi files from (use option multiple times for multiple sources). Possible values: %s. Default: all sources." % ", ".join(SOURCES)) parser.add_option("-r", "--resume", dest="resume", action="store", type="int", help="resume lookup at the given sequence index. Sequences before this index will be skipped at the names entries will be appended to an existing file.") parser.add_option("-d", "--dir", dest="dir", action="store", help="directory to output files to. By default, outputs to the current directory") options, arguments = parser.parse_args() if options.dir is not None: outdir = os.path.abspath(options.dir) else: outdir = os.path.abspath(os.getcwd()) if not os.path.isdir(outdir): print >>sys.stderr, "%s is not a directory" % outdir if options.name is not None: sequences = [(" ".join(arguments),None)] else: if len(arguments) == 0: print >>sys.stderr, "You must specify an input sequence index file" sys.exit(1) filename = os.path.abspath(arguments[0]) # Read in the data file seqs = SequenceIndex.from_file(filename) if options.index is not None: seq = seqs.sequence_by_index(options.index) sequences = [(seq.name,seq.id)] elif options.resume is not None: sequences = [(seq.name,seq.id) for seq in seqs.sequences[options.resume:]] else: sequences = [(s.name,s.id) for s in seqs.sequences] if options.verbose: verbose_out = sys.stderr out_prefix = ">>> " else: verbose_out = None out_prefix = "" # Output a name list if options.resume is None: namefile = open(os.path.join(outdir, "NAMES"), 'w') else: # Append data to the old file namefile = open(os.path.join(outdir, "NAMES"), 'a') try: names = UnicodeCsvWriter(namefile) if options.resume is None: # Add a header if we're not appending to an old file names.writerow(['Filename','Reported song name','Database id']) for seq_name,seq_id in sequences: print "%sLooking up %s" % (out_prefix, seq_name) files = find_midi_files(seq_name, sources=options.sources, verbose_out=verbose_out) print "%s Found %d files" % (out_prefix, len(files)) # Create a suitable base filename base_filename = "_".join(\ seq_name.encode('ascii', 'ignore').translate(string.maketrans("",""), string.punctuation).lower().split()) for i,(data,name) in enumerate(files): filename = u"%s-%d.mid" % (base_filename,i) full_filename = os.path.join(outdir, filename) # Write each midi file out individually f = open(full_filename, 'w') f.write(data) f.close() # Keep a list of the name reported for each file names.writerow([filename,name,seq_id]) namefile.flush() finally: namefile.close()