Пример #1
0
def main():
    usage = "%prog [options] <in-file>"
    parser = OptionParser(usage=usage)
    parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", default=DEFAULT_PARTITIONS, help="the number of partitions to use (default: %d)" % DEFAULT_PARTITIONS)
    parser.add_option("--ids", dest="ids", action="store_true", help="don't output any files - just print out a list of the ids of the sequences in each partition")
    options, arguments = parser.parse_args()
        
    if len(arguments) == 0:
        print >>sys.stderr, "You must specify an input data file"
        sys.exit(1)
    filename = os.path.abspath(arguments[0])
    
    # Read in the data file
    seqs = SequenceIndex.from_file(filename)
    
    part_pattern = "%s.part%%d" % filename
    heldout_pattern = "%s.heldout_part%%d" % filename
    # Divide the data up into partitions, with their complements
    parts = zip(partition(seqs.sequences, options.partitions), holdout_partition(seqs.sequences, options.partitions))
    # Save each partition and its complement
    for i,(part,heldout) in enumerate(parts):
        if options.ids:
            # Just print out a list of the ids in the partition
            print " ".join(["%d" % s.id for s in part])
        else:
            save_sequences(part_pattern % i, part)
            save_sequences(heldout_pattern % i, heldout)
            print >>sys.stderr, "Wrote partition %d to %s and %s" % (i,part_pattern % i,heldout_pattern % i)
Пример #2
0
def main():
    usage = "%prog [options] <seq-file> <index>"
    description = "Displays a tree for the annotated derivation of a chord "\
        "sequence in the gold standard"
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()

    if len(arguments) < 2:
        print "You must specify a sequence file and index"
        sys.exit(1)

    index = int(arguments[1])
    # Get the chord sequence
    sequence = SequenceIndex.from_file(arguments[0]).sequence_by_index(index)

    try:
        # Show the song name
        print "Tree for '%s'" % sequence.string_name
        tree = build_tree_for_sequence(sequence)
        # Output the linear textual form of the tree
        print tree
        # Display the tree using NLTK
        ntree = tree_to_nltk(tree)
        ntree.draw()
    except TreeBuildError, err:
        print >> sys.stderr, "Error parsing: %s" % err
        sys.exit(1)
Пример #3
0
 def test_from_sequence(self):
     # Load the sequence index file
     index = SequenceIndex.from_file(DB_SEQUENCES_FILE)
     # Pick out a sequence
     seq = index.sequences[0]
     # Construct a DbInput from this sequence
     dbi = DbInput.from_sequence(seq)
Пример #4
0
def main():
    usage = "%prog [options] <in-file> <out-file>"
    description = "Reads a sequence index file and produces a tag sequence "\
        "file containing the gold standard tags for every sequence"
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()

    if len(arguments) < 2:
        print >> sys.stderr, "You must specify input and output data files"
        sys.exit(1)
    in_filename = os.path.abspath(arguments[0])
    out_filename = os.path.abspath(arguments[1])

    # Read in the data file
    seqs = SequenceIndex.from_file(in_filename)

    tags = {}
    for seq in seqs.sequences:
        # Convert each sequence to a list of tags
        tags[seq.id] = [c.category for c in seq]

    # Output the results to a file
    tagsfile = TagsFile(tags)
    tagsfile.to_file(out_filename)

    print >> sys.stderr, "Wrote tags data to %s" % out_filename
Пример #5
0
def main():
    usage = "%prog [options] <in-file> <out-file>"
    parser = OptionParser(usage=usage)
    options, arguments = parser.parse_args()

    if len(arguments) < 2:
        print >> sys.stderr, "You must specify input and output data files"
        sys.exit(1)
    in_filename = os.path.abspath(arguments[0])
    out_filename = os.path.abspath(arguments[1])

    # Read in the data file
    seqs = SequenceIndex.from_file(in_filename)

    output = []
    for seq in seqs.sequences:
        # Convert each sequence to C&C supertagger training data
        output.append(sequence_to_candc_chord_super(seq))

    # Output the results to a file
    outfile = open(out_filename, 'w')
    outfile.write("".join(output))
    outfile.close()

    print >> sys.stderr, "Wrote C&C supertagger training data to %s" % out_filename
Пример #6
0
def prepare_db_input():
    """
    Loads a sequence index file, pulls out some data and prepares it 
    as it using it as input to the parser.
    
    This may be used by tests to get hold of data as example input.
    
    @note: Don't rely on the size of the returned tuple to stay the 
    same. I may add more return items in the future, so access the 
    ones that are being returned currently by index.
    
    @rtype: tuple
    @return: (sequence index, sequence, DbInput instance)
    
    """
    from jazzparser.data.db_mirrors import SequenceIndex
    from jazzparser.data.input import DbInput
    from jazzparser.settings import TEST as settings

    seqs = SequenceIndex.from_file(settings.SEQUENCE_DATA)
    seq = seqs.sequences[0]

    input_sequence = DbInput.from_sequence(seq)

    return seqs, seq, input_sequence
Пример #7
0
def main():
    usage = "%prog [options] <in-file> <out-file>"
    parser = OptionParser(usage=usage)
    options, arguments = parser.parse_args()
        
    if len(arguments) < 2:
        print >>sys.stderr, "You must specify input and output data files"
        sys.exit(1)
    in_filename = os.path.abspath(arguments[0])
    out_filename = os.path.abspath(arguments[1])
    
    # Read in the data file
    seqs = SequenceIndex.from_file(in_filename)
    
    output = []
    for seq in seqs.sequences:
        # Convert each sequence to C&C supertagger training data
        output.append(sequence_to_candc_chord_super(seq))
    
    # Output the results to a file
    outfile = open(out_filename, 'w')
    outfile.write("".join(output))
    outfile.close()
    
    print >>sys.stderr, "Wrote C&C supertagger training data to %s" % out_filename
def main():
    usage = "%prog [options] <in-file> <out-file>"
    description = (
        "Reads a sequence index file and produces a tag sequence "
        "file containing the gold standard tags for every sequence"
    )
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()

    if len(arguments) < 2:
        print >>sys.stderr, "You must specify input and output data files"
        sys.exit(1)
    in_filename = os.path.abspath(arguments[0])
    out_filename = os.path.abspath(arguments[1])

    # Read in the data file
    seqs = SequenceIndex.from_file(in_filename)

    tags = {}
    for seq in seqs.sequences:
        # Convert each sequence to a list of tags
        tags[seq.id] = [c.category for c in seq]

    # Output the results to a file
    tagsfile = TagsFile(tags)
    tagsfile.to_file(out_filename)

    print >>sys.stderr, "Wrote tags data to %s" % out_filename
Пример #9
0
def prepare_db_input():
    """
    Loads a sequence index file, pulls out some data and prepares it 
    as it using it as input to the parser.
    
    This may be used by tests to get hold of data as example input.
    
    @note: Don't rely on the size of the returned tuple to stay the 
    same. I may add more return items in the future, so access the 
    ones that are being returned currently by index.
    
    @rtype: tuple
    @return: (sequence index, sequence, DbInput instance)
    
    """
    from jazzparser.data.db_mirrors import SequenceIndex
    from jazzparser.data.input import DbInput
    from jazzparser.settings import TEST as settings
    
    seqs = SequenceIndex.from_file(settings.SEQUENCE_DATA)
    seq = seqs.sequences[0]
    
    input_sequence = DbInput.from_sequence(seq)
    
    return seqs, seq, input_sequence
Пример #10
0
def main():
    usage = "%prog [options] <seq-file> <out-file>"
    description = "Outputs a full corpus from a sequence index file to a text "\
        "file that can more easily be read by other people. If <out-file> is "\
        "omitted, data is output to stdout"
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()
    
    if len(arguments) < 1:
        print "You must specify a sequence file"
        sys.exit(1)
        
    # Get the chord sequence
    seqindex = SequenceIndex.from_file(arguments[0])
    if len(arguments) > 1:
        # Open a file to write to
        outfile = open(arguments[1], 'w')
    else:
        # Output to stdout
        outfile = sys.stdout
    
    try:
        output_sequence_index(seqindex, outfile)
    finally:
        outfile.close()
Пример #11
0
 def test_from_sequence(self):
     # Load the sequence index file
     index = SequenceIndex.from_file(DB_SEQUENCES_FILE)
     # Pick out a sequence
     seq = index.sequences[0]
     # Construct a DbInput from this sequence
     dbi = DbInput.from_sequence(seq)
Пример #12
0
def count_categories(options, arguments):
    # Read in the sequence data from the file
    filename = os.path.abspath(arguments[0])
    seqs = SequenceIndex.from_file(filename)
    
    category_counts = {}
    total = 0
    # Count up how many times each category is used
    for seq in seqs.sequences:
        for chord in seq.iterator():
            total += 1
            if chord.category not in category_counts:
                category_counts[chord.category] = 1
            else:
                category_counts[chord.category] += 1
    table_header = [['Category','Count','%']]
    table_data = []
    for cat,count in category_counts.items():
        category = cat or "No category"
        percent = float(count) / float(total) * 100.0
        table_data.append([category, count, percent])
    # Sort the rows by the count
    table_data = reversed(sorted(table_data, key=lambda d: d[1]))
    # Now format the numbers
    table_data = [[row[0], "%s" % row[1], "%.02f" % row[2]] for row in table_data]
    # Add the header on the top
    table_data = table_header + table_data
    if options.csv:
        print "\n".join([",".join([v for v in row]) for row in table_data])
    else:
        pprint_table(sys.stdout, table_data, [True,False,False], "|")
        print "Total chords: %s" % total
    return 0
Пример #13
0
def main():
    usage = "%prog [options] <seq-file> <index>"
    description = "Displays a tree for the annotated derivation of a chord "\
        "sequence in the gold standard"
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()
    
    if len(arguments) < 2:
        print "You must specify a sequence file and index"
        sys.exit(1)
        
    index = int(arguments[1])
    # Get the chord sequence
    sequence = SequenceIndex.from_file(arguments[0]).sequence_by_index(index)
    
    try:
        # Show the song name
        print "Tree for '%s'" % sequence.string_name
        tree = build_tree_for_sequence(sequence)
        # Output the linear textual form of the tree
        print tree
        # Display the tree using NLTK
        ntree = tree_to_nltk(tree)
        ntree.draw()
    except TreeBuildError, err:
        print >>sys.stderr, "Error parsing: %s" % err
        sys.exit(1)
Пример #14
0
def main():
    usage = "%prog <in-file> <part>/<parts>"
    description = "Takes a sequence data file, partitions it into the "\
        "number of partitions given and prints out the indices of the "\
        "sequences the appear in the requested partition. Specify the "\
        "partition number (from 0) and total number of partitions in the "\
        "form <partition-num>/<total-parts>."
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()
        
    if len(arguments) == 0:
        print >>sys.stderr, "You must specify an input data file"
        sys.exit(1)
    elif len(arguments) == 1:
        print >>sys.stderr, "You must give a partition specifier: <part>/<parts>"
    filename = os.path.abspath(arguments[0])
    part, parts = arguments[1].split("/")
    part, parts = int(part), int(parts)
    
    # Read in the data file
    seqs = SequenceIndex.from_file(filename)
    
    # Partition the sequences
    indices = range(len(seqs))
    # Use the partition function to ensure this partitioning is consistent
    #  with all other places the sequences get partitioned
    all_parts = partition(indices, parts)
    print " ".join(["%d" % i for i in all_parts[part]])
Пример #15
0
def main():
    usage = "%prog [options] <seq-file> <out-file>"
    description = "Outputs a full corpus from a sequence index file to a text "\
        "file that can more easily be read by other people. If <out-file> is "\
        "omitted, data is output to stdout"
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()

    if len(arguments) < 1:
        print "You must specify a sequence file"
        sys.exit(1)

    # Get the chord sequence
    seqindex = SequenceIndex.from_file(arguments[0])
    if len(arguments) > 1:
        # Open a file to write to
        outfile = open(arguments[1], 'w')
    else:
        # Output to stdout
        outfile = sys.stdout

    try:
        output_sequence_index(seqindex, outfile)
    finally:
        outfile.close()
 def from_file(filename, options={}):
     # Load up a sequence index file according to the filename
     seqs = SequenceIndex.from_file(filename)
     # Get a sequence by index from the file
     seq = seqs.sequence_by_index(options["index"])
     if seq is None:
         raise InputReadError("%d is not a valid sequence index in %s" % (options["index"], filename))
     return AnnotatedDbInput.from_sequence(seq)
Пример #17
0
 def from_file(filename, options={}):
     # Load up a sequence index file according to the filename
     seqs = SequenceIndex.from_file(filename)
     # Get a sequence by index from the file
     seq = seqs.sequence_by_index(options['index'])
     if seq is None:
         raise InputReadError("%d is not a valid sequence index in %s" % \
             (options['index'], filename))
     return AnnotatedDbInput.from_sequence(seq)
Пример #18
0
def train_model_on_sequence_data(model, data_filename, *args, **kwargs):
    """
    Same as train_model, but takes a db_mirrors sequence data file as 
    input, rather than a C&C training data file.
    
    """
    # Read in the training data
    si = SequenceIndex.from_file(data_filename)
    # Generate a temporary file with C&C training data in it
    file = sequence_index_to_training_file(si)
    train_model(model, file.name, *args, **kwargs)
Пример #19
0
 def run(self, args, state):
     from jazzparser.data.db_mirrors import SequenceIndex
     from .shell import ShellError
     if len(args) < 1:
         raise ShellError, "Please specify a file to load"
     filename = args[0]
     # Load the data file
     si = SequenceIndex.from_file(filename)
     # Store it in the state
     state.gs_sequences = si
     print "Loaded %d gold standard sequences from %s" % (len(si), filename)
Пример #20
0
def train_model_on_sequence_data(model, data_filename, *args, **kwargs):
    """
    Same as train_model, but takes a db_mirrors sequence data file as 
    input, rather than a C&C training data file.
    
    """
    # Read in the training data
    si = SequenceIndex.from_file(data_filename)
    # Generate a temporary file with C&C training data in it
    file = sequence_index_to_training_file(si)
    train_model(model, file.name, *args, **kwargs)
Пример #21
0
 def run(self, args, state):
     from jazzparser.data.db_mirrors import SequenceIndex
     from .shell import ShellError
     if len(args) < 1:
         raise ShellError, "Please specify a file to load"
     filename = args[0]
     # Load the data file
     si = SequenceIndex.from_file(filename)
     # Store it in the state
     state.gs_sequences = si
     print "Loaded %d gold standard sequences from %s" % (len(si),filename)
Пример #22
0
def main():
    usage = "%prog [options] <seq-file>"
    description = "Outputs the details of all chord sequences from a "\
        "sequence index file to stdout. This is for getting a "\
        "(relatively) human-readable form of the data"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("--categories", "-c", dest="categories", action="store_true", help="include category annotations")
    parser.add_option("--coordinations", "-o", dest="coordinations", action="store_true", help="include coordination annotations")
    parser.add_option("--meta", "-m", dest="meta", action="store_true", help="output sequence meta data")
    parser.add_option("--no-map", "-n", dest="no_map", action="store_true", help="don't apply a mapping from the names in the corpus to those used in the paper")
    parser.add_option("--all", "-a", dest="all", action="store_true", help="output everything")
    options, arguments = parser.parse_args()
    
    if len(arguments) < 1:
        print "You must specify a sequence file"
        sys.exit(1)
       
    # Get the chord sequence
    seqs = SequenceIndex.from_file(arguments[0])
    
    # Show the song name
    for seq in seqs:
        print "Chords for '%s'" % seq.string_name
        
        if options.meta or options.all:
            print "Main key:    %s" % seq.key
            print "Bar length:  %d" % seq.bar_length
        
        # Put together a table of chords plus annotations (if requested)
        data = [[ str(chord) for chord in seq ], 
                [ str(chord.duration) for chord in seq ]]
        if options.categories or options.all:
            if options.no_map:
                # Don't apply any mapping to the category names
                data.append([ chord.category for chord in seq ])
            else:
                # Map the names to those used in the paper/thesis
                data.append([ annotation_to_lexicon_name(chord.category) for chord in seq ])
        if options.coordinations or options.all:
            coords = []
            for chord in seq:
                ti = chord.treeinfo
                if ti.coord_resolved and ti.coord_unresolved:
                    coords.append(")(")
                elif ti.coord_resolved:
                    coords.append(")")
                elif ti.coord_unresolved:
                    coords.append("(")
                else:
                    coords.append("")
            data.append(coords)
        pprint_table(sys.stdout, data, default_just=True)
        print
Пример #23
0
def main():
    usage = "%prog [options] <seq-file>:<index> <midi-file> <midi-out>"
    description = "Aligns a chord sequence with a MIDI file and inserts "\
        "marker events into the MIDI data to mark where chord changes "\
        "are. Alignment parameters will be loaded from a file (not "\
        "implemented yet), but can be overridden using the script's "\
        "options."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("--mbpb", "--midi-beats-per-beat", dest="beats_per_beat", type="int", help="number of midi beats to align with a single sequence beat (see SequenceMidiAlignment.midi_beats_per_beat)")
    parser.add_option("--ss", "--sequence-start", dest="sequence_start", type="int", help="number of midi ticks after the first note-on event when the chord sequence begins (see SequenceMidiAlignment.sequence_start)")
    parser.add_option("--repeats", dest="repeats", help="repeat spans, in the form 'start_chord,end_chord,count', separated by semicolons (see SequenceMidiAlignment.repeat_spans)")
    parser.add_option("--lyrics", dest="lyrics", action="store_true", help="use lyrics events instead of marker events to mark the chords")
    options, arguments = parser.parse_args()
    
    if len(arguments) < 3:
        print "You must specify a sequence file, midi file and output midi filename"
        sys.exit(1)
        
    # Get the chord sequence
    filename,__,index = arguments[0].partition(":")
    index = int(index)
    seq = SequenceIndex.from_file(filename).sequence_by_index(index)
    
    # Load the input midi data
    mid = read_midifile(arguments[1])
    
    outfile = arguments[2]
    
    # For now, just create a new default alignment
    # TODO: load the alignment parameters from a file or from the 
    #  sequence data itself
    alignment = SequenceMidiAlignment()
    
    # Override alignment parameters if options are given
    if options.beats_per_beat is not None:
        alignment.midi_beats_per_beat = options.beats_per_beat
    if options.sequence_start is not None:
        alignment.sequence_start = options.sequence_start
    if options.repeats is not None:
        repeats = []
        try:
            for string_triple in options.repeats.split(":"):
                start,end,count = string_triple.split(",")
                start,end,count = int(start), int(end), int(count)
                repeats.append((start,end,count))
        except:
            print "Error parsing repeat spans:"
            raise
        alignment.repeat_spans = repeats
    
    alignment.align(seq, mid, lyrics=options.lyrics)
    
    write_midifile(mid, outfile)
Пример #24
0
def main():
    parser = OptionParser()
    usage = "%prog [options] [<seq-db-file>]"
    description = "Measure the degree of ambiguity (average cats per chord) "\
        "for a grammar over a particular dataset"
    parser.add_option('-g',
                      '--grammar',
                      dest='grammar',
                      action='store',
                      help='Speficy a grammar by name')
    options, arguments = parser.parse_args()

    if len(arguments) < 1:
        print "No sequence index file given: grammar stats only"
        seq_file = None
    else:
        seq_file = arguments[0]
    # Load the grammar
    grammar = get_grammar(options.grammar)

    # Some stats about ambiguity in the grammar
    table = []
    class_cats = []
    for class_name, chord_class in grammar.chord_classes.items():
        if class_name not in EXCLUDE_CLASSES:
            cats = grammar.get_signs_for_word(str(chord_class.words[0]))
            table.append([str(class_name), str(len(cats))])
            class_cats.append(len(cats))

    table.append(["Mean", "%.2f" % (float(sum(class_cats)) / len(class_cats))])
    table.append(["Std dev", "%.2f" % (std(class_cats))])
    print "Cats for each chord class:"
    pprint_table(sys.stdout, table, justs=[True, True])

    # Ambiguity stats on the dataset
    if seq_file is not None:
        seqs = SequenceIndex.from_file(arguments[0])

        counts = []
        for seq in seqs:
            for chord in seq:
                cats = grammar.get_signs_for_word(chord)
                counts.append(len(cats))

        table = []
        table.append(["Chords", str(len(counts))])
        table.append(
            ["Cats per chord",
             "%.2f" % (float(sum(counts)) / len(counts))])
        table.append(["Std dev", "%.2f" % (std(counts))])

        print
        pprint_table(sys.stdout, table, justs=[True, True])
Пример #25
0
def main():
    usage = "%prog [options] <seq-file> <index>"
    description = "Outputs the details of a chord sequence from a "\
        "sequence index file to stdout."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("--categories",
                      "-c",
                      dest="categories",
                      action="store_true",
                      help="include category annotations")
    parser.add_option("--coordinations",
                      "-o",
                      dest="coordinations",
                      action="store_true",
                      help="include coordination annotations")
    parser.add_option("--meta",
                      "-m",
                      dest="meta",
                      action="store_true",
                      help="output sequence meta data")
    options, arguments = parser.parse_args()

    if len(arguments) < 2:
        print "You must specify a sequence file and index"
        sys.exit(1)

    index = int(arguments[1])
    # Get the chord sequence
    seq = SequenceIndex.from_file(arguments[0]).sequence_by_index(index)

    # Show the song name
    print "Chords for '%s'" % seq.string_name

    if options.meta:
        print "Main key:    %s" % seq.key
        print "Bar length:  %d" % seq.bar_length
        print "Notes:\n%s\n\n" % seq.notes

    for i, chord in enumerate(seq.iterator()):
        output = "%d\t%s\t%d" % (i, chord, chord.duration)
        if options.categories:
            output += "\t%s" % chord.category
        if options.coordinations:
            ti = chord.treeinfo
            if ti.coord_resolved and ti.coord_unresolved:
                output += "\t)("
            elif ti.coord_resolved:
                output += "\t)"
            elif ti.coord_unresolved:
                output += "\t("
        print output
Пример #26
0
def main():
    usage = "%prog [options] <seq-file> <index>"
    description = "Outputs the key associated with each chord of a sequence "\
        "from an annotated corpus"
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()
    
    if len(arguments) < 2:
        print "You must specify a sequence file and index"
        sys.exit(1)
        
    index = int(arguments[1])
    # Get the chord sequence
    seq = SequenceIndex.from_file(arguments[0]).sequence_by_index(index)
    
    print keys_for_sequence(seq)
Пример #27
0
def main():
    parser = OptionParser()
    usage = "%prog [options] [<seq-db-file>]"
    description = "Measure the degree of ambiguity (average cats per chord) "\
        "for a grammar over a particular dataset"
    parser.add_option('-g', '--grammar', dest='grammar', action='store', help='Speficy a grammar by name')
    options, arguments = parser.parse_args()
    
    if len(arguments) < 1:
        print "No sequence index file given: grammar stats only"
        seq_file = None
    else:
        seq_file = arguments[0]
    # Load the grammar
    grammar = get_grammar(options.grammar)
    
    # Some stats about ambiguity in the grammar
    table = []
    class_cats = []
    for class_name,chord_class in grammar.chord_classes.items():
        if class_name not in EXCLUDE_CLASSES:
            cats = grammar.get_signs_for_word(str(chord_class.words[0]))
            table.append([str(class_name), str(len(cats))])
            class_cats.append(len(cats))
    
    table.append(["Mean", "%.2f" % (float(sum(class_cats))/len(class_cats))])
    table.append(["Std dev", "%.2f" % (std(class_cats))])
    print "Cats for each chord class:"
    pprint_table(sys.stdout, table, justs=[True, True])
    
    # Ambiguity stats on the dataset
    if seq_file is not None:
        seqs = SequenceIndex.from_file(arguments[0])
        
        counts = []
        for seq in seqs:
            for chord in seq:
                cats = grammar.get_signs_for_word(chord)
                counts.append(len(cats))
        
        table = []
        table.append(["Chords", str(len(counts))])
        table.append(["Cats per chord", "%.2f" % (float(sum(counts)) / len(counts))])
        table.append(["Std dev", "%.2f" % (std(counts))])
        
        print
        pprint_table(sys.stdout, table, justs=[True, True])
Пример #28
0
def main():
    usage = "%prog [options] <in-file>"
    description = "Filter a sequence data file to remove any sequences "\
        "that are not fully annotated and write the result back to the file."
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()
        
    if len(arguments) == 0:
        print >>sys.stderr, "You must specify an input data file"
        sys.exit(1)
    in_filename = os.path.abspath(arguments[0])
    
    # Read in the data file
    seqs = SequenceIndex.from_file(in_filename)
    
    sequences = [seq for seq in seqs.sequences if seq.fully_annotated]
    save_sequences(in_filename, sequences)
    
    print >>sys.stderr, "Removed %d sequences" % (len(seqs.sequences)-len(sequences))
Пример #29
0
def main():
    usage = "%prog [options] <in-file>"
    parser = OptionParser(usage=usage)
    parser.add_option("-p",
                      "--partitions",
                      dest="partitions",
                      action="store",
                      type="int",
                      default=DEFAULT_PARTITIONS,
                      help="the number of partitions to use (default: %d)" %
                      DEFAULT_PARTITIONS)
    parser.add_option(
        "--ids",
        dest="ids",
        action="store_true",
        help=
        "don't output any files - just print out a list of the ids of the sequences in each partition"
    )
    options, arguments = parser.parse_args()

    if len(arguments) == 0:
        print >> sys.stderr, "You must specify an input data file"
        sys.exit(1)
    filename = os.path.abspath(arguments[0])

    # Read in the data file
    seqs = SequenceIndex.from_file(filename)

    part_pattern = "%s.part%%d" % filename
    heldout_pattern = "%s.heldout_part%%d" % filename
    # Divide the data up into partitions, with their complements
    parts = zip(partition(seqs.sequences, options.partitions),
                holdout_partition(seqs.sequences, options.partitions))
    # Save each partition and its complement
    for i, (part, heldout) in enumerate(parts):
        if options.ids:
            # Just print out a list of the ids in the partition
            print " ".join(["%d" % s.id for s in part])
        else:
            save_sequences(part_pattern % i, part)
            save_sequences(heldout_pattern % i, heldout)
            print >> sys.stderr, "Wrote partition %d to %s and %s" % (
                i, part_pattern % i, heldout_pattern % i)
Пример #30
0
def main():
    usage = "%prog <in-file> <command>"
    parser = OptionParser(usage=usage)
    parser.add_option("-c",
                      "--commands",
                      dest="commands",
                      action="store_true",
                      help="show a list of available commands")
    options, arguments = parser.parse_args()

    commands = {
        'ids': "output a space-separated list of the ids of all sequences",
        'count': "output the total number of sequences",
        'help': "show this help",
    }

    if options.commands:
        print "Available commands:\n%s" % \
            "\n".join(["%s  %s" % (format(cmd, " >10s"), help) for cmd,help in commands.items()])
        sys.exit(0)

    if len(arguments) == 0:
        print >> sys.stderr, "You must specify an input data file"
        sys.exit(1)
    filename = os.path.abspath(arguments[0])

    # Read in the data file
    seqs = SequenceIndex.from_file(filename)

    if len(arguments) > 1:
        command = arguments[1].lower()
        if command not in commands:
            print >> sys.stderr, "%s is not a valid command. Use -c for a list of available commands."
        elif command == "ids":
            # Output a list of the ids of sequences
            print " ".join(["%s" % id for id in seqs.ids])
        elif command == "count":
            print len(seqs)
        else:
            print >> sys.stderr, "Oops, I've not defined this command"
    else:
        print "Successfully read in sequences"
Пример #31
0
def main():
    usage = "%prog [options] <in-file>"
    description = "Filter a sequence data file to remove any sequences "\
        "that are not fully annotated and write the result back to the file."
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()

    if len(arguments) == 0:
        print >> sys.stderr, "You must specify an input data file"
        sys.exit(1)
    in_filename = os.path.abspath(arguments[0])

    # Read in the data file
    seqs = SequenceIndex.from_file(in_filename)

    sequences = [seq for seq in seqs.sequences if seq.fully_annotated]
    save_sequences(in_filename, sequences)

    print >> sys.stderr, "Removed %d sequences" % (len(seqs.sequences) -
                                                   len(sequences))
Пример #32
0
def main():
    usage = "%prog [options] <seq-file> <index>"
    description = "Outputs the details of a chord sequence from a "\
        "sequence index file to stdout."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("--categories", "-c", dest="categories", action="store_true", help="include category annotations")
    parser.add_option("--coordinations", "-o", dest="coordinations", action="store_true", help="include coordination annotations")
    parser.add_option("--meta", "-m", dest="meta", action="store_true", help="output sequence meta data")
    options, arguments = parser.parse_args()
    
    if len(arguments) < 2:
        print "You must specify a sequence file and index"
        sys.exit(1)
        
    index = int(arguments[1])
    # Get the chord sequence
    seq = SequenceIndex.from_file(arguments[0]).sequence_by_index(index)
    
    # Show the song name
    print "Chords for '%s'" % seq.string_name
    
    if options.meta:
        print "Main key:    %s" % seq.key
        print "Bar length:  %d" % seq.bar_length
        print "Notes:\n%s\n\n" % seq.notes
    
    for i,chord in enumerate(seq.iterator()):
        output = "%d\t%s\t%d" % (i,chord,chord.duration)
        if options.categories:
            output += "\t%s" % chord.category
        if options.coordinations:
            ti = chord.treeinfo
            if ti.coord_resolved and ti.coord_unresolved:
                output += "\t)("
            elif ti.coord_resolved:
                output += "\t)"
            elif ti.coord_unresolved:
                output += "\t("
        print output
Пример #33
0
def main():
    usage = "%prog [options] <seq-file>"
    description = "Outputs some statistics about a chord sequence corpus file"
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()
    
    if len(arguments) < 1:
        print "You must specify a sequence file"
        sys.exit(1)
        
    # Get the chord sequence
    seqindex = SequenceIndex.from_file(arguments[0])
    print "Sequences:   %d" % len(seqindex)
    
    # Get the sequence lengths
    lengths = [len(seq) for seq in seqindex]
    
    # Count up chords
    print "Chords:      %d" % sum(lengths)
    print "Min length:  %d" % min(lengths)
    print "Max length:  %d" % max(lengths)
    print "Mean length: %f" % (float(sum(lengths)) / len(lengths))
def main():
    usage = "%prog [options] <seq-file>"
    description = "Outputs some statistics about a chord sequence corpus file"
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()

    if len(arguments) < 1:
        print "You must specify a sequence file"
        sys.exit(1)

    # Get the chord sequence
    seqindex = SequenceIndex.from_file(arguments[0])
    print "Sequences:   %d" % len(seqindex)

    # Get the sequence lengths
    lengths = [len(seq) for seq in seqindex]

    # Count up chords
    print "Chords:      %d" % sum(lengths)
    print "Min length:  %d" % min(lengths)
    print "Max length:  %d" % max(lengths)
    print "Mean length: %f" % (float(sum(lengths)) / len(lengths))
Пример #35
0
def main():
    usage = "%prog <in-file> <command>"
    parser = OptionParser(usage=usage)
    parser.add_option("-c", "--commands", dest="commands", action="store_true", help="show a list of available commands")
    options, arguments = parser.parse_args()
    
    commands = {
        'ids' : "output a space-separated list of the ids of all sequences",
        'count' : "output the total number of sequences",
        'help' : "show this help",
    }
    
    if options.commands:
        print "Available commands:\n%s" % \
            "\n".join(["%s  %s" % (format(cmd, " >10s"), help) for cmd,help in commands.items()])
        sys.exit(0)
        
    if len(arguments) == 0:
        print >>sys.stderr, "You must specify an input data file"
        sys.exit(1)
    filename = os.path.abspath(arguments[0])
    
    # Read in the data file
    seqs = SequenceIndex.from_file(filename)
    
    if len(arguments) > 1:
        command = arguments[1].lower()
        if command not in commands:
            print >>sys.stderr, "%s is not a valid command. Use -c for a list of available commands."
        elif command == "ids":
            # Output a list of the ids of sequences
            print " ".join(["%s" % id for id in seqs.ids])
        elif command == "count":
            print len(seqs)
        else:
            print >>sys.stderr, "Oops, I've not defined this command"
    else:
        print "Successfully read in sequences"
Пример #36
0
def main():    

    usage = "%prog [options] <results-files>"
    description = "Prints a dependency tree for a parse result"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-t", "--times", dest="times", action="store_true", help="show timings of nodes")
    parser.add_option("-l", "--latex", dest="latex", action="store_true", help="output Latex for the graphs using tikz-dependency")
    parser.add_option("--la", "--latex-align", dest="latex_align", action="store_true", help="show node alignments in Latex output")
    parser.add_option("--align-time", dest="align_time", action="store_true", help="show the graph of common dependencies when the two graphs are aligned by node times")
    parser.add_option("--align-max", dest="align_max", action="store_true", help="show the graph of common dependencies when the two graphs are aligned to maximize the dependency recovery")
    options, arguments = parser.parse_args()
        
    
    if len(arguments) == 0:
        print >>sys.stderr, "Specify a file to read the results from"
        sys.exit(1)
    filename = arguments[0]

    # Swith PCCG/St+PCCG
    PARSER = "PCCG"
    FEATURE_PARAMS = "../xuanhong/params_2_pcfg.txt"
    if filename.find("stpcfg") != -1:
        PARSER = "St+PCCG"
        FEATURE_PARAMS = "../xuanhong/params_2_stpcfg.txt"        


    # Input sequence
    list_songs = read_list_songs("../xuanhong/list_songs.txt")
    song_name = os.path.basename(filename)
    seqs = SequenceIndex.from_file(settings.SEQUENCE_DATA)
    seq = seqs.sequences[list_songs[song_name]]
    input_sequence = DbInput.from_sequence(seq)
    
    try:
        pres = ParseResults.from_file(filename)
    except ParseResults.LoadError, err:
        print >>sys.stderr, "Error loading file: %s" % (err)
        sys.exit(1)
 def from_file(filename, options={}):
     # Read in the sequence index file
     f = SequenceIndex.from_file(filename)
     inputs = [AnnotatedDbInput.from_sequence(s) for s in f]
     return AnnotatedDbBulkInput(inputs)
Пример #38
0
def main():
    usage = "%prog [options] <seq-file>"
    description = "Outputs the details of all chord sequences from a "\
        "sequence index file to stdout. This is for getting a "\
        "(relatively) human-readable form of the data"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("--categories",
                      "-c",
                      dest="categories",
                      action="store_true",
                      help="include category annotations")
    parser.add_option("--coordinations",
                      "-o",
                      dest="coordinations",
                      action="store_true",
                      help="include coordination annotations")
    parser.add_option("--meta",
                      "-m",
                      dest="meta",
                      action="store_true",
                      help="output sequence meta data")
    parser.add_option(
        "--no-map",
        "-n",
        dest="no_map",
        action="store_true",
        help=
        "don't apply a mapping from the names in the corpus to those used in the paper"
    )
    parser.add_option("--all",
                      "-a",
                      dest="all",
                      action="store_true",
                      help="output everything")
    options, arguments = parser.parse_args()

    if len(arguments) < 1:
        print "You must specify a sequence file"
        sys.exit(1)

    # Get the chord sequence
    seqs = SequenceIndex.from_file(arguments[0])

    # Show the song name
    for seq in seqs:
        print "Chords for '%s'" % seq.string_name

        if options.meta or options.all:
            print "Main key:    %s" % seq.key
            print "Bar length:  %d" % seq.bar_length

        # Put together a table of chords plus annotations (if requested)
        data = [[str(chord) for chord in seq],
                [str(chord.duration) for chord in seq]]
        if options.categories or options.all:
            if options.no_map:
                # Don't apply any mapping to the category names
                data.append([chord.category for chord in seq])
            else:
                # Map the names to those used in the paper/thesis
                data.append([
                    annotation_to_lexicon_name(chord.category) for chord in seq
                ])
        if options.coordinations or options.all:
            coords = []
            for chord in seq:
                ti = chord.treeinfo
                if ti.coord_resolved and ti.coord_unresolved:
                    coords.append(")(")
                elif ti.coord_resolved:
                    coords.append(")")
                elif ti.coord_unresolved:
                    coords.append("(")
                else:
                    coords.append("")
            data.append(coords)
        pprint_table(sys.stdout, data, default_just=True)
        print
Пример #39
0
 def from_file(filename, options={}):
     # Read in the sequence index file
     f = SequenceIndex.from_file(filename)
     inputs = [DbInput.from_sequence(s) for s in f]
     return DbBulkInput(inputs)
Пример #40
0
def main():
    usage = "%prog [options] <in-file> [<index1> [<index2> ...]]"
    description = (
        "Print the names of sequences in a sequence input "
        "file. Optionally specify indices of sequences. If no index "
        "is given, displays all sequences."
    )
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        "--sa",
        "-a",
        "--sort-alpha",
        "--alpha",
        dest="alphabetical",
        action="store_true",
        help="order sequences alphabetically by name",
    )
    parser.add_option(
        "--sl", "--sort-length", dest="sort_length", action="store_true", help="order sequences by length"
    )
    parser.add_option(
        "-i",
        "--index",
        dest="index",
        action="store_true",
        help="also display the indices in the sequence file of each sequence, in the column before the ids",
    )
    parser.add_option("-l", "--lengths", dest="lengths", action="store_true", help="output lengths of the sequences")
    options, arguments = parser.parse_args()

    if len(arguments) < 1:
        print "You must specify an input file"
        sys.exit(1)
    seqs = SequenceIndex.from_file(arguments[0])

    indices = [int(ind) for ind in arguments[1:]]
    if len(indices) == 0:
        sequences = seqs.sequences
    else:
        sequences = [seqs.sequence_by_index(index) for index in indices]

    if options.alphabetical:
        # Sort by string_name
        sequences.sort(key=lambda s: s.string_name)
    elif options.sort_length:
        # Sort by sequence length
        sequences.sort(key=lambda s: len(s))

    header = ["Song name", "Id"]
    justs = [True, False]
    if options.lengths:
        header.append("Length")
        justs.append(False)
    if options.index:
        header.append("Index")
        justs.append(False)
    rows = [header]

    for seq in sequences:
        row = [seq.string_name, str(seq.id)]
        if options.lengths:
            row.append(str(len(seq)))
        if options.index:
            row.append(str(seqs.index_for_id(seq.id)))
        rows.append(row)
    pprint_table(sys.stdout, rows, justs=justs)
Пример #41
0
def main():
    usage = "%prog [<options>] <model-name> <training-input>"
    description = "Training of PCFG models."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \
        help="Number of partitions to divide the data into. "\
            "For train, divides the input file, trains a model on each "\
            "partition's complement and appends partition number to "\
            "the model names. For del, appends partition numbers to model "\
            "names and deletes all the models. Recache does similarly. "\
            "Has no effect for parse.")
    parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options")
    parser.add_option("--debug", dest="debug", action="store_true", help="Output verbose logging information to stderr")
    parser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.")
    options, arguments = parse_args_with_config(parser)
    
    if options.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.WARN
    # Create a logger for training
    logger = create_logger(log_level = log_level,
                  name = "training",
                  stderr = True)
    
    # Load a grammar
    grammar = get_grammar(options.grammar)
    # Get the pcfg model class for the formalism
    PcfgModel = grammar.formalism.PcfgModel
        
    # Parse the option string
    if options.training_opts is None:
        opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(PcfgModel.TRAINING_OPTIONS, 
                                            intro="Training options for PCFGs")
        sys.exit(0)
    else:
        opts = ModuleOption.process_option_dict(
                    ModuleOption.process_option_string(options.training_opts),
                    PcfgModel.TRAINING_OPTIONS)
    
    if len(arguments) == 0:
        print >>sys.stderr, "Specify a model name"
        models = PcfgModel.list_models()
        print >>sys.stderr, "Available models: %s" % ", ".join(models)
        sys.exit(1)
    model_name = arguments[0]
    print "Model base name:", model_name
    
    if options.partitions is not None:
        parts = [(i, "%s%d" % (model_name, i)) for i in range(options.partitions)]
    else:
        parts = [(None, model_name)]
    
    if len(arguments) < 2:
        print >>sys.stderr, "Specify an input file to read sequence data from"
        sys.exit(1)
    # Read in the training data from the given file
    seqs = SequenceIndex.from_file(arguments[1])
    
    if options.partitions is not None:
        # Prepare each training partition
        datasets = holdout_partition(seqs.sequences, options.partitions)
    else:
        datasets = [seqs.sequences]
        
    for dataset,(parti,part_model) in zip(datasets,parts):
        # Train the named model on the sequence data
        model = PcfgModel.train(part_model, dataset, opts, grammar=grammar, 
                                logger=logger)
        model.save()
        print "Trained model", part_model
Пример #42
0
def main():
    usage = "%prog [<options>] <model-name> <training-input>"
    description = "Training of PCFG models."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \
        help="Number of partitions to divide the data into. "\
            "For train, divides the input file, trains a model on each "\
            "partition's complement and appends partition number to "\
            "the model names. For del, appends partition numbers to model "\
            "names and deletes all the models. Recache does similarly. "\
            "Has no effect for parse.")
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="store",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options"
    )
    parser.add_option("--debug",
                      dest="debug",
                      action="store_true",
                      help="Output verbose logging information to stderr")
    parser.add_option("-g",
                      "--grammar",
                      dest="grammar",
                      action="store",
                      help="use the named grammar instead of the default.")
    options, arguments = parse_args_with_config(parser)

    if options.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.WARN
    # Create a logger for training
    logger = create_logger(log_level=log_level, name="training", stderr=True)

    # Load a grammar
    grammar = get_grammar(options.grammar)
    # Get the pcfg model class for the formalism
    PcfgModel = grammar.formalism.PcfgModel

    # Parse the option string
    if options.training_opts is None:
        opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(PcfgModel.TRAINING_OPTIONS,
                                intro="Training options for PCFGs")
        sys.exit(0)
    else:
        opts = ModuleOption.process_option_dict(
            ModuleOption.process_option_string(options.training_opts),
            PcfgModel.TRAINING_OPTIONS)

    if len(arguments) == 0:
        print >> sys.stderr, "Specify a model name"
        models = PcfgModel.list_models()
        print >> sys.stderr, "Available models: %s" % ", ".join(models)
        sys.exit(1)
    model_name = arguments[0]
    print "Model base name:", model_name

    if options.partitions is not None:
        parts = [(i, "%s%d" % (model_name, i))
                 for i in range(options.partitions)]
    else:
        parts = [(None, model_name)]

    if len(arguments) < 2:
        print >> sys.stderr, "Specify an input file to read sequence data from"
        sys.exit(1)
    # Read in the training data from the given file
    seqs = SequenceIndex.from_file(arguments[1])

    if options.partitions is not None:
        # Prepare each training partition
        datasets = holdout_partition(seqs.sequences, options.partitions)
    else:
        datasets = [seqs.sequences]

    for dataset, (parti, part_model) in zip(datasets, parts):
        # Train the named model on the sequence data
        model = PcfgModel.train(part_model,
                                dataset,
                                opts,
                                grammar=grammar,
                                logger=logger)
        model.save()
        print "Trained model", part_model
Пример #43
0
def main():
    usage = "%prog [options] <in-file> [<index1> [<index2> ...]]"
    description = "Print the names of sequences in a sequence input "\
            "file. Optionally specify indices of sequences. If no index "\
            "is given, displays all sequences."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("--sa",
                      "-a",
                      "--sort-alpha",
                      "--alpha",
                      dest="alphabetical",
                      action="store_true",
                      help="order sequences alphabetically by name")
    parser.add_option("--sl",
                      "--sort-length",
                      dest="sort_length",
                      action="store_true",
                      help="order sequences by length")
    parser.add_option(
        "-i",
        "--index",
        dest="index",
        action="store_true",
        help=
        "also display the indices in the sequence file of each sequence, in the column before the ids"
    )
    parser.add_option("-l",
                      "--lengths",
                      dest="lengths",
                      action="store_true",
                      help="output lengths of the sequences")
    options, arguments = parser.parse_args()

    if len(arguments) < 1:
        print "You must specify an input file"
        sys.exit(1)
    seqs = SequenceIndex.from_file(arguments[0])

    indices = [int(ind) for ind in arguments[1:]]
    if len(indices) == 0:
        sequences = seqs.sequences
    else:
        sequences = [seqs.sequence_by_index(index) for index in indices]

    if options.alphabetical:
        # Sort by string_name
        sequences.sort(key=lambda s: s.string_name)
    elif options.sort_length:
        # Sort by sequence length
        sequences.sort(key=lambda s: len(s))

    header = ["Song name", "Id"]
    justs = [True, False]
    if options.lengths:
        header.append("Length")
        justs.append(False)
    if options.index:
        header.append("Index")
        justs.append(False)
    rows = [header]

    for seq in sequences:
        row = [seq.string_name, str(seq.id)]
        if options.lengths:
            row.append(str(len(seq)))
        if options.index:
            row.append(str(seqs.index_for_id(seq.id)))
        rows.append(row)
    pprint_table(sys.stdout, rows, justs=justs)
Пример #44
0
def prepare_evaluation_options(usage=None, description=None, 
        optparse_options=[], check_args=None, optparse_groups=[]):
    """
    Various tasks common to the initial part of the evaluation routine
    scripts (C{models/eval.py}).
    
    @todo: This is not used any more. Remove it, after checking it's definitely 
        not used.
    
    @param usage: the optparse usage string
    @param description: the optparse description string
    @type optparse_options: list of tuples
    @param optparse_options: (args,kwargs) pairs to add additional 
        options to the optparse parser.
    @type check_args: function
    @param check_args: function to take the command-line arguments and 
        check them. This will be called early in the script. Must 
        return a tuple of (1) the model name (or model basename) that 
        will be used in the partition model names and (2) the input 
        filename to get sequences from.
    @type optparse_groups: list of pairs
    @param optparse_groups: specificatios for option groups to add to the 
        optparse option parser. The first of each pair is a tuple of 
        args to C{OptionGroup}'s init (excluding the first). 
        The second is a list of options 
        each formatted as C{optparse_options}.
        
    @rtype: tuple
    @return: (1) list of (sequences,model_name,partition_index) tuples
        for each partition; (2) list of lists containing the sequence 
        ids for each partition; (3) optparse options; (4) optparse 
        arguments.
    
    """
    import sys
    from optparse import OptionParser, OptionGroup
    from jazzparser.utils.config import parse_args_with_config
    from jazzparser.utils.loggers import init_logging
    from jazzparser.data.db_mirrors import SequenceIndex
    from jazzparser.utils.data import partition
    
    parser = OptionParser(usage=usage, description=description)
    group = OptionGroup(parser, "Input", "Input data and partitioning for evaluation")
    group.add_option("-s", "--sequence", dest="sequence", action="store", help="limit the evaluation to just one sequence, with the given index in the input file")
    group.add_option("--partition", dest="partition", action="store", help="restrict to only one partition of the data. Specify as i/n, where i is the partition number and n the total number of partitions.")
    group.add_option("-p", "--partitions", dest="partitions", type="int", action="store", help="test on all n partitions of the data, using a different model for each. Will look for a model <NAME>i, where <NAME> is the given model name and i the partition number.")
    parser.add_option_group(group)
    
    parser.add_option("--debug", dest="debug", action="store_true", help="show debugging output")
    
    # Add the options according to their specs
    for args,kwargs in optparse_options:
        parser.add_option(*args, **kwargs)
        
    # Add groups and their options
    for group_args,options in optparse_groups:
        # Check whether the group already exists
        same_titles = [g for g in parser.option_groups if g.title == group_args[0]]
        if same_titles:
            group = same_titles[0]
        else:
            group = OptionGroup(parser, *group_args)
            parser.add_option_group(group)
        # Add options to this group
        for args,kwargs in options:
            group.add_option(*args, **kwargs)
    options, arguments = parse_args_with_config(parser)
    
    if check_args is None:
        raise ValueError, "could not check arguments and get model "\
            "name. check_args must not be None"
    model_name,input_filename = check_args(arguments)
        
    if options.debug:
        # Set the log level to debug and do the standard logging init
        init_logging(logging.DEBUG)
    else:
        init_logging()
        
    # Load up sequences
    seqs = SequenceIndex.from_file(input_filename)
        
    def _get_seq_by_index(index):
        seq = seqs.sequence_by_index(index)
        if seq is None:
            print >>sys.stderr, "There are only %d sequences" % len(seqs)
            sys.exit(1)
        return seq
    
    ################ Data partitioning ####################
    if options.partitions is not None:
        # Divide the data up into n partitions and use a different model name for each
        total_parts = options.partitions
        print >>sys.stderr, "Cross validation: dividing test data into %d partitions" % total_parts
        partitions = [(part,"%s%d" % (model_name,i), i) for i,part in enumerate(partition(seqs.sequences, total_parts))]
        part_ids = partition(seqs.ids, total_parts)
    elif options.partition is not None:
        # Just select one partition
        # Split up the argument to get two integers
        parti,total_parts = options.partition.split("/")
        parti,total_parts = int(parti), int(total_parts)
        print >>sys.stderr, "Restricting sequences to %d-way partition %d" % (total_parts,parti)
        # Get a list of sequence indices to restrict our set to
        part_ids = partition(seqs.ids, total_parts)[parti]
        partitions = [ [(part,"%s%d" % (model_name,i), i) for i,part in enumerate(partition(seqs.sequences, total_parts))][parti] ]
    elif options.sequence is not None:
        # Just select one sequence
        seq = _get_seq_by_index(int(options.sequence))
        partitions = [( [seq], model_name, 0 )]
        part_ids = [seq.id]
    else:
        # Don't partition the sequences
        partitions = [(seqs.sequences, model_name,0)]
        part_ids = [None]
    
    return partitions,part_ids,options,arguments
Пример #45
0
def main():
    usage = "%prog [options] <seq-file>"
    description = "Parses a sequence from a sequence index file using the "\
        "annotations stored in the same file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        "--popt",
        "--parser-options",
        dest="popts",
        action="append",
        help=
        "specify options for the parser. Type '--popt help' to get a list of options (we use a DirectedCkyParser)"
    )
    parser.add_option("--derivations",
                      "--deriv",
                      dest="derivations",
                      action="store_true",
                      help="print out derivation traces of all the results")
    parser.add_option("--index",
                      "-i",
                      dest="index",
                      action="store",
                      type="int",
                      help="parse just the sequence with this index")
    parser.add_option("--quiet",
                      "-q",
                      dest="quiet",
                      action="store_true",
                      help="show only errors in the output")
    parser.add_option(
        "--tonal-space",
        "--ts",
        dest="tonal_space",
        action="store_true",
        help="show the tonal space path (with -q, shows only paths)")
    parser.add_option(
        "--output-set",
        "-o",
        dest="output_set",
        action="store",
        help="store the analyses to a tonal space analysis set with this name")
    parser.add_option(
        "--trace-parse",
        "-t",
        dest="trace_parse",
        action="store_true",
        help=
        "output a trace of the shift-reduce parser's operations in producing the full interpretation from the annotations"
    )
    options, arguments = parser.parse_args()

    if len(arguments) < 1:
        print "You must specify a sequence file"
        sys.exit(1)

    if options.popts is not None:
        poptstr = options.popts
        if "help" in [s.strip().lower() for s in poptstr]:
            # Output this tagger's option help
            print options_help_text(
                DirectedCkyParser.PARSER_OPTIONS,
                intro="Available options for the directed parser")
            return 0
    else:
        poptstr = ""
    popts = ModuleOption.process_option_string(poptstr)

    grammar = get_grammar()
    if options.quiet:
        logger = create_plain_stderr_logger(log_level=logging.ERROR)
    else:
        logger = create_plain_stderr_logger()

    if options.trace_parse:
        parse_logger = logger
    else:
        parse_logger = None

    seq_index = SequenceIndex.from_file(arguments[0])
    # Get the chord sequence(s)
    if options.index is None:
        seqs = seq_index.sequences
    else:
        seqs = [seq_index.sequence_by_index(options.index)]
    logger.info("%d sequences\n" % len(seqs))

    full_analyses = []
    stats = {
        'full': 0,
        'partial': 0,
        'fail': 0,
    }
    # Try parsing every sequence
    for seq in seqs:
        logger.info("====== Sequence %s =======" % seq.string_name)
        try:
            results = parse_sequence_with_annotations(
                seq, grammar, logger=logger, parse_logger=parse_logger)
        except ParseError, err:
            logger.error("Error parsing: %s" % err)
            stats['fail'] += 1
        else:
            # This may have resulted in multiple partial parses
            logger.info("%d partial parses" % len(results))

            if len(results) == 1:
                stats['full'] += 1
            else:
                stats['partial'] += 1

            if options.derivations:
                # Output the derivation trace for each partial parse
                for result in results:
                    print
                    print result.derivation_trace

            if options.tonal_space:
                # Output the tonal space coordinates
                path = grammar.formalism.sign_to_coordinates(results[0])
                for i, point in enumerate(path):
                    print "%d, %d: %s" % (seq.id, i, point)

            # Only include a result in the output analyses if it was a full parse
            if len(results) == 1:
                full_analyses.append((seq.string_name, results[0].semantics))
            else:
                logger.warn("%s was not included in the output analyses, "\
                    "since it was not fully parsed" % seq.string_name)
Пример #46
0
def main():
    usage = "%prog [options] <seq-file>:<index> <midi-file> <midi-out>"
    description = "Aligns a chord sequence with a MIDI file and inserts "\
        "marker events into the MIDI data to mark where chord changes "\
        "are. Alignment parameters will be loaded from a file (not "\
        "implemented yet), but can be overridden using the script's "\
        "options."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        "--mbpb",
        "--midi-beats-per-beat",
        dest="beats_per_beat",
        type="int",
        help=
        "number of midi beats to align with a single sequence beat (see SequenceMidiAlignment.midi_beats_per_beat)"
    )
    parser.add_option(
        "--ss",
        "--sequence-start",
        dest="sequence_start",
        type="int",
        help=
        "number of midi ticks after the first note-on event when the chord sequence begins (see SequenceMidiAlignment.sequence_start)"
    )
    parser.add_option(
        "--repeats",
        dest="repeats",
        help=
        "repeat spans, in the form 'start_chord,end_chord,count', separated by semicolons (see SequenceMidiAlignment.repeat_spans)"
    )
    parser.add_option(
        "--lyrics",
        dest="lyrics",
        action="store_true",
        help="use lyrics events instead of marker events to mark the chords")
    options, arguments = parser.parse_args()

    if len(arguments) < 3:
        print "You must specify a sequence file, midi file and output midi filename"
        sys.exit(1)

    # Get the chord sequence
    filename, __, index = arguments[0].partition(":")
    index = int(index)
    seq = SequenceIndex.from_file(filename).sequence_by_index(index)

    # Load the input midi data
    mid = read_midifile(arguments[1])

    outfile = arguments[2]

    # For now, just create a new default alignment
    # TODO: load the alignment parameters from a file or from the
    #  sequence data itself
    alignment = SequenceMidiAlignment()

    # Override alignment parameters if options are given
    if options.beats_per_beat is not None:
        alignment.midi_beats_per_beat = options.beats_per_beat
    if options.sequence_start is not None:
        alignment.sequence_start = options.sequence_start
    if options.repeats is not None:
        repeats = []
        try:
            for string_triple in options.repeats.split(":"):
                start, end, count = string_triple.split(",")
                start, end, count = int(start), int(end), int(count)
                repeats.append((start, end, count))
        except:
            print "Error parsing repeat spans:"
            raise
        alignment.repeat_spans = repeats

    alignment.align(seq, mid, lyrics=options.lyrics)

    write_midifile(mid, outfile)
Пример #47
0
def main():
    usage = "%prog [options] <names-index> <seq-index>"
    description = "Loads the MIDI downloaded files in the names index "\
                    "and the sequence index with the chord sequences in "\
                    "it and performs operations on the MIDI files. "\
                    "By default, counts the files for each sequence."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-z",
                      "--zeroes",
                      dest="zeroes",
                      action="store_true",
                      help="display the names of sequences with no midi files")
    parser.add_option(
        "-f",
        "--few",
        dest="few",
        action="store",
        type="int",
        help=
        "display the names of sequences with few midi files, below the given threshold"
    )
    parser.add_option(
        "--names",
        dest="names",
        action="store_true",
        help=
        "only show the names in the output, not the numbers (only applies to --zeroes or --few)"
    )
    parser.add_option(
        "-d",
        "--diff",
        dest="diff",
        action="store_true",
        help=
        "check every pair of files for each sequence and report the similarity of the midi notes"
    )
    parser.add_option(
        "--min-diff",
        dest="min_diff",
        action="store",
        type="float",
        help=
        "the minimum similarity the report when diffing files (see --diff). By default, all are reported (i.e. 0)",
        default=0.0)
    options, arguments = parser.parse_args()

    if len(arguments) == 0:
        print >> sys.stderr, "You must specify a names index file"
        sys.exit(1)
    if len(arguments) == 1:
        print >> sys.stderr, "You must specify a sequence index file"
        sys.exit(1)
    names_filename = os.path.abspath(arguments[0])
    # Use this directory to get midi files from
    midi_base_dir = os.path.dirname(names_filename)
    names_file = open(names_filename, 'r')
    names = UnicodeCsvReader(names_file)
    lines = list(names)

    # Load the sequence index file
    seq_filename = arguments[1]
    sequences = SequenceIndex.from_file(seq_filename)

    # Index the entries in the names index by the sequence id
    midi_seqs = {}
    for row in lines[1:]:
        # Col 0: filename
        # Col 1: name from web page
        midi_seqs.setdefault(int(row[2]), []).append((row[0], row[1]))
    # Filter out the ones that don't exist
    def _exists(filename):
        return os.path.exists(os.path.join(midi_base_dir, filename))

    existing_seqs = dict([(seq_id,
                           list(
                               set([(filename, name)
                                    for (filename, name) in files
                                    if _exists(filename)])))
                          for (seq_id, files) in midi_seqs.items()])

    def _load_midi(filename):
        return read_midifile(open(os.path.join(midi_base_dir, filename), 'r'))

    if options.zeroes or options.few is not None:
        # Look for sequences with few (or no) midi files
        if options.zeroes:
            threshold = 1
        else:
            threshold = options.few
        seq_counts = [
            (seq,
             0 if seq.id not in existing_seqs else len(existing_seqs[seq.id]))
            for seq in sequences
        ]
        few_seqs = [(seq, count) for (seq, count) in seq_counts
                    if count < threshold]
        if options.names:
            print "\n".join([seq.string_name for (seq, count) in few_seqs])
        else:
            print "\n".join([
                "%s (%d)" % (seq.string_name, count)
                for (seq, count) in few_seqs
            ])
    elif options.diff:
        # Measure the similarity between each pair of files
        for seq_id, files in existing_seqs.items():
            seq = sequences.sequence_by_id(seq_id)
            print "%s (%d)" % (seq.string_name, len(files))
            # Compare every pair
            for i, (filename0, __) in enumerate(files):
                mid0 = _load_midi(filename0)
                for (filename1, __) in files[:i]:
                    mid1 = _load_midi(filename1)
                    similarity0, similarity1 = note_on_similarity(mid0, mid1)
                    if similarity0 >= options.min_diff:
                        print "  %s, %s: %f" % (filename0, filename1,
                                                similarity0)
                    if similarity1 >= options.min_diff:
                        print "  %s, %s: %f" % (filename1, filename0,
                                                similarity1)
    else:
        # By default, count the midi files found for each sequence
        for seq in sequences:
            files = existing_seqs.get(seq.id, [])
            print "%s\t%d" % (seq.string_name, len(files))
Пример #48
0
def main():
    usage = "%prog [options] <in-file>"
    description = "Reads in a sequence index file and tries to find "\
        "midi files of each song by looking up the name online. Writes "\
        "them all to the given directory."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-i", "--index", dest="index", action="store", type="int", help="select a single sequence by index from the file and just get files for that sequence")
    parser.add_option("-n", "--name", dest="name", action="store_true", help="interpret the arguments as a song name to look up directly instead of fetching the name of a sequence from a file")
    parser.add_option("-v", "--verbose", dest="verbose", action="store_true", help="verbose output")
    parser.add_option("-s", "--source", dest="sources", action="append", help="sources to get midi files from (use option multiple times for multiple sources). Possible values: %s. Default: all sources." % ", ".join(SOURCES))
    parser.add_option("-r", "--resume", dest="resume", action="store", type="int", help="resume lookup at the given sequence index. Sequences before this index will be skipped at the names entries will be appended to an existing file.")
    parser.add_option("-d", "--dir", dest="dir", action="store", help="directory to output files to. By default, outputs to the current directory")
    options, arguments = parser.parse_args()
        
    if options.dir is not None:
        outdir = os.path.abspath(options.dir)
    else:
        outdir = os.path.abspath(os.getcwd())
        
    if not os.path.isdir(outdir):
        print >>sys.stderr, "%s is not a directory" % outdir
    
    if options.name is not None:
        sequences = [(" ".join(arguments),None)]
    else:
        if len(arguments) == 0:
            print >>sys.stderr, "You must specify an input sequence index file"
            sys.exit(1)
        filename = os.path.abspath(arguments[0])
        
        # Read in the data file
        seqs = SequenceIndex.from_file(filename)
        if options.index is not None:
            seq = seqs.sequence_by_index(options.index)
            sequences = [(seq.name,seq.id)]
        elif options.resume is not None:
            sequences = [(seq.name,seq.id) for seq in seqs.sequences[options.resume:]]
        else:
            sequences = [(s.name,s.id) for s in seqs.sequences]
    
    if options.verbose:
        verbose_out = sys.stderr
        out_prefix = ">>> "
    else:
        verbose_out = None
        out_prefix = ""
    
    # Output a name list
    if options.resume is None:
        namefile = open(os.path.join(outdir, "NAMES"), 'w')
    else:
        # Append data to the old file
        namefile = open(os.path.join(outdir, "NAMES"), 'a')
    try:
        names = UnicodeCsvWriter(namefile)
        if options.resume is None:
            # Add a header if we're not appending to an old file
            names.writerow(['Filename','Reported song name','Database id'])
        
        for seq_name,seq_id in sequences:
            print "%sLooking up %s" % (out_prefix, seq_name)
            files = find_midi_files(seq_name, sources=options.sources, verbose_out=verbose_out)
            print "%s  Found %d files" % (out_prefix, len(files))
            # Create a suitable base filename
            base_filename = "_".join(\
                seq_name.encode('ascii', 'ignore').translate(string.maketrans("",""), string.punctuation).lower().split())
            for i,(data,name) in enumerate(files):
                filename = u"%s-%d.mid" % (base_filename,i)
                full_filename = os.path.join(outdir, filename)
                # Write each midi file out individually
                f = open(full_filename, 'w')
                f.write(data)
                f.close()
                # Keep a list of the name reported for each file
                names.writerow([filename,name,seq_id])
            namefile.flush()
    finally:
        namefile.close()