def main(): usage = "%prog [options] <model-type> <model_name> <in-file>" description = "Trains a backoff builder model using the given "\ "input data. Specify a model type (ngram, etc) and a name to "\ "identify it. The data file should be a stored SequenceIndex file." parser = OptionParser(usage=usage, description=description) parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.") parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.") # File input options parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") options, arguments = parse_args_with_config(parser) if len(arguments) < 3: print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[2]) model_type = arguments[0] model_name = arguments[1] builder_cls = get_backoff_builder(model_type) model_cls = builder_cls.MODEL_CLASS # Load the sequence data from a dbinput file input_data = command_line_input(filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=['bulk-db', 'bulk-db-annotated']) # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif options.training_opts.lower() == "help": print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__) sys.exit(0) else: training_opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), model_cls.TRAINING_OPTIONS) if options.partitions is not None: parts = holdout_partition(input_data, options.partitions) models = [(builder_cls.partition_model_name(model_name,num),seqs) for \ num,seqs in enumerate(parts)] else: models = [(model_name,input_data)] for part_name,seqs in models: # Instantiate a fresh model with this name model = model_cls(part_name, options=training_opts) # Train it with the loaded data model.train(seqs) model.save() print "Trained model %s" % (part_name)
def main(): usage = "%prog <model-name>" description = "Debug a PCFG model" parser = OptionParser(usage=usage, description=description) parser.add_option("-g", "--grammar", dest="grammar", action="store", \ help="use the named grammar instead of the default.") parser.add_option("-d", "--debug", dest="debug", action="store_true", \ help="output debugging information during generation") parser.add_option("--file-options", "--fopt", dest="file_options", \ action="store", help="options for the input file "\ "(--file). Type '--fopt help' for a list of available "\ "options.") options, arguments = parse_args_with_config(parser) if len(arguments) < 1: print "Specify a model name" sys.exit(1) model_name = arguments[0] if len(arguments) < 2: print "Specify an input file" grammar = get_grammar(options.grammar) PcfgModel = grammar.formalism.PcfgModel # Load the trained model model = PcfgModel.load_model(model_name) # Try getting a file from the command-line options input_data = command_line_input(filename=arguments[1], filetype="db", options=options.file_options) # Prepare the input and annotations sequence = input_data.sequence categories = [chord.category for chord in sequence.iterator()] str_inputs = input_data.inputs # Build the implicit normal-form tree from the annotations try: tree = build_tree_for_sequence(sequence) except TreeBuildError, err: raise ModelTrainingError, "could not build a tree for '%s': %s" % \ (sequence.string_name, err)
def main(): usage = "%prog [options] <model-type> <model_name> <in-file>" description = "Trains a supertagging model using the given "\ "input data. Specify a model type (baseline1, etc) and a name to "\ "identify it. The data file may be a stored SequenceIndex file, or "\ "any other type of bulk data file. "\ "This can only be used with the follow types of models: %s" % ", ".join(TRAINABLE_MODELS) parser = OptionParser(usage=usage, description=description) parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.") parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.") # File input options parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") # Logging output parser.add_option('--log', dest="log", action="store", help="file to output training logs to. Specify a base filename; <modelname>.log will be added to the end") options, arguments = parse_args_with_config(parser) grammar = Grammar() # Get the model type first: we might not need the other args if len(arguments) == 0: print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments" model_type = arguments[0] if model_type not in TRAINABLE_MODELS: print >>sys.stderr, "'%s' is not a valid model type. Available taggers are: %s" % \ (model_type, ", ".join(TRAINABLE_MODELS)) sys.exit(1) if model_type not in TAGGERS: print >>sys.stderr, "'%s' isn't a registered model type. Check that "\ "the name in TRAINABLE_MODELS is correct" % model_type sys.exit(1) tagger_cls = get_tagger(model_type) if not issubclass(tagger_cls, ModelTagger): print >>sys.stderr, "'%s' tagger cannot be trained with this script. Only model taggers can be." % (tagger_cls.__name__) sys.exit(1) model_cls = tagger_cls.MODEL_CLASS # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif options.training_opts.lower() == "help": print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__) sys.exit(0) else: training_opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), model_cls.TRAINING_OPTIONS) # Get the rest of the args if len(arguments) < 3: print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[2]) model_name = arguments[1] # Load the sequence data # Only allow bulk types input_data = command_line_input(filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=get_input_type_names(single=False, bulk=True)) if options.partitions is not None and options.partitions > 1: parts = input_data.get_partitions(options.partitions)[1] models = [(tagger_cls.partition_model_name(model_name,num),seqs) for \ num,seqs in enumerate(parts)] else: models = [(model_name,input_data)] for part_name,seqs in models: # Instantiate a fresh model with this name model = model_cls(part_name, options=training_opts) if options.log is not None: # Prepare a logger logfile = "%s%s.log" % (options.log, part_name) print "Logging output to file %s" % logfile logger = create_logger(filename=logfile) else: logger = None # Train the model with the loaded data model.train(seqs, logger=logger) model.save() print "Trained model %s" % (part_name)
def main(): usage = "%prog [options] <model_name> <in-file>" description = "Trains a chord labeling model using the given "\ "input data. The data file may be a stored SequenceIndex file, or "\ "any other type of bulk data file." parser = OptionParser(usage=usage, description=description) parser.add_option( '-p', '--partitions', dest="partitions", action="store", type="int", help= "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number." ) parser.add_option( '--opts', dest="training_opts", action="append", help= "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type." ) # File input options parser.add_option( "--filetype", "--ft", dest="filetype", action="store", help= "select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option( "--file-options", "--fopt", dest="file_options", action="store", help= "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options." ) # Logging output parser.add_option( '--log', dest="log", action="store", help= "file to output training logs to. Specify a base filename; <modelname>.log will be added to the end" ) options, arguments = parse_args_with_config(parser) grammar = Grammar() # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif "help" in [opt.lower() for opt in options.training_opts]: print options_help_text(HPChordLabeler.TRAINING_OPTIONS, intro="Training options:") sys.exit(0) else: training_opts = ModuleOption.process_option_string( options.training_opts) if len(arguments) < 2: print >> sys.stderr, "You must specify a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] # Load the sequence data # Only allow bulk types input_data = command_line_input(filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=get_input_type_names( single=False, bulk=True)) # Only partition the chord data, not the MIDI data if options.partitions is not None and not \ (isinstance(input_data, MidiTaggerTrainingBulkInput) and \ input_data.chords is not None): print >>sys.stderr, "Can only partition chord data and no chord data "\ "was supplied" sys.exit(1) if options.partitions: # The input includes chord training data parts = input_data.chords.get_partitions(options.partitions)[1] models = [("%s%d" % (model_name,num),chord_data) \ for num,chord_data in enumerate(parts)] else: models = [(model_name, None)] for part_name, chord_data in models: if options.log is not None: # Prepare a logger logfile = "%s%s.log" % (options.log, part_name) print "Logging output to file %s" % logfile logger = create_logger(filename=logfile) else: logger = None # Create a fresh model with this name model = HPChordLabeler.train(input_data, part_name, logger=logger, options=training_opts, chord_data=chord_data) print "Trained model %s" % (part_name)
def main(): usage = "%prog [options] <results-files> <index>" description = "Prints a dependency tree for a parse result" parser = OptionParser(usage=usage, description=description) parser.add_option("-l", "--latex", dest="latex", action="store_true", help="output Latex for the graphs using tikz-dependency") parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file (--file). Type '--fopt help' for a list of available options.") options, arguments = parser.parse_args() if len(arguments) < 1: print >>sys.stderr, "Specify a file to read the results from" sys.exit(1) filename = arguments[0] if len(arguments) < 2: print >>sys.stderr, "Specify an of the sequence to load" sys.exit(1) index = int(arguments[1]) grammar = get_grammar() # We always need an index, so this is given as an argument # Put it in the options list for loading the file fopts = options.file_options if fopts and len(fopts): fopts += ":index=%d" % index else: fopts = "index=%d" % index # Load the sequence index file dbinput = command_line_input(filename=filename, filetype="db", options=fopts) name = dbinput.name anal = parse_sequence_with_annotations(dbinput, grammar)[0] graph, time_map = semantics_to_dependency_graph(anal.semantics) # Join together chords that are on the same dependency node times = iter(sorted(time_map.values())) dep_time = times.next() current_chord = [] joined_chords = [] finished = False for chord_time,chord in sorted(dbinput.sequence.time_map.items()): if chord_time >= dep_time and not finished: if len(current_chord): joined_chords.append(current_chord) current_chord = [chord] try: dep_time = times.next() except StopIteration: finished = True else: current_chord.append(chord) joined_chords.append(current_chord) chords = [" ".join(filter_latex(str(crd)) for crd in item) for item in joined_chords] annotations = [" ".join(filter_latex(crd.category) for crd in item) for item in joined_chords] graph.words = annotations if options.latex: # Exit with status 1 if we don't output anything exit_status = 1 # Output a full Latex document in one go if name is not None: title = r"""\title{%s} \author{} \date{}""" % name.capitalize() maketitle = r"\maketitle\thispagestyle{empty}\vspace{-20pt}" else: title = "" maketitle = "" # Print the header print r"""\documentclass[a4paper]{article} \usepackage{tikz-dependency} %% You may need to set paperheight (for width) and paperwidth (for height) to get things to fit \usepackage[landscape,margin=1cm,paperheight=50cm]{geometry} \pagestyle{empty} %(title)s \begin{document} %(maketitle)s \tikzstyle{every picture}+=[remember picture] \centering """ % \ { 'title' : title, 'maketitle' : maketitle } if graph is not None: exit_status = 0 print dependency_graph_to_latex(graph, fmt_lab=_fmt_label, extra_rows=[chords]) print "\n\\vspace{15pt}" # Finish off the document print r""" \end{document} """ sys.exit(exit_status) else: # Not outputing Latex print graph
def main(): usage = "%prog [options] <input>" description = "Divides midi files into chunks, with size and offset, "\ "given in the input file, and plays "\ "the chunks consecutively. Input is a segmented bulk midi input file." parser = OptionParser(usage=usage, description=description) parser.add_option( '-g', '--gap', dest="gap", action="store", type="float", help= "time to wait between playing each chunk in seconds (potentially float). It will take some time to load the chunk and the sequencer usually pauses before reporting it's done: this is not included in this value", default=0.0) parser.add_option('-p', '--print', dest="print_events", action="store_true", help="print out all events for each chunk") parser.add_option('--pno', '--print-note-ons', dest="print_note_ons", action="store_true", help="print only note-on events") parser.add_option( '--fopt', dest="file_options", action="store", help= "options for file loading. Use '--fopt help' to see available options") options, arguments = parse_args_with_config(parser) filename = arguments[0] # Try getting a file from the command-line options input_data = command_line_input(filename=filename, filetype='bulk-segmidi', options=options.file_options) # Play each input in turn input_getter = iter(input_data) segmidi = input_getter.next() while True: print "###############################" print "Playing '%s'" % segmidi.name print "%s-beat chunks with a %d-tick offset\n" % \ (segmidi.time_unit, segmidi.tick_offset) slices = list(segmidi) try: for i, strm in enumerate(slices): print "Playing chunk %d: %d events" % (i, len(strm.trackpool)) if options.print_events: print "\n".join(" %s" % ev for ev in sorted(strm.trackpool)) elif options.print_note_ons: print "\n".join(" %s" % ev for ev in sorted(strm.trackpool) \ if type(ev) is NoteOnEvent) # Play this midi chunk play_stream(strm, block=True) # Leave a gap before continuing if options.gap > 0.0: time.sleep(options.gap) except KeyboardInterrupt: pass print "Continue to next song (<enter>); exit (x); play again (p)" command = raw_input(">> ").lower() if command == "x": sys.exit(0) elif command == "p": # Play again continue elif command == "": # Move to next segmidi = input_getter.next() continue else: print "Unknown command: %s" % command print "Playing again..." continue sys.exit(0)
def main(): usage = "%prog [options] <input>" description = "Divides midi files into chunks, with size and offset, "\ "given in the input file, and plays "\ "the chunks consecutively. Input is a segmented bulk midi input file." parser = OptionParser(usage=usage, description=description) parser.add_option('-g', '--gap', dest="gap", action="store", type="float", help="time to wait between playing each chunk in seconds (potentially float). It will take some time to load the chunk and the sequencer usually pauses before reporting it's done: this is not included in this value", default=0.0) parser.add_option('-p', '--print', dest="print_events", action="store_true", help="print out all events for each chunk") parser.add_option('--pno', '--print-note-ons', dest="print_note_ons", action="store_true", help="print only note-on events") parser.add_option('--fopt', dest="file_options", action="store", help="options for file loading. Use '--fopt help' to see available options") options, arguments = parse_args_with_config(parser) filename = arguments[0] # Try getting a file from the command-line options input_data = command_line_input(filename=filename, filetype='bulk-segmidi', options=options.file_options) # Play each input in turn input_getter = iter(input_data) segmidi = input_getter.next() while True: print "###############################" print "Playing '%s'" % segmidi.name print "%s-beat chunks with a %d-tick offset\n" % \ (segmidi.time_unit, segmidi.tick_offset) slices = list(segmidi) try: for i,strm in enumerate(slices): print "Playing chunk %d: %d events" % (i, len(strm.trackpool)) if options.print_events: print "\n".join(" %s" % ev for ev in sorted(strm.trackpool)) elif options.print_note_ons: print "\n".join(" %s" % ev for ev in sorted(strm.trackpool) \ if type(ev) is NoteOnEvent) # Play this midi chunk play_stream(strm, block=True) # Leave a gap before continuing if options.gap > 0.0: time.sleep(options.gap) except KeyboardInterrupt: pass print "Continue to next song (<enter>); exit (x); play again (p)" command = raw_input(">> ").lower() if command == "x": sys.exit(0) elif command == "p": # Play again continue elif command == "": # Move to next segmidi = input_getter.next() continue else: print "Unknown command: %s" % command print "Playing again..." continue sys.exit(0)
def main(): usage = "%prog [options] <results-files> <index>" description = "Prints a dependency tree for a parse result" parser = OptionParser(usage=usage, description=description) parser.add_option("-l", "--latex", dest="latex", action="store_true", help="output Latex for the graphs using tikz-dependency") parser.add_option( "--file-options", "--fopt", dest="file_options", action="store", help= "options for the input file (--file). Type '--fopt help' for a list of available options." ) options, arguments = parser.parse_args() if len(arguments) < 1: print >> sys.stderr, "Specify a file to read the results from" sys.exit(1) filename = arguments[0] if len(arguments) < 2: print >> sys.stderr, "Specify an of the sequence to load" sys.exit(1) index = int(arguments[1]) grammar = get_grammar() # We always need an index, so this is given as an argument # Put it in the options list for loading the file fopts = options.file_options if fopts and len(fopts): fopts += ":index=%d" % index else: fopts = "index=%d" % index # Load the sequence index file dbinput = command_line_input(filename=filename, filetype="db", options=fopts) name = dbinput.name anal = parse_sequence_with_annotations(dbinput, grammar)[0] graph, time_map = semantics_to_dependency_graph(anal.semantics) # Join together chords that are on the same dependency node times = iter(sorted(time_map.values())) dep_time = times.next() current_chord = [] joined_chords = [] finished = False for chord_time, chord in sorted(dbinput.sequence.time_map.items()): if chord_time >= dep_time and not finished: if len(current_chord): joined_chords.append(current_chord) current_chord = [chord] try: dep_time = times.next() except StopIteration: finished = True else: current_chord.append(chord) joined_chords.append(current_chord) chords = [ " ".join(filter_latex(str(crd)) for crd in item) for item in joined_chords ] annotations = [ " ".join(filter_latex(crd.category) for crd in item) for item in joined_chords ] graph.words = annotations if options.latex: # Exit with status 1 if we don't output anything exit_status = 1 # Output a full Latex document in one go if name is not None: title = r"""\title{%s} \author{} \date{}""" % name.capitalize() maketitle = r"\maketitle\thispagestyle{empty}\vspace{-20pt}" else: title = "" maketitle = "" # Print the header print r"""\documentclass[a4paper]{article} \usepackage{tikz-dependency} %% You may need to set paperheight (for width) and paperwidth (for height) to get things to fit \usepackage[landscape,margin=1cm,paperheight=50cm]{geometry} \pagestyle{empty} %(title)s \begin{document} %(maketitle)s \tikzstyle{every picture}+=[remember picture] \centering """ % \ { 'title' : title, 'maketitle' : maketitle } if graph is not None: exit_status = 0 print dependency_graph_to_latex(graph, fmt_lab=_fmt_label, extra_rows=[chords]) print "\n\\vspace{15pt}" # Finish off the document print r""" \end{document} """ sys.exit(exit_status) else: # Not outputing Latex print graph
def main(): usage = "%prog [options] <model_name> <in-file>" description = "Loads a chord labeling model and uses it to assign chord "\ "labels to the given MIDI file." parser = OptionParser(usage=usage, description=description) # File input options parser.add_option( "--filetype", "--ft", dest="filetype", action="store", help= "select the file type for the input file. Same filetypes as jazzparser", default='segmidi') parser.add_option( "--file-options", "--fopt", dest="file_options", action="store", help= "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options." ) # Labeling options parser.add_option( "--labeler-options", "--lopt", dest="labeler_options", action="append", help= "options for the labeler. Type '--lopt help' for a list of available options." ) parser.add_option( "--no-key", "--nk", dest="no_key", action="store_true", help="merge together labels with the same key (same as --lopt nokey)") # Output options parser.add_option( "--single", "-1", dest="single", action="store_true", help= "show only one chord per time segment (same as --lopt n=1, but formats the output in a simpler way)" ) parser.add_option( '-r', '--realize', dest="realize", action="store", help="realize the chord sequence as a midi file, overlaid on the input" ) parser.add_option( '--chords-only', dest="chords_only", action="store_true", help= "only realize the chords: don't overlay on the input midi (only works with -r)" ) options, arguments = parse_args_with_config(parser) if options.labeler_options is not None and "help" in options.labeler_options: print options_help_text(HPChordLabeler.LABELING_OPTIONS, intro="Options for HP chord labeler") sys.exit(0) if len(arguments) < 2: print >>sys.stderr, "You must specify a model name and an input "\ "(MIDI) data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] # Process the labeler options lopt_dict = ModuleOption.process_option_string(options.labeler_options) if options.single: # No point in getting more than one label, since we only display one lopt_dict['n'] = 1 if options.no_key: # Just set the nokey option lopt_dict['nokey'] = True # Check they're valid before doing anything else HPChordLabeler.process_labeling_options(lopt_dict) input_data = command_line_input(filename, filetype=options.filetype, options=options.file_options, allowed_types=['segmidi', 'bulk-segmidi']) bulk = not is_bulk_type(type(input_data)) if bulk: input_data = [input_data] for i, data in enumerate(input_data): input_stream = data.stream print "Read midi data in %d segments" % len(data) # Load the model model = HPChordLabeler.load_model(model_name) # Perform labeling labels = model.label(data, options=lopt_dict) # Try labeling as it will be passed to the tagger labs = model.label_lattice(data, options=lopt_dict) if options.single: # Special output for single label output print ", ".join(["%s" % timelabs[0][0] for timelabs in labels]) else: # Print out the labels for each timestep for time, timelabs in enumerate(labels): print "%d: %s" % (time, ", ".join([ "%s (%.2e)" % (label, prob) for (label, prob) in timelabs ])) if options.realize is not None: # Get the single best chord label for each time best_labels = [timelabs[0][0] for timelabs in labels] # Realize as a midi file print "Realizing output chord sequence" real = ChordSequenceRealizer(best_labels, model.chord_vocab, resolution=input_stream.resolution, chord_length=data.time_unit, text_events=True) if options.chords_only: # Don't overlay stream = real.generate(offset=data.tick_offset) else: stream = real.generate(overlay=input_stream, offset=data.tick_offset) if bulk: filename = "%s-%d" % (options.realize, i) else: filename = options.realize write_midifile(stream, filename)
def main(): usage = "%prog [options] <model_name> <in-file>" description = "Trains a chord labeling model using the given "\ "input data. The data file may be a stored SequenceIndex file, or "\ "any other type of bulk data file." parser = OptionParser(usage=usage, description=description) parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.") parser.add_option('--opts', dest="training_opts", action="append", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.") # File input options parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") # Logging output parser.add_option('--log', dest="log", action="store", help="file to output training logs to. Specify a base filename; <modelname>.log will be added to the end") options, arguments = parse_args_with_config(parser) grammar = Grammar() # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif "help" in [opt.lower() for opt in options.training_opts]: print options_help_text(HPChordLabeler.TRAINING_OPTIONS, intro="Training options:") sys.exit(0) else: training_opts = ModuleOption.process_option_string(options.training_opts) if len(arguments) < 2: print >>sys.stderr, "You must specify a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] # Load the sequence data # Only allow bulk types input_data = command_line_input(filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=get_input_type_names(single=False, bulk=True)) # Only partition the chord data, not the MIDI data if options.partitions is not None and not \ (isinstance(input_data, MidiTaggerTrainingBulkInput) and \ input_data.chords is not None): print >>sys.stderr, "Can only partition chord data and no chord data "\ "was supplied" sys.exit(1) if options.partitions: # The input includes chord training data parts = input_data.chords.get_partitions(options.partitions)[1] models = [("%s%d" % (model_name,num),chord_data) \ for num,chord_data in enumerate(parts)] else: models = [(model_name,None)] for part_name,chord_data in models: if options.log is not None: # Prepare a logger logfile = "%s%s.log" % (options.log, part_name) print "Logging output to file %s" % logfile logger = create_logger(filename=logfile) else: logger = None # Create a fresh model with this name model = HPChordLabeler.train(input_data, part_name, logger=logger, options=training_opts, chord_data=chord_data) print "Trained model %s" % (part_name)
def main(): usage = "%prog [options] <model_name> <in-file>" description = "Loads a chord labeling model and uses it to assign chord "\ "labels to the given MIDI file." parser = OptionParser(usage=usage, description=description) # File input options parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='segmidi') parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.") # Labeling options parser.add_option("--labeler-options", "--lopt", dest="labeler_options", action="append", help="options for the labeler. Type '--lopt help' for a list of available options.") parser.add_option("--no-key", "--nk", dest="no_key", action="store_true", help="merge together labels with the same key (same as --lopt nokey)") # Output options parser.add_option("--single", "-1", dest="single", action="store_true", help="show only one chord per time segment (same as --lopt n=1, but formats the output in a simpler way)") parser.add_option('-r', '--realize', dest="realize", action="store", help="realize the chord sequence as a midi file, overlaid on the input") parser.add_option('--chords-only', dest="chords_only", action="store_true", help="only realize the chords: don't overlay on the input midi (only works with -r)") options, arguments = parse_args_with_config(parser) if options.labeler_options is not None and "help" in options.labeler_options: print options_help_text(HPChordLabeler.LABELING_OPTIONS, intro="Options for HP chord labeler") sys.exit(0) if len(arguments) < 2: print >>sys.stderr, "You must specify a model name and an input "\ "(MIDI) data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[1]) model_name = arguments[0] # Process the labeler options lopt_dict = ModuleOption.process_option_string(options.labeler_options) if options.single: # No point in getting more than one label, since we only display one lopt_dict['n'] = 1 if options.no_key: # Just set the nokey option lopt_dict['nokey'] = True # Check they're valid before doing anything else HPChordLabeler.process_labeling_options(lopt_dict) input_data = command_line_input(filename, filetype=options.filetype, options=options.file_options, allowed_types=['segmidi','bulk-segmidi']) bulk = not is_bulk_type(type(input_data)) if bulk: input_data = [input_data] for i,data in enumerate(input_data): input_stream = data.stream print "Read midi data in %d segments" % len(data) # Load the model model = HPChordLabeler.load_model(model_name) # Perform labeling labels = model.label(data, options=lopt_dict) # Try labeling as it will be passed to the tagger labs = model.label_lattice(data, options=lopt_dict) if options.single: # Special output for single label output print ", ".join(["%s" % timelabs[0][0] for timelabs in labels]) else: # Print out the labels for each timestep for time,timelabs in enumerate(labels): print "%d: %s" % (time, ", ".join(["%s (%.2e)" % (label,prob) for (label,prob) in timelabs])) if options.realize is not None: # Get the single best chord label for each time best_labels = [timelabs[0][0] for timelabs in labels] # Realize as a midi file print "Realizing output chord sequence" real = ChordSequenceRealizer(best_labels, model.chord_vocab, resolution=input_stream.resolution, chord_length=data.time_unit, text_events=True) if options.chords_only: # Don't overlay stream = real.generate(offset=data.tick_offset) else: stream = real.generate(overlay=input_stream, offset=data.tick_offset) if bulk: filename = "%s-%d" % (options.realize, i) else: filename = options.realize write_midifile(stream, filename)
def main(): usage = "%prog [options] <model-type> <model_name> <in-file>" description = "Trains a supertagging model using the given "\ "input data. Specify a model type (baseline1, etc) and a name to "\ "identify it. The data file may be a stored SequenceIndex file, or "\ "any other type of bulk data file. "\ "This can only be used with the follow types of models: %s" % ", ".join(TRAINABLE_MODELS) parser = OptionParser(usage=usage, description=description) parser.add_option( '-p', '--partitions', dest="partitions", action="store", type="int", help= "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number." ) parser.add_option( '--opts', dest="training_opts", action="store", help= "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type." ) # File input options parser.add_option( "--filetype", "--ft", dest="filetype", action="store", help= "select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option( "--file-options", "--fopt", dest="file_options", action="store", help= "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options." ) # Logging output parser.add_option( '--log', dest="log", action="store", help= "file to output training logs to. Specify a base filename; <modelname>.log will be added to the end" ) options, arguments = parse_args_with_config(parser) grammar = Grammar() # Get the model type first: we might not need the other args if len(arguments) == 0: print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments" model_type = arguments[0] if model_type not in TRAINABLE_MODELS: print >>sys.stderr, "'%s' is not a valid model type. Available taggers are: %s" % \ (model_type, ", ".join(TRAINABLE_MODELS)) sys.exit(1) if model_type not in TAGGERS: print >>sys.stderr, "'%s' isn't a registered model type. Check that "\ "the name in TRAINABLE_MODELS is correct" % model_type sys.exit(1) tagger_cls = get_tagger(model_type) if not issubclass(tagger_cls, ModelTagger): print >> sys.stderr, "'%s' tagger cannot be trained with this script. Only model taggers can be." % ( tagger_cls.__name__) sys.exit(1) model_cls = tagger_cls.MODEL_CLASS # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif options.training_opts.lower() == "help": print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__) sys.exit(0) else: training_opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), model_cls.TRAINING_OPTIONS) # Get the rest of the args if len(arguments) < 3: print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[2]) model_name = arguments[1] # Load the sequence data # Only allow bulk types input_data = command_line_input(filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=get_input_type_names( single=False, bulk=True)) if options.partitions is not None and options.partitions > 1: parts = input_data.get_partitions(options.partitions)[1] models = [(tagger_cls.partition_model_name(model_name,num),seqs) for \ num,seqs in enumerate(parts)] else: models = [(model_name, input_data)] for part_name, seqs in models: # Instantiate a fresh model with this name model = model_cls(part_name, options=training_opts) if options.log is not None: # Prepare a logger logfile = "%s%s.log" % (options.log, part_name) print "Logging output to file %s" % logfile logger = create_logger(filename=logfile) else: logger = None # Train the model with the loaded data model.train(seqs, logger=logger) model.save() print "Trained model %s" % (part_name)
toptstr = ":".join(toptstr) else: toptstr = "" topts = ModuleOption.process_option_string(toptstr) # Check that the options are valid try: tagger_cls.check_options(topts) except ModuleOptionError, err: print "Problem with tagger options (--topt): %s" % err return 1 ############################ Input processing ##################### stdinput = False # Try getting a file from the command-line options input_data = command_line_input(filename=options.file, filetype=options.filetype, options=options.file_options) # Record progress in this for helpful output if input_data is None: # No input file: process command line input input_string = " ".join(clinput) input_list = [input_string] name_getter = iter(["commandline"]) # Take input from stdin if nothing else is given if len(input_string) == 0: stdinput = True # Use integers to identify each input name_getter = count() num_inputs = None else: num_inputs = 1
try: DirectedCkyParser.check_options(popts) except ModuleOptionError, err: logger.error("Problem with parser options (--popt): %s" % err) sys.exit(1) if len(arguments) < 3: print >>sys.stderr, "Specify a song corpus name, a chord labeling "\ "model name, and a file to read midi data from" sys.exit(1) # First argument is an TonalSpaceAnalysisSet corpus_filename = arguments[0] # Load the corpus file corpus = command_line_input(corpus_filename, filetype='bulk-db', options="") # The rest of the args are midi files to analyze filename = arguments[2] input_data = command_line_input(filename, filetype=options.filetype, options=options.file_options, allowed_types=['segmidi', 'bulk-segmidi']) if isinstance(input_data, SegmentedMidiInput): # Single input input_data = [input_data] # Work out how many results to print out if options.print_results == -1:
def main(): usage = "%prog [options] <model-type> <model_name> <in-file>" description = "Trains a backoff builder model using the given "\ "input data. Specify a model type (ngram, etc) and a name to "\ "identify it. The data file should be a stored SequenceIndex file." parser = OptionParser(usage=usage, description=description) parser.add_option( '-p', '--partitions', dest="partitions", action="store", type="int", help= "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number." ) parser.add_option( '--opts', dest="training_opts", action="store", help= "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type." ) # File input options parser.add_option( "--filetype", "--ft", dest="filetype", action="store", help= "select the file type for the input file. Same filetypes as jazzparser", default='bulk-db') parser.add_option( "--file-options", "--fopt", dest="file_options", action="store", help= "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options." ) options, arguments = parse_args_with_config(parser) if len(arguments) < 3: print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments" sys.exit(1) filename = os.path.abspath(arguments[2]) model_type = arguments[0] model_name = arguments[1] builder_cls = get_backoff_builder(model_type) model_cls = builder_cls.MODEL_CLASS # Load the sequence data from a dbinput file input_data = command_line_input( filename=filename, filetype=options.filetype, options=options.file_options, allowed_types=['bulk-db', 'bulk-db-annotated']) # Handle any training options that were given on the command line if options.training_opts is None: training_opts = {} elif options.training_opts.lower() == "help": print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__) sys.exit(0) else: training_opts = ModuleOption.process_option_dict( ModuleOption.process_option_string(options.training_opts), model_cls.TRAINING_OPTIONS) if options.partitions is not None: parts = holdout_partition(input_data, options.partitions) models = [(builder_cls.partition_model_name(model_name,num),seqs) for \ num,seqs in enumerate(parts)] else: models = [(model_name, input_data)] for part_name, seqs in models: # Instantiate a fresh model with this name model = model_cls(part_name, options=training_opts) # Train it with the loaded data model.train(seqs) model.save() print "Trained model %s" % (part_name)