def get_gold_semantics(self): """ Tries to return a gold standard semantics. In some cases this is stored along with the results in C{gold_parse}. In others this is not available, but a gold annotated chord sequence is: then we can get the gold semantics by parsing the annotations. Note that this might take a little bit of time. In other cases neither is available. Then C{None} will be returned. """ from jazzparser.evaluation.parsing import parse_sequence_with_annotations if self.gold_parse is not None: return self.gold_parse elif self.gold_sequence is not None: # Parse the annotations to get a semantics try: gold_parses = parse_sequence_with_annotations( self.gold_sequence, grammar=get_grammar(), allow_subparses=False) if len(gold_parses) != 1: # This shouldn't happen, since allow_subparses was False return None # Got a result: return its semantics return gold_parses[0].semantics except ParseError: # Could not parse annotated sequence return None else: return None
def get_gold_semantics(self): """ Tries to return a gold standard semantics. In some cases this is stored along with the results in C{gold_parse}. In others this is not available, but a gold annotated chord sequence is: then we can get the gold semantics by parsing the annotations. Note that this might take a little bit of time. In other cases neither is available. Then C{None} will be returned. """ from jazzparser.evaluation.parsing import parse_sequence_with_annotations if self.gold_parse is not None: return self.gold_parse elif self.gold_sequence is not None: # Parse the annotations to get a semantics try: gold_parses = parse_sequence_with_annotations( self.gold_sequence, grammar=get_grammar(), allow_subparses=False ) if len(gold_parses) != 1: # This shouldn't happen, since allow_subparses was False return None # Got a result: return its semantics return gold_parses[0].semantics except ParseError: # Could not parse annotated sequence return None else: return None
def get_gold_analysis(self): """ Parses the annotations to get a gold analysis. """ from jazzparser.evaluation.parsing import parse_sequence_with_annotations from jazzparser.grammar import get_grammar parses = parse_sequence_with_annotations(self, get_grammar(), allow_subparses=False) return parses[0].semantics
def keys_for_sequence(sequence, grammar=None): """ Takes a chord sequence from the chord corpus and parses using its annotations. Returns a list of the key (as a pitch class integer) for each chord. This is simply worked out, once the parse is done. Every chord in a cadence has the same key as the resolution of the cadence, which can be read off by taking the equal temperament pitch class for the tonal space point of the resolution. """ from jazzparser.evaluation.parsing import parse_sequence_with_annotations if grammar is None: grammar = get_grammar() # Try parsing the sequence according to the tree in the database sub_parses = parse_sequence_with_annotations(sequence, grammar) if len(sub_parses) > 1: # We can only continue if we got a full parse raise ParseError, "could not fully parse the sequence %s." % \ sequence.string_name sems = sub_parses[0].semantics # Get the keys for this LF, and the times when they start keys = grammar.formalism.semantics_to_keys(sems) key_roots, change_times = zip(*keys) key_roots = iter(key_roots) change_times = iter(change_times) chords = iter(sequence) # Get the first key as the current key key = key_roots.next() # Ignore the first time, as it should be 0 change_times.next() chord_keys = [] try: # Get the next time at which we'll need to change next_change = change_times.next() time = 0 for chord in sequence.chords: if time >= next_change: # Move onto the next key key = key_roots.next() next_change = change_times.next() # Add the next chord with the current key value chord_keys.append((chord, key)) time += chord.duration except StopIteration: # No more timings left # Include the rest of the chords with the current key for chord in chords: chord_keys.append((chord, key)) return chord_keys
def keys_for_sequence(sequence, grammar=None): """ Takes a chord sequence from the chord corpus and parses using its annotations. Returns a list of the key (as a pitch class integer) for each chord. This is simply worked out, once the parse is done. Every chord in a cadence has the same key as the resolution of the cadence, which can be read off by taking the equal temperament pitch class for the tonal space point of the resolution. """ from jazzparser.evaluation.parsing import parse_sequence_with_annotations if grammar is None: grammar = get_grammar() # Try parsing the sequence according to the tree in the database sub_parses = parse_sequence_with_annotations(sequence, grammar) if len(sub_parses) > 1: # We can only continue if we got a full parse raise ParseError, "could not fully parse the sequence %s." % sequence.string_name sems = sub_parses[0].semantics # Get the keys for this LF, and the times when they start keys = grammar.formalism.semantics_to_keys(sems) key_roots, change_times = zip(*keys) key_roots = iter(key_roots) change_times = iter(change_times) chords = iter(sequence) # Get the first key as the current key key = key_roots.next() # Ignore the first time, as it should be 0 change_times.next() chord_keys = [] try: # Get the next time at which we'll need to change next_change = change_times.next() time = 0 for chord in sequence.chords: if time >= next_change: # Move onto the next key key = key_roots.next() next_change = change_times.next() # Add the next chord with the current key value chord_keys.append((chord, key)) time += chord.duration except StopIteration: # No more timings left # Include the rest of the chords with the current key for chord in chords: chord_keys.append((chord, key)) return chord_keys
def _parse_seq(seq): # Parse the annotations to get a semantics try: gold_parses = parse_sequence_with_annotations( DbInput.from_sequence(seq), grammar=grammar, allow_subparses=False) # Got a result: return its semantics return gold_parses[0].semantics except ParseError, err: # Could not parse annotated sequence print >>sys.stderr, "Could not parse sequence '%s': %s" % \ (seq.string_name, err) return
def get_gold_analysis(self): """ Parses the annotations, if present, to get a gold analysis. Unlike L{AnnotatedDbInput}, this input type cannot be assumed to have annotations. It will therefore not raise an error if annotations are missing or incomplete, but just return None. """ from jazzparser.evaluation.parsing import parse_sequence_with_annotations from jazzparser.grammar import get_grammar from jazzparser.parsers import ParseError try: parses = parse_sequence_with_annotations(self, get_grammar(), allow_subparses=False) except ParseError: return None else: return parses[0].semantics
def main(): usage = "%prog [options] <results-files> <index>" description = "Prints a dependency tree for a parse result" parser = OptionParser(usage=usage, description=description) parser.add_option("-l", "--latex", dest="latex", action="store_true", help="output Latex for the graphs using tikz-dependency") parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file (--file). Type '--fopt help' for a list of available options.") options, arguments = parser.parse_args() if len(arguments) < 1: print >>sys.stderr, "Specify a file to read the results from" sys.exit(1) filename = arguments[0] if len(arguments) < 2: print >>sys.stderr, "Specify an of the sequence to load" sys.exit(1) index = int(arguments[1]) grammar = get_grammar() # We always need an index, so this is given as an argument # Put it in the options list for loading the file fopts = options.file_options if fopts and len(fopts): fopts += ":index=%d" % index else: fopts = "index=%d" % index # Load the sequence index file dbinput = command_line_input(filename=filename, filetype="db", options=fopts) name = dbinput.name anal = parse_sequence_with_annotations(dbinput, grammar)[0] graph, time_map = semantics_to_dependency_graph(anal.semantics) # Join together chords that are on the same dependency node times = iter(sorted(time_map.values())) dep_time = times.next() current_chord = [] joined_chords = [] finished = False for chord_time,chord in sorted(dbinput.sequence.time_map.items()): if chord_time >= dep_time and not finished: if len(current_chord): joined_chords.append(current_chord) current_chord = [chord] try: dep_time = times.next() except StopIteration: finished = True else: current_chord.append(chord) joined_chords.append(current_chord) chords = [" ".join(filter_latex(str(crd)) for crd in item) for item in joined_chords] annotations = [" ".join(filter_latex(crd.category) for crd in item) for item in joined_chords] graph.words = annotations if options.latex: # Exit with status 1 if we don't output anything exit_status = 1 # Output a full Latex document in one go if name is not None: title = r"""\title{%s} \author{} \date{}""" % name.capitalize() maketitle = r"\maketitle\thispagestyle{empty}\vspace{-20pt}" else: title = "" maketitle = "" # Print the header print r"""\documentclass[a4paper]{article} \usepackage{tikz-dependency} %% You may need to set paperheight (for width) and paperwidth (for height) to get things to fit \usepackage[landscape,margin=1cm,paperheight=50cm]{geometry} \pagestyle{empty} %(title)s \begin{document} %(maketitle)s \tikzstyle{every picture}+=[remember picture] \centering """ % \ { 'title' : title, 'maketitle' : maketitle } if graph is not None: exit_status = 0 print dependency_graph_to_latex(graph, fmt_lab=_fmt_label, extra_rows=[chords]) print "\n\\vspace{15pt}" # Finish off the document print r""" \end{document} """ sys.exit(exit_status) else: # Not outputing Latex print graph
def main(): usage = "%prog [options] <seq-file>" description = "Parses a sequence from a sequence index file using the "\ "annotations stored in the same file." parser = OptionParser(usage=usage, description=description) parser.add_option( "--popt", "--parser-options", dest="popts", action="append", help= "specify options for the parser. Type '--popt help' to get a list of options (we use a DirectedCkyParser)" ) parser.add_option("--derivations", "--deriv", dest="derivations", action="store_true", help="print out derivation traces of all the results") parser.add_option("--index", "-i", dest="index", action="store", type="int", help="parse just the sequence with this index") parser.add_option("--quiet", "-q", dest="quiet", action="store_true", help="show only errors in the output") parser.add_option( "--tonal-space", "--ts", dest="tonal_space", action="store_true", help="show the tonal space path (with -q, shows only paths)") parser.add_option( "--output-set", "-o", dest="output_set", action="store", help="store the analyses to a tonal space analysis set with this name") parser.add_option( "--trace-parse", "-t", dest="trace_parse", action="store_true", help= "output a trace of the shift-reduce parser's operations in producing the full interpretation from the annotations" ) options, arguments = parser.parse_args() if len(arguments) < 1: print "You must specify a sequence file" sys.exit(1) if options.popts is not None: poptstr = options.popts if "help" in [s.strip().lower() for s in poptstr]: # Output this tagger's option help print options_help_text( DirectedCkyParser.PARSER_OPTIONS, intro="Available options for the directed parser") return 0 else: poptstr = "" popts = ModuleOption.process_option_string(poptstr) grammar = get_grammar() if options.quiet: logger = create_plain_stderr_logger(log_level=logging.ERROR) else: logger = create_plain_stderr_logger() if options.trace_parse: parse_logger = logger else: parse_logger = None seq_index = SequenceIndex.from_file(arguments[0]) # Get the chord sequence(s) if options.index is None: seqs = seq_index.sequences else: seqs = [seq_index.sequence_by_index(options.index)] logger.info("%d sequences\n" % len(seqs)) full_analyses = [] stats = { 'full': 0, 'partial': 0, 'fail': 0, } # Try parsing every sequence for seq in seqs: logger.info("====== Sequence %s =======" % seq.string_name) try: results = parse_sequence_with_annotations( seq, grammar, logger=logger, parse_logger=parse_logger) except ParseError, err: logger.error("Error parsing: %s" % err) stats['fail'] += 1 else: # This may have resulted in multiple partial parses logger.info("%d partial parses" % len(results)) if len(results) == 1: stats['full'] += 1 else: stats['partial'] += 1 if options.derivations: # Output the derivation trace for each partial parse for result in results: print print result.derivation_trace if options.tonal_space: # Output the tonal space coordinates path = grammar.formalism.sign_to_coordinates(results[0]) for i, point in enumerate(path): print "%d, %d: %s" % (seq.id, i, point) # Only include a result in the output analyses if it was a full parse if len(results) == 1: full_analyses.append((seq.string_name, results[0].semantics)) else: logger.warn("%s was not included in the output analyses, "\ "since it was not fully parsed" % seq.string_name)
def main(): usage = "%prog [options] <results-files> <index>" description = "Prints a dependency tree for a parse result" parser = OptionParser(usage=usage, description=description) parser.add_option("-l", "--latex", dest="latex", action="store_true", help="output Latex for the graphs using tikz-dependency") parser.add_option( "--file-options", "--fopt", dest="file_options", action="store", help= "options for the input file (--file). Type '--fopt help' for a list of available options." ) options, arguments = parser.parse_args() if len(arguments) < 1: print >> sys.stderr, "Specify a file to read the results from" sys.exit(1) filename = arguments[0] if len(arguments) < 2: print >> sys.stderr, "Specify an of the sequence to load" sys.exit(1) index = int(arguments[1]) grammar = get_grammar() # We always need an index, so this is given as an argument # Put it in the options list for loading the file fopts = options.file_options if fopts and len(fopts): fopts += ":index=%d" % index else: fopts = "index=%d" % index # Load the sequence index file dbinput = command_line_input(filename=filename, filetype="db", options=fopts) name = dbinput.name anal = parse_sequence_with_annotations(dbinput, grammar)[0] graph, time_map = semantics_to_dependency_graph(anal.semantics) # Join together chords that are on the same dependency node times = iter(sorted(time_map.values())) dep_time = times.next() current_chord = [] joined_chords = [] finished = False for chord_time, chord in sorted(dbinput.sequence.time_map.items()): if chord_time >= dep_time and not finished: if len(current_chord): joined_chords.append(current_chord) current_chord = [chord] try: dep_time = times.next() except StopIteration: finished = True else: current_chord.append(chord) joined_chords.append(current_chord) chords = [ " ".join(filter_latex(str(crd)) for crd in item) for item in joined_chords ] annotations = [ " ".join(filter_latex(crd.category) for crd in item) for item in joined_chords ] graph.words = annotations if options.latex: # Exit with status 1 if we don't output anything exit_status = 1 # Output a full Latex document in one go if name is not None: title = r"""\title{%s} \author{} \date{}""" % name.capitalize() maketitle = r"\maketitle\thispagestyle{empty}\vspace{-20pt}" else: title = "" maketitle = "" # Print the header print r"""\documentclass[a4paper]{article} \usepackage{tikz-dependency} %% You may need to set paperheight (for width) and paperwidth (for height) to get things to fit \usepackage[landscape,margin=1cm,paperheight=50cm]{geometry} \pagestyle{empty} %(title)s \begin{document} %(maketitle)s \tikzstyle{every picture}+=[remember picture] \centering """ % \ { 'title' : title, 'maketitle' : maketitle } if graph is not None: exit_status = 0 print dependency_graph_to_latex(graph, fmt_lab=_fmt_label, extra_rows=[chords]) print "\n\\vspace{15pt}" # Finish off the document print r""" \end{document} """ sys.exit(exit_status) else: # Not outputing Latex print graph
def train(data, estimator, grammar, cutoff=0, logger=None, chord_map=None, order=2, backoff_orders=0, backoff_kwargs={}): """ Initializes and trains an HMM in a supervised fashion using the given training data. Training data should be chord sequence data (input type C{bulk-db} or C{bulk-db-annotated}). """ # Prepare a dummy logger if none was given if logger is None: logger = create_dummy_logger() logger.info(">>> Beginning training of ngram backoff model") training_data = [] # Generate the gold standard data by parsing the annotations for dbinput in data: # Get a gold standard tonal space sequence try: parses = parse_sequence_with_annotations(dbinput, grammar, \ allow_subparses=False) except ParseError, err: # Just skip this sequence logger.error('Could not get a GS parse of %s: %s' % (dbinput, err)) continue # There should only be one of these now parse = parses[0] if parse is None: logger.error('Could not get a GS parse of %s' % (dbinput)) continue # Get the form of the analysis we need for the training if chord_map is None: chords = [(c.root, c.type) for c in dbinput.chords] else: chords = [(c.root, chord_map[c.type]) for c in dbinput.chords] points, times = zip( *grammar.formalism.semantics_to_coordinates(parse.semantics)) # Run through the sequence, transforming absolute points into # the condensed relative representation ec0 = EnharmonicCoordinate.from_harmonic_coord(points[0]) # The first point is relative to the origin and always in the # (0,0) enharmonic space rel_points = [(0, 0, ec0.x, ec0.y)] for point in points[1:]: ec1 = EnharmonicCoordinate.from_harmonic_coord(point) # Find the nearest enharmonic instance of this point to the last nearest = ec0.nearest((ec1.x, ec1.y)) # Work out how much we have to shift this by to get the point dX = ec1.X - nearest.X dY = ec1.Y - nearest.Y rel_points.append((dX, dY, ec1.x, ec1.y)) ec0 = ec1 funs, times = zip( *grammar.formalism.semantics_to_functions(parse.semantics)) ### Synchronize the chords with the points and functions # We may need to repeat chords to match up with analysis # points that span multiple chords analysis = iter(zip(rel_points, funs, times)) rel_point, fun, __ = analysis.next() next_rel_point, next_fun, next_anal_time = analysis.next() # Keep track of how much time has elapsed time = 0 training_seq = [] reached_end = False for crd_pair, chord in zip(chords, dbinput.chords): if time >= next_anal_time and not reached_end: # Move on to the next analysis point rel_point, fun = next_rel_point, next_fun try: next_rel_point, next_fun, next_anal_time = analysis.next( ) except StopIteration: # No more points: keep using the same to the end reached_end = True training_seq.append((crd_pair, (rel_point, fun))) time += chord.duration training_data.append(training_seq)
def train(data, estimator, grammar, cutoff=0, logger=None, chord_map=None, order=2, backoff_orders=0, backoff_kwargs={}): """ Initializes and trains an HMM in a supervised fashion using the given training data. Training data should be chord sequence data (input type C{bulk-db} or C{bulk-db-annotated}). """ # Prepare a dummy logger if none was given if logger is None: logger = create_dummy_logger() logger.info(">>> Beginning training of ngram backoff model") training_data = [] # Generate the gold standard data by parsing the annotations for dbinput in data: # Get a gold standard tonal space sequence try: parses = parse_sequence_with_annotations(dbinput, grammar, \ allow_subparses=False) except ParseError, err: # Just skip this sequence logger.error('Could not get a GS parse of %s: %s' % (dbinput,err)) continue # There should only be one of these now parse = parses[0] if parse is None: logger.error('Could not get a GS parse of %s' % (dbinput)) continue # Get the form of the analysis we need for the training if chord_map is None: chords = [(c.root, c.type) for c in dbinput.chords] else: chords = [(c.root, chord_map[c.type]) for c in dbinput.chords] points,times = zip(*grammar.formalism.semantics_to_coordinates( parse.semantics)) # Run through the sequence, transforming absolute points into # the condensed relative representation ec0 = EnharmonicCoordinate.from_harmonic_coord(points[0]) # The first point is relative to the origin and always in the # (0,0) enharmonic space rel_points = [(0,0,ec0.x,ec0.y)] for point in points[1:]: ec1 = EnharmonicCoordinate.from_harmonic_coord(point) # Find the nearest enharmonic instance of this point to the last nearest = ec0.nearest((ec1.x, ec1.y)) # Work out how much we have to shift this by to get the point dX = ec1.X - nearest.X dY = ec1.Y - nearest.Y rel_points.append((dX,dY,ec1.x,ec1.y)) ec0 = ec1 funs,times = zip(*grammar.formalism.semantics_to_functions( parse.semantics)) ### Synchronize the chords with the points and functions # We may need to repeat chords to match up with analysis # points that span multiple chords analysis = iter(zip(rel_points,funs,times)) rel_point, fun, __ = analysis.next() next_rel_point,next_fun,next_anal_time = analysis.next() # Keep track of how much time has elapsed time = 0 training_seq = [] reached_end = False for crd_pair,chord in zip(chords, dbinput.chords): if time >= next_anal_time and not reached_end: # Move on to the next analysis point rel_point, fun = next_rel_point, next_fun try: next_rel_point,next_fun,next_anal_time = analysis.next() except StopIteration: # No more points: keep using the same to the end reached_end = True training_seq.append((crd_pair, (rel_point,fun))) time += chord.duration training_data.append(training_seq)