def tonal_space_alignment(sem1, sem2, distance=False): """ Performs the same algorithm as L{tonal_space_distance} and L{tonal_space_alignment_costs}, but returns a list of the operations that produce the optimal alignment: "I" - insertion; "D" - deletion; "A" - alignment; "S" - full substitution; or anything else beginning with "S" to indicate a partial substitution. Returns the operation list and the two sequences that were compared. If distance=True, also includes the distance metric. Not included by default for backward compatibility. """ from jazzparser.utils.distance import levenshtein_distance_with_pointers # Get a list of (coord,fun) pairs for the logical forms seq1 = _lf_to_coord_funs(sem1) seq2 = _lf_to_coord_funs(sem2) # Produce a version of the paths made up of steps and functions, # rather than points and functions steps1 = _steps_list(seq1) steps2 = _steps_list(seq2) dists, pointers = levenshtein_distance_with_pointers( steps1, steps2, delins_cost=2, subst_cost_fun=_subst_cost) # We now have the matrix of costs and the pointers that generated # those costs. # Trace back to find out what produces the optimal alignment # Start at the top right corner i, j = (len(dists) - 1), (len(dists[0]) - 1) oplist = [] while not i == j == 0: if pointers[i][j] == "I": oplist.append("I") j -= 1 elif pointers[i][j] == "D": oplist.append("D") i -= 1 else: # Substitution: find out what kind step1 = steps1[i - 1] step2 = steps2[j - 1] subst_type = _subst_type(step1, step2) if subst_type == "both": oplist.append("S") elif subst_type == "fun": oplist.append("Sf") elif subst_type == "root": oplist.append("Sr") else: oplist.append("A") j -= 1 i -= 1 oplist = list(reversed(oplist)) if distance: dist = float(dists[-1][-1]) / 2.0 return oplist, steps1, steps2, dist else: return oplist, steps1, steps2
def tonal_space_alignment(sem1, sem2, distance=False): """ Performs the same algorithm as L{tonal_space_distance} and L{tonal_space_alignment_costs}, but returns a list of the operations that produce the optimal alignment: "I" - insertion; "D" - deletion; "A" - alignment; "S" - full substitution; or anything else beginning with "S" to indicate a partial substitution. Returns the operation list and the two sequences that were compared. If distance=True, also includes the distance metric. Not included by default for backward compatibility. """ from jazzparser.utils.distance import levenshtein_distance_with_pointers # Get a list of (coord,fun) pairs for the logical forms seq1 = _lf_to_coord_funs(sem1) seq2 = _lf_to_coord_funs(sem2) # Produce a version of the paths made up of steps and functions, # rather than points and functions steps1 = _steps_list(seq1) steps2 = _steps_list(seq2) dists,pointers = levenshtein_distance_with_pointers( steps1, steps2, delins_cost=2, subst_cost_fun=_subst_cost) # We now have the matrix of costs and the pointers that generated # those costs. # Trace back to find out what produces the optimal alignment # Start at the top right corner i,j = (len(dists)-1), (len(dists[0])-1) oplist = [] while not i == j == 0: if pointers[i][j] == "I": oplist.append("I") j -= 1 elif pointers[i][j] == "D": oplist.append("D") i -= 1 else: # Substitution: find out what kind step1 = steps1[i-1] step2 = steps2[j-1] subst_type = _subst_type(step1, step2) if subst_type == "both": oplist.append("S") elif subst_type == "fun": oplist.append("Sf") elif subst_type == "root": oplist.append("Sr") else: oplist.append("A") j -= 1 i -= 1 oplist = list(reversed(oplist)) if distance: dist = float(dists[-1][-1]) / 2.0 return oplist,steps1,steps2,dist else: return oplist,steps1,steps2
def tonal_space_alignment_costs(sem1, sem2): """ Performs the same algorithm as tonal_space_distance, but instead of returning the score returns the counts of deletions, insertions, root (only) substitutions, function (only) substitutions, full substitutions and alignments. """ from jazzparser.utils.distance import levenshtein_distance_with_pointers # Get a list of (coord,fun) pairs for the logical forms seq1 = _lf_to_coord_funs(sem1) seq2 = _lf_to_coord_funs(sem2) # Produce a version of the paths made up of steps and functions, # rather than points and functions steps1 = _steps_list(seq1) steps2 = _steps_list(seq2) dists, pointers = levenshtein_distance_with_pointers( steps1, steps2, delins_cost=2, subst_cost_fun=_subst_cost) # We now have the matrix of costs and the pointers that generated # those costs. # Trace back to find out what costs were incurred in the optimal # alignment insertions = 0 deletions = 0 function_subs = 0 root_subs = 0 full_subs = 0 alignments = 0 # Start at the top right corner i, j = (len(dists) - 1), (len(dists[0]) - 1) while not i == j == 0: if pointers[i][j] == "I": insertions += 1 j -= 1 elif pointers[i][j] == "D": deletions += 1 i -= 1 else: # Substitution: find out what kind step1 = steps1[i - 1] step2 = steps2[j - 1] subst_type = _subst_type(step1, step2) if subst_type == "both": full_subs += 1 elif subst_type == "fun": function_subs += 1 elif subst_type == "root": root_subs += 1 else: alignments += 1 j -= 1 i -= 1 return { 'deletions': deletions, 'insertions': insertions, 'root_subs': root_subs, 'function_subs': function_subs, 'full_subs': full_subs, 'substitutions': root_subs + function_subs + full_subs, 'alignments': alignments, 'steps1': steps1, 'steps2': steps2, }
def tonal_space_alignment_costs(sem1, sem2): """ Performs the same algorithm as tonal_space_distance, but instead of returning the score returns the counts of deletions, insertions, root (only) substitutions, function (only) substitutions, full substitutions and alignments. """ from jazzparser.utils.distance import levenshtein_distance_with_pointers # Get a list of (coord,fun) pairs for the logical forms seq1 = _lf_to_coord_funs(sem1) seq2 = _lf_to_coord_funs(sem2) # Produce a version of the paths made up of steps and functions, # rather than points and functions steps1 = _steps_list(seq1) steps2 = _steps_list(seq2) dists,pointers = levenshtein_distance_with_pointers( steps1, steps2, delins_cost=2, subst_cost_fun=_subst_cost) # We now have the matrix of costs and the pointers that generated # those costs. # Trace back to find out what costs were incurred in the optimal # alignment insertions = 0 deletions = 0 function_subs = 0 root_subs = 0 full_subs = 0 alignments = 0 # Start at the top right corner i,j = (len(dists)-1), (len(dists[0])-1) while not i == j == 0: if pointers[i][j] == "I": insertions += 1 j -= 1 elif pointers[i][j] == "D": deletions += 1 i -= 1 else: # Substitution: find out what kind step1 = steps1[i-1] step2 = steps2[j-1] subst_type = _subst_type(step1, step2) if subst_type == "both": full_subs += 1 elif subst_type == "fun": function_subs += 1 elif subst_type == "root": root_subs += 1 else: alignments += 1 j -= 1 i -= 1 return { 'deletions' : deletions, 'insertions' : insertions, 'root_subs' : root_subs, 'function_subs' : function_subs, 'full_subs' : full_subs, 'substitutions' : root_subs+function_subs+full_subs, 'alignments' : alignments, 'steps1' : steps1, 'steps2' : steps2, }