def run(sentence, tagset, hmm_prob): max_iterations = 200 step_size = 15 n = len(sentence) u1 = defaultdict() # dual decomposition parameter u2 = defaultdict() for i in xrange(0, n): u1[i] = defaultdict() u2[i] = defaultdict() for t in tagset: u1[i][t] = 0 u2[i][t] = 0 k = 0 # number of iterations while k < max_iterations: tags1 = viterbi.run(sentence, tagset, hmm_prob, u1) tags2 = viterbi.run(sentence, tagset, hmm_prob, u2) if k == 0: print "initial tags:" print tags1, ":tagger1" print tags2, ":tagger2" if disagree(tags1, tags2): return k, tags1, tags2 # converges in the kth iteration update(tags1, tags2, u1, u2, step_size) k += 1 return -1, tags1, tags2 # does not converge
def iterate_sentences_viterbi(outfile): """iterates over all saved sentences and runs the cky parser for each one of them """ i = 1 for s in sentences: print "Sentence in line ", i if len(s) <= 15: chart = cky_parsing_most(s) viterbi.run(chart,0,len(s),start_node,s,0) viterbi.build_tree(s,outfile) i += 1
def run(sentence, tagset, hmm, k_best_list): max_iter = len(k_best_list)*200 n = len(sentence) k = len(k_best_list) u = {} # dual decomposition parameter init_dd_param(u, n, tagset) iteration = 1 while iteration <= max_iter: #print iteration step_size = 1.0 / math.sqrt(iteration) #print "step size", step_size seq1, score1, score2 = viterbi.run(sentence, tagset, hmm, u) y = compute_indicators(seq1, tagset) #print 0, ' '.join(seq1) seq2, fst_score = fst_search.run(k_best_list, u, tagset) z = compute_indicators(seq2, tagset) #print j+1, ' '.join(seq) # check for agreement if seq1 != seq2: update(y, z, u, step_size) else: return seq1, iteration iteration += 1 return seq1, -1
def run(sentence, tagset, preterms, start, hmm_prob, pcfg, pcfg_u): max_iterations = 20 step_size = 1.0 n = len(sentence) u = defaultdict() # dual decomposition parameter init_dd_param(u, n, tagset) k = 0 # number of iterations while k < max_iterations: tags = viterbi.run(sentence, tagset, hmm_prob, u) parse = fast_cky.run(sentence, preterms, start, pcfg, pcfg_u, u) parse_list = utils.make_parse_list(parse) terms, parse_tags = utils.get_terminals_tags(parse_list) print k print tags, "tagger" print parse_tags, "parser" if agree(parse_tags, tags): return k, tags, parse # converges in the kth iteration y = compute_indicators(tags, tagset) z = compute_indicators(parse_tags, tagset) k += 1 step_size = 1.0/k update(y, z, u, step_size) return -1, tags, parse # does not converge
def run(sentence, tagset, hmm_prob): max_iterations = 200 #step_size = 100 n = len(sentence) u = {}#defaultdict() # dual decomposition parameter init_dd_param(u, n, tagset) k = 1 # number of iterations while k <= max_iterations: step_size = 1.0 / math.sqrt(k) #print "\niteration:", k #print "-------------------------------" #print "step size = ", "{0:.2f}".format(step_size) tags1, aug_hmm_score, hmm_score = viterbi.run(sentence, tagset, hmm_prob, u) #print "vit output:", ' '.join(tags1) if k == 1: best_tags = tags1 tags2, fst_score = fst_search.run(best_tags, u, tagset) if agree(tags1, tags2): #sys.stderr.write("hmm only = "+ str( hmm_score) + "\n") #sys.stderr.write("fst only = "+ str(fst_score) + "\n") #sys.stderr.write("big hmm = "+ str(aug_hmm_score) + "\n") #sys.stderr.write("hmm fst = "+ str(aug_hmm_score + fst_score) + "\n") return best_tags, k, tags1, tags2 # converges in the kth iteration y = compute_indicators(tags1, tagset) z = compute_indicators(tags2, tagset) update(y, z, u, step_size) k += 1 return best_tags, -1, tags1, tags2 # does not converge
def run(sentence, tagset, hmm, k_best_list): max_iter = len(k_best_list)*200 n = len(sentence) k = len(k_best_list) u = [] # dd parameter list for j in range(k+1): u_j = {} init_dd_param(u_j, n, tagset) u.append(u_j) w = {} init_dd_param(w, n, tagset) ku = {} init_dd_param(ku, n, tagset) iteration = 1 while iteration <= max_iter: #print iteration step_size = 21.0 / math.sqrt(iteration) #print "step size", step_size seqs = [] indicators = [] for i in u[0].iterkeys(): for t in u[0][i].iterkeys(): ku[i][t] = -1 * u[0][i][t] seq1, score1, score2 = viterbi.run(sentence, tagset, hmm, ku) seqs.append(seq1) indicators.append(compute_indicators(seq1, tagset)) #print 0, ' '.join(seq1) for j in range(k): seq, fst_score = bigram_fst_search.run(k_best_list[j], u[j+1], tagset) #print j+1, ' '.join(seq) seqs.append(seq) indicators.append(compute_indicators(seq, tagset)) # check for agreement agree = True for seq in seqs[1:]: if seq != seq1: agree = False break if agree == False: update(indicators, u, w, step_size) else: return seq1, iteration iteration += 1 return seq1, -1
def execute(treebank, dev): print "reading treebank..." parses = utils.read_parses_no_indent(treebank) parse_lists = [] for parse in parses: parse_lists.append(utils.make_parse_list(parse)) print "learning pcfg..." nonterms, terms, start, prob = grammar.learn(parse_lists) print "learning hmm..." emission, transition = sequnece_labeler.learn(parse_lists) print "reading dev data..." dev_sentences = utils.get_sentences(dev) print dev_sentences[100] for sentence in dev_sentences: parse = cky.run(sentence, nonterms, start, prob) sequnece = viterbi.run(sentence, emission, transition)
def run(obs, states_list, start_p, trans_p, emit_p): # define global variables global states, start_probs, transition, emission, test_sequence, sequence_syms, end_probs states, start_probs, transition, emission = states_list, start_p, trans_p, emit_p test_sequence = obs # list of strings of observations (angular velocities) # probabilities of going to end state <-- What is this? end_probs = [0.1, 0.1] states_dic = {} for i in range(len(states)): states_dic[states[i]] = i # generating initial probabilities sequence = np.unique( test_sequence ) # should these be a range of all possible theoretical observations or a list of all observations in the data? sequence_syms = {} for i in range(len(sequence)): sequence_syms[sequence[i]] = i #performing iterations until convergence for iteration in range(2000): print('\nIteration No: ', iteration + 1) # print('\nTransition:\n ', transition) # print('\nEmission: \n', emission) #Calling probability functions to calculate all probabilities fwd_probs, fwd_val = forward_probs() bwd_probs, bwd_val = backward_probs() si_probabilities = si_probs(fwd_probs, bwd_probs, fwd_val) gamma_probabilities = gamma_probs(fwd_probs, bwd_probs, fwd_val) # print('Forward Probs:') # print(np.matrix(fwd_probs)) # # print('Backward Probs:') # print(np.matrix(bwd_probs)) # # print('Si Probs:') # print(si_probabilities) # # print('Gamma Probs:') # print(np.matrix(gamma_probabilities)) #calculating 'a' and 'b' matrices a = np.zeros((len(states), len(states))) b = np.zeros((len(states), len(sequence_syms))) #'a' matrix for j in range(len(states)): for i in range(len(states)): for t in range(len(test_sequence) - 1): a[j, i] = a[j, i] + si_probabilities[j, t, i] denomenator_a = [ si_probabilities[j, t_x, i_x] for t_x in range(len(test_sequence) - 1) for i_x in range(len(states)) ] denomenator_a = sum(denomenator_a) if (denomenator_a == 0): a[j, i] = 0 else: a[j, i] = a[j, i] / denomenator_a #'b' matrix for j in range(len(states)): #states for i in range(len(sequence)): #seq indices = [ idx for idx, val in enumerate(test_sequence) if val == sequence[i] ] numerator_b = sum(gamma_probabilities[j, indices]) denomenator_b = sum(gamma_probabilities[j, :]) if (denomenator_b == 0): b[j, i] = 0 else: b[j, i] = numerator_b / denomenator_b print('\nMatrix a:\n') print(np.array(a.round(decimals=4))) print('\nMatrix b:\n') print(np.array(b.round(decimals=4))) transition = a emission = b new_fwd_temp, new_fwd_temp_val = forward_probs() print('New forward probability: ', new_fwd_temp_val) diff = np.abs(fwd_val - new_fwd_temp_val) print('Difference in forward probability: ', diff) if (diff < 0.0000001): break # update params with a, b to run viterbi start_p = {"Sac": 0.5, "Fix": 0.5} # update from list to dict # update transition and emission from list lists to dict t = {} for i in range(len(states)): d = {} for j in range(len(states)): d[states[j]] = a[i, j] t[states[i]] = d e = {} for i in range(len(states)): d = {} for j in range(len(obs)): d[obs[j]] = b[i, j] e[states[i]] = d viterbi.run(obs, states, start_p, t, e) c = 1
if args.melody == 'little_happiness': melody = converter.parse(A_LITTLE_HAPPINESS) elif args.melody == 'jj_lin': melody = converter.parse(JJ_LIN_MELODY) else: print('Unrecognized melody: should be jj_lin or little_happiness') sys.exit(1) if args.series not in ('major', 'minor'): print('Unrecognized series: should be major or minor') sys.exit(1) melody.insert(0, MetronomeMark(number=95)) # Pick algorithm if args.algorithm == 'basic': chord_search.run(chords, melody, args.series) elif args.algorithm == 'hmm': viterbi.run(chords, melody, args.series) else: print('Unrecognized algorithm: should be basic or hmm') sys.exit(1) # Combine two parts song = Stream() song.insert(0, melody) song.insert(0, chords) # song.show('midi') song.show()
elif args.series == 'a_jazz': chords = A_JAZZ elif args.series == 'f_sharp_jazz': chords = F_SHARP_JAZZ else: print('Unrecognized series') sys.exit(1) # Init chord stream chords_output = converter.parse("""tinynotation: 4/4""") # Pick algorithm if args.algorithm == 'basic': chord_search.run(chords, melody, args.series, chords_output) elif args.algorithm == 'markov': viterbi.run(chords, melody, args.series, chords_output) else: print('Unrecognized algorithm: should be basic or markov') sys.exit(1) # Combine two parts song = stream.Score(id='mainScore') part0 = stream.Part(id='melody') part0.append(melody) part1 = stream.Part(id='chords') part1.append(chords_output) song.insert(0, part0) song.insert(0, part1) song.makeNotation(inPlace=True) song.show()
def main(): # files df = pd.read_csv( '/Users/ischoning/PycharmProjects/GitHub/data/participant08_preprocessed172.csv' ) # shorten dataset for better visuals and quicker results #df = df[int(len(df)/200):int(len(df)/100)] df = df[100:int(len(df) / 500)] df.reset_index(drop=True, inplace=True) # assign relevant data lx = df['left_forward_x'] ly = df['left_forward_y'] lz = df['left_forward_z'] rx = df['right_forward_x'] ry = df['right_forward_y'] rz = df['right_forward_z'] t = df['timestamp_milis'] # compute angular values df['Ax_left'] = np.rad2deg(np.arctan2(lx, lz)) df['Ay_left'] = np.rad2deg(np.arctan2(ly, lz)) df['Ax_right'] = np.rad2deg(np.arctan2(rx, rz)) df['Ay_right'] = np.rad2deg(np.arctan2(ry, rz)) df['Avg_angular_x'] = df[['Ax_left', 'Ax_right']].mean(axis=1) df['Avg_angular_y'] = df[['Ay_left', 'Ay_right']].mean(axis=1) # show vision path in averaged angular degrees # show_path(df['Avg_angular_x'], df['Avg_angular_y']) # print('Length of capture time:', len(t)) # print('Length of capture time differences:', # len(np.diff(t/1000000))) # # show vision path, separately for each eye # plot_eye_path(df) # show angular displacement over time, averaged over both eyes # plot_vs_time(t, df['Avg_angular_x'], df['Avg_angular_y'], 'Angular Displacement Over Time', 'degrees') # plot angular velocity for x and y # remove the last row so lengths of each column are consistent dt = np.diff(t) # aka isi dx = np.diff(df['Avg_angular_x']) dy = np.diff(df['Avg_angular_y']) df.drop(df.tail(1).index, inplace=True) t = df['timestamp_milis'] # plot_vs_time(t,dx/dt,dy/dt, 'Angular Velocity Over Time', 'degrees per millisecond') # plot combined angular velocity df['ang_vel'] = np.sqrt(np.square(dx) + np.square(dy)) ang_vel = df['ang_vel'] # plot_vs_time(t, ang_vel, y = [], title = 'Combined Angular Velocity Over Time', y_axis = 'degrees per millisecond') plt.scatter(range(len(ang_vel)), ang_vel) # plt.show() plt.scatter(ang_vel, ang_vel) # plt.show() # plot angular acceleration for x and y # remove the last row so lengths of each column are consistent dt = np.diff(t) # aka isi dv = np.diff(df['ang_vel']) df_ = df.copy() df_.drop(df_.tail(1).index, inplace=True) df_['ang_acc'] = dv / dt # plot combined angular accleration # plot_vs_time(df_['timestamp_milis'],df_['ang_vel'], y = df_['ang_acc'], title = 'Combined Angular Acceleration Over Time', y_axis = 'degrees per millisecond') # show histogram of angular velocity make_hist(ang_vel, 'Histogram of Angular Velocity', 'angular velocity', 'number of occurrences') # make pmf pmf(ang_vel, 'PMF of Angular Velocity', 'angular velocity', 'probability') # if velocity is greater than 3 standard deviations from the mean of the pmf, classify the point as saccade, else fixation # NOTE that the white space in the plot is due to jump in ms between events states = ['Saccade', 'Fixation'] df['fix1 sac0'] = np.where(ang_vel <= 0.02, 1, 0) event = df['fix1 sac0'] # plot_vs_time(t, ang_vel, y=[], title='Combined Angular Velocity Over Time', y_axis='degrees per millisecond', event = event) print_events(t, event=event, states=states) print('=============== STEP 1: Filter Saccades ===============') # estimate priors (sample means) mean_fix = np.mean(df[event == 1]['ang_vel']) mean_sac = np.mean(df[event == 0]['ang_vel']) std_fix = np.std(df[event == 1]['ang_vel']) std_sac = np.std(df[event == 0]['ang_vel']) print("Fixation: mean =", mean_fix, "standard deviation =", std_fix) print("Saccade: mean =", mean_sac, "standard deviation =", std_sac) print('\n============== BEGIN VITERBI ==============') # first run EM to get best match params (priors, trans, emission probabilities) # then run Viterbi HMM algorithm to output the most likely sequence given the params calculated in EM obs = ang_vel.astype(str) obs = obs.tolist() states = ['Sac', 'Fix'] start_p = [0.5, 0.5] trans_p = np.array([[0.5, 0.5], [0.5, 0.5]]) Sac = [] Fix = [] for o in obs: x = float(o) if o not in Sac: Sac.append(gaussian(mean_sac, std_sac, x)) if o not in Fix: Fix.append(gaussian(mean_fix, std_fix, x)) emit_p = np.array([Sac, Fix]) df['hidden_state'] = viterbi.run(obs, states, start_p, trans_p, emit_p) print(df['hidden_state'].value_counts()) print('=============== END VITERBI ===============') print('\n============== BEGIN BAUM-WELCH ==============') trans_p, emit_p = baum_welch.run(obs, states, start_p, trans_p, emit_p) print('============== END BAUM-WELCH ==============') print('\n============== BEGIN UPDATED VITERBI ==============') df['hidden_state'] = viterbi.run(obs, states, start_p, trans_p, emit_p) print('=============== END UPDATED VITERBI ===============') df = clean_sequence(df) print( '\n=============== STEP 2: Classify Fixations and Smooth Pursuits ===============' ) # filter out Saccades df = df[df.hidden_state != 'Sac'] df.reset_index(drop=True, inplace=True) ang_vel = df['ang_vel'] states = ['Smooth Pursuit', 'Fixation'] plt.plot(df.timestamp_milis, ang_vel) plt.show() df['fix1 smp0'] = np.where(ang_vel <= 0.02, 1, 0) event = df['fix1 smp0'] print_events(t, event=event, states=states) # estimate priors (sample means) mean_fix = np.mean(df[event == 1]['ang_vel']) mean_smp = np.mean(df[event == 0]['ang_vel']) std_fix = np.std(df[event == 1]['ang_vel']) std_smp = np.std(df[event == 0]['ang_vel']) print("Fixation: mean =", mean_fix, "standard deviation =", std_fix) print("Smooth Pursuit: mean =", mean_smp, "standard deviation =", std_smp) print('\n============== BEGIN VITERBI ==============') # first run EM to get best match params (priors, trans, emission probabilities) # then run Viterbi HMM algorithm to output the most likely sequence given the params calculated in EM obs = ang_vel.astype(str) obs = obs.tolist() states = ['SmP', 'Fix'] # p = math.log(0.5) start_p = [0.5, 0.5] trans_p = np.array([[0.5, 0.5], [0.5, 0.5]]) # Note: not possible to have two contiguous saccades without a fixation (or smooth pursuit) # in between SmP = [] Fix = [] for o in obs: x = float(o) if o not in SmP: SmP.append(gaussian(mean_sac, std_sac, x)) if o not in Fix: Fix.append(gaussian(mean_fix, std_fix, x)) emit_p = np.array([SmP, Fix]) df['hidden_state'] = viterbi.run(obs, states, start_p, trans_p, emit_p) print(df['hidden_state'].value_counts()) print('=============== END VITERBI ===============') print('\n============== BEGIN BAUM-WELCH ==============') trans_p, emit_p = baum_welch.run(obs, states, start_p, trans_p, emit_p) print('============== END BAUM-WELCH ==============') print('\n============== BEGIN UPDATED VITERBI ==============') df['hidden_state'] = viterbi.run(obs, states, start_p, trans_p, emit_p) # print(len(df['hidden_state'])) print(df['hidden_state'].value_counts()) print('=============== END UPDATED VITERBI ===============') df = clean_sequence(df)