hmm.add_transition(s33, s3, 0.01) hmm.add_transition(s33, s33, 0.92) hmm.add_transition(s33, s333, 0.01) hmm.add_transition(s333, s1, 0.01) hmm.add_transition(s333, s11, 0.01) hmm.add_transition(s333, s111, 0.01) hmm.add_transition(s333, s2, 0.01) hmm.add_transition(s333, s22, 0.01) hmm.add_transition(s333, s222, 0.01) hmm.add_transition(s333, s3, 0.01) hmm.add_transition(s333, s33, 0.01) hmm.add_transition(s333, s333, 0.92) hmm.bake() hmm.fit(X) # , weights=w) hmm does not support weights in pomegranate preds = hmm.predict(X) probs = hmm.predict_proba(X) data_thr['preds'] = pd.Series(preds).astype("category") color_key = ["red", "blue", "yellow", "grey", "black", "purple", "pink", "brown", "green", "orange"] # Spectral9 color_key = color_key[:len(set(preds))+2] covs = np.array([np.array(hmm.states[m].distribution.parameters[1]) for m in range(9)]) means = np.array([np.array(hmm.states[m].distribution.parameters[0]) for m in range(9)]) # transform cov for non-standardizeed data:
def build_reference_repeat_finder_hmm(patterns, copies=1): pattern = patterns[0] model = Model(name="HMM Model") insert_distribution = DiscreteDistribution({ 'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25 }) last_end = None start_random_matches = State(insert_distribution, name='start_random_matches') end_random_matches = State(insert_distribution, name='end_random_matches') model.add_states([start_random_matches, end_random_matches]) for repeat in range(copies): insert_states = [] match_states = [] delete_states = [] for i in range(len(pattern) + 1): insert_states.append( State(insert_distribution, name='I%s_%s' % (i, repeat))) for i in range(len(pattern)): distribution_map = dict({ 'A': 0.01, 'C': 0.01, 'G': 0.01, 'T': 0.01 }) distribution_map[pattern[i]] = 0.97 match_states.append( State(DiscreteDistribution(distribution_map), name='M%s_%s' % (str(i + 1), repeat))) for i in range(len(pattern)): delete_states.append( State(None, name='D%s_%s' % (str(i + 1), repeat))) unit_start = State(None, name='unit_start_%s' % repeat) unit_end = State(None, name='unit_end_%s' % repeat) model.add_states(insert_states + match_states + delete_states + [unit_start, unit_end]) last = len(delete_states) - 1 if repeat > 0: model.add_transition(last_end, unit_start, 0.5) else: model.add_transition(model.start, unit_start, 0.5) model.add_transition(model.start, start_random_matches, 0.5) model.add_transition(start_random_matches, unit_start, 0.5) model.add_transition(start_random_matches, start_random_matches, 0.5) model.add_transition(unit_end, end_random_matches, 0.5) if repeat == copies - 1: model.add_transition(unit_end, model.end, 0.5) model.add_transition(end_random_matches, end_random_matches, 0.5) model.add_transition(end_random_matches, model.end, 0.5) model.add_transition(unit_start, match_states[0], 0.98) model.add_transition(unit_start, delete_states[0], 0.01) model.add_transition(unit_start, insert_states[0], 0.01) model.add_transition(insert_states[0], insert_states[0], 0.01) model.add_transition(insert_states[0], delete_states[0], 0.01) model.add_transition(insert_states[0], match_states[0], 0.98) model.add_transition(delete_states[last], unit_end, 0.99) model.add_transition(delete_states[last], insert_states[last + 1], 0.01) model.add_transition(match_states[last], unit_end, 0.99) model.add_transition(match_states[last], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], unit_end, 0.99) for i in range(0, len(pattern)): model.add_transition(match_states[i], insert_states[i + 1], 0.01) model.add_transition(delete_states[i], insert_states[i + 1], 0.01) model.add_transition(insert_states[i + 1], insert_states[i + 1], 0.01) if i < len(pattern) - 1: model.add_transition(insert_states[i + 1], match_states[i + 1], 0.98) model.add_transition(insert_states[i + 1], delete_states[i + 1], 0.01) model.add_transition(match_states[i], match_states[i + 1], 0.98) model.add_transition(match_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], match_states[i + 1], 0.98) last_end = unit_end model.bake() if len(patterns) > 1: # model.fit(patterns, algorithm='baum-welch', transition_pseudocount=1, use_pseudocount=True) fit_patterns = [pattern * copies for pattern in patterns] model.fit(fit_patterns, algorithm='viterbi', transition_pseudocount=1, use_pseudocount=True) return model
def main(): rospy.init_node('hmm_trainer') phase_pub = rospy.Publisher('/phase', Int32, queue_size=10) rospack = rospkg.RosPack() packpath = rospack.get_path('exo_control') datapath = packpath + "/log/mat_files/" verbose = rospy.get_param('~verbose', False) """Print console output into text file""" sys.stdout = open(packpath + "/log/results/leave-one-out_cross_validation_cov.txt", "w") """Data loading""" n_trials = 3 n_sub = 9 healthy_subs = ["daniel", "erika", "felipe", "jonathan", "luis", "nathalia", "paula", "pedro", "tatiana"] patients = ["andres", "carlos", "carmen", "carolina", "catalina", "claudia", "emmanuel", "fabian", "gustavo"] study_subs = [healthy_subs, patients] dataset = [{} for x in range(len(study_subs))] for i in range(len(study_subs)): for sub in study_subs[i]: dataset[i][sub] = {"gyro_y": [[] for x in range(n_trials)], "fder_gyro_y": [[] for x in range(n_trials)], "time": [[] for x in range(n_trials)], "labels": [[] for x in range(n_trials)], "Fs_fsr": 0.0} for group in dataset: for sub,data in group.iteritems(): for trial in range(n_trials): mat_file = scio.loadmat(datapath + sub + "_proc_data" + str(trial+1) + ".mat") for signal in data: if signal not in ["pathol","fder_gyro_y"]: if signal == "Fs_fsr": data[signal] = mat_file[signal][0][0] else: data[signal][trial] = mat_file[signal][0] del mat_file """Feature extraction""" """First derivative""" for group in dataset: for sub,data in group.iteritems(): for trial in range(n_trials): der = [] gyro_y = data["gyro_y"][trial] der.append(gyro_y[0]) for i in range(1,len(gyro_y)-1): der.append((gyro_y[i+1]-gyro_y[i-1])/2) der.append(gyro_y[-1]) data["fder_gyro_y"][trial] = der del der, sub, data """Global variables of cHMM""" startprob = [0.25, 0.25, 0.25, 0.25] state_names = ['hs', 'ff', 'ho', 'sw'] n_classes = 4 n_signals = 2 tol = 6e-2 # Tolerance window of 60 ms # pathology = 0 for pathology in range(len(dataset)): if pathology == 0: rospy.logwarn("**Leave-one-out cross validation with HEALTHY subjects**") print "**Leave-one-out cross validation with HEALTHY subjects**" else: rospy.logwarn("**Leave-one-out cross validation with PATIENTS**") print "**Leave-one-out cross validation with PATIENTS**" # if True: for lou_sub,lou_data in dataset[pathology].iteritems(): # Iterate through leave-one-out subject's data rospy.logwarn("Leave " + lou_sub + " out:") print "Leave " + lou_sub + " out:" t = np.zeros((4, 4)) # Transition matrix prev = -1 for trial in range(n_trials): for label in lou_data["labels"][trial]: if prev == -1: prev = label t[prev][label] += 1.0 prev = label t = normalize(t, axis=1, norm='l1') if verbose: rospy.logwarn("TRANSITION MATRIX\n" + str(t)) class_data = [[] for x in range(n_classes)] # full_lou_data = [] # full_lou_labels = [] for trial in range(n_trials): for sample in range(len(lou_data["gyro_y"][trial])): d = [lou_data["gyro_y"][trial][sample], lou_data["fder_gyro_y"][trial][sample]] l = lou_data["labels"][trial][sample] # full_lou_data.append(d) # full_lou_labels.append(l) class_data[l].append(d) """Multivariate Gaussian Distributions for each hidden state""" class_means = [[[] for x in range(n_signals)] for i in range(n_classes)] class_vars = [[[] for x in range(n_signals)] for i in range(n_classes)] class_std = [[[] for x in range(n_signals)] for i in range(n_classes)] class_cov = [] for state in range(n_classes): cov = np.ma.cov(np.array(class_data[state]), rowvar=False) class_cov.append(cov) for signal in range(n_signals): class_means[state][signal] = np.array(class_data[state][:])[:, [signal]].mean(axis=0) class_vars[state][signal] = np.array(class_data[state][:])[:, [signal]].var(axis=0) class_std[state][signal] = np.array(class_data[state][:])[:, [signal]].std(axis=0) # lou_trial = 1 # if True: for lou_trial in range(n_trials): rospy.logwarn("Trial {}".format(lou_trial+1)) print("Trial {}".format(lou_trial+1)) """Classifier initialization""" # distros = [] hmm_states = [] for state in range(n_classes): dis = MGD\ (np.array(class_means[state]).flatten(), np.array(class_cov[state])) st = State(dis, name=state_names[state]) # distros.append(dis) hmm_states.append(st) model = HMM(name="Gait") model.add_states(hmm_states) """Initial transitions""" for state in range(n_classes): model.add_transition(model.start, hmm_states[state], startprob[state]) """Left-right model""" for i in range(n_classes): for j in range(n_classes): model.add_transition(hmm_states[i], hmm_states[j], t[i][j]) model.bake() """Create training and test data""" x_train = [] x_test = [] test_gyro_y = lou_data["gyro_y"][lou_trial] test_fder_gyro_y = lou_data["fder_gyro_y"][lou_trial] """Create test data with n-th trial of leave-one-out subject""" for sample in range(len(test_gyro_y)): x_test.append([test_gyro_y[sample], test_fder_gyro_y[sample]]) """Create training data with n-1 trials of the rest of subjects (healthy group)""" for train_sub,train_data in dataset[0].iteritems(): count_trials = 0 if lou_sub != train_sub: # if train_sub == "daniel": for trial in range(n_trials): if trial != lou_trial and count_trials < 1: # rospy.logwarn(trial) train_gyro_y = train_data["gyro_y"][trial] train_fder_gyro_y = train_data["fder_gyro_y"][trial] for sample in range(len(train_gyro_y)): x_train.append([train_gyro_y[sample], train_fder_gyro_y[sample]]) count_trials += 1 rospy.logwarn(len(x_train)) x_train = list([x_train]) """Training""" rospy.logwarn("Training HMM...") model.fit(x_train, algorithm='baum-welch', verbose=True) # model.fit(x_train, algorithm='viterbi', verbose='True') """Find most-likely sequence""" rospy.logwarn("Finding most-likely sequence...") logp, path = model.viterbi(x_test) # rospy.logwarn(len(path)) # rospy.logwarn(len(lou_data["labels"][lou_trial])) class_labels = [] for i in range(len(lou_data["labels"][lou_trial])): path_phase = path[i][1].name for state in range(n_classes): if path_phase == state_names[state]: class_labels.append(state) '''Saving classifier labels into csv file''' # np.savetxt(packpath+"/log/inter_labels/"+lou_sub+"_labels.csv", class_labels, delimiter=",", fmt='%s') # rospy.logwarn("csv file with classifier labels was saved.") # lou_data["labels"][lou_trial] = lou_data["labels"][lou_trial][1:] """Calculate mean time (MT) of stride and each gait phase and Coefficient of Variation (CoV)""" rospy.logwarn("Mean time (MT) and Coefficient of Variance (CoV)") print "Mean time (MT) and Coefficient of Variance (CoV)" curr_label = -1 count = 0 n_phases = 0 stride_samples = 0 phases_time = [[] for x in range(n_classes)] stride_time = [] for label in class_labels: if curr_label != label: n_phases += 1 stride_samples += count if label == 0: # Gait start: HS if n_phases == 4: # If a whole gait cycle has past stride_time.append(stride_samples/lou_data["Fs_fsr"]) n_phases = 0 stride_samples = 0 phases_time[label-1].append(count/lou_data["Fs_fsr"]) curr_label = label count = 1 else: count += 1.0 for phase in range(n_classes): mean_time = np.mean(phases_time[phase]) phase_std = np.std(phases_time[phase]) rospy.logwarn("(" + state_names[phase] + ")") print "(" + state_names[phase] + ")" rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std)) print "Mean time: " + str(mean_time) + " + " + str(phase_std) rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0)) print("CoV: " + str(phase_std/mean_time*100.0)) mean_time = np.mean(stride_time) phase_std = np.std(stride_time) rospy.logwarn("(Stride)") print "(Stride)" rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std)) print "Mean time: " + str(mean_time) + " + " + str(phase_std) rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0)) print("CoV: " + str(phase_std/mean_time*100.0))
seq2 = test_data if seq2 == [] or test_data == []: rospy.logerr("Empty testing sequence") continue print(np.array(seq).shape) print(np.array(seq[0]).shape) # print np.array(seq) print(np.array(seq2).shape) print(np.array(seq2[0]).shape) # print np.array(seq2) # seq = np.array(seq) seq2 = np.array(seq2) # rospy.logwarn(seq) cl.fit(seq, algorithm='baum-welch', verbose='True') # print(model) rospy.logwarn("Start Viterbi") log, path = cl.viterbi(seq) rospy.logwarn("Viterbi Done") # rospy.logwarn(len(path)) sum_ = 0.0 if (len(path) - 1) != len(test_data): print(len(path)) print(path[0][1].name) print(path[len(path) - 1][1].name) print(len(test_data)) exit()
def build_dis_classifier(self): skf = StratifiedKFold(self.full_labels, n_folds=self.folds) classifier_array = [] stats_array = [] num_class = len(self.full_data[0]) print (num_class) for cl in range(0, num_class): lel = -1 tp_total = 0.0 tn_total = 0.0 fp_total = 0.0 fn_total = 0.0 tests = 0 for train_index, test_index in skf: if lel > 0: lel -= 1 continue stats = [] distros = [] hmm_states = [] state_names = ['swing', 'stance'] swings = 0 stances = 0 for i in range(0, 2): dis = MGD.from_samples(self.class_data[i]) st = State(dis, name=state_names[i]) distros.append(dis) hmm_states.append(st) model = HMM() print(model.states) model.add_states(hmm_states) model.add_transition(model.start, hmm_states[0], 0.5) model.add_transition(model.start, hmm_states[1], 0.5) model.add_transition(hmm_states[1], model.end, 0.000000000000000001) model.add_transition(hmm_states[0], model.end, 0.000000000000000001) for i in range(0, 2): for j in range(0, 2): model.add_transition(hmm_states[i], hmm_states[j], self.t[i][j]) model.bake() tp = 0.0 tn = 0.0 fp = 0.0 fn = 0.0 train_data = self.full_data[train_index, cl] train_class = self.full_labels[train_index, cl] test_data = self.full_data[test_index] test_class = self.full_labels[test_index] print(np.isfinite(train_data).all()) print(np.isfinite(test_data).all()) print(np.isnan(train_data.any())) print(np.isinf(train_data.any())) print(np.isnan(test_data.any())) print(np.isinf(test_data.any())) if (not np.isfinite(train_data.any())) or (not np.isfinite(test_data.any())) \ or (not np.isfinite(train_class.any())) or (not np.isfinite(test_data.any())): rospy.logerr("NaN or Inf Detected") exit() try: rospy.logwarn("Training model #"+str(cl)+", fold #" + str(tests)) seq = np.array(train_data) model.fit(seq, algorithm='baum-welch', verbose='True', n_jobs=8, max_iterations=150) except ValueError: rospy.logwarn("Something went wrong, exiting") rospy.shutdown() exit() seq = [] if self.batch_test == 1: s = 0 # for s in range(0, len(test_data)): while s < len(test_data): k = 0 seq_entry = [] while k < 20 and s < len(test_data): seq_entry.append(test_data[s]) k += 1 s += 1 seq.append(seq_entry) else: seq = np.array(test_data) if seq == [] or test_data == []: rospy.logerr("Empty testing sequence") continue log, path = model.viterbi(test_data) if (len(path) - 2) != len(test_data): rospy.logerr(len(path)) rospy.logerr(path[0][1].name) rospy.logerr(path[len(path) - 1][1].name) rospy.logerr(len(test_data)) exit() tests += 1 for i in range(0, len(path) - 2): if path[i + 1][1].name != 'Gait-start' and path[i + 1][1].name != 'Gait-end': if path[i + 1][1].name == 'swing': # prediction is 0 swings += 1 if test_class[i] == 0: # class is 0 tn += 1.0 elif test_class[i] == 1: fn += 1.0 # class is 1 elif path[i + 1][1].name == 'stance': # prediction is 1 stances += 1 if test_class[i] == 1: # class is 1 tp += 1.0 elif test_class[i] == 0: # class is 0 fp += 1.0 print (swings) print (stances) if (tp + fn) != 0.0: rospy.logwarn("Sensitivity : " + str(tp / (tp + fn))) # sensitivity = tp / (tp + fn) else: rospy.logwarn("Sensitivity : 0.0") # sensitivity = 0.0 if (tn + fp) != 0.0: rospy.logwarn("Specificity : " + str(tn / (tn + fp))) # specificity = tn_total / (tn_total + fp_total) else: rospy.logwarn("Specificity : 0.0") # specificity = 0.0 if (tn + tp + fn + fp) != 0.0: rospy.logwarn("Accuracy : " + str((tn + tp) / (tn + tp + fn + fp))) # accuracy = (tn + tp) / (tn + tp + fn + fp) else: rospy.logwarn("Accuracy : 0.0") # accuracy = 0.0 tn_total += tn tp_total += tp fn_total += fn fp_total += fp tp_total /= tests tn_total /= tests fp_total /= tests fn_total /= tests rospy.logerr("TP :" + str(tp_total)) rospy.logerr("TN :" + str(tn_total)) rospy.logerr("FP :" + str(fp_total)) rospy.logerr("FN :" + str(fn_total)) rospy.logerr("Tests :" + str(tests)) if (tp_total + fn_total) != 0.0: sensitivity = tp_total / (tp_total + fn_total) else: sensitivity = 0.0 if (tn_total + fp_total) != 0.0: specificity = tn_total / (tn_total + fp_total) else: specificity = 0.0 if (tn_total + tp_total + fn_total + fp_total) != 0.0: accuracy = (tn_total + tp_total) / (tn_total + tp_total + fn_total + fp_total) else: accuracy = 0.0 rospy.logwarn("----------------------------------------------------------") rospy.logerr("Total accuracy: " + str(accuracy)) rospy.logerr("Total sensitivity: " + str(sensitivity)) rospy.logerr("Total specificity: " + str(specificity)) stats = [tn_total * tests, fn_total * tests, fp_total * tests, fn_total * tests, tests, accuracy, sensitivity, specificity] rospy.logwarn("-------------------DONE-------------------------") classifier_array.append(model) stats_array.append(stats) pickle.dump(classifier_array, open(datafile + "distributed_classifiers.p", 'wb')) pickle.dump(stats_array, open(datafile + "distributed_stats.p", 'wb')) scio.savemat(datafile + "distributed_stats.mat", {'stats': stats_array})
total.append(no_p_line) converted_total = [converter_to(x, 2) for x in total] #print(converted_total) def test(model): with open('new_intron_acceptor.txt') as test_file: cont_ok = 0 cont_not_ok = 0 for line in test_file: test_line = line.lower().replace('\n', '') logp, path = model.viterbi(converter_to(test_line, 2)) if (path[-40][1].name == 'acceptor015'): cont_ok += 1 else: cont_not_ok += 1 print(cont_ok / (cont_not_ok + cont_ok)) test(model) model.fit(converted_total, transition_pseudocount=1, emission_pseudocount=1, verbose=True) test(model) with open('partial_model_intron_acceptor_model.json', 'w') as out: out.write(model.to_json())
def _segment(self, arr, components=2): nonzero = arr[arr > 0] idx = self.hampel_filter(np.log2(nonzero)) filtered = nonzero[idx] log_gmm = self.get_states(np.log2(filtered)) log_means, log_probs = log_gmm.means_.ravel(), log_gmm.weights_ ln_gmm = self.get_states(filtered) # to improve the sensitivity ln_means, ln_probs = ln_gmm.means_.ravel(), ln_gmm.weights_ if (len(log_means) == 1): means, probs = ln_means, ln_probs scale = 'linear' else: means, probs = log_means, log_probs scale = 'log' logger.info('Estimated HMM state number: {0} ({1} scale)'.format(len(means), scale)) model = HiddenMarkovModel() # GMM emissions dists = [] for m in means: tmp = [] for i in range(components): e = m + (-1)**i * ((i+1)//2) * 0.5 s = 0.5 tmp.append(NormalDistribution(e, s)) mixture = State(GeneralMixtureModel(tmp), name=str(m)) dists.append(mixture) model.add_states(*tuple(dists)) # transition matrix for i in range(len(means)): for j in range(len(means)): if i==j: model.add_transition(dists[i], dists[j], 0.8) else: model.add_transition(dists[i], dists[j], 0.2/(len(means)-1)) # starts and ends for i in range(len(means)): model.add_transition(model.start, dists[i], probs[i]) model.bake() # training sequences tmp = np.zeros(nonzero.size) tmp[idx] = filtered newarr = np.zeros(arr.size) newarr[arr > 0] = tmp if len(means) > 1: model.fit(self.pieces(newarr, scale=scale), algorithm='baum-welch', n_jobs=self.n_jobs, max_iterations=5000, stop_threshold=2e-4) queue = newarr[newarr > 0] if scale=='log': seq = np.r_[[s.name for i, s in model.viterbi(np.log2(queue))[1][1:]]] else: seq = np.r_[[s.name for i, s in model.viterbi(queue)[1][1:]]] seg = self.assign_cnv(queue, seq) predicted = np.zeros(newarr.size) predicted[newarr > 0] = seg seg = self.call_intervals(predicted) else: seg = [(0, newarr.size)] return newarr, seg, scale
def train_and_test(): with open('../data extractors/exons_start_1.txt') as in_file: total = [] for line in in_file: no_p_line = line.replace('P', '').lower().replace('\n', '') total.append(no_p_line) converted_total = [converter_to(x, 2) for x in total] matrixDonor0 = numpy.array( matrix_from_exa('../data extractors/new_donor1.exa')) c0, c1, c2 = calculator.calculate_proba2('../data extractors/new_cuts.txt') print(c0.p, c1.p, c2.p) coding_state0 = State(DiscreteDistribution(c0.p), 'coding state 0') coding_state1 = State(DiscreteDistribution(c1.p), 'coding state 1') coding_state2 = State(DiscreteDistribution(c2.p), 'coding state 2') donor0_data = classify(matrixDonor0, 2) donor0_states = sequence_state_factory(donor0_data, 'donor0') post = State(DiscreteDistribution(equal_distribution), name='post') model = HiddenMarkovModel('coding to donor') model.add_state(coding_state0) model.add_state(coding_state1) model.add_state(coding_state2) add_sequence(model, donor0_states) model.add_state(post) model.add_transition(model.start, coding_state0, 1) model.add_transition(coding_state0, coding_state1, 0.6) model.add_transition(coding_state0, donor0_states[0], 0.4) model.add_transition(coding_state1, coding_state2, 0.6) model.add_transition(coding_state1, donor0_states[0], 0.4) model.add_transition(coding_state2, coding_state0, 0.6) model.add_transition(coding_state2, donor0_states[0], 0.4) model.add_transition(donor0_states[-1], post, 1) model.add_transition(post, post, 0.9) model.add_transition(post, model.end, 0.1) model.bake() test_model(model) model.fit(converted_total, transition_pseudocount=1, emission_pseudocount=1, verbose=True) test_model(model) with open('partial_model_coding_to_donor_model0.json', 'w') as out: out.write(model.to_json())
def main(): rospy.init_node('hmm_trainer') param_vec = [] rospack = rospkg.RosPack() if (len(sys.argv) < 2): print("Missing the prefix argument.") exit() else: prefix = sys.argv[1] use_measurements = np.zeros(3) # patient = rospy.get_param('~patient', 'None') # if prefix == 'None': # rospy.logerr("No filename given ,exiting") # exit() phase_pub = rospy.Publisher('/phase', Int32, queue_size=10) packpath = rospack.get_path('exo_gait_phase_det') datapath = packpath + "/log/mat_files/" rospy.logwarn("Patient: {}".format(prefix)) print("Patient: {}".format(prefix)) verbose = rospy.get_param('~verbose', False) """Print console output into text file""" # sys.stdout = open(packpath + "/log/results/intra-sub_" + prefix + ".txt", "w") """Data loading""" n_trials = 3 data = [[] for x in range(0, n_trials)] for i in range(0, n_trials): data[i] = scio.loadmat(datapath + prefix + "_proc_data" + str(i + 1) + ".mat") accel_x = [[] for x in range(0, n_trials)] accel_y = [[] for x in range(0, n_trials)] accel_z = [[] for x in range(0, n_trials)] gyro_x = [[] for x in range(0, n_trials)] gyro_y = [[] for x in range(0, n_trials)] gyro_z = [[] for x in range(0, n_trials)] time_array = [[] for x in range(0, n_trials)] labels = [[] for x in range(0, n_trials)] fs_fsr = [] for i in range(0, n_trials): # accel_x[i] = data[i]["accel_x"][0] # accel_y[i] = data[i]["accel_y"][0] # accel_z[i] = data[i]["accel_z"][0] gyro_x[i] = data[i]["gyro_x"][0] gyro_y[i] = data[i]["gyro_y"][0] gyro_z[i] = data[i]["gyro_z"][0] time_array[i] = data[i]["time"][0] labels[i] = data[i]["labels"][0] fs_fsr.append(data[i]["Fs_fsr"][0][0]) """Feature extraction""" """First derivative""" # fder_gyro_x = [] # for i in range(n_trials): # der = [] # der.append(gyro_x[i][0]) # for j in range(1,len(gyro_x[i])-1): # der.append((gyro_x[i][j+1]-gyro_x[i][j-1])/2) # der.append(gyro_x[i][-1]) # fder_gyro_x.append(der) fder_gyro_y = [] for i in range(n_trials): der = [] der.append(gyro_y[i][0]) for j in range(1, len(gyro_y[i]) - 1): der.append((gyro_y[i][j + 1] - gyro_y[i][j - 1]) / 2) der.append(gyro_y[i][-1]) fder_gyro_y.append(der) # fder_gyro_z = [] # for i in range(n_trials): # der = [] # der.append(gyro_z[i][0]) # for j in range(1,len(gyro_z[i])-1): # der.append((gyro_z[i][j+1]-gyro_z[i][j-1])/2) # der.append(gyro_z[i][-1]) # fder_gyro_z.append(der) """Second derivative""" # sder_gyro_x = [] # for i in range(n_trials): # der = [] # der.append(fder_gyro_x[i][0]) # for j in range(1,len(fder_gyro_x[i])-1): # der.append((fder_gyro_x[i][j+1]-fder_gyro_x[i][j-1])/2) # der.append(fder_gyro_x[i][-1]) # sder_gyro_x.append(der) # # sder_gyro_y = [] # for i in range(n_trials): # der = [] # der.append(fder_gyro_y[i][0]) # for j in range(1,len(fder_gyro_y[i])-1): # der.append((fder_gyro_y[i][j+1]-fder_gyro_y[i][j-1])/2) # der.append(fder_gyro_y[i][-1]) # sder_gyro_y.append(der) # # sder_gyro_z = [] # for i in range(n_trials): # der = [] # der.append(fder_gyro_z[i][0]) # for j in range(1,len(fder_gyro_z[i])-1): # der.append((fder_gyro_z[i][j+1]-fder_gyro_z[i][j-1])/2) # der.append(fder_gyro_z[i][-1]) # sder_gyro_z.append(der) """Peak detector""" # window_wid = 15 # Window width should be odd # search_ratio = window_wid/2 # pdet_gyro_x = [] # for i in range(n_trials): # pdet = [] # for j in range(len(gyro_x[i])): # if j <= search_ratio: # win = gyro_x[i][:j+search_ratio+1] # elif j >= len(gyro_x[i])-search_ratio-1: # win = gyro_x[i][j-search_ratio:] # else: # win = gyro_x[i][j-search_ratio:j+search_ratio+1] # pdet.append(gyro_x[i][j]/max(win)) # pdet_gyro_x.append(pdet) # print len(gyro_x) # print len(pdet_gyro_x) # for i in range(3): # print len(gyro_x[i]) # print len(pdet_gyro_x[i]) # pdet_gyro_y = [] # for i in range(n_trials): # pdet = [] # for j in range(len(gyro_y[i])): # if j <= search_ratio: # win = gyro_y[i][:j+search_ratio+1] # elif j >= len(gyro_y[i])-search_ratio-1: # win = gyro_y[i][j-search_ratio:] # else: # win = gyro_y[i][j-search_ratio:j+search_ratio+1] # pdet.append(gyro_y[i][j]/max(win)) # pdet_gyro_y.append(pdet) # # pdet_gyro_z = [] # for i in range(n_trials): # pdet = [] # for j in range(len(gyro_z[i])): # if j <= search_ratio: # win = gyro_z[i][:j+search_ratio+1] # elif j >= len(gyro_z[i])-search_ratio-1: # win = gyro_z[i][j-search_ratio:] # else: # win = gyro_z[i][j-search_ratio:j+search_ratio+1] # pdet.append(gyro_z[i][j]/max(win)) # pdet_gyro_z.append(pdet) """Create training and test data""" ff = [[] for x in range(0, n_trials)] for j in range(0, n_trials): for k in range(0, len(time_array[j])): f_ = [] # f_.append(accel_x[j][k]) # f_.append(accel_y[j][k]) # f_.append(accel_z[j][k]) # f_.append(gyro_x[j][k]) # f_.append(fder_gyro_x[j][k]) # f_.append(sder_gyro_x[j][k]) # f_.append(pdet_gyro_x[j][k]) f_.append(gyro_y[j][k]) f_.append(fder_gyro_y[j][k]) # f_.append(sder_gyro_y[j][k]) # f_.append(pdet_gyro_y[j][k]) # f_.append(gyro_z[j][k]) # f_.append(fder_gyro_z[j][k]) # f_.append(sder_gyro_z[j][k]) # f_.append(pdet_gyro_z[j][k]) ff[j].append(f_) n_signals = len(ff[0][0]) """cHMM""" startprob = [0.25, 0.25, 0.25, 0.25] state_names = ['hs', 'ff', 'ho', 'sw'] rospy.logwarn("""Intra-subject training""") print("""Intra-subject training""") # for leave_one_out in range(0, n_trials): for leave_one_out in range(1, 2): rospy.logwarn("-------TRIAL {}-------".format(leave_one_out + 1)) print("-------TRIAL {}-------".format(leave_one_out + 1)) """Transition matrix""" t = np.zeros((4, 4)) # Transition matrix prev = -1 for i in range(0, len(labels[leave_one_out])): # data[i]._replace(label = correct_mapping[data[i].label]) if prev == -1: prev = labels[leave_one_out][i] t[prev][labels[leave_one_out][i]] += 1.0 prev = labels[leave_one_out][i] t = normalize(t, axis=1, norm='l1') if verbose: rospy.logwarn("TRANSITION MATRIX\n" + str(t)) n_classes = 4 class_data = [[] for x in range(n_classes)] full_data = [] full_labels = [] for i in range(len(ff[leave_one_out])): full_data.append(ff[leave_one_out][i]) full_labels.append(labels[leave_one_out][i]) # print full_data == ff[leave_one_out] # print full_labels == labels[leave_one_out] # print len(full_data) == len(full_labels) # for i in range(0,len(ff[leave_one_out-1])): # full_data.append(ff[leave_one_out-1][i]) # full_labels.append(labels[leave_one_out-1][i]) # for i in range(0,len(ff[(leave_one_out+1) % n_trials])): # full_data.append(ff[(leave_one_out+1) % n_trials][i]) # full_labels.append(labels[(leave_one_out+1) % n_trials][i]) # print len(full_data) == (len(ff[leave_one_out]) + len(ff[leave_one_out-1]) + len(ff[(leave_one_out+1) % n_trials])) # print full_data # print len(full_data) # print full_labels # print len(full_labels) for i in range(0, len(full_data)): class_data[full_labels[i]].append(full_data[i]) """Multivariate Gaussian Distributions for each hidden state""" class_means = [[[] for x in range(n_signals)] for i in range(n_classes)] class_vars = [[[] for x in range(n_signals)] for i in range(n_classes)] class_std = [[[] for x in range(n_signals)] for i in range(n_classes)] class_cov = [] classifiers = [] for i in range(0, n_classes): # cov = np.ma.cov(np.array(class_data[i]), rowvar=False) cov = np.cov(np.array(class_data[i]), rowvar=False) class_cov.append(cov) for j in range(0, n_signals): class_means[i][j] = np.array( class_data[i][:])[:, [j]].mean(axis=0) class_vars[i][j] = np.array(class_data[i][:])[:, [j]].var(axis=0) class_std[i][j] = np.array(class_data[i][:])[:, [j]].std(axis=0) print "\n" + str(class_cov) + "\n" """Classifier initialization""" distros = [] hmm_states = [] for i in range(n_classes): dis = MGD\ (np.array(class_means[i]).flatten(), np.array(class_cov[i])) st = State(dis, name=state_names[i]) distros.append(dis) hmm_states.append(st) model = HMM(name="Gait") model.add_states(hmm_states) """Initial transitions""" for i in range(0, n_classes): model.add_transition(model.start, hmm_states[i], startprob[i]) """Left-right model""" for i in range(0, n_classes): for j in range(0, n_classes): model.add_transition(hmm_states[i], hmm_states[j], t[i][j]) model.bake() # print (model.name) # rospy.logwarn("N. observations: " + str(model.d)) # print (model.edges) # rospy.logwarn("N. hidden states: " + str(model.silent_start)) # print model """Training""" # limit = int(len(ff1)*(8/10.0)) # 80% of data to test, 20% to train # x_train = list([ff1[:limit]]) # x_train = list([ff1,ff2]) # x_train = list([ff2]) x_train = [] for i in range(0, len(ff[leave_one_out - 1])): x_train.append(ff[leave_one_out - 1][i]) for i in range(0, len(ff[(leave_one_out + 1) % n_trials])): x_train.append(ff[(leave_one_out + 1) % n_trials][i]) x_train = list([x_train]) rospy.logwarn("Training...") model.fit(x_train, algorithm='baum-welch', verbose=verbose) # model.fit(list([ff[leave_one_out-1]]), algorithm='baum-welch', verbose=verbose) # model.fit(list([ff[(leave_one_out+1) % n_trials]]), algorithm='baum-welch', verbose=verbose) # model.fit(seq, algorithm='viterbi', verbose='True') """Find most-likely sequence""" # logp, path = model.viterbi(ff[limit:]) logp, path = model.viterbi(ff[leave_one_out]) # print logp # print path class_labels = [] for i in range(len(labels[leave_one_out])): path_phase = path[i][1].name for state in range(n_classes): if path_phase == state_names[state]: class_labels.append(state) labels[leave_one_out] = list(labels[leave_one_out][1:]) # Saving classifier labels into csv file # np.savetxt(packpath+"/log/intra_labels/"+prefix+"_labels"+str(leave_one_out+1)+".csv", class_labels, delimiter=",", fmt='%s') # rospy.logwarn("csv file with classifier labels was saved.") sum = 0.0 true_pos = 0.0 false_pos = 0.0 true_neg = 0.0 false_neg = 0.0 tol = 6e-2 # Tolerance window of 60 ms tol_window = int((tol / 2) / (1 / float(fs_fsr[leave_one_out]))) print "FSR freq: " + str(fs_fsr[leave_one_out]) print "Tolerance win: " + str(tol_window) # print tol_window # # print type(tol_window) # for i in range(0, len(labels[leave_one_out])): # """Tolerance window""" # if i > tol_window+1 and i < len(labels[leave_one_out])-tol_window: # # curr_tol = time_array[leave_one_out][i+tol_window]-time_array[leave_one_out][i-tol_window] # # print curr_tol # win = [] # for j in range(i-tol_window,i+tol_window+1): # win.append(state_names[labels[leave_one_out][j]]) # if path[i][1].name in win: # sum += 1.0 # else: # if path[i][1].name == labels[leave_one_out][i]: # sum += 1.0 """Performance Evaluation""" rospy.logwarn("Calculating results...") time_error = [[] for x in range(n_classes)] for phase in range(n_classes): for i in range(len(labels[leave_one_out])): """Tolerance window""" if i >= tol_window and i < len( labels[leave_one_out]) - tol_window: # curr_tol = time_array[leave_one_out][i+tol_window]-time_array[leave_one_out][i-tol_window] # print curr_tol win = [] for j in range(i - tol_window, i + tol_window + 1): win.append(labels[leave_one_out][j]) """Calculate time error with true positives""" if class_labels[i] == phase: if class_labels[i] in win: for k in range(len(win)): if win[k] == phase: time_error[phase].append( (k - tol_window) / fs_fsr[leave_one_out]) break true_pos += 1.0 if verbose: print phase + ", " + state_names[labels[ leave_one_out][i]] + ", " + class_labels[ i] + ", true_pos" else: false_pos += 1.0 if verbose: print phase + ", " + state_names[labels[ leave_one_out][i]] + ", " + class_labels[ i] + ", false_pos" else: if phase != labels[leave_one_out][i]: # if phase not in win: true_neg += 1.0 if verbose: print phase + ", " + state_names[labels[ leave_one_out][i]] + ", " + class_labels[ i] + ", true_neg" else: false_neg += 1.0 if verbose: print phase + ", " + state_names[labels[ leave_one_out][i]] + ", " + class_labels[ i] + ", false_neg" else: if class_labels[i] == phase: if class_labels[i] == labels[leave_one_out][i]: true_pos += 1.0 else: false_pos += 1.0 else: if phase != labels[leave_one_out][i]: true_neg += 1.0 else: false_neg += 1.0 rospy.logwarn("Timing error") print("Timing error") for phase in range(n_classes): rospy.logwarn("(" + state_names[phase] + ")") print "(" + state_names[phase] + ")" if len(time_error[phase]) > 0: rospy.logwarn( str(np.mean(time_error[phase])) + " + " + str(np.std(time_error[phase]))) print str(np.mean(time_error[phase])) + " + " + str( np.std(time_error[phase])) else: rospy.logwarn("0.06 + 0") print "0.06 + 0" """Calculate mean time (MT) of stride and each gait phase and Coefficient of Variation (CoV)""" rospy.logwarn("Mean time (MT) and Coefficient of Variance (CoV)") print("Mean time (MT) and Coefficient of Variance (CoV)") n_group = 0 for label_group in [class_labels, labels[leave_one_out]]: if n_group == 0: rospy.logwarn("Results for HMM:") print("Results for HMM:") else: rospy.logwarn("Results for FSR:") print("Results for FSR:") curr_label = -1 count = 0 n_phases = 0 stride_samples = 0 phases_time = [[] for x in range(n_classes)] stride_time = [] for label in label_group: # for label in class_labels: if curr_label != label: n_phases += 1 stride_samples += count if label == 0: # Gait start: HS if n_phases == 4: # If a whole gait cycle has past stride_time.append(stride_samples / fs_fsr[leave_one_out]) n_phases = 0 stride_samples = 0 phases_time[label - 1].append(count / fs_fsr[leave_one_out]) curr_label = label count = 1 else: count += 1.0 for phase in range(n_classes): mean_time = np.mean(phases_time[phase]) phase_std = np.std(phases_time[phase]) rospy.logwarn("(" + state_names[phase] + ")") print "(" + state_names[phase] + ")" rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std)) print "Mean time: " + str(mean_time) + " + " + str(phase_std) rospy.logwarn("CoV: " + str(phase_std / mean_time * 100.0)) print("CoV: " + str(phase_std / mean_time * 100.0)) mean_time = np.mean(stride_time) phase_std = np.std(stride_time) rospy.logwarn("(Stride)") print "(Stride)" rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std)) print "Mean time: " + str(mean_time) + " + " + str(phase_std) rospy.logwarn("CoV: " + str(phase_std / mean_time * 100.0)) print("CoV: " + str(phase_std / mean_time * 100.0)) n_group += 1 """Accuracy""" # acc = sum/len(labels[leave_one_out]) if (true_neg + true_pos + false_neg + false_pos) != 0.0: acc = (true_neg + true_pos) / (true_neg + true_pos + false_neg + false_pos) else: acc = 0.0 """Sensitivity or True Positive Rate""" if true_pos + false_neg != 0: tpr = true_pos / (true_pos + false_neg) else: tpr = 0.0 """Specificity or True Negative Rate""" if false_pos + true_neg != 0: tnr = true_neg / (false_pos + true_neg) else: tnr = 0.0 # rospy.logwarn("Accuracy: {}%".format(acc*100)) rospy.logwarn("Accuracy: {}%".format(acc * 100.0)) # print("Accuracy: {}%".format(acc*100.0)) rospy.logwarn("Sensitivity: {}%".format(tpr * 100.0)) # print("Sensitivity: {}%".format(tpr*100.0)) rospy.logwarn("Specificity: {}%".format(tnr * 100.0)) # print("Specificity: {}%".format(tnr*100.0)) """Goodness index""" G = np.sqrt((1 - tpr)**2 + (1 - tnr)**2) if G <= 0.25: rospy.logwarn("Optimum classifier (G = {} <= 0.25)".format(G)) # print("Optimum classifier (G = {} <= 0.25)".format(G)) elif G > 0.25 and G <= 0.7: rospy.logwarn("Good classifier (0.25 < G = {} <= 0.7)".format(G)) # print("Good classifier (0.25 < G = {} <= 0.7)".format(G)) elif G == 0.7: rospy.logwarn("Random classifier (G = 0.7)") # print("Random classifier (G = 0.7)") else: rospy.logwarn("Bad classifier (G = {} > 0.7)".format(G))
for j in range(0, n_classes): model.add_transition(hmm_states[i], hmm_states[j], t[i][j]) print(hmm_states[i].name + "(" + str(i) + ")-> " + hmm_states[j].name + "(" + str(j) + ") : " + str(t[i][j])) model.bake() # seq = list([full_features[:limit]]) # print skf[2] for train_index, test_index in skf: X_train = full_features[train_index] Y_train = full_labels[train_index] X_test = full_features[test_index] Y_test = full_labels[test_index] model.fit(list([X_train]), algorithm='viterbi', verbose='False') if batch == 0: logp, path = model.viterbi(list(X_test)) sum_ = 0.0 path = path[1:] for i in range(0, len(path)): # print path[i][1].name + " " + phase_labels[Y_test[i]] if path[i][1].name == phase_labels[Y_test[i]]: sum_ += 1.0 print(str(sum_) + "/" + str(len(Y_test))) print(sum_ / float(len(Y_test))) print('------------------------------------') else: b = 0 while b < len(X_test):
hmmodel.add_transition(back_state, back_state, 0.9) hmmodel.add_transition(back_state, fixed_state, 0.1) hmmodel.add_transition(fixed_state, fixed_state, 0.9) hmmodel.add_transition(fixed_state, back_state, 0.1) hmmodel.bake() seq = list('acgtacgtaaaaccccaaa') lopg, path = hmmodel.viterbi(seq) print([x[1].name for x in path]) print(hmmodel.to_json()) to_fit1 = list('acgtacacacacacacac') to_fit2 = list('acgtacgtacgtacgtacgtacgtacgtcgt') to_fit3 = list('aaaaacccccaaacc') to_fit4 = list('aaaaaccgcccaaaccacgtacgtacgtacgtactacgggggg') lopg, path = hmmodel.viterbi(to_fit4) print([x[1].name for x in path]) hmmodel.fit([to_fit1, to_fit2, to_fit3, to_fit4]) lopg, path = hmmodel.viterbi(seq) print([x[1].name for x in path]) print(hmmodel.to_json())
model.bake() # print (model.name) rospy.logwarn("N. observations: " + str(model.d)) # print (model.edges) rospy.logwarn("N. hidden states: " + str(model.silent_start)) # print model """Training""" limit = int(len(ff) * (8 / 10.0)) # 80% of data to test, 20% to train # seq = list([ff[:limit]]) x_train = list([ff[limit:]]) print len(x_train) rospy.logwarn("Training...") # time_start = time.clock() model.fit(x_train, algorithm='baum-welch', verbose='True') # model.fit(x_train, algorithm='viterbi', verbose='True') # time_elapsed = time.clock()- time_start # rospy.logwarn("Training time: " + str(time_elapsed) + "s") """Find most-likely sequence""" # logp, path = model.viterbi(ff[limit:]) x_test = ff[:limit] logp, path = model.viterbi(x_test) sum_ = 0.0 for i in range(0, len(path)): if path[i][1].name != 'Gait-start': # if path[i][1].name == state_names[labels[i+limit - 1]]: if path[i][1].name == state_names[labels[i - 1]]: # print path[i][1].name + " = " + state_names[labels[i+limit - 1]] sum_ += 1.0 # else:
def main(): rospy.init_node('hmm_trainer') phase_pub = rospy.Publisher('/phase', Int32, queue_size=10) rospack = rospkg.RosPack() packpath = rospack.get_path('exo_control') datapath = packpath + "/log/mat_files/" verbose = rospy.get_param('~verbose', False) """Print console output into text file""" sys.stdout = open(packpath + "/log/results/leave-one-out_cross_validation.txt", "w") """Data loading""" n_trials = 3 n_sub = 9 healthy_subs = ["daniel", "erika", "felipe", "jonathan", "luis", "nathalia", "paula", "pedro", "tatiana"] patients = ["andres", "carlos", "carmen", "carolina", "catalina", "claudia", "emmanuel", "fabian", "gustavo"] study_subs = [healthy_subs, patients] dataset = [{} for x in range(len(study_subs))] for i in range(len(study_subs)): for sub in study_subs[i]: dataset[i][sub] = {"gyro_y": [[] for x in range(n_trials)], "fder_gyro_y": [[] for x in range(n_trials)], "time": [[] for x in range(n_trials)], "labels": [[] for x in range(n_trials)], "Fs_fsr": 0.0} for group in dataset: for sub,data in group.iteritems(): for trial in range(n_trials): mat_file = scio.loadmat(datapath + sub + "_proc_data" + str(trial+1) + ".mat") for signal in data: if signal not in ["pathol","fder_gyro_y"]: if signal == "Fs_fsr": data[signal] = mat_file[signal][0][0] else: data[signal][trial] = mat_file[signal][0] del mat_file """Feature extraction""" """First derivative""" for group in dataset: for sub,data in group.iteritems(): for trial in range(n_trials): der = [] gyro_y = data["gyro_y"][trial] der.append(gyro_y[0]) for i in range(1,len(gyro_y)-1): der.append((gyro_y[i+1]-gyro_y[i-1])/2) der.append(gyro_y[-1]) data["fder_gyro_y"][trial] = der del der, sub, data """Global variables of cHMM""" startprob = [0.25, 0.25, 0.25, 0.25] state_names = ['hs', 'ff', 'ho', 'sw'] n_classes = 4 n_signals = 2 tol = 6e-2 # Tolerance window of 60 ms # for pathology in range(len(dataset)): # if pathology == 0: # rospy.logwarn("**Leave-one-out cross validation with HEALTHY subjects**") # print "**Leave-one-out cross validation with HEALTHY subjects**" # else: # rospy.logwarn("**Leave-one-out cross validation with PATIENTS**") # print "**Leave-one-out cross validation with PATIENTS**" if True: # for lou_sub,lou_data in dataset[pathology].iteritems(): # Iterate through leave-one-out subject's data for lou_sub,lou_data in dataset[0].iteritems(): # Iterate through leave-one-out subject's data rospy.logwarn("Leave " + lou_sub + " out:") print "Leave " + lou_sub + " out:" t = np.zeros((4, 4)) # Transition matrix prev = -1 for trial in range(n_trials): for label in lou_data["labels"][trial]: if prev == -1: prev = label t[prev][label] += 1.0 prev = label t = normalize(t, axis=1, norm='l1') if verbose: rospy.logwarn("TRANSITION MATRIX\n" + str(t)) class_data = [[] for x in range(n_classes)] full_lou_data = [] full_lou_labels = [] for trial in range(n_trials): for sample in range(len(lou_data["gyro_y"][trial])): d = [lou_data["gyro_y"][trial][sample], lou_data["fder_gyro_y"][trial][sample]] l = lou_data["labels"][trial][sample] full_lou_data.append(d) full_lou_labels.append(l) class_data[l].append(d) """Multivariate Gaussian Distributions for each hidden state""" class_means = [[[] for x in range(n_signals)] for i in range(n_classes)] class_vars = [[[] for x in range(n_signals)] for i in range(n_classes)] class_std = [[[] for x in range(n_signals)] for i in range(n_classes)] class_cov = [] classifiers = [] for state in range(n_classes): cov = np.ma.cov(np.array(class_data[state]), rowvar=False) class_cov.append(cov) for signal in range(n_signals): class_means[state][signal] = np.array(class_data[state][:])[:, [signal]].mean(axis=0) class_vars[state][signal] = np.array(class_data[state][:])[:, [signal]].var(axis=0) class_std[state][signal] = np.array(class_data[state][:])[:, [signal]].std(axis=0) """Classifier initialization""" distros = [] hmm_states = [] for state in range(n_classes): dis = MGD\ (np.array(class_means[state]).flatten(), np.array(class_cov[state])) st = State(dis, name=state_names[state]) distros.append(dis) hmm_states.append(st) model = HMM(name="Gait") model.add_states(hmm_states) """Initial transitions""" for state in range(n_classes): model.add_transition(model.start, hmm_states[state], startprob[state]) """Left-right model""" for i in range(n_classes): for j in range(n_classes): model.add_transition(hmm_states[i], hmm_states[j], t[i][j]) model.bake() """Create training and test data""" x_train = [] x_test = [] test_gyro_y = lou_data["gyro_y"][-1] test_fder_gyro_y = lou_data["fder_gyro_y"][-1] """Create test data with n-th trial of leave-one-out subject""" for sample in range(len(test_gyro_y)): x_test.append([test_gyro_y[sample], test_fder_gyro_y[sample]]) """Create training data with n-1 trials of all subjects (patients group)""" # if pathology == 1: # for trial in range(n_trials-1): # train_gyro_y = lou_data["gyro_y"][trial] # train_fder_gyro_y = lou_data["fder_gyro_y"][trial] # for sample in range(len(train_gyro_y)): # x_train.append([train_gyro_y[sample], train_fder_gyro_y[sample]]) """Create training data with n-1 trials of the rest of subjects (healthy group)""" # for train_sub,train_data in dataset[pathology].iteritems(): for train_sub,train_data in dataset[0].iteritems(): # if lou_sub != train_sub: if True: for trial in range(n_trials-1): train_gyro_y = train_data["gyro_y"][trial] train_fder_gyro_y = train_data["fder_gyro_y"][trial] for sample in range(len(train_gyro_y)): x_train.append([train_gyro_y[sample], train_fder_gyro_y[sample]]) x_train = list([x_train]) """Training""" rospy.logwarn("Training HMM...") model.fit(x_train, algorithm='baum-welch', verbose=True) # model.fit(x_train, algorithm='viterbi', verbose='True') """Find most-likely sequence""" logp, path = model.viterbi(x_test) class_labels = [] for i in range(len(lou_data["labels"][-1])): path_phase = path[i][1].name for state in range(n_classes): if path_phase == state_names[state]: class_labels.append(state) # Saving classifier labels into csv file np.savetxt(packpath+"/log/inter_labels/"+lou_sub+"_labels.csv", class_labels, delimiter=",", fmt='%s') rospy.logwarn("csv file with classifier labels was saved.") lou_data["labels"][-1] = lou_data["labels"][-1][1:] """Results""" sum = 0.0 true_pos = 0.0 false_pos = 0.0 true_neg = 0.0 false_neg = 0.0 tol_window = int((tol/2) / (1/float(lou_data["Fs_fsr"]))) rospy.logwarn("Calculating results...") time_error = [[] for x in range(n_classes)] for phase in range(n_classes): for i in range(len(lou_data["labels"][-1])): """Tolerance window""" if i >= tol_window and i < len(lou_data["labels"][-1])-tol_window: win = [] for win_label in lou_data["labels"][-1][i-tol_window:i+tol_window+1]: win.append(win_label) if class_labels[i] == phase: if class_labels[i] in win: for k in range(len(win)): if win[k] == phase: time_error[phase].append((k-tol_window)/lou_data["Fs_fsr"]) break true_pos += 1.0 if verbose: print phase + ", " + lou_data["labels"][-1][i] + ", " + class_labels[i] + ", true_pos" else: false_pos += 1.0 if verbose: print phase + ", " + lou_data["labels"][-1][i] + ", " + class_labels[i] + ", false_pos" else: if phase != lou_data["labels"][-1][i]: # if phase not in win: true_neg += 1.0 if verbose: print phase + ", " + lou_data["labels"][-1][i] + ", " + class_labels[i] + ", true_neg" else: false_neg += 1.0 if verbose: print phase + ", " + lou_data["labels"][-1][i] + ", " + class_labels[i] + ", false_neg" else: if class_labels[i] == phase: if class_labels[i] == lou_data["labels"][-1][i]: true_pos += 1.0 else: false_pos += 1.0 else: if phase != lou_data["labels"][-1][i]: true_neg += 1.0 else: false_neg += 1.0 rospy.logwarn("Timing error") print "Timing error" for phase in range(n_classes): rospy.logwarn("(" + state_names[phase] + ")") print "(" + state_names[phase] + ")" if len(time_error[phase]) > 0: rospy.logwarn(str(np.mean(time_error[phase])) + " + " + str(np.std(time_error[phase]))) print str(np.mean(time_error[phase])) + " + " + str(np.std(time_error[phase])) else: rospy.logwarn("0.06 + 0") print "0.06 + 0" """Calculate mean time (MT) of stride and each gait phase and Coefficient of Variation (CoV)""" rospy.logwarn("Mean time (MT) and Coefficient of Variance (CoV)") print "Mean time (MT) and Coefficient of Variance (CoV)" n_group = 0 for label_group in [class_labels, lou_data["labels"][-1]]: if n_group == 0: rospy.logwarn("Results for HMM:") print "Results for HMM:" else: rospy.logwarn("Results for FSR:") print "Results for FSR:" curr_label = -1 count = 0 n_phases = 0 stride_samples = 0 phases_time = [[] for x in range(n_classes)] stride_time = [] for label in label_group: # for label in class_labels: if curr_label != label: n_phases += 1 stride_samples += count if label == 0: # Gait start: HS if n_phases == 4: # If a whole gait cycle has past stride_time.append(stride_samples/lou_data["Fs_fsr"]) n_phases = 0 stride_samples = 0 phases_time[label-1].append(count/lou_data["Fs_fsr"]) curr_label = label count = 1 else: count += 1.0 for phase in range(n_classes): mean_time = np.mean(phases_time[phase]) phase_std = np.std(phases_time[phase]) rospy.logwarn("(" + state_names[phase] + ")") print "(" + state_names[phase] + ")" rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std)) print "Mean time: " + str(mean_time) + " + " + str(phase_std) rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0)) print("CoV: " + str(phase_std/mean_time*100.0)) mean_time = np.mean(stride_time) phase_std = np.std(stride_time) rospy.logwarn("(Stride)") print "(Stride)" rospy.logwarn("Mean time: " + str(mean_time) + " + " + str(phase_std)) print "Mean time: " + str(mean_time) + " + " + str(phase_std) rospy.logwarn("CoV: " + str(phase_std/mean_time*100.0)) print("CoV: " + str(phase_std/mean_time*100.0)) n_group += 1 """Accuracy""" if (true_neg+true_pos+false_neg+false_pos) != 0.0: acc = (true_neg + true_pos)/(true_neg + true_pos + false_neg + false_pos) else: acc = 0.0 """Sensitivity or True Positive Rate""" if true_pos+false_neg != 0: tpr = true_pos / (true_pos+false_neg) else: tpr = 0.0 """Specificity or True Negative Rate""" if false_pos+true_neg != 0: tnr = true_neg / (false_pos+true_neg) else: tnr = 0.0 rospy.logwarn("Accuracy: {}%".format(acc*100.0)) print("Accuracy: {}%".format(acc*100.0)) rospy.logwarn("Sensitivity: {}%".format(tpr*100.0)) print("Sensitivity: {}%".format(tpr*100.0)) rospy.logwarn("Specificity: {}%".format(tnr*100.0)) print("Specificity: {}%".format(tnr*100.0)) """Goodness index""" G = np.sqrt((1-tpr)**2 + (1-tnr)**2) if G <= 0.25: rospy.logwarn("Optimum classifier (G = {} <= 0.25)".format(G)) print("Optimum classifier (G = {} <= 0.25)".format(G)) elif G > 0.25 and G <= 0.7: rospy.logwarn("Good classifier (0.25 < G = {} <= 0.7)".format(G)) print("Good classifier (0.25 < G = {} <= 0.7)".format(G)) elif G == 0.7: rospy.logwarn("Random classifier (G = 0.7)") print("Random classifier (G = 0.7)") else: rospy.logwarn("Bad classifier (G = {} > 0.7)".format(G)) print("Bad classifier (G = {} > 0.7)".format(G)) del test_gyro_y, test_fder_gyro_y, train_gyro_y, train_fder_gyro_y, d, l
class RealTimeHMM(): def __init__(self, n_trials=3, leave_one_out=1): """Variable initialization""" self.patient = rospy.get_param("gait_phase_det/patient") self.verbose = rospy.get_param("gait_phase_det/verbose") self.n_trials = n_trials self.n_features = 2 # Raw data and 1st-derivative self.leave_one_out = leave_one_out self.rec_data = 0.0 # Number of recorded IMU data self.proc_data = 0.0 # Number of extracted features self.win_size = 3 self.raw_win = [None] * self.win_size # self.fder_win = [0] * self.win_size self.ff = [[] for x in range(self.n_trials)] # Training and test dataset self.labels = [[] for x in range(self.n_trials)] # Reference labels from local data self.first_eval = True self.model_loaded = False algorithm = rospy.get_param("gait_phase_det/algorithm") rospy.loginfo('Decoding algorithm: {}'.format(algorithm)) if algorithm not in DECODER_ALGORITHMS: raise ValueError("Unknown decoder {!r}".format(algorithm)) self.decode = { "fov": self._run_fov, "bvsw": self._run_bvsw }[algorithm] self.imu_callback = { "fov": self._fov_callback, "bvsw": self._bvsw_callback }[algorithm] """HMM variables""" ''' State list: s1: Heel Strike (HS) s2: Flat Foot (FF) s3: Heel Off (HO) s4: Swing Phase (SP)''' self.model_name = "Gait" self.has_model = False self.must_train = False self.states = ['s1', 's2', 's3', 's4'] self.n_states = len(self.states) self.state2phase = {"s1": "hs", "s2": "ff", "s3": "ho", "s4": "sp"} self.train_data = [] self.mgds = {} self.dis_means = [[] for x in range(self.n_states)] self.dis_covars = [[] for x in range(self.n_states)] self.start_prob = [1.0/self.n_states]*self.n_states self.trans_mat = np.array([(0.9, 0.1, 0, 0), (0, 0.9, 0.1, 0), (0, 0, 0.9, 0.1), (0.1, 0, 0, 0.9)]) # Left-right model # self.trans_mat = np.array([0.8, 0.1, 0, 0.1], [0.1, 0.8, 0.1, 0], [0, 0.1, 0.8, 0.1], [0.1, 0, 0.1, 0.8]) # Left-right-left model self.log_startprob = [] self.log_transmat = np.empty((self.n_states, self.n_states)) self.max_win_len = 11 # ms (120 ms: mean IC duration for healthy subjects walking at comfortable speed) self.viterbi_path = np.empty((self.max_win_len+1, self.n_states)) self.backtrack = [[None for x in range(self.n_states)] for y in range(self.max_win_len+1)] self.global_path = [] self.work_buffer = np.empty(self.n_states) self.boundary = 1 self.buff_len = 0 self.states_pos = {} for i in range(len(self.states)): self.states_pos[self.states[i]] = i self.last_state = -1 self.curr_state = -1 self.conv_point = 0 self.conv_found = False self.smp_freq = 100.0 # Hz self.fp_thresh = 1/self.smp_freq*4 # Threshold corresponds to 8 samples self.time_passed = 0.0 self.obs = [[None for x in range(self.n_features)] for y in range(self.max_win_len)] self.model = HMM(name=self.model_name) """ROS init""" rospy.init_node('real_time_HMM', anonymous=True) rospack = rospkg.RosPack() self.packpath = rospack.get_path('hmm_gait_phase_classifier') self.init_subs() self.init_pubs() """HMM-training (if no model exists)""" try: '''HMM-model loading''' with open(self.packpath+'/log/HMM_models/'+self.patient+'.txt') as infile: json_model = json.load(infile) self.model = HMM.from_json(json_model) rospy.logwarn(self.patient + "'s HMM model was loaded.") self.has_model = True except IOError: if os.path.isfile(self.packpath + "/log/mat_files/" + self.patient + "_proc_data1.mat"): """Training with data collected with FSR-based reference system""" self.data_ext = 'mat' self.must_train = True elif os.path.isfile(self.packpath + "/log/IMU_data/" + self.patient + "_labels.csv"): """Training with data collected with offline threshold-based gait phase detection method""" self.data_ext = 'csv' self.must_train = True else: rospy.logerr("Please collect data for training ({})!".format(self.patient)) if self.must_train: rospy.logwarn("HMM model not trained yet for {}!".format(self.patient)) rospy.logwarn("Training HMM with local data...") self.load_data() self.init_hmm() self.train_hmm() self.has_model = True if self.has_model: try: '''MGDs loading if model exists''' for st in self.states: with open(self.packpath+'/log/HMM_models/'+self.patient+'_'+self.state2phase[st]+'.txt') as infile: yaml_dis = yaml.safe_load(infile) dis = MGD.from_yaml(yaml_dis) self.mgds[st] = dis rospy.logwarn(self.patient +"'s " + self.state2phase[st] + " MGC was loaded.") '''Loading means and covariance matrix''' self.dis_means[self.states_pos[st]] = self.mgds[st].parameters[0] self.dis_covars[self.states_pos[st]] = self.mgds[st].parameters[1] except yaml.YAMLError as exc: rospy.logwarn("Not able to load distributions: " + exc) """Transition and initial (log) probabilities matrices upon training""" trans_mat = self.model.dense_transition_matrix()[:self.n_states,:self.n_states] if self.verbose: print '**TRANSITION MATRIX (post-training)**\n'+ str(trans_mat) for i in range(self.n_states): self.log_startprob.append(ln(self.start_prob[i])) for j in range(self.n_states): self.log_transmat[i,j] = ln(trans_mat[i][j]) self.model_loaded = True """Init ROS publishers""" def init_pubs(self): self.phase_pub = rospy.Publisher('/gait_phase', Int8, queue_size=100) """Init ROS subcribers""" def init_subs(self): rospy.Subscriber('/imu_data', Imu, self.imu_callback) """Callback function upon arrival of IMU data for forward-only decoding""" def _fov_callback(self, data): self.rec_data += 1.0 self.raw_win.append(data.angular_velocity.y) self.raw_win.pop(0) # Drop first element if self.rec_data >= self.win_size and self.model_loaded: # At least one previous and one subsequent data should have been received """Extract feature and append it to test dataset""" fder = (self.raw_win[self.win_size/2 + 1] - self.raw_win[self.win_size/2 - 1])/2 # peak_detector = self.raw_win[self.win_size/2]/max(self.raw_win) # self.fder_win.append(fder) # self.fder_win.pop(0) # sder = (self.fder_win[2] - self.fder_win[0])/2 # test_set = [self.raw_win[self.win_size/2], self.raw_win[self.win_size/2 - 2], self.raw_win[self.win_size/2 - 1], self.raw_win[self.win_size/2 + 1], self.raw_win[self.win_size/2 + 2]] # Temporally proximal features test_set = [self.raw_win[self.win_size/2], fder] # Temporally proximal features '''Forward-only decoding approach''' state = self.decode(test_set) # rospy.loginfo("Decoded phase: {}".format(state)) self.time_passed += 1/self.smp_freq if self.last_state == state: if (self.time_passed >= self.fp_thresh) and (self.curr_state == 3 and state == 0) or (state == self.curr_state + 1): self.curr_state = state self.phase_pub.publish(state) else: # rospy.loginfo("Current phase: {}".format(self.state2phase[self.states[self.curr_state]])) # self.phase_pub.publish((self.curr_state-1)%4) self.phase_pub.publish(self.curr_state) else: self.last_state = state self.time_passed = 1/self.smp_freq # rospy.logwarn("Detected phase: {}".format(self.state2phase[self.states[self.last_state]])) """Callback function upon arrival of IMU data for BVSW""" def _bvsw_callback(self, data): self.rec_data += 1.0 self.raw_win.append(data.angular_velocity.y) self.raw_win.pop(0) # Drop first element if self.rec_data >= 3 and self.model_loaded: # At least one previous and one subsequent data should have been received """Extract feature and append it to test dataset""" test_set = [self.raw_win[1], (self.raw_win[0]+self.raw_win[2])/2] # First-derivate of angular velocity '''Bounded sliding window decoding approach''' self.obs.append(test_set) self.obs.pop(0) # This way, -1 element corresponds to last received features states = self.decode(test_set) if len(states) != 0: for st in states: self.phase_pub.publish(st) if self.curr_state != st: rospy.logwarn("Detected phase: {}".format(self.state2phase[self.states[st]])) self.curr_state = st self.proc_data += 1.0 # One gyro data has been processed """Local data loading from mat file and feature extraction""" def load_data(self): """Data loading""" '''Load mat file (Data processed offline in Matlab)''' if self.data_ext == 'mat': rospy.logwarn("Initializing parameters via FSR-based reference system") # subs = ['daniel', 'erika', 'felipe', 'jonathan', 'luis', 'nathalia', 'paula', 'pedro', 'tatiana'] # Healthy subjects # subs = ['carmen', 'carolina', 'catalina', 'claudia', 'emmanuel', 'fabian', 'gustavo'] # Pathological subjects # subs = ['daniel'] # Single subject datapath = self.packpath + "/log/mat_files/" gyro_y = [[] for x in range(self.n_trials)] time_array = [[] for x in range(self.n_trials)] # for patient in subs: for i in range(self.n_trials): data = scio.loadmat(datapath + self.patient + "_proc_data" + str(i+1) + ".mat") # data = scio.loadmat(datapath + patient + "_proc_data" + str(i+1) + ".mat") gyro_y[i] = data["gyro_y"][0] # gyro_y[i] += list(data["gyro_y"][0]) time_array[i] = data["time"][0] # time_array[i] += list(data["time"][0]) self.labels[i] = data["labels"][0] # self.labels[i] += list(data["labels"][0]) """Feature extraction""" '''First derivative''' fder_gyro_y = [] for i in range(self.n_trials): der = [] der.append(gyro_y[i][0]) for j in range(1,len(gyro_y[i])-1): der.append((gyro_y[i][j+1]-gyro_y[i][j-1])/2) der.append(gyro_y[i][-1]) fder_gyro_y.append(der) '''Second derivative''' # sder_gyro_y = [] # for i in range(self.n_trials): # der = [] # der.append(fder_gyro_y[i][0]) # for j in range(1,len(fder_gyro_y[i])-1): # der.append((fder_gyro_y[i][j+1]-fder_gyro_y[i][j-1])/2) # der.append(fder_gyro_y[i][-1]) # sder_gyro_y.append(der) '''Peak detector''' # peak_detector = [] # for i in range(self.n_trials): # win = [] # for j in range(len(gyro_y[i])): # if (j - self.win_size/2) < 0: # win.append(gyro_y[i][j] / self._max(gyro_y[i][0:j + self.win_size/2])) # elif (j + self.win_size/2) > (len(gyro_y[i])-1): # win.append(gyro_y[i][j] / self._max(gyro_y[i][j - self.win_size/2:len(gyro_y[i])-1])) # else: # win.append(gyro_y[i][j] / self._max(gyro_y[i][j - self.win_size/2:j + self.win_size/2])) # peak_detector.append(win) """Create training data""" for j in range(self.n_trials): # for k in range(self.win_size/2,len(gyro_y[j])-1-self.win_size/2): for k in range(len(gyro_y[j])): f_ = [] f_.append(gyro_y[j][k]) '''Approximate differentials''' f_.append(fder_gyro_y[j][k]) # f_.append(sder_gyro_y[j][k]) '''Temporally proximal feature''' # f_.append(gyro_y[j][k-1]) # f_.append(gyro_y[j][k-2]) # f_.append(gyro_y[j][k+1]) # f_.append(gyro_y[j][k+2]) '''Peak detector''' # f_.append(peak_detector[j][k]) self.ff[j].append(f_) self.ff = np.array(self.ff) self.n_features = len(self.ff[0][0]) for i in range(len(self.ff[self.leave_one_out-1])): self.train_data.append(self.ff[self.leave_one_out-1][i]) for i in range(len(self.ff[(self.leave_one_out+1) % self.n_trials])): self.train_data.append(self.ff[(self.leave_one_out+1) % self.n_trials][i]) self.train_data = [self.train_data] # Keep this line or training won't work """Parameter initialization""" '''Kmeans clustering''' # n_components = 4 # No. components of MGD # init = 'kmeans++' # "kmeans||", "first-k", "random" # n_init = 1 # weights = None # max_kmeans_iterations = 1 # batch_size = None # batches_per_epoch = None # # X_concat = x_train # # # X_concat = numpy.concatenate(x_train) # # if X_concat.ndim == 1: # # X_concat = X_concat.reshape(X_concat.shape[0], 1) # # n, d = X_concat.shape # # rospy.logwarn("K-means clustering...") # clf = Kmeans(n_components, init=init, n_init=n_init) # clf.fit(X_concat, weights, max_iterations=max_kmeans_iterations, # batch_size=batch_size, batches_per_epoch=batches_per_epoch) # y = clf.predict(X_concat) # # rospy.logwarn("Creating distributions...") # class_data = [[] for x in range(self.n_states)] # for i in range(len(x_train)): # class_data[y[i]].append(x_train[i]) # if self.verbose: # sum = 0 # print "Kmeans clusters:" # for i in range(self.n_states): # temp = len(class_data[i]) # sum += temp # print temp # print sum, len(x_train) """Offline threshold-based classification""" if self.data_ext == 'csv': rospy.logwarn("Initializing parameters via threshold-based gait phase detection method") self.leave_one_out = 0 gyro_y = [] datapath = self.packpath + '/log/IMU_data/' with open(datapath+'{}.csv'.format(self.patient), 'r') as imu_file: reader = csv.DictReader(imu_file) for row in reader: gyro_y.append(float(dict(row)['gyro_y'])) '''Feature extraction: 1st derivative''' for i in range(1, len(gyro_y)-1): feature_vect = [gyro_y[i], (gyro_y[i+1]-gyro_y[i-1])/2] self.train_data.append(feature_vect) self.ff[self.leave_one_out].append(feature_vect) '''Reference labels''' with open(datapath+'{}_labels.csv'.format(self.patient), 'r') as labels_file: reader = csv.reader(labels_file) for row in reader: self.labels[self.leave_one_out].append(int(row[0])) self.train_data = [self.train_data] """Init HMM if no previous training""" def init_hmm(self): if self.data_ext == 'mat': rospy.logwarn("-------Leaving trial {} out-------".format(self.leave_one_out+1)) """Transition matrix (A)""" '''Transition matrix from reference labels''' # prev = -1 # for i in range(len(self.labels[self.leave_one_out])): # if prev == -1: # prev = self.labels[self.leave_one_out][i] # self.trans_mat[prev][self.labels[self.leave_one_out][i]] += 1.0 # prev = self.labels[self.leave_one_out][i] # self.trans_mat = normalize(self.trans_mat, axis=1, norm='l1') if self.verbose: rospy.logwarn("**TRANSITION MATRIX (pre-training)**\n" + str(self.trans_mat) + '\nMatrix type: {}'.format(type(self.trans_mat))) class_data = [[] for x in range(self.n_states)] for i in range(len(self.ff[self.leave_one_out])): class_data[self.labels[self.leave_one_out][i]].append(self.ff[self.leave_one_out][i]) """Multivariate Gaussian Distributions for each hidden state""" class_means = [[[] for x in range(self.n_features)] for i in range(self.n_states)] # class_vars = [[[] for x in range(self.n_features)] for i in range(self.n_states)] # class_std = [[[] for x in range(self.n_features)] for i in range(self.n_states)] class_cov = [] for i in range(self.n_states): cov = np.ma.cov(np.array(class_data[i]), rowvar=False) class_cov.append(cov) for j in range(self.n_features): class_means[i][j] = np.array(class_data[i][:])[:, [j]].mean(axis=0) # class_vars[i][j] = np.array(class_data[i][:])[:, [j]].var(axis=0) # class_std[i][j] = np.array(class_data[i][:])[:, [j]].std(axis=0) """Classifier initialization""" distros = [] hmm_states = [] for i in range(self.n_states): dis = MGD(np.array(class_means[i]).flatten(), np.array(class_cov[i])) st = State(dis, name=self.states[i]) distros.append(dis) hmm_states.append(st) self.model.add_states(hmm_states) '''Initial transitions''' for i in range(self.n_states): self.model.add_transition(self.model.start, hmm_states[i], self.start_prob[i]) '''Left-right model''' for i in range(self.n_states): for j in range(self.n_states): self.model.add_transition(hmm_states[i], hmm_states[j], self.trans_mat[i][j]) '''Finish model setup''' self.model.bake() """Train initialized model""" def train_hmm(self): rospy.logwarn("Training initialized model...") self.model.fit(self.train_data, algorithm='baum-welch', verbose=self.verbose) self.model.freeze_distributions() # Freeze all model distributions, preventing update from ocurring if self.verbose: print "**HMM model:\n{}**".format(self.model) """Save Multivariate Gaussian Distributions into yaml file""" for st in self.model.states: if st.name != self.model_name+"-start" and st.name != self.model_name+"-end": dis = st.distribution dis_yaml = dis.to_yaml() try: with open(self.packpath+'/log/HMM_models/'+self.patient+'_'+self.state2phase[st.name]+'.txt', 'w') as outfile: yaml.dump(dis_yaml, outfile, default_flow_style=False) rospy.logwarn(self.patient+"'s "+self.state2phase[st.name]+" distribution was saved.") except IOError: rospy.logwarn('It was not possible to write GMM distribution.') """Save model (json script) into txt file""" model_json = self.model.to_json() try: with open(self.packpath+'/log/HMM_models/'+self.patient+'.txt', 'w') as outfile: json.dump(model_json, outfile) rospy.logwarn(self.patient+"'s HMM model was saved.") except IOError: rospy.logwarn('It was not possible to write HMM model.') """Compute the log probability under a multivariate Gaussian distribution. Parameters ---------- X : array_like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. means : array_like, shape (n_components, n_features) List of n_features-dimensional mean vectors for n_components Gaussians. Each row corresponds to a single mean vector. covars : array_like List of n_components covariance parameters for each Gaussian. The shape is (n_components, n_features, n_features) if 'full' Returns ---------- lpr : array_like, shape (n_samples, n_components) Array containing the log probabilities of each data point in X under each of the n_components multivariate Gaussian distributions.""" def log_multivariate_normal_density(self, X, min_covar=1.e-7): """Log probability for full covariance matrices.""" n_samples, n_dim = X.shape nmix = len(self.dis_means) log_prob = np.empty((n_samples, nmix)) for c, (mu, cv) in enumerate(zip(self.dis_means, self.dis_covars)): try: cv_chol = linalg.cholesky(cv, lower=True) except linalg.LinAlgError: # The model is most probably stuck in a component with too # few observations, we need to reinitialize this components try: cv_chol = linalg.cholesky(cv + min_covar * np.eye(n_dim), lower=True) except linalg.LinAlgError: raise ValueError("'covars' must be symmetric, " "positive-definite") cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol))) cv_sol = linalg.solve_triangular(cv_chol, (X - mu).T, lower=True).T log_prob[:, c] = - .5 * (np.sum(cv_sol ** 2, axis=1) + n_dim * np.log(2 * np.pi) + cv_log_det) return log_prob """Find argument (pos) that has the maximum value in array-like object""" def _argmax(self, X): X_max = float("-inf") pos = 0 for i in range(X.shape[0]): if X[i] > X_max: X_max = X[i] pos = i return pos """Find max value in array-like object""" def _max(self, X): return X[self._argmax(X)] """Backtracking process to decode most-likely state sequence""" def _optim_backtrack(self, k): opt = [] self.last_state = where_from = self._argmax(self.viterbi_path[k]) opt.append(where_from) for lp in range(k-1, -1, -1): opt.insert(0, self.backtrack[lp + 1][where_from]) where_from = self.backtrack[lp + 1][where_from] self.global_path.extend(opt) return opt '''Forward-only decoding approach''' def _run_fov(self, test_set): # Probability distribution of state given observation framelogprob = self.log_multivariate_normal_density(np.array([test_set])) if self.first_eval: for i in range(self.n_states): self.viterbi_path[0, i] = self.log_startprob[i] + framelogprob[0, i] self.first_eval = False return self._argmax(self.viterbi_path[0]) else: # Recursion for i in range(self.n_states): for j in range(self.n_states): self.work_buffer[j] = (self.log_transmat[j, i] + self.viterbi_path[0, j]) self.viterbi_path[1, i] = self._max(self.work_buffer) + framelogprob[0, i] self.viterbi_path[0] = self.viterbi_path[1] # Prepare for next feature vector return self._argmax(self.viterbi_path[1]) '''Bounded sliding variable window approach''' def _run_bvsw(self, test_set): framelogprob = self.log_multivariate_normal_density(np.array([test_set])) if self.first_eval: for i in range(self.n_states): self.viterbi_path[0,i] = self.log_startprob[i] + framelogprob[0,i] self.backtrack[0][i] = None self.first_eval = False return [] else: '''Find likelihood probability and backpointer''' for j in range(self.n_states): for i in range(self.n_states): self.work_buffer[i] = self.viterbi_path[self.boundary - 1][i] + self.log_transmat[i, j] self.viterbi_path[self.boundary][j] = self._max(self.work_buffer) + framelogprob[0][j] self.backtrack[self.boundary][j] = self._argmax(self.work_buffer) '''Backtracking local paths''' local_paths = [[] for x in range(self.n_states)] for j in range(self.n_states): where_from = j for smp in range(self.boundary-1, -1, -1): local_paths[j].insert(0, self.backtrack[smp+1][where_from]) where_from = self.backtrack[smp+1][where_from] # if self.verbose: # print "\n{}, {}".format(t, b) # for path in local_paths: # print path '''Given all local paths, find fusion point''' tmp = [None] * self.n_states for k in range(len(local_paths[0])-1, 0, -1): for st in range(self.n_states): tmp[st] = local_paths[st][k] if tmp.count(tmp[0]) == len(tmp): # All local paths point to only one state? self.conv_found = True self.conv_point = k # if self.verbose: print "Found, {}".format(k) break '''Find local path if fusion point was found''' if self.boundary < self.max_win_len and self.conv_found: self.buff_len += self.conv_point opt = self._optim_backtrack(self.conv_point) self.conv_found = False # if self.verbose: print "\nOpt1: " + str(opt) + ", {}".format(len(self.global_path)) '''Reinitialize local variables''' for i in range(self.n_states): if i == self.last_state: self.log_startprob[i] = ln(1.0) else: self.log_startprob[i] = ln(0.0) self.viterbi_path[0][i] = self.log_startprob[i] + framelogprob[0][i] self.backtrack[0][i] = None for smp in range(1, self.boundary-self.conv_point+1): for j in range(self.n_states): for i in range(self.n_states): self.work_buffer[i] = self.viterbi_path[smp - 1][i] + self.log_transmat[i, j] self.viterbi_path[smp][j] = self._max(self.work_buffer) + self.log_multivariate_normal_density(np.array([self.obs[self.conv_point-self.boundary+smp-1]]))[0,j] self.backtrack[smp][j] = self._argmax(self.work_buffer) self.boundary -= self.conv_point-1 return opt elif self.boundary >= self.max_win_len: '''Bounding threshold was exceeded''' self.buff_len += self.max_win_len opt = self._optim_backtrack(self.boundary-1) # if self.verbose: print "\nOpt2: " + str(opt) + ", {}".format(len(self.global_path)) '''Reinitialize local variables''' self.boundary = 1 for i in range(self.n_states): if i == self.last_state: self.log_startprob[i] = ln(1.0) else: self.log_startprob[i] = ln(0.0) self.viterbi_path[0][i] = self.log_startprob[i] + framelogprob[0][i] self.backtrack[0][i] = None return opt else: self.boundary += 1 return []
model = HiddenMarkovModel() model.add_state(back) model.add_state(back2) add_sequence(model, start_states) model.add_transition(model.start, back, 1) model.add_transition(back, back, 0.55) model.add_transition(back, start_states[0], 0.45) model.add_transition(start_states[-1], back2, 1) model.add_transition(back2, back2, 0.5) model.bake() test(model) lines = [] with open('/home/zippyttech/Projects/personal/PerKalk/train_start2.exa') as fi: for line in fi: lines.append(converter_to(line.replace('\n', ''))) model.fit(lines, transition_pseudocount=1, emission_pseudocount=1, verbose=True) test(model) with open('partial_model_start_model.json', 'w') as out: out.write(model.to_json())