def train_model(s, H, K, d, n_iter): """ Trains an HMM with parameters s, H, K, d""" # s is a list of the labels to be trained on # H is the number of hidden states to include in the model # K is the length of the activity sequences # d is a list of the features to be used for training the model x_train, y_train, s_train, x_test, y_test, s_test = initialize_hmm.load_data() # standardize it (get z-scores) x_train = initialize_hmm.standardize_data(x_train) # get the indices of each activity sequence activity_train = initialize_hmm.segment_data(y_train) # get the required segments of activities all_segments = [] for i in s: segment = hmm.all_sequences(x_train,i, activity_train) all_segments = all_segments + segment x = all_segments # initialize the transition matrix, the prior probabilities, and the Gaussians A, pi = initialize_hmm.init_par(H) pi = np.asarray(pi) kmeans, B_mean, B_var = hmm.initialize_GMM(x, H) B_mean = B_mean[:,d] B_var = B_var[:,d] for j in range(n_iter): print("Iteration {}".format(j)) A, B_mean, B_var, pi = log_FB_seq.forward_backward_algorithm(x, A, B_mean, B_var, pi, H, K, d) return A, B_mean, B_var, pi
def compute_error_stage1(y_pred): # get the appropriate y labels x_train, y_train, s_train, x_test, y_test, s_test = initialize_hmm.load_data() # get the indices of each activity sequence activity_train = initialize_hmm.segment_data(y_train) # get the required segments of activities y_labels = [] for i in range(1,7): segment = hmm.all_sequences(x_train,i, activity_train) y_labels = y_labels + (i*np.ones((len(segment)))).tolist() # compute error error = 0.0 E = len(y_labels) for e in range(E): if y_labels[e] == 1 or y_labels[e] == 2 or y_labels[e] == 3: current_label = 1 else: current_label = 0 if y_pred[e] != current_label: error+= 1 error = error/E print("Testing error rate for Stage 1 is {}.".format(error)) return error, y_labels
def get_data(features, ACT1, ACT2): x_train, y_train, s_train, x_test, y_test, s_test = initialize_hmm.load_data( ) x_train = initialize_hmm.standardize_data(x_train) x, y = get_data_for_these_activities(x_train, y_train, ACT1, ACT2, features) return x, y
def visualize_features(ACT1, ACT2): # x: (7352,561) # y: (7352,) # ACT1 and ACT 2 indicates two sets of activities that you want to compare x_train, y_train, s_train, x_test, y_test, s_test = initialize_hmm.load_data( ) # standardize it (get z-scores) x_train = initialize_hmm.standardize_data(x_train) # get the indices of each activity sequence segments = initialize_hmm.segment_data(y_train) n_feature = x_train.shape[1] # 1 "segment" is "5 from 0 to 27" act1_seq = [] act2_seq = [] # iterate over all features and draw a plot for each feature # for debugging purpose draw the first 3 features. for f in range(n_feature): plt.figure() act1_seq = [] act2_seq = [] for i in range(len(segments)): # interate over all segments if segments[i, 0] in ACT1: act1_seq.append(x_train[segments[i, 1]:segments[i, 2], f].ravel()) if segments[i, 0] in ACT2: act2_seq.append(x_train[segments[i, 1]:segments[i, 2], f].ravel()) alpha = 0.5 for i, seq in enumerate(act1_seq): if i == 0: plt.plot(range(len(seq)), seq, 'r-', label=','.join([str(x) for x in ACT1]), alpha=alpha) else: plt.plot(range(len(seq)), seq, 'r-', alpha=alpha) for i, seq in enumerate(act2_seq): if i == 0: plt.plot(range(len(seq)), seq, 'b-', label=','.join([str(x) for x in ACT2]), alpha=alpha) else: plt.plot(range(len(seq)), seq, 'b-', alpha=alpha) plt.title('Feature {}/{}'.format(f + 1, n_feature)) plt.xlabel('Time Frame') plt.ylabel('Feature Value') plt.legend() # plt.show() plt.savefig('feature{}.png'.format(f))
def predict_stage1(A_stat, B_mean_stat, B_var_stat, pi_stat, A_mov, B_mean_mov, B_var_mov, pi_mov, d, H, K): """ Function for generating predictions from the first stage (moving or stationary) model""" x_train, y_train, s_train, x_test, y_test, s_test = initialize_hmm.load_data() # standardize it (get z-scores) x_train = initialize_hmm.standardize_data(x_train) # get the indices of each activity sequence activity_train = initialize_hmm.segment_data(y_train) all_segments = [] for i in range(1,7): segment = hmm.all_sequences(x_train,i, activity_train) all_segments = all_segments + segment x = all_segments E = len(x) L_mov = np.zeros((E,)) L_stat = np.zeros((E,)) y_pred = [] for e in range(E): B_stat = hmm.cal_b_matrix(x[e][:,d], B_mean_stat, B_var_stat, H, K) B_mov = hmm.cal_b_matrix(x[e][:,d], B_mean_mov, B_var_mov, H, K) alpha_stat = log_FB_seq.forward_step(A_stat, B_stat, pi_stat, H, K) alpha_mov = log_FB_seq.forward_step(A_mov, B_mov, pi_mov, H, K) L_stat[e] = np.sum(alpha_stat[:,-1]) L_mov[e] = np.sum(alpha_mov[:,-1]) if L_stat[e] > L_mov[e]: y_pred.append(0) else: y_pred.append(1) return y_pred
def hmm_train(K, H, n_iterations, model_type): # parse commandline arguments ''' ap = argparse.ArgumentParser() ap.add_argument("-f","--file", type=str,default = "standing.txt", help="the input file which contains the sequences for one (class) of activity") ap.add_argument("-t","--len_time", type=int,default = 7, help="the length of the observation sequence") ap.add_argument("-k", "--k_states", type=int, default = 3, help="the number of hidden states") args = vars(ap.parse_args()) ''' # args = thisdict = { # "file": "standing.txt", # "h_states": 3, # "k_states": 7 #} # input_file = args["file"] # K = args["k_states"] # number of observations in a sequence = states (default = 7) # H = args["h_states"] # number of hidden states (default = 3) # n_iteration = 5 # ACT = 2 # the activity that we build this HMM for. # load the data x_train, y_train, s_train, x_test, y_test, s_test = initialize_hmm.load_data( ) # standardize it (get z-scores) x_train = initialize_hmm.standardize_data(x_train) # use the first two features for debugging purpose x_train = x_train[:, 0:10] # get the indices of each activity sequence activity_train = initialize_hmm.segment_data(y_train) # get the stationary segments segments1 = all_sequences(x_train, 1, activity_train) segments2 = all_sequences(x_train, 2, activity_train) segments3 = all_sequences(x_train, 3, activity_train) segments4 = all_sequences(x_train, 4, activity_train) segments5 = all_sequences(x_train, 5, activity_train) segments6 = all_sequences(x_train, 6, activity_train) if model_type == "stationary": segments = segments4 + segments5 + segments6 elif model_type == "moving": segments = segments1 + segments2 + segments3 # reduced_xtrain = feature_selection_RF(x_train,y_train,ACT,activity_train) x_train = segments # y_stationary = initialize_hmm.relabel(y_train) # initialize the model parameters A, pi = initialize_hmm.init_par( x_train, y_train, H) # x_train, y_train not used in the function kmeans, B_mean, B_var = initialize_GMM(x_train, H) # activity_train/test has three columns: activity start end # states, valid = activity_sequence(5, activity_train, x_train, K) #-----------------------------------------# ## Baum-Welch algorithm ## Step 1: Initialize all Gaussian distributions with the mean and variance along the whole dataset. ## Step 2: Calculate the forward and backward probabilities for all states j and times t. for i in range(n_iterations): alpha, beta = forward_backward(x_train, A, B_mean, B_var, pi.T, H) # scale alpha and beta for n in range(len(x_train)): alpha[n], beta[n] = scale_prob(alpha[n], beta[n], H, K) print("This is the {}-th iteration, the {}-th scaling".format( i, n)) A, B_mean, B_var, pi = update_GMM(x_train, alpha, beta, H, A, B_mean, B_var, pi) print(A) print(pi) return A, B_mean, B_var, pi, alpha, beta