def compute_statistics_samples(ids, boolean_classes=True, attr=0): ''' Compute the max and min values for normalizing the data. print max and min. These values will be computed only once and the max min values will be place as constants @param ids: ids for train ''' recordings = ['R{:02d}'.format(r) for r in range(1, 31)] counter_list_class = {} if boolean_classes: NUM_CLASSES = 8 else: NUM_CLASSES = 2 for cl in range(NUM_CLASSES): counter_list_class[cl] = [] hist_classes_all = np.zeros((NUM_CLASSES)) for P in persons: if P in ids: for r, R in enumerate(recordings): if P in ["S01", "S02", "S03", "S04", "S05", "S06"]: S = "L01" else: S = SCENARIO[r] for N in repetition: annotator_file = annotator[P] if P == 'S07' and SCENARIO[r] == 'L01': annotator_file = "A03" if P == 'S14' and SCENARIO[r] == 'L03': annotator_file = "A19" if P == 'S11' and SCENARIO[r] == 'L01': annotator_file = "A03" if P == 'S11' and R in [ 'R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13', 'R15' ]: annotator_file = "A02" if P == 'S13' and R in ['R28']: annotator_file = "A01" if P == 'S13' and R in ['R29', 'R30']: annotator_file = "A11" if P == 'S09' and R in ['R28', 'R29']: annotator_file = "A01" if P == 'S09' and R in ['R21', 'R22', 'R23', 'R24', 'R25']: annotator_file = "A11" file_name_label = "{}/{}_{}_{}_{}_{}_labels.csv".format( P, S, P, R, annotator_file, N) try: data = csv_reader.reader_labels(FOLDER_PATH + file_name_label) labels = data[:, attr] print("Files loaded") for c in range(NUM_CLASSES): #indexes per class idxs = np.where(labels == c)[0] counter = 0 #counting if continuity in labels for idx in range(idxs.shape[0] - 1): if idxs[idx + 1] - idxs[idx] == 1: counter += 1 else: counter_list_class[c].append(counter) counter = 0 if (idx + 1) == (idxs.shape[0] - 1): counter_list_class[c].append(counter) counter = 0 #Statistics hist_classes = np.bincount(labels.astype(int), minlength=NUM_CLASSES) hist_classes_all += hist_classes except: print("No file {}".format(FOLDER_PATH + file_name_label)) fig = plt.figure() axis_list = [] axis_list.append(fig.add_subplot(421)) axis_list.append(fig.add_subplot(422)) axis_list.append(fig.add_subplot(423)) axis_list.append(fig.add_subplot(424)) axis_list.append(fig.add_subplot(425)) axis_list.append(fig.add_subplot(426)) axis_list.append(fig.add_subplot(427)) axis_list.append(fig.add_subplot(428)) fig2 = plt.figure() axis_list_2 = [] axis_list_2.append(fig2.add_subplot(111)) fig3 = plt.figure() axis_list_3 = [] axis_list_3.append(fig3.add_subplot(421)) axis_list_3.append(fig3.add_subplot(422)) axis_list_3.append(fig3.add_subplot(423)) axis_list_3.append(fig3.add_subplot(424)) axis_list_3.append(fig3.add_subplot(425)) axis_list_3.append(fig3.add_subplot(426)) axis_list_3.append(fig3.add_subplot(427)) axis_list_3.append(fig3.add_subplot(428)) colours = { 0: 'b', 1: 'g', 2: 'r', 3: 'c', 4: 'm', 5: 'y', 6: 'k', 7: 'greenyellow' } mins = [] mus = [] sigmas = [] min_1_data = [] min_2_data = [] min_3_data = [] medians = [] lower_whiskers = [] Q1s = [] for cl in range(NUM_CLASSES): mu = np.mean(np.array(counter_list_class[cl])) sigma = np.std(np.array(counter_list_class[cl])) mus.append(mu) sigmas.append(sigma) min_1_data.append(-1 * sigma + mu) min_2_data.append(-2 * sigma + mu) min_3_data.append(-3 * sigma + mu) mins.append(np.min(np.array(counter_list_class[cl]))) medians.append(np.median(np.array(counter_list_class[cl]))) x = np.linspace(-3 * sigma + mu, 3 * sigma + mu, 100) axis_list[cl].plot(x, norm.pdf(x, mu, sigma) / np.float(np.max(norm.pdf(x, mu, sigma))), '-b', label='mean:{}_std:{}'.format(mu, sigma)) axis_list[cl].plot(counter_list_class[cl], np.ones(len(counter_list_class[cl])), 'ro') result_box = axis_list[cl].boxplot(counter_list_class[cl], vert=False) lower_whiskers.append(result_box['whiskers'][0].get_data()[0][0]) Q1s.append(result_box['whiskers'][0].get_data()[0][1]) axis_list_2[0].plot(x, norm.pdf(x, mu, sigma) / np.float(np.max(norm.pdf(x, mu, sigma))), '-b', label='mean:{}_std:{}'.format(mu, sigma), color=colours[cl]) axis_list_2[0].plot(counter_list_class[cl], np.ones(len(counter_list_class[cl])), 'ro') #color = colours[cl], marker='o') axis_list_3[cl].boxplot(counter_list_class[cl]) axis_list_2[0].relim() axis_list_2[0].autoscale_view() axis_list_2[0].legend(loc='best') fig.canvas.draw() fig2.canvas.draw() plt.pause(2.0) print("Mins {} Min {} Argmin {}".format(mins, np.min(mins), np.argmin(mins))) print("Means {} Min {} Argmin {}".format(mus, np.min(mus), np.argmin(mus))) print("Stds {} Min {}".format(sigmas, sigmas[np.argmin(mus)])) print("Medians {} Min {} Argmin {}".format(medians, np.min(medians), np.argmin(medians))) print("Lower Whiskers {} Min {} Argmin {}".format( lower_whiskers, np.min(lower_whiskers), np.argmin(lower_whiskers))) print("Q1s {} Min {} Argmin {}".format(Q1s, np.min(Q1s), np.argmin(Q1s))) print("1sigma from mu {}".format(min_1_data)) print("2sigma from mu {}".format(min_2_data)) print("3sigma from mu {}".format(min_3_data)) print("Min 1sigma from mu {}".format(np.min(min_1_data))) print("Min 2sigma from mu {}".format(np.min(min_2_data))) print("Min 3sigma from mu {}".format(np.min(min_3_data))) print("Number of samples per class {}".format(hist_classes_all)) print("Number of samples per class {}".format( hist_classes_all / np.float(np.sum(hist_classes_all)) * 100)) return
def generate_data(ids, sliding_window_length, sliding_window_step, data_dir=None, half=False, identity_bool=False, usage_modus='train'): ''' creates files for each of the sequences, which are extracted from a file following a sliding window approach returns a numpy array @param ids: ids for train, val or test @param sliding_window_length: length of window for segmentation @param sliding_window_step: step between windows for segmentation @param data_dir: path to dir where files will be stored ''' if identity_bool: if usage_modus == 'train': recordings = ['R{:02d}'.format(r) for r in range(1, 21)] elif usage_modus == 'val': recordings = ['R{:02d}'.format(r) for r in range(21, 26)] elif usage_modus == 'test': recordings = ['R{:02d}'.format(r) for r in range(26, 31)] else: recordings = ['R{:02d}'.format(r) for r in range(1, 31)] counter_seq = 0 hist_classes_all = np.zeros(NUM_CLASSES) for P in persons: if P not in ids: print("\nNo Person in expected IDS {}".format(P)) else: if P == 'S11': if identity_bool: if usage_modus == 'train': recordings = [ 'R{:02d}'.format(r) for r in range(1, 10) ] elif usage_modus == 'val': recordings = [ 'R{:02d}'.format(r) for r in range(10, 12) ] elif usage_modus == 'test': recordings = [ 'R{:02d}'.format(r) for r in range(12, 15) ] else: recordings = ['R{:02d}'.format(r) for r in range(1, 31)] elif P == 'S12': if identity_bool: if usage_modus == 'train': recordings = [ 'R{:02d}'.format(r) for r in range(1, 25) ] elif usage_modus == 'val': recordings = [ 'R{:02d}'.format(r) for r in range(25, 28) ] elif usage_modus == 'test': recordings = [ 'R{:02d}'.format(r) for r in range(28, 31) ] else: recordings = ['R{:02d}'.format(r) for r in range(1, 31)] else: if identity_bool: if usage_modus == 'train': recordings = [ 'R{:02d}'.format(r) for r in range(1, 21) ] elif usage_modus == 'val': recordings = [ 'R{:02d}'.format(r) for r in range(21, 26) ] elif usage_modus == 'test': recordings = [ 'R{:02d}'.format(r) for r in range(26, 31) ] else: recordings = ['R{:02d}'.format(r) for r in range(1, 31)] for R in recordings: if P in ["S01", "S02", "S03", "S04", "S05", "S06"]: S = "L01" else: S = SCENARIO[R] for N in repetition: annotator_file = annotator[P] if P == 'S07' and SCENARIO[R] == 'L01': annotator_file = "A03" if P == 'S14' and SCENARIO[R] == 'L03': annotator_file = "A19" if P == 'S11' and SCENARIO[R] == 'L01': annotator_file = "A03" if P == 'S11' and R in [ 'R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13', 'R15' ]: annotator_file = "A02" if P == 'S13' and R in ['R28']: annotator_file = "A01" if P == 'S13' and R in ['R29', 'R30']: annotator_file = "A11" if P == 'S09' and R in ['R28', 'R29']: annotator_file = "A01" if P == 'S09' and R in ['R21', 'R22', 'R23', 'R24', 'R25']: annotator_file = "A11" file_name_norm = "{}/{}_{}_{}_{}_{}_norm_data.csv".format( P, S, P, R, annotator_file, N) file_name_label = "{}/{}_{}_{}_{}_{}_labels.csv".format( P, S, P, R, annotator_file, N) try: #getting data data = csv_reader.reader_data(FOLDER_PATH + file_name_norm) print("\nFiles loaded in modus {}\n{}".format( usage_modus, file_name_norm)) data = select_columns_opp(data) print("Columns selected") except: print("\n In generating data, No file {}".format( FOLDER_PATH + file_name_norm)) continue try: #Getting labels and attributes labels = csv_reader.reader_labels(FOLDER_PATH + file_name_label) class_labels = np.where(labels[:, 0] == 7)[0] # Deleting rows containing the "none" class data = np.delete(data, class_labels, 0) labels = np.delete(labels, class_labels, 0) if half: downsampling = range(0, data.shape[0], 2) data = data[downsampling] labels = labels[downsampling] data_t, data_x, data_y = divide_x_y(data) del data_t else: data_t, data_x, data_y = divide_x_y(data) del data_t except: print( "\n In generating data, Error getting the data {}". format(FOLDER_PATH + file_name_norm)) continue try: # checking if annotations are consistent data_x = normalize(data_x) if np.sum(data_y == labels[:, 0]) == data_y.shape[0]: # Sliding window approach print("Starting sliding window") X, y, y_all = opp_sliding_window( data_x, labels.astype(int), sliding_window_length, sliding_window_step, label_pos_end=False) print("Windows are extracted") # Statistics hist_classes = np.bincount(y[:, 0], minlength=NUM_CLASSES) hist_classes_all += hist_classes print("Number of seq per class {}".format( hist_classes_all)) for f in range(X.shape[0]): try: sys.stdout.write('\r' + 'Creating sequence file ' 'number {} with id {}'. format(f, counter_seq)) sys.stdout.flush() # print "Creating sequence file number {} with id {}".format(f, counter_seq) seq = np.reshape(X[f], newshape=(1, X.shape[1], X.shape[2])) seq = np.require(seq, dtype=np.float) obj = { "data": seq, "label": y[f], "labels": y_all[f], "identity": labels_persons[P] } f = open( os.path.join( data_dir, 'seq_{0:06}.pkl'.format( counter_seq)), 'wb') pickle.dump( obj, f, protocol=pickle.HIGHEST_PROTOCOL) f.close() counter_seq += 1 except: raise ('\nError adding the seq') print("\nCorrect data extraction from {}".format( FOLDER_PATH + file_name_norm)) del data del data_x del data_y del X del labels del class_labels else: print("\nNot consisting annotation in {}".format( file_name_norm)) continue except: print("\n In generating data, No file {}".format( FOLDER_PATH + file_name_norm)) return
def generate_data(ids, sliding_window_length, sliding_window_step, data_dir=None, identity_bool=False, usage_modus='train'): ''' creates files for each of the sequences, which are extracted from a file following a sliding window approach returns Sequences are stored in given path @param ids: ids for train, val or test @param sliding_window_length: length of window for segmentation @param sliding_window_step: step between windows for segmentation @param data_dir: path to dir where files will be stored @param identity_bool: selecting for identity experiment @param usage_modus: selecting Train, Val or testing ''' persons = [ "S01", "S02", "S03", "S04", "S05", "S06", "S07", "S08", "S09", "S10", "S11", "S12", "S13", "S14" ] idx_train = { "S07": 21, "S08": 21, "S09": 21, "S10": 11, "S11": 11, "S12": 23, "S13": 21, "S14": 21 } idx_val = { "S07": 26, "S08": 26, "S09": 26, "S10": 16, "S11": 13, "S12": 26, "S13": 26, "S14": 26 } idx_test = { "S07": 31, "S08": 31, "S09": 31, "S10": 24, "S11": 16, "S12": 31, "S13": 31, "S14": 31 } counter_seq = 0 hist_classes_all = np.zeros((NUM_CLASSES)) for P in persons: if P not in ids: print("\n6 No Person in expected IDS {}".format(P)) else: if identity_bool: # Selecting the proportions of the train, val or testing according to the quantity of # recordings per subject, as there are not equal number of recordings per subject # see dataset for checking the recording files per subject if usage_modus == 'train': recordings = [ 'R{:02d}'.format(rec) for rec in range(1, idx_train[P]) ] elif usage_modus == 'val': recordings = [ 'R{:02d}'.format(rec) for rec in range(idx_train[P], idx_val[P]) ] elif usage_modus == 'test': recordings = [ 'R{:02d}'.format(rec) for rec in range(idx_val[P], idx_test[P]) ] else: recordings = ['R{:02d}'.format(rec) for rec in range(1, 31, 1)] # recordings = ['R{:02d}'.format(r) for r in range(1, 31, 2)] print("\nModus {} \n{}".format(usage_modus, recordings)) for R in recordings: try: S = SCENARIO[R] file_name_data = "{}/{}_{}_{}.csv".format(P, S, P, R) file_name_label = "{}/{}_{}_{}_labels.csv".format( P, S, P, R) print("\n{}\n{}".format(file_name_data, file_name_label)) try: # getting data data = reader_data(FOLDER_PATH + file_name_data) print("\nFiles loaded in modus {}\n{}".format( usage_modus, file_name_data)) data_x = data["data"] print("\nFiles loaded") except: print("\n1 In loading data, in file {}".format( FOLDER_PATH + file_name_data)) continue try: # Getting labels and attributes labels = csv_reader.reader_labels(FOLDER_PATH + file_name_label) class_labels = np.where(labels[:, 0] == 7)[0] # Deleting rows containing the "none" class data_x = np.delete(data_x, class_labels, 0) labels = np.delete(labels, class_labels, 0) #data_t, data_x, data_y = divide_x_y(data) #del data_t except: print( "2 In generating data, Error getting the data {}". format(FOLDER_PATH + file_name_data)) continue try: data_x = norm_mbientlab(data_x) except: print("\n3 In generating data, Plotting {}".format( FOLDER_PATH + file_name_data)) continue try: # checking if annotations are consistent if data_x.shape[0] == data_x.shape[0]: # Sliding window approach print("\nStarting sliding window") X, y, y_all = opp_sliding_window( data_x, labels.astype(int), sliding_window_length, sliding_window_step, label_pos_end=False) print("\nWindows are extracted") # Statistics hist_classes = np.bincount(y[:, 0], minlength=NUM_CLASSES) hist_classes_all += hist_classes print("\nNumber of seq per class {}".format( hist_classes_all)) for f in range(X.shape[0]): try: sys.stdout.write( '\r' + 'Creating sequence file number {} with id {}' .format(f, counter_seq)) sys.stdout.flush() # print "Creating sequence file number {} with id {}".format(f, counter_seq) seq = np.reshape(X[f], newshape=(1, X.shape[1], X.shape[2])) seq = np.require(seq, dtype=np.float) obj = { "data": seq, "label": y[f], "labels": y_all[f], "identity": labels_persons[P] } file_name = open( os.path.join( data_dir, 'seq_{0:06}.pkl'.format( counter_seq)), 'wb') pickle.dump( obj, file_name, protocol=pickle.HIGHEST_PROTOCOL) file_name.close() counter_seq += 1 except: raise ('\nError adding the seq') print("\nCorrect data extraction from {}".format( FOLDER_PATH + file_name_data)) del data del data_x del X del labels del class_labels else: print( "\n4 Not consisting annotation in {}".format( file_name_data)) continue except: print("\n5 In generating data, No created file {}". format(FOLDER_PATH + file_name_data)) print( "-----------------\n{}\n{}\n-----------------".format( file_name_data, file_name_label)) except KeyboardInterrupt: print('\nYou cancelled the operation.') return
def compute_min_num_samples(ids, boolean_classes=True, attr=0): ''' Compute the max and min values for normalizing the data. print max and min. These values will be computed only once and the max min values will be place as constants @param ids: ids for train ''' recordings = ['R{:02d}'.format(r) for r in range(1, 31)] if boolean_classes: NUM_CLASSES = 8 else: NUM_CLASSES = 2 #min_durations = np.ones((NUM_CLASSES)) * 10000000 min_durations = np.empty((0, NUM_CLASSES)) hist_classes_all = np.zeros((NUM_CLASSES)) for P in persons: if P in ids: for r, R in enumerate(recordings): if P in ["S01", "S02", "S03", "S04", "S05", "S06"]: S = "L01" else: S = SCENARIO[r] for N in repetition: annotator_file = annotator[P] if P == 'S07' and SCENARIO[r] == 'L01': annotator_file = "A03" if P == 'S14' and SCENARIO[r] == 'L03': annotator_file = "A19" if P == 'S11' and SCENARIO[r] == 'L01': annotator_file = "A03" if P == 'S11' and R in [ 'R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13', 'R15' ]: annotator_file = "A02" if P == 'S13' and R in ['R28']: annotator_file = "A01" if P == 'S13' and R in ['R29', 'R30']: annotator_file = "A11" if P == 'S09' and R in ['R28', 'R29']: annotator_file = "A01" if P == 'S09' and R in ['R21', 'R22', 'R23', 'R24', 'R25']: annotator_file = "A11" file_name_label = "{}/{}_{}_{}_{}_{}_labels.csv".format( P, S, P, R, annotator_file, N) try: data = csv_reader.reader_labels(FOLDER_PATH + file_name_label) labels = data[:, attr] print("Files loaded") min_duration = np.zeros((1, NUM_CLASSES)) for c in range(NUM_CLASSES): #indexes per class idxs = np.where(labels == c)[0] counter = 0 min_counter = np.Inf #counting if continuity in labels for idx in range(idxs.shape[0] - 1): if idxs[idx + 1] - idxs[idx] == 1: counter += 1 else: if counter < min_counter: min_counter = counter counter = 0 if counter < min_counter: min_counter = counter counter = 0 min_duration[0, c] = min_counter print("class {} counter size {}".format( c, min_counter)) min_durations = np.append(min_durations, min_duration, axis=0) #Statistics hist_classes = np.bincount(labels.astype(int), minlength=NUM_CLASSES) hist_classes_all += hist_classes except: print("No file {}".format(FOLDER_PATH + file_name_label)) min_durations[min_durations == 0] = np.Inf print("Minimal duration per class \n{}".format(min_durations)) print("Number of samples per class {}".format(hist_classes_all)) print("Number of samples per class {}".format( hist_classes_all / np.float(np.sum(hist_classes_all)) * 100)) return np.min(min_durations, axis=0)
def generate_data(ids, sliding_window_length, sliding_window_step, data_dir=None, identity_bool=False, usage_modus='train'): ''' creates files for each of the sequences extracted from a file following a sliding window approach returns a numpy array @param ids: ids for train, val or test @param sliding_window_length: length of window for segmentation @param sliding_window_step: step between windows for segmentation @param data_dir: path to dir where files will be stored ''' dataset_path_imu = "/vol/actrec/DFG_Project/2019/LARa_dataset/Motionminers/LARa_dataset_mbientlab/" #dataset_path_imu = "/Users/fernandomoyarueda/Documents/Doktorado/DFG_project/Data/" \ # "MotionMiners_FLW/flw_recordings_12000/" persons = [ "S01", "S02", "S03", "S04", "S05", "S06", "S07", "S08", "S09", "S10", "S11", "S12", "S13", "S14" ] idx_train = { "S07": 21, "S08": 21, "S09": 21, "S10": 11, "S11": 11, "S12": 23, "S13": 21, "S14": 21 } idx_val = { "S07": 26, "S08": 26, "S09": 26, "S10": 16, "S11": 13, "S12": 26, "S13": 26, "S14": 26 } idx_test = { "S07": 31, "S08": 31, "S09": 31, "S10": 24, "S11": 16, "S12": 31, "S13": 31, "S14": 31 } counter_seq = 0 hist_classes_all = np.zeros((NUM_CLASSES)) counter_file_label = -1 #g, ax_x = plt.subplots(2, sharex=False) #line3, = ax_x[0].plot([], [], '-b', label='blue') #line4, = ax_x[1].plot([], [], '-b', label='blue') for P in persons: if P not in ids: print("\n6 No Person in expected IDS {}".format(P)) else: if identity_bool: if usage_modus == 'train': recordings = [ 'R{:02d}'.format(rec) for rec in range(1, idx_train[P]) ] elif usage_modus == 'val': recordings = [ 'R{:02d}'.format(rec) for rec in range(idx_train[P], idx_val[P]) ] elif usage_modus == 'test': recordings = [ 'R{:02d}'.format(rec) for rec in range(idx_val[P], idx_test[P]) ] else: recordings = ['R{:02d}'.format(rec) for rec in range(1, 31, 1)] # recordings = ['R{:02d}'.format(r) for r in range(1, 31, 2)] print("\nModus {} \n{}".format(usage_modus, recordings)) for R in recordings: try: S = SCENARIO[R] file_name_data = "{}/{}_{}_{}.csv".format(P, S, P, R) file_name_label = "{}/{}_{}_{}_labels.csv".format( P, S, P, R) print("\n{}\n{}".format(file_name_data, file_name_label)) try: # getting data data = read_extracted_data(dataset_path_imu + file_name_data, skiprows=1) print("\nFiles loaded in modus {}\n{}".format( usage_modus, file_name_data)) data_x = data[:, 2:] print("\nFiles loaded") except: print("\n1 Error In loading data, in file {}".format( dataset_path_imu + file_name_data)) continue try: # Getting labels and attributes labels = csv_reader.reader_labels(dataset_path_imu + file_name_label) class_labels = np.where(labels[:, 0] == 7)[0] # Deleting rows containing the "none" class data_x = np.delete(data_x, class_labels, 0) labels = np.delete(labels, class_labels, 0) #data_t, data_x, data_y = divide_x_y(data) #del data_t except: print( "2 In generating data, Error getting the data {}". format(dataset_path_imu + file_name_data)) continue try: # Graphic Vals for X in T #line3.set_ydata(data_x[:, 0].flatten()) #line3.set_xdata(range(len(data_x[:, 0].flatten()))) #ax_x[0].relim() #ax_x[0].autoscale_view() #plt.draw() #plt.pause(2.0) data_x = norm_mbientlab(data_x) #line4.set_ydata(data_x[:, 0].flatten()) #line4.set_xdata(range(len(data_x[:, 0].flatten()))) #ax_x[1].relim() #ax_x[1].autoscale_view() #plt.draw() #plt.pause(2.0) except: print("\n3 In generating data, Plotting {}".format( dataset_path_imu + file_name_data)) continue try: # checking if annotations are consistent if data_x.shape[0] == labels.shape[0]: # Sliding window approach print("\nStarting sliding window") X, y, y_all = opp_sliding_window( data_x, labels.astype(int), sliding_window_length, sliding_window_step, label_pos_end=False) print("\nWindows are extracted") # Statistics hist_classes = np.bincount(y[:, 0], minlength=NUM_CLASSES) hist_classes_all += hist_classes print("\nNumber of seq per class {}".format( hist_classes_all)) counter_file_label += 1 for f in range(X.shape[0]): try: # print "Creating sequence file number {} with id {}".format(f, counter_seq) seq = np.reshape(X[f], newshape=(1, X.shape[1], X.shape[2])) seq = np.require(seq, dtype=float) obj = { "data": seq, "label": y[f], "labels": y_all[f], "identity": labels_persons[P], "label_file": counter_file_label } file_name = open( os.path.join( data_dir, 'seq_{0:07}.pkl'.format( counter_seq)), 'wb') pickle.dump( obj, file_name, protocol=pickle.HIGHEST_PROTOCOL) counter_seq += 1 sys.stdout.write( '\r' + 'Creating sequence file number {} with id {}' .format(f, counter_seq)) sys.stdout.flush() file_name.close() except: raise ( '\nError adding the seq {} from {} \n'. format(f, X.shape[0])) print("\nCorrect data extraction from {}".format( dataset_path_imu + file_name_data)) del data del data_x del X del labels del class_labels else: print( "\n4 Not consisting annotation in {}".format( file_name_data)) continue except: print("\n5 In generating data, No created file {}". format(dataset_path_imu + file_name_data)) print( "-----------------\n{}\n{}\n-----------------".format( file_name_data, file_name_label)) except KeyboardInterrupt: print('\nYou cancelled the operation.') return
def create_labels_imus_from_mocap(): ''' fig = plt.figure() axis_list = [] plot_list = [] axis_list.append(fig.add_subplot(411)) axis_list.append(fig.add_subplot(412)) axis_list.append(fig.add_subplot(413)) axis_list.append(fig.add_subplot(414)) plot_list.append(axis_list[0].plot([], [], '-r', label='MOCAP Annotation', linewidth=0.30)[0]) plot_list.append(axis_list[1].plot([], [], '-b', label='Mocap Data', linewidth=0.15)[0]) plot_list.append(axis_list[2].plot([], [], '-b', label='IMUs Annotation', linewidth=0.15)[0]) plot_list.append(axis_list[3].plot([], [], '-b', label='IMUs Data', linewidth=0.15)[0]) ''' dir_dataset = "/vol/actrec/DFG_Project/2019/LARa_dataset/MoCap/recordings_2019/15_Annotated_Dataset_Corrected/" dataset_path_imu_sequences = "/vol/actrec/DFG_Project/2019/LARa_dataset/Motionminers/2019/flw_recordings_annotated_revised/" annotator = { "S01": "A17", "S02": "A03", "S03": "A08", "S04": "A06", "S05": "A12", "S06": "A13", "S07": "A05", "S08": "A17", "S09": "A03", "S10": "A18", "S11": "A08", "S12": "A11", "S13": "A08", "S14": "A06" } persons = [ "S01", "S02", "S03", "S04", "S05", "S06", "S07", "S08", "S09", "S10", "S11", "S12", "S13", "S14" ] repetition = ["N01", "N02"] recordings = ['R{:02d}'.format(r) for r in range(1, 31)] for idp, P in enumerate(persons): for counter, r in enumerate(recordings): try: for N in repetition: print( "\n\n------------------------------------------------------------------" ) annotator_file = annotator[P] if P == 'S07' and SCENARIO[r] == 'L01': annotator_file = "A03" if P == 'S09' and r in ['R28', 'R29']: annotator_file = "A01" if P == 'S09' and r in ['R21', 'R22', 'R23', 'R24', 'R25']: annotator_file = "A11" if P == 'S11' and SCENARIO[r] == 'L01': annotator_file = "A03" if P == 'S11' and r in [ 'R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13', 'R15' ]: annotator_file = "A02" if P == 'S13' and r in ['R28']: annotator_file = "A01" if P == 'S13' and r in ['R29', 'R30']: annotator_file = "A11" if P == 'S14' and SCENARIO[r] == 'L03': annotator_file = "A19" file_name_imu = '{}/{}_{}_{}.csv'.format( P, SCENARIO[r], P, r) file_name_imu_attr = '{}/{}_{}_{}_labels.csv'.format( P, SCENARIO[r], P, r) file_name_mocap_data = "{}/{}_{}_{}_{}_{}_norm_data.csv".format( P, SCENARIO[r], P, r, annotator_file, N) file_name_mocap_attr = "{}/{}_{}_{}_{}_{}_labels.csv".format( P, SCENARIO[r], P, r, annotator_file, N) print("\n{}\n".format(file_name_imu)) if not os.path.exists(dir_dataset + file_name_mocap_data): print("1 - No file in {}".format(dir_dataset + file_name_mocap_data)) continue if not os.path.exists(dir_dataset + file_name_mocap_attr): print("1 - No file in {}".format(dir_dataset + file_name_mocap_attr)) continue if not os.path.exists(dataset_path_imu_sequences + file_name_imu): print("1 - No file in {}".format( dataset_path_imu_sequences + file_name_imu)) continue try: data = csv_reader.reader_labels(dir_dataset + file_name_mocap_attr) labels_imus = data except: print( "3 - Error getting annotated labels in {}".format( dir_dataset + file_name_mocap_attr)) continue ''' try: csv_save.save_attr_csv(labels_imus.astype(int), filename=dataset_path_imu_sequences + file_name_imu_attr) except: print("5 - Error saving atts in {}".format(dataset_path_imu_sequences + file_name_imu_attr)) ''' print( "\n\n------------------------------------------------------------------" ) except KeyboardInterrupt: print('You cancelled the operation.') return
def create_annotated_sequences(): dir_dataset_mocap = "/vol/actrec/DFG_Project/2019/LARa_dataset/MoCap/recordings_2019/15_Annotated_Dataset_Corrected/" dataset_path_imu = "/vol/actrec/DFG_Project/2019/LARa_dataset/Motionminers/2019/flw_data_recordings/" dataset_path_imu_sequences = "/vol/actrec/DFG_Project/2019/LARa_dataset/Motionminers/2019/flw_recordings_annotated_revised/" #persons = ["S07", "S08", "S09", "S10", "S11", "S12", "S13", "S14"] persons = ["S12", "S13", "S14"] annotator = { "S01": "A17", "S02": "A03", "S03": "A08", "S04": "A06", "S05": "A12", "S06": "A13", "S07": "A05", "S08": "A17", "S09": "A03", "S10": "A18", "S11": "A08", "S12": "A11", "S13": "A08", "S14": "A06" } start_sequences_imus = [ 535, 525, 544, 0, 535, 520, 563, 550, 556, 539, 540, 535, 545, 565, 545, 776, 558, 536, 540, 570, 553, 543, 528, 523, 520, 558, 558, 514, 534, 544 ] start_sequences_mocap = [ 351, 448, 484, 0, 468, 324, 410, 392, 442, 445, 450, 371, 315, 388, 452, 274, 525, 422, 387, 389, 332, 378, 240, 515, 357, 430, 239, 349, 400, 260 ] repetition = ["N01", "N02"] recordings = ['R{:02d}'.format(r) for r in range(1, 31)] for idp, P in enumerate(persons): for counter, r in enumerate(recordings): for N in repetition: print( "\n\n------------------------------------------------------------------" ) annotator_file = annotator[P] if P == 'S07' and SCENARIO[r] == 'L01': annotator_file = "A03" if P == 'S09' and r in ['R28', 'R29']: annotator_file = "A01" if P == 'S09' and r in ['R21', 'R22', 'R23', 'R24', 'R25']: annotator_file = "A11" if P == 'S11' and SCENARIO[r] == 'L01': annotator_file = "A03" if P == 'S11' and r in [ 'R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13', 'R15' ]: annotator_file = "A02" if P == 'S13' and r in ['R28']: annotator_file = "A01" if P == 'S13' and r in ['R29', 'R30']: annotator_file = "A11" if P == 'S14' and SCENARIO[r] == 'L03': annotator_file = "A19" file_name_imu = '{}/{}_{}_{}.csv'.format(P, SCENARIO[r], P, r) file_name_imu_attr = '{}/{}_{}_{}_labels.csv'.format( P, SCENARIO[r], P, r) file_name_mocap_attr = "{}/{}_{}_{}_{}_{}_labels.csv".format( P, SCENARIO[r], P, r, annotator_file, N) print("\n{}\n".format(file_name_imu)) if not os.path.exists(dir_dataset_mocap + file_name_mocap_attr): print("1 - No file in {}".format(dir_dataset_mocap + file_name_mocap_attr)) continue try: data_labels = csv_reader.reader_labels( dir_dataset_mocap + file_name_mocap_attr) data_labels = data_labels[start_sequences_mocap[counter]:] except: print("2 - Error getting annotated labels in {}".format( dir_dataset_mocap + file_name_mocap_attr)) continue try: data_imu = read_extracted_data(dataset_path_imu + file_name_imu, skiprows=1) print("\nFiles loaded\n") except: print("3 - Error getting annotated labels in {}".format( dataset_path_imu + file_name_imu)) continue try: idxs_labels = np.arange(0, data_labels.shape[0], 2) data_labels = data_labels[idxs_labels] sequence = data_imu[start_sequences_imus[counter]: start_sequences_imus[counter] + data_labels.shape[0]] sequence[:, 0] = sequence[:, 0] - sequence[0, 0] annotated_sequence = np.zeros( (sequence.shape[0], sequence.shape[1] + 1)) annotated_sequence[:, 0] = sequence[:, 0] annotated_sequence[:, 1] = data_labels[:, 0] annotated_sequence[:, 2:] = sequence[:, 1:] except: print("4 - Error getting annotated labels in {}".format( dataset_path_imu + file_name_imu)) continue try: if annotated_sequence.shape[0] == data_labels.shape[0]: save_data(annotated_sequence, dataset_path_imu_sequences + file_name_imu, headers_bool=True, seq_annotated=True) #csv_save.save_attr_csv(data_labels.astype(int), # filename=dataset_path_imu_sequences + file_name_imu_attr) except: print("4 - Error saving atts in {}".format( dataset_path_imu_sequences + file_name_imu_attr)) return
def compute_min_num_samples(ids, boolean_classes=True, attr=0): ''' Compute the minimum duration of a sequences with the same classes or attribute This value will help selecting the best sliding window size @param ids: ids for train @param boolean_classes: selecting between classes or attributes @param attr: ids for attribute ''' recordings = ['R{:02d}'.format(r) for r in range(1, 31)] if boolean_classes: NUM_CLASSES = 8 else: NUM_CLASSES = 2 #min_durations = np.ones((NUM_CLASSES)) * 10000000 min_durations = np.empty((0,NUM_CLASSES)) hist_classes_all = np.zeros((NUM_CLASSES)) for P in persons: if P in ids: for r, R in enumerate(recordings): # All of these if-cases are coming due to the naming of the recordings in the data. # Not all the subjects have the same # annotated recordings, nor annotators, nor annotations runs, nor scenarios. # these will include all of the recordings for the subjects if P in ["S01", "S02", "S03", "S04", "S05", "S06"]: S = "L01" else: S = SCENARIO[r] for N in repetition: annotator_file = annotator[P] if P == 'S07' and SCENARIO[r] == 'L01': annotator_file = "A03" if P == 'S14' and SCENARIO[r] == 'L03': annotator_file = "A19" if P == 'S11' and SCENARIO[r] == 'L01': annotator_file = "A03" if P == 'S11' and R in ['R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13', 'R15']: annotator_file = "A02" if P == 'S13' and R in ['R28']: annotator_file = "A01" if P == 'S13' and R in ['R29', 'R30']: annotator_file = "A11" if P == 'S09' and R in ['R28', 'R29']: annotator_file = "A01" if P == 'S09' and R in ['R21', 'R22', 'R23', 'R24', 'R25']: annotator_file = "A11" file_name_label = "{}/{}_{}_{}_{}_{}_labels.csv".format(P, S, P, R, annotator_file,N) try: data = csv_reader.reader_labels(FOLDER_PATH + file_name_label) labels = data[:,attr] print("Files loaded") min_duration = np.zeros((1,NUM_CLASSES)) for c in range(NUM_CLASSES): #indexes per class idxs = np.where(labels == c)[0] counter = 0 min_counter = np.Inf #counting if continuity in labels for idx in range(idxs.shape[0] - 1): if idxs[idx + 1] - idxs[idx] == 1: counter += 1 else: if counter < min_counter: min_counter = counter counter = 0 if counter < min_counter: min_counter = counter counter = 0 min_duration[0,c] = min_counter print("class {} counter size {}".format(c, min_counter)) min_durations = np.append(min_durations, min_duration, axis = 0) #Statistics hist_classes = np.bincount(labels.astype(int), minlength = NUM_CLASSES) hist_classes_all += hist_classes except: print("No file {}".format(FOLDER_PATH + file_name_label)) min_durations[min_durations == 0] = np.Inf print("Minimal duration per class \n{}".format(min_durations)) print("Number of samples per class {}".format(hist_classes_all)) print("Number of samples per class {}".format(hist_classes_all / np.float(np.sum(hist_classes_all)) * 100)) return np.min(min_durations, axis = 0)
def generate_data(ids, sliding_window_length, sliding_window_step, data_dir=None): ''' creates files for each of the sequences extracted from a file following a sliding window approach returns a numpy array @param ids: ids for train, val or test @param sliding_window_length: length of window for segmentation @param sliding_window_step: step between windows for segmentation @param data_dir: path to dir where files will be stored ''' FOLDER_PATH = '/path_to_LARa_Mocap_for_annotations/' folder_derivative = "/path_to_LARa_Mocap_for_annotations/" # Recording names, refer to the naming of the files in LARa dataset recordings = ['R{:02d}'.format(r) for r in range(1, 31)] counter_seq = 0 hist_classes_all = np.zeros(NUM_CLASSES) for P in persons: if P not in ids: print("\nNo Person in expected IDS {}".format(P)) else: for r, R in enumerate(recordings): # Selecting the proportions of the train, val or testing according to the quentity of # recordings per subject, as there are not equal number of recordings per subject # see dataset for checking the recording files per subject if P in ["S01", "S02", "S03", "S04", "S05", "S06"]: S = "L01" else: S = SCENARIO[R] for N in repetition: annotator_file = annotator[P] if P == 'S07' and SCENARIO[r] == 'L01': annotator_file = "A03" if P == 'S14' and SCENARIO[r] == 'L03': annotator_file = "A19" if P == 'S11' and SCENARIO[r] == 'L01': annotator_file = "A03" if P == 'S11' and R in ['R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13', 'R15']: annotator_file = "A02" if P == 'S13' and R in ['R28']: annotator_file = "A01" if P == 'S13' and R in ['R29', 'R30']: annotator_file = "A11" if P == 'S09' and R in ['R28', 'R29']: annotator_file = "A01" if P == 'S09' and R in ['R21', 'R22', 'R23', 'R24', 'R25']: annotator_file = "A11" file_name_norm = "{}/{}_{}_{}_{}_{}_der_data.csv".format(P, S, P, R, annotator_file, N) file_name_label = "{}/{}_{}_{}_{}_{}_labels.csv".format(P, S, P, R, annotator_file, N) try: # getting data data = csv_reader.reader_data(folder_derivative + file_name_norm) print("\nFiles loaded") except: print("\n In generating data, No file {}".format(folder_derivative + file_name_norm)) continue try: # Getting labels and attributes labels = csv_reader.reader_labels(FOLDER_PATH + file_name_label) class_labels = np.where(labels[:, 0] == 7)[0] print("\nGet labels") # Deleting rows containing the "none" class data = np.delete(data, class_labels, 0) labels = np.delete(labels, class_labels, 0) print("\nDeleting none rows") # halving the frequency, as Mbientlab or MotionMiners sensors use 100Hz downsampling = range(0, data.shape[0], 2) data = data[downsampling] labels = labels[downsampling] data_t, data_x, data_y = divide_x_y(data) del data_t print("\nDownsampling") except: print("\n In generating data, Error getting the data {}".format(FOLDER_PATH + file_name_norm)) continue try: # checking if annotations are consistent data_x = norm_mean_std(data_x) if np.sum(data_y == labels[:, 0]) == data_y.shape[0]: # Sliding window approach print("Starting sliding window") X, y, y_all = opp_sliding_window(data_x, labels.astype(int), sliding_window_length, sliding_window_step, label_pos_end=False) print("Windows are extracted") # Statistics hist_classes = np.bincount(y[:, 0], minlength=NUM_CLASSES) hist_classes_all += hist_classes print("Number of seq per class {}".format(hist_classes_all)) for f in range(X.shape[0]): try: sys.stdout.write( '\r' + 'Creating sequence file number {} with id {}'.format(f, counter_seq)) sys.stdout.flush() # print "Creating sequence file number {} with id {}".format(f, counter_seq) seq = np.reshape(X[f], newshape=(1, X.shape[1], X.shape[2])) seq = np.require(seq, dtype=np.float) # Storing the sequences obj = {"data": seq, "label": y[f], "labels": y_all[f]} f = open(os.path.join(data_dir, 'seq_{0:06}.pkl'.format(counter_seq)), 'wb') pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL) f.close() counter_seq += 1 except: raise ('\nError adding the seq') print("\nCorrect data extraction from {}".format(FOLDER_PATH + file_name_norm)) del data del data_x del data_y del X del labels del class_labels else: print("\nNot consisting annotation in {}".format(file_name_norm)) continue except: print("\n In generating data, No file {}".format(FOLDER_PATH + file_name_norm)) return