if any(check_for_matching_word(words, back_keywords)): files["backCSV"] = sub_files_and_dirs elif any(check_for_matching_word(words, thigh_keywords)): files["thighCSV"] = sub_files_and_dirs elif any(check_for_matching_word(words, label_keywords)): files["labelCSV"] = sub_files_and_dirs subjects[subject] = files # print(subjects) merged_df = None dh = DataHandler() dh_stacker = DataHandler() for idx, root_dir in enumerate(subjects): subject = subjects[root_dir] print("SUBJECT: \n", subject) master = os.path.join(root_dir, subject['backCSV']) slave = os.path.join(root_dir, subject['thighCSV']) label = os.path.join(root_dir, subject['labelCSV']) # dh = DataHandler() dh.merge_csvs_on_first_time_overlap(master, slave, out_path=None, merge_column=None, master_columns=['bx', 'by', 'bz'],
import sys, os try: sys.path.append( os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) except: print("SAdsadsadhsa;hkldasjkd") from src.pipeline.Pipeline import Pipeline from src.pipeline.DataHandler import DataHandler from src import models import pickle ################################ First we need to get the training data! ################################# print('CREATING datahandlerS') dh1 = DataHandler() dh2 = DataHandler() print('CREATED datahandlerS') # header = 0 because the csv has the first row indicating column names, let pandas # know that the first row is a header row dh1.load_dataframe_from_csv( input_directory_path='/app/data/temp/testSNTAtle.7z/testSNTAtle/', filename='P1_atle_B_TEMP_SYNCHED_BT.csv', header=0, columns=[ 'timestamp', 'back_x', 'back_y', 'back_z', 'thigh_x', 'thigh_y', 'thigh_z', 'btemp', 'ttemp' ]) dh1.convert_column_from_str_to_datetime(column_name='timestamp')
res = res[0] # there will always be an array with one element print("RES: ", res) print("") indx_of_most_conf = res.argmax(axis=0) print("CLASS", " --> ", "CONFIDENCE") print(indx_of_most_conf, " --> ", res[indx_of_most_conf]) answr = input("\nONE WINDOW CLASSIFICATION DONE\n Continue or quite [y | n]") if not answr == "y": os._exit(1) ################# # CLASSIFY W/ MODEL ################# datahandler = DataHandler() # csv has column names as first row datahandler.load_dataframe_from_csv( '../data/temp/4000181.7z/4000181/', '4000181-34566_2017-09-19_B_TEMP_SYNCHED_BT.csv', whole_days=True, chunk_size=20000, max_days=6) #cols = time,bx,by,bz,tx,ty,tz,btemp,ttemp predictions = model.inference( dataframe_iterator=datahandler.get_dataframe_iterator(), batch_size=512, sequence_length=250,
] timestamps = [[ ["2019-04-01 20:00:00", "2019-04-01 20:00:05"], ["2019-04-01 22:00:00", "2019-04-01 22:00:05"], ["2019-04-01 22:16:00", "2019-04-01 22:16:05"], ]] dataframes = pipObj.create_large_dataframe_from_multiple_input_directories( train_list_with_subjects, merge_column=None, save=False, added_columns_name=['labels'], list=True) dh = DataHandler() for idx, df in enumerate(dataframes): for tidx, times in enumerate(timestamps[idx]): start = times[0] end = times[1] # print(start, end) # input("...") res = df.loc[start:end, ['ttemp']] vals = res['ttemp'].values # print(vals, type(vals)) # input("...") min = np.amin(vals) - 1 max = np.amax(vals) + 1
list_with_subjects, back_keywords=['Back', "b"], thigh_keywords=['Thigh', "t"], label_keywords=['GoPro', "Labels"], out_path=None, merge_column=None, master_columns=['bx', 'by', 'bz'], slave_columns=['tx', 'ty', 'tz'], rearrange_columns_to=None, save=False, added_columns_name=["label"], verbose=False ) train, validation = DataHandler.split_df_into_training_and_test(dataframe, split_rate=.2, shuffle=False) validation, test = DataHandler.split_df_into_training_and_test(validation, split_rate=.5, shuffle=False) p.train_lstm_model( training_dataframe=train, back_cols=['bx','by','bz'], # back_cols=None, thigh_cols=['tx','ty','tz'], # thigh_cols=None, config_path='../params/config.yml', # config_path='../params/one_sensor_config.yml', label_col='label', validation_dataframe=validation, save_to_path="trained_models/both_sensors_11_03", save_weights=True, shuffle=False
######################################################### ## # Disabling Tensorflow GPU inforamtion os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' ## ######################################################### # ######################################################### # ## # # TESTING # ## # ######################################################### # # GET DATA dh3 = DataHandler() dh3.load_dataframe_from_csv( input_directory_path='/app/data/temp/4000181.7z/4000181/', filename='4000181-34566_2017-09-19_B_TEMP_SYNCHED_BT.csv', header=0, columns=[ 'timestamp', 'back_x', 'back_y', 'back_z', 'thigh_x', 'thigh_y', 'thigh_z', 'btemp', 'ttemp' ]) dh3.convert_column_from_str_to_datetime(column_name='timestamp') dh3.set_column_as_index("timestamp") dh3.add_new_column() dh3.add_labels_file_based_on_intervals( intervals={
# # first unzip and synch .7z folder # datahandler = pipeline.unzipNsynch(os.path.join(input_dir_rel_path, data_name), save=True) # returns datahandler # unzipped_path = datahandler.get_unzipped_path() # # pipeline.addLables(intervals="../data/temp/{}/{}/{}".format( # data_name, data_name.split(".")[0], label_file), column_name="label") # dataframe = pipeline.dh.get_dataframe_iterator() # print(dataframe.head(10)) ########### # # IF data is csv file # ########## dh = DataHandler() sub_name = data_name.split(".")[0] input_dir = "../data/temp/xxx_x.7z/xxx_x/" filename = "xxx_xB_xxx_x_T_timesync_output_TEMP_BT.csv" dh.load_dataframe_from_csv( input_directory_path=input_dir, filename=filename, header=0, columns=['time', 'bx', 'by', 'bz', 'tx', 'ty', 'tz', 'btemp', 'ttemp']) dh.convert_column_from_str_to_datetime(column_name='time') dh.set_column_as_index("time") # # add labels dh.add_new_column("label")
batch_size = config.TRAINING['args']['batch_size'] sequence_length = config.TRAINING['args']['sequence_length'] callbacks = config.TRAINING['args']['callbacks'] or None back_cols = ['bx', 'by', 'bz'] thigh_cols = ['tx', 'ty', 'tz'] label_col = 'label' ####### # LEAVE ONE OUT CONFIGURATION ####### X = np.array(train_list_with_subjects) loo = LeaveOneOut() datahandler = DataHandler() RUNS_HISTORY = {} for train_index, test_index in loo.split(X): print("TRAIN:", train_index, "TEST:", test_index) trainingset = [] testset = trainDataframes[test_index[0]] for idx in train_index: trainingset.append(trainDataframes[idx]) model_history = model.train( train_data=trainingset, valid_data=None, epochs=config.TRAINING['args']['epochs'], batch_size=batch_size, # gets this from config file when init model