# dh = DataHandler() dh.merge_csvs_on_first_time_overlap(master, slave, out_path=None, merge_column=None, master_columns=['bx', 'by', 'bz'], slave_columns=['tx', 'ty', 'tz'], rearrange_columns_to=None, save=False, left_index=True, right_index=True) dh.add_columns_based_on_csv(label, columns_name=["label"], join_type="inner") if idx == 0: merged_df = dh.get_dataframe_iterator() continue merged_old_shape = merged_df.shape # vertically stack the dataframes aka add the rows from dataframe2 as rows to the dataframe1 merged_df = dh_stacker.vertical_stack_dataframes( merged_df, dh.get_dataframe_iterator(), set_as_current_df=False) print("shape merged df: ", merged_df.shape, "should be ", dh.get_dataframe_iterator().shape, " more than old ", merged_old_shape) print("Final merge form: ", merged_df.shape)
################# # CLASSIFY W/ MODEL ################# datahandler = DataHandler() # csv has column names as first row datahandler.load_dataframe_from_csv( '../data/temp/4000181.7z/4000181/', '4000181-34566_2017-09-19_B_TEMP_SYNCHED_BT.csv', whole_days=True, chunk_size=20000, max_days=6) #cols = time,bx,by,bz,tx,ty,tz,btemp,ttemp predictions = model.inference( dataframe_iterator=datahandler.get_dataframe_iterator(), batch_size=512, sequence_length=250, weights_path=config.WEIGHTS_PATH, timestamp_col="time", back_cols=['bx', 'by', 'bz'], thigh_cols=['tx', 'ty', 'tz']) answr = input( "\nENTIRE DATASET CLASSIFICATION DONE\n Continue or quite [y | n]") if not answr == "y": os._exit(1)
dh3.add_new_column() dh3.add_labels_file_based_on_intervals( intervals={ '1': [['2017-09-19', '18:31:09', '23:59:59'], ['2017-09-20', '00:00:00', '08:23:08'], ['2017-09-20', '08:35:13', '16:03:58'], ['2017-09-20', '16:20:21', '23:59:59'], ['2017-09-21', '00:00:00', '09:23:07'], ['2017-09-21', '09:35:40', '23:59:59'], ['2017-09-22', '00:00:00', '09:54:29']], '3': [['2017-09-20', '08:23:09', '08:35:12'], ['2017-09-20', '16:03:59', '16:20:20'], ['2017-09-21', '09:23:08', '09:35:39']] }) dataframe_test = dh3.get_dataframe_iterator() dataframe_test.dropna(subset=['label'], inplace=True) ############### # RUN PIPELINE PARALLELL CODE building queues for model classification and activity classification ############### # Do some magic numbering sampling_frequency = 50 window_length = 120 tempearture_reading_rate = 120 samples_pr_second = 1 / (tempearture_reading_rate / sampling_frequency) samples_pr_window = int(window_length * samples_pr_second) RFC = models.get("RFC", {})
['2018-04-27', '10:03:39', '11:09:00']], '2': [['2018-04-27', '11:09:01', '12:19:00']], '3': [['2018-04-27', '12:19:01', '14:28:00']] }) dh2.add_new_column() dh2.add_labels_file_based_on_intervals( intervals={ "1": [['2018-04-24', '12:09:00', '13:08:00']], '2': [['2018-04-24', '13:08:01', '14:08:00']], '3': [['2018-04-24', '14:08:01', '15:08:00']] }) ###################################### remove rows that does not have label ########################### df1 = dh1.get_dataframe_iterator() df2 = dh2.get_dataframe_iterator() print(df1.shape, df2.shape) df1.dropna(subset=['label'], inplace=True) df2.dropna(subset=['label'], inplace=True) print(df1.shape, df2.shape) ############################## THEN COMBINE INTO ONE BIG TRAINING SET AKA VERTICAL STACKING ############# dataframe = dh1.vertical_stack_dataframes(df1, df2, set_as_current_df=False) # dataframe = dh1.vertical_stack_dataframes(dataframe, df3, set_as_current_df=False) print("DATAFRAME\n", dataframe.head(5), dataframe.shape) ############################## THEN WE MUST EXTRACT FEATURES N LABELS ######################################
dh.load_dataframe_from_csv( input_directory_path=input_dir, filename=filename, header=0, columns=['time', 'bx', 'by', 'bz', 'tx', 'ty', 'tz', 'btemp', 'ttemp']) dh.convert_column_from_str_to_datetime(column_name='time') dh.set_column_as_index("time") # # add labels dh.add_new_column("label") intervals = dh.read_labels_from_json( filepath="../data/temp/xxx_x.7z/xxx_x/xxx_x intervals.json") dh.add_labels_file_based_on_intervals(intervals=intervals) df = dh.get_dataframe_iterator() print(df.head(10)) print() print(df.dtypes) # # ######## # # # # DATA INPUT FORMAT SPECIFIC DONE # # # ####### # # # Do some magic numbering sampling_frequency = 50 window_length = 120 tempearture_reading_rate = 120 samples_pr_second = 1 / (tempearture_reading_rate / sampling_frequency)