import sys, os try: sys.path.append( os.path.abspath( os.path.join( os.path.dirname( __file__), '..'))) except: print("SAdsadsadhsa;hkldasjkd") from src.pipeline.Pipeline import Pipeline from src import models pipObj = Pipeline() # Create a TRAINING dataframe train = [ '../data/temp/xxx_x.7z/xxx_x', ] trainDataframe = pipObj.create_large_dataframe_from_multiple_input_directories( train, merge_column='time', master_columns=['time', 'bx', 'by', 'bz', 'tx', 'ty', 'tz'], slave_columns=['time', 'bx1', 'by1', 'bz1', 'btemp'], slave2_columns=['time', 'tx1', 'ty1', 'tz1', 'ttemp'], rearrange_columns_to=[ 'time', 'bx', 'by', 'bz', 'tx', 'ty', 'tz', 'btemp', 'ttemp'
from src.pipeline.Pipeline import Pipeline from src.pipeline.DataHandler import DataHandler from src import models import pickle, math import pandas as pd import time ###### ###### # # # CONFIGURE THE PIPELINE # # # ###### ###### # Create pipeline object pipObj = Pipeline() # Define how many cpus we can paralell meta classification on cpus = os.cpu_count() # cpus = 1 ###### ###### # # # CONFIGURE THE DATA # # # ###### ###### #define training data list_with_subjects_to_classify = [ '../data/input/4003601.7z', ]
import sys, os try: sys.path.append( os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) except: print("SAdsadsadhsa;hkldasjkd") from src.pipeline.Pipeline import Pipeline from src.pipeline.resampler import main as resampler import pandas as pd pipObj = Pipeline() # list_with_subjects = [ # '../data/input/4000181.7z' # # '../data/input/training_data/006' # ] # # ###unzip all data # unzipped_paths = pipObj.unzip_multiple_directories(list_with_subjects, zip_to="../data/temp/") # print(unzipped_paths) subject = "022" # # resample = [ # '../data/input/training_data/'+subject # ] # # trainDataframe = pipObj.create_large_dataframe_from_multiple_input_directories( # resample, # merge_column='time',
import sys, os try: sys.path.append( os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) except: print("SAdsadsadhsa;hkldasjkd") import datetime import numpy as np from matplotlib import pyplot as plt from src.pipeline.DataHandler import DataHandler from src.pipeline.Pipeline import Pipeline now = datetime.datetime.now() pipObj = Pipeline() list_with_subjects = [ '../data/input/xxx_x.7z', '../data/input/xxx_x.7z', ] # ###unzip all data # unzipped_paths = pipObj.unzip_multiple_directories(list_with_subjects, zip_to="../data/temp/") unzipped_paths = [ '../data/temp/xxx_x.7z/xxx_x', '../data/temp/xxx_x.7z/xxx_x', ] # Trenger ikke downsample, da data er recorded i 50Hz
""" from pyspark import SparkContext, SparkConf, SQLContext from pyspark.sql import SparkSession from src.pipeline.Pipeline import Pipeline from src.config.param_config.param_config import param_dict from src.utils.arg_parse import pipeline_arg_parse ##################################必须传入的参数########################################### ####读入常变参数 params = pipeline_arg_parse() final_param_dict = param_dict print(params) print(param_dict) ###启动spark环境 try: sc.stop() conf = SparkConf() conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") conf.set("spark.memory.fraction", 0.8) sc = SparkContext(conf).getOrCreate() # 添加参数启动 except: conf = SparkConf() conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") conf.set("spark.memory.fraction", 0.8) sc = SparkContext().getOrCreate() spark = SparkSession.builder.enableHiveSupport().getOrCreate() Pp = Pipeline(sc, final_param_dict, spark=spark) result = Pp.run_feature()
import sys, os try: sys.path.append( os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) except: print("SAdsadsadhsa;hkldasjkd") import numpy as np import pandas as pd from src.pipeline.Pipeline import Pipeline pipObj = Pipeline() # list_with_subjects = [ # '../data/input/xxx_x.7z', # '../data/input/xxx_x.7z # ] # data = pipObj.unzip_multiple_directories(list_with_subjects, zip_to="../data/temp/") # print(unzipped_paths) data = [ # '../data/temp/xxx_x.7z/xxx_x', # '../data/temp/xxx_x.7z/xxx_x' ] dataframe = pipObj.create_large_dataframe_from_multiple_input_directories( data, merge_column='time', master_columns=['time', 'bx', 'by', 'bz', 'tx', 'ty', 'tz'], slave_columns=['time', 'bx1', 'by1', 'bz1', 'btemp'],
import sys, os try: sys.path.append( os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) except: print("SAdsadsadhsa;hkldasjkd") from src.pipeline.Pipeline import Pipeline from src import models pipObj = Pipeline() # list_with_subjects = [ # '../data/input/xxx_x.7z', # '../data/input/xxx_x.7z' # ] # # ###unzip all data # unzipped_paths = pipObj.unzip_multiple_directories(list_with_subjects, zip_to="../data/temp/") # print(unzipped_paths) train = [ # '../data/temp/xxx_x.7z/xxx_x' ] test = ['../data/temp/xxx_x.7z/xxx_x', '../data/temp/xxx_x.7z/xxx_x'] trainDataframe = pipObj.create_large_dataframe_from_multiple_input_directories( train, merge_column='time',
import sys, os try: sys.path.append( os.path.abspath( os.path.join( os.path.dirname( __file__), '..'))) except: print("SAdsadsadhsa;hkldasjkd") import datetime import numpy as np from matplotlib import pyplot as plt from src.pipeline.DataHandler import DataHandler from src.pipeline.Pipeline import Pipeline now = datetime.datetime.now() pipObj = Pipeline() train_list_with_subjects = [ '../data/input/training_data/006', '../data/input/training_data/008', '../data/input/training_data/009', '../data/input/training_data/010', '../data/input/training_data/011', '../data/input/training_data/012', '../data/input/training_data/013', '../data/input/training_data/014', '../data/input/training_data/015', '../data/input/training_data/016', '../data/input/training_data/017', '../data/input/training_data/018', '../data/input/training_data/019', '../data/input/training_data/020', '../data/input/training_data/021',
df2 = dh2.get_dataframe_iterator() print(df1.shape, df2.shape) df1.dropna(subset=['label'], inplace=True) df2.dropna(subset=['label'], inplace=True) print(df1.shape, df2.shape) ############################## THEN COMBINE INTO ONE BIG TRAINING SET AKA VERTICAL STACKING ############# dataframe = dh1.vertical_stack_dataframes(df1, df2, set_as_current_df=False) # dataframe = dh1.vertical_stack_dataframes(dataframe, df3, set_as_current_df=False) print("DATAFRAME\n", dataframe.head(5), dataframe.shape) ############################## THEN WE MUST EXTRACT FEATURES N LABELS ###################################### pipeObj = Pipeline() back_feat_train, thigh_feat_train, label_train = pipeObj.get_features_and_labels_as_np_array( dataframe) ############################## THEN WE MUST TRAIN THE CLASSIFIER ###################################### RFC = models.get("RFC", {}) ############## # MODEL ARGUMENTS ############## # Do some magic numbering sampling_frequency = 50 window_length = 120 tempearture_reading_rate = 120
import sys, os try: sys.path.append( os.path.abspath( os.path.join( os.path.dirname( __file__), '..'))) except: print("SAdsadsadhsa;hkldasjkd") from src.pipeline.Pipeline import Pipeline pipObj = Pipeline() # 1. Fist unzipp if .7z # 2. then concat / merge two sensors and synch with time and temperature ## for instance; pipelineObject.create_large_dataframe_from_multiple_input_directories does all of this! # For each dataset: # extract_temperature using cwa_converter.convert_cwas_to_csv_with_temp # merge_multiple_csvs # concat_dataframes # optional add_labels # save to a specific output path # outpath, res_df = pipObj.downsampleData( # input_csv_path="../data/temp/merged/res006.csv", # out_csv_path="../data/temp/merged/resampled006.csv", # discrete_columns=['label'] # ) # ha en funksjon for a lese inn csv som dataframe, som saa blir da trining eller testing dataframe equals to return of pipObj.create_large_dataframe_from_multiple_input_directories #dataframe = datahandler.load_dataframe_from_csv()
import sys, os try: sys.path.append( os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) except: print("SAdsadsadhsa;hkldasjkd") import datetime import numpy as np from matplotlib import pyplot as plt from src.pipeline.DataHandler import DataHandler from src.pipeline.Pipeline import Pipeline pipObj = Pipeline() train_list_with_subjects = [ '../data/temp/xxx_x.7z/xxx_x/', ] timestamps = [[ ["2019-04-01 20:00:00", "2019-04-01 20:00:05"], ["2019-04-01 22:00:00", "2019-04-01 22:00:05"], ["2019-04-01 22:16:00", "2019-04-01 22:16:05"], ]] dataframes = pipObj.create_large_dataframe_from_multiple_input_directories( train_list_with_subjects, merge_column=None, save=False, added_columns_name=['labels'], list=True)
import sys, os try: sys.path.append( os.path.abspath( os.path.join( os.path.dirname( __file__), '..'))) except: pass import pandas as pd from src.pipeline.DataHandler import DataHandler from src.pipeline.Pipeline import Pipeline p = Pipeline() list_with_subjects = [ '../data/input/006', '../data/input/008', '../data/input/009', '../data/input/training_data/small_set' ] dataframe = p.create_large_dataframe_from_multiple_input_directories( list_with_subjects, back_keywords=['Back', "b"], thigh_keywords=['Thigh', "t"], label_keywords=['GoPro', "Labels"], out_path=None, merge_column=None, master_columns=['bx', 'by', 'bz'], slave_columns=['tx', 'ty', 'tz'], rearrange_columns_to=None,
"saved_model": "trained_models/test_model_thigh_sensor.h5", "weights": "trained_models/test_model_thigh_sensor_weights.h5" }, "3": { "config": "../params/one_sensor_config.yml", "saved_model": "trained_models/test_model_back_sensor.h5", "weights": "trained_models/test_model_back_sensor_weights.h5" } } model_cpus = math.floor(os.cpu_count() // 2) class_cpus = math.floor(os.cpu_count() // 2) if model_cpus == 0 or class_cpus == 0: model_cpus, class_cpus = 1, 1 p = Pipeline() dataframe_columns = { 'back_features': ['back_x', 'back_y', 'back_z'], 'thigh_features': ['thigh_x', 'thigh_y', 'thigh_z'], 'back_temp': ['btemp'], 'thigh_temp': ['ttemp'], 'label_column': ['label'], 'time': [] } a, t, b, r = p.parallel_pipeline_classification_run( dataframe=dataframe_test, dataframe_columns=dataframe_columns, rfc_model_path=rfc_model_path, lstm_models_paths=lstm_models_path,
import sys, os try: sys.path.append( os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) except: print("SAdsadsadhsa;hkldasjkd") from src.pipeline.Pipeline import Pipeline from src.pipeline.DataHandler import DataHandler from src import models input_dir_rel_path = "/app/data/input" data_name = "xxx_x.7z" label_file = "xxx_x intervals.json" pipeline = Pipeline() ########### # # IF first time running script on data, else it is saved in ../data/temp/name # ########## # if there allready is a temp folder with the same name # TODO get this in the unzip N Synch method, path is unzip_path + filename.7z # if os.path.exists("../data/temp/{}".format(data_name)): # print("REMVOING OLD TEMP FOLDER") # os.system("rm -rf ../data/temp/{}".format(data_name)) # # # # first unzip and synch .7z folder
from src.pipeline.Pipeline import Pipeline if __name__ == "__main__": # example_pipeline(config.data.path_to_video) # change this line to be called via web-api pipeline = Pipeline() pipeline.example_pipeline()