Python DataHandler示例

        if any(check_for_matching_word(words, back_keywords)):
            files["backCSV"] = sub_files_and_dirs

        elif any(check_for_matching_word(words, thigh_keywords)):
            files["thighCSV"] = sub_files_and_dirs

        elif any(check_for_matching_word(words, label_keywords)):
            files["labelCSV"] = sub_files_and_dirs

    subjects[subject] = files

# print(subjects)

merged_df = None
dh = DataHandler()
dh_stacker = DataHandler()
for idx, root_dir in enumerate(subjects):
    subject = subjects[root_dir]
    print("SUBJECT: \n", subject)

    master = os.path.join(root_dir, subject['backCSV'])
    slave = os.path.join(root_dir, subject['thighCSV'])
    label = os.path.join(root_dir, subject['labelCSV'])

    # dh = DataHandler()
    dh.merge_csvs_on_first_time_overlap(master,
                                        slave,
                                        out_path=None,
                                        merge_column=None,
                                        master_columns=['bx', 'by', 'bz'],

示例#2

显示文件

import sys, os
try:
    sys.path.append(
        os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
except:
    print("SAdsadsadhsa;hkldasjkd")

from src.pipeline.Pipeline import Pipeline
from src.pipeline.DataHandler import DataHandler
from src import models
import pickle

################################ First we need to get the training data! #################################

print('CREATING datahandlerS')
dh1 = DataHandler()
dh2 = DataHandler()
print('CREATED datahandlerS')

# header = 0 because the csv has the first row indicating column names, let pandas
# know that the first row is a header row
dh1.load_dataframe_from_csv(
    input_directory_path='/app/data/temp/testSNTAtle.7z/testSNTAtle/',
    filename='P1_atle_B_TEMP_SYNCHED_BT.csv',
    header=0,
    columns=[
        'timestamp', 'back_x', 'back_y', 'back_z', 'thigh_x', 'thigh_y',
        'thigh_z', 'btemp', 'ttemp'
    ])

dh1.convert_column_from_str_to_datetime(column_name='timestamp')

示例#3

显示文件

文件： test_lstm.py 项目： skaugvoll/master_project

res = res[0]  # there will always be an array with one element
print("RES: ", res)
print("")
indx_of_most_conf = res.argmax(axis=0)
print("CLASS", " --> ", "CONFIDENCE")
print(indx_of_most_conf, " --> ", res[indx_of_most_conf])

answr = input("\nONE WINDOW CLASSIFICATION DONE\n Continue or quite [y | n]")
if not answr == "y":
    os._exit(1)

#################
# CLASSIFY W/ MODEL
#################

datahandler = DataHandler()

# csv has column names as first row
datahandler.load_dataframe_from_csv(
    '../data/temp/4000181.7z/4000181/',
    '4000181-34566_2017-09-19_B_TEMP_SYNCHED_BT.csv',
    whole_days=True,
    chunk_size=20000,
    max_days=6)

#cols =  time,bx,by,bz,tx,ty,tz,btemp,ttemp

predictions = model.inference(
    dataframe_iterator=datahandler.get_dataframe_iterator(),
    batch_size=512,
    sequence_length=250,

示例#4

显示文件

文件： plot-5-sec-window.py 项目： skaugvoll/master_project

]

timestamps = [[
    ["2019-04-01 20:00:00", "2019-04-01 20:00:05"],
    ["2019-04-01 22:00:00", "2019-04-01 22:00:05"],
    ["2019-04-01 22:16:00", "2019-04-01 22:16:05"],
]]

dataframes = pipObj.create_large_dataframe_from_multiple_input_directories(
    train_list_with_subjects,
    merge_column=None,
    save=False,
    added_columns_name=['labels'],
    list=True)

dh = DataHandler()
for idx, df in enumerate(dataframes):
    for tidx, times in enumerate(timestamps[idx]):

        start = times[0]
        end = times[1]

        # print(start, end)
        # input("...")

        res = df.loc[start:end, ['ttemp']]
        vals = res['ttemp'].values
        # print(vals, type(vals))
        # input("...")
        min = np.amin(vals) - 1
        max = np.amax(vals) + 1

示例#5

显示文件

文件： test_pipeline_lstm_training.py 项目： skaugvoll/master_project

    list_with_subjects,
    back_keywords=['Back', "b"],
    thigh_keywords=['Thigh', "t"],
    label_keywords=['GoPro', "Labels"],
    out_path=None,
    merge_column=None,
    master_columns=['bx', 'by', 'bz'],
    slave_columns=['tx', 'ty', 'tz'],
    rearrange_columns_to=None,
    save=False,
    added_columns_name=["label"],
    verbose=False
)


train, validation = DataHandler.split_df_into_training_and_test(dataframe, split_rate=.2, shuffle=False)
validation, test = DataHandler.split_df_into_training_and_test(validation, split_rate=.5, shuffle=False)

p.train_lstm_model(
    training_dataframe=train,
    back_cols=['bx','by','bz'],
    # back_cols=None,
    thigh_cols=['tx','ty','tz'],
    # thigh_cols=None,
    config_path='../params/config.yml',
    # config_path='../params/one_sensor_config.yml',
    label_col='label',
    validation_dataframe=validation,
    save_to_path="trained_models/both_sensors_11_03",
    save_weights=True,
    shuffle=False

示例#6

显示文件

文件： test_run_pipeline_window.py 项目： skaugvoll/master_project

#########################################################
##
# Disabling Tensorflow GPU inforamtion
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
##
#########################################################

# #########################################################
# ##
# # TESTING
# ##
# #########################################################
#
# GET DATA
dh3 = DataHandler()
dh3.load_dataframe_from_csv(
    input_directory_path='/app/data/temp/4000181.7z/4000181/',
    filename='4000181-34566_2017-09-19_B_TEMP_SYNCHED_BT.csv',
    header=0,
    columns=[
        'timestamp', 'back_x', 'back_y', 'back_z', 'thigh_x', 'thigh_y',
        'thigh_z', 'btemp', 'ttemp'
    ])

dh3.convert_column_from_str_to_datetime(column_name='timestamp')
dh3.set_column_as_index("timestamp")

dh3.add_new_column()
dh3.add_labels_file_based_on_intervals(
    intervals={

示例#7

显示文件

文件： main.py 项目： skaugvoll/master_project

# # first unzip and synch .7z folder
# datahandler = pipeline.unzipNsynch(os.path.join(input_dir_rel_path, data_name), save=True) # returns datahandler
# unzipped_path = datahandler.get_unzipped_path()
#
# pipeline.addLables(intervals="../data/temp/{}/{}/{}".format(
#     data_name, data_name.split(".")[0], label_file), column_name="label")
# dataframe = pipeline.dh.get_dataframe_iterator()
# print(dataframe.head(10))

###########
#
# IF data is csv file
#
##########

dh = DataHandler()
sub_name = data_name.split(".")[0]
input_dir = "../data/temp/xxx_x.7z/xxx_x/"
filename = "xxx_xB_xxx_x_T_timesync_output_TEMP_BT.csv"

dh.load_dataframe_from_csv(
    input_directory_path=input_dir,
    filename=filename,
    header=0,
    columns=['time', 'bx', 'by', 'bz', 'tx', 'ty', 'tz', 'btemp', 'ttemp'])

dh.convert_column_from_str_to_datetime(column_name='time')
dh.set_column_as_index("time")

# # add labels
dh.add_new_column("label")

示例#8

显示文件

batch_size = config.TRAINING['args']['batch_size']
sequence_length = config.TRAINING['args']['sequence_length']
callbacks = config.TRAINING['args']['callbacks'] or None

back_cols = ['bx', 'by', 'bz']
thigh_cols = ['tx', 'ty', 'tz']
label_col = 'label'

#######
# LEAVE ONE OUT CONFIGURATION
#######

X = np.array(train_list_with_subjects)

loo = LeaveOneOut()
datahandler = DataHandler()

RUNS_HISTORY = {}

for train_index, test_index in loo.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    trainingset = []
    testset = trainDataframes[test_index[0]]
    for idx in train_index:
        trainingset.append(trainDataframes[idx])

    model_history = model.train(
        train_data=trainingset,
        valid_data=None,
        epochs=config.TRAINING['args']['epochs'],
        batch_size=batch_size,  # gets this from config file when init model