示例#1
0
from src.pipeline.Pipeline import Pipeline
from src.pipeline.DataHandler import DataHandler
from src import models
import pickle, math
import pandas as pd
import time

######                              ######
#                                        #
#         CONFIGURE THE PIPELINE         #
#                                        #
######                              ######

# Create pipeline object
pipObj = Pipeline()
# Define how many cpus we can paralell meta classification on
cpus = os.cpu_count()
# cpus = 1

######                              ######
#                                        #
#           CONFIGURE THE DATA           #
#                                        #
######                              ######

#define training data
list_with_subjects_to_classify = [
    '../data/input/4003601.7z',
]
示例#2
0
"""
from pyspark import SparkContext, SparkConf, SQLContext
from pyspark.sql import SparkSession
from src.pipeline.Pipeline import Pipeline
from src.config.param_config.param_config import param_dict
from src.utils.arg_parse import pipeline_arg_parse

##################################必须传入的参数###########################################
####读入常变参数
params = pipeline_arg_parse()
final_param_dict = param_dict
print(params)
print(param_dict)

###启动spark环境
try:
    sc.stop()
    conf = SparkConf()
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    conf.set("spark.memory.fraction", 0.8)
    sc = SparkContext(conf).getOrCreate()  # 添加参数启动
except:
    conf = SparkConf()
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    conf.set("spark.memory.fraction", 0.8)
    sc = SparkContext().getOrCreate()

spark = SparkSession.builder.enableHiveSupport().getOrCreate()
Pp = Pipeline(sc, final_param_dict, spark=spark)
result = Pp.run_feature()
import sys, os
try: sys.path.append( os.path.abspath( os.path.join( os.path.dirname( __file__), '..')))
except: pass


import pandas as pd

from src.pipeline.DataHandler import DataHandler
from src.pipeline.Pipeline import Pipeline



p = Pipeline()

list_with_subjects = [
    '../data/input/006',
    '../data/input/008',
    '../data/input/009',
    '../data/input/training_data/small_set'
]

dataframe = p.create_large_dataframe_from_multiple_input_directories(
    list_with_subjects,
    back_keywords=['Back', "b"],
    thigh_keywords=['Thigh', "t"],
    label_keywords=['GoPro', "Labels"],
    out_path=None,
    merge_column=None,
    master_columns=['bx', 'by', 'bz'],
    slave_columns=['tx', 'ty', 'tz'],
    rearrange_columns_to=None,
示例#4
0
df2 = dh2.get_dataframe_iterator()

print(df1.shape, df2.shape)
df1.dropna(subset=['label'], inplace=True)
df2.dropna(subset=['label'], inplace=True)
print(df1.shape, df2.shape)

############################## THEN COMBINE INTO ONE BIG TRAINING SET  AKA VERTICAL STACKING #############

dataframe = dh1.vertical_stack_dataframes(df1, df2, set_as_current_df=False)
# dataframe = dh1.vertical_stack_dataframes(dataframe, df3, set_as_current_df=False)
print("DATAFRAME\n", dataframe.head(5), dataframe.shape)

############################## THEN WE MUST EXTRACT FEATURES N LABELS ######################################

pipeObj = Pipeline()
back_feat_train, thigh_feat_train, label_train = pipeObj.get_features_and_labels_as_np_array(
    dataframe)

############################## THEN WE MUST TRAIN THE CLASSIFIER ######################################

RFC = models.get("RFC", {})

##############
# MODEL ARGUMENTS
##############

# Do some magic numbering
sampling_frequency = 50
window_length = 120
tempearture_reading_rate = 120
示例#5
0
import sys, os
try:
    sys.path.append(
        os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
except:
    print("SAdsadsadhsa;hkldasjkd")

from src.pipeline.Pipeline import Pipeline
from src.pipeline.DataHandler import DataHandler
from src import models

input_dir_rel_path = "/app/data/input"
data_name = "xxx_x.7z"
label_file = "xxx_x intervals.json"

pipeline = Pipeline()

###########
#
# IF first time running script on data, else it is saved in ../data/temp/name
#
##########

# if there allready is a temp folder with the same name
# TODO get this in the unzip N Synch method, path is unzip_path + filename.7z
# if os.path.exists("../data/temp/{}".format(data_name)):
#     print("REMVOING OLD TEMP FOLDER")
#     os.system("rm -rf ../data/temp/{}".format(data_name))
#
#
# # first unzip and synch .7z folder