def configureAll(self, feature, scenario): vs = MockViewSpace(MockCanvas()) fm = FeatureManager(vs) feat = feature(fm, vs) fm.features.append(feat) self.configure(fm.machine, scenario['machine_config']) self.configure(fm.work_piece, scenario['work_piece_config']) self.configure(feat, scenario['config']) if hasattr(feat, 'is_composed'): feat.addChildByClass() for child in feat.features: self.configure(child, scenario['child_features'][child.__class__]) return fm
def __init__(self, patientId, dbConnection, plotFeatureImportance=False): logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', datefmt='%d.%m.%Y %I:%M:%S %p', level=logging.DEBUG) self.log = logging.getLogger("BaseClassifier") self.patient_id = patientId self.con = dbConnection self.glucose_data = list() self.insulin_data = list() self.carb_data = list() self.activity_data = list() # load data necessary for ARIMA self.discretized_data = list() ###### LOAD DATA ###### ###### do not change line order ### self.load_all_data() ###### LOAD Feature Extraction #### self.Features = FeatureManager(self.glucose_data, self.insulin_data, self.carb_data, self.activity_data, self.patient_id) # tuning option for RF # set it now as a common parameter # for all models self.tune = False # parameters self.split_ratio = .66 self.look_back = 8 self._plotFeatureImportance = plotFeatureImportance self._plotLearnedModel = False # customize feature set option # TODO: set from outside self._customizeFeatureSet = False self._allFeatureDesp = list()
class BaseRegressor(object): def __init__(self, patientId, dbConnection, plotFeatureImportance=False): logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', datefmt='%d.%m.%Y %I:%M:%S %p', level=logging.DEBUG) self.log = logging.getLogger("BaseClassifier") self.patient_id = patientId self.con = dbConnection self.glucose_data = list() self.insulin_data = list() self.carb_data = list() self.activity_data = list() # load data necessary for ARIMA self.discretized_data = list() ###### LOAD DATA ###### ###### do not change line order ### self.load_all_data() ###### LOAD Feature Extraction #### self.Features = FeatureManager(self.glucose_data, self.insulin_data, self.carb_data, self.activity_data, self.patient_id) # tuning option for RF # set it now as a common parameter # for all models self.tune = False # parameters self.split_ratio = .66 self.look_back = 8 self._plotFeatureImportance = plotFeatureImportance self._plotLearnedModel = False # customize feature set option # TODO: set from outside self._customizeFeatureSet = False self._allFeatureDesp = list() def load_all_data(self): ###### LOAD DATA ###### self.load_glucose_data() self.load_insulin_data() self.load_carb_data() self.load_activity_data() # load data necessary for ARIMA self.load_discretized_data() def load_glucose_data(self): """ Retrieve glucose (ground truth) data from database """ self.log.info("Loading Glucose data for patient {}".format(self.patient_id)) with self.con: cur = self.con.cursor() query = "SELECT date as 'time', `gt-value` as 'value', pos as 'index' FROM BG_Instance " \ "WHERE patientID = {patientId} and date > '2017-02-25'".format(patientId=self.patient_id) self.log.debug("load_glucose_data() query: '" + query + "'") cur.execute(query) logging.debug("{} rows returned".format(cur.rowcount)) rows = cur.fetchall() if not rows: self.log.error("No Glucose data was returned!") return for row in rows: self.glucose_data.append(row) logging.debug("{} glucose measurements returned".format(len(self.glucose_data))) def load_insulin_data(self, ignoreBasal=False): """ Retrieve insulin data """ self.log.info("Loading insulin data for patient {}".format(self.patient_id)) if ignoreBasal: with self.con: cur = self.con.cursor() query = "SELECT date as 'time', value, type FROM BG_Insulin " \ "WHERE patientID = {patientId} and date > '2017-02-25' and type='rapid'".format( patientId=self.patient_id) self.log.debug("load_insulin_data() query: '" + query + "'") cur.execute(query) logging.debug("{} rows returned".format(cur.rowcount)) rows = cur.fetchall() if not rows: self.log.error("No insulin data was returned!") return for row in rows: self.insulin_data.append(row) else: with self.con: cur = self.con.cursor() query = "SELECT date as 'time', value, type FROM BG_Insulin " \ "WHERE patientID = {patientId} and date > '2017-02-25'".format(patientId=self.patient_id) self.log.debug("load_insulin_data() query: '" + query + "'") cur.execute(query) logging.debug("{} rows returned".format(cur.rowcount)) rows = cur.fetchall() if not rows: self.log.error("No insulin data was returned!") return for row in rows: self.insulin_data.append(row) def load_carb_data(self): """ Retrieve carbohydrate data """ self.log.info("Loading carbohydrate data for patient {}".format(self.patient_id)) with self.con: cur = self.con.cursor() query = "SELECT date as 'time', value FROM BG_carbohydrate " \ "WHERE patientID = {patientId} and date > '2017-02-25'".format(patientId=self.patient_id) self.log.debug("loadCarbohydrateData() query: '" + query + "'") cur.execute(query) logging.debug("{} rows returned".format(cur.rowcount)) rows = cur.fetchall() if not rows: self.log.error("No carb data was returned!") return for row in rows: self.carb_data.append(row) def load_activity_data(self): """ Retrieve activity data """ # FIXED: import steps and use them in place of Akcal self.log.info("Loading activity data for patient {}".format(self.patient_id)) with self.con: cur = self.con.cursor() query = "SELECT date as 'time', value FROM BG_steps " \ "WHERE patientID = {patientId} and date > '2017-02-25'".format(patientId=self.patient_id) self.log.debug("load_activity_data() query: '" + query + "'") cur.execute(query) logging.debug("{} rows returned".format(cur.rowcount)) rows = cur.fetchall() if not rows: self.log.error("No activity data was returned!") return for row in rows: self.activity_data.append(row) def load_discretized_data(self): self.log.info("Loading discretized glucose data for patient {}".format(self.patient_id)) with self.con: cur = self.con.cursor() query = "SELECT date, bloodglucose as 'bg' FROM BG_Timeseries " \ "WHERE patientID = {patient_id} and date > '2017-02-25' ".format(patientId=self.patient_id) df = pd.read_sql(query, self.con) logging.debug("{} rows returned".format(len(df))) df['date'] = pd.to_datetime(df['date']) df = df.set_index('date') # IMPORTANT: sort by date df = df.sort_index() # select the time only from 2017-03-01 # where we have stable measurement data df = df["2017-02-25":] min_date = min(df['bg'].index) max_date = max(df['bg'].index) df = df[min_date:max_date] self.discretized_data = df.values ''''' load the raw timestamp for blood glucose data ''' def load_timestamps(self, con, patientId): with con: cur = con.cursor() query = "SELECT date, pos FROM BG_Instance " \ "WHERE patientID = {patientId} ".format(patientId=patientId) df = pd.read_sql(query, con) df['date'] = pd.to_datetime(df['date']) df = df.set_index('date') # IMPORTANT: sort by date df = df.sort_index() return df def predict(self): raise NotImplementedError() def save_base_params(self): return ";".join( ("tune: " + str(self.tune), "look_back: " + str(self.look_back), "split_ratio: " + str(self.split_ratio))) def save_params(self): raise NotImplementedError() def extract_features(self, customizeFeatureSet=False, customGroup=None): X, Y = self.Features.build_feature_matrix(self.look_back) if customGroup != None: return self.Features.custom_feature_group(X, customGroup), Y if not customizeFeatureSet: return X, Y else: new_X, desp = self.Features.customFeatureGroupSubset(X) return new_X, Y, desp def to_result(self, test_glucoseData, predictions, test_y, timestamps): """ :param test_glucoseData: :param predictions: :param test_y: :param timestamps: :return: """ pass # TODO: return ground truth (test set) and predictions (as a dict) results = dict() return results def categorized_y(self, y, threshold): ''' Labeling Y :param y: :param threshold: :return: ''' if len(threshold) == 1: if y >= threshold[0]: return 1 else: return 0 elif len(threshold) == 2: if y <= threshold[0]: return 0 elif y >= threshold[0] and y <= threshold[1]: return 1 else: return 2 def select_k_importance(self, model, X, k=10): """ Only for Decision Tree-based models :param model: :param X: :param k: :return: """ return X[:, model.feature_importances_.argsort()[::-1][:k]] def plot_feature_importance(self, model, X): """ Only for Decision Tree-based models :param model: :param X: :return: """ if not self._plotFeatureImportance: return # import matplotlib only when necessary import matplotlib.pyplot as plt from matplotlib import rcParams rcParams.update({'figure.autolayout': True, 'font.size': 10}) # do the plotting importances = model.feature_importances_ std = np.std([tree.feature_importances_ for tree in model.estimators_], axis=0) indices = np.argsort(importances) # Plot the feature importances of the forest plt.figure() plt.title("Feature importances") plt.barh(range(X.shape[1]), importances[indices], color="r", xerr=std[indices], align="center") # If you want to define your own labels, # change indices to a list of labels on the following line. plt.yticks(range(X.shape[1]), self.Features.featureNames) plt.ylim([-1, X.shape[1]]) plt.savefig("prediction/tmp/feature_importance{}.png".format(self.patient_id)) plt.close() def plot_learned_model(self, test, sample, timestamp): ''' :param test: :param sample: :return: ''' # import matplotlib only when necessary import matplotlib.pyplot as plt if not self._plotLearnedModel: return assert len(test) == len(sample) xfmt = md.DateFormatter('%Y-%m-%d %H') plt.figure() plt.title("Prediction Variance") ax = plt.gca() ax.set_xticks(timestamp) ax.xaxis.set_major_formatter(xfmt) p = plt.plot(timestamp, test, label="prediction") s = plt.plot(timestamp, sample, label="sample") plt.legend() plt.savefig("prediction/tmp/prediction_time{}.png".format(self.patient_id)) plt.close()
# CONFIGURATIONS path_to_datasets = "./Data/Bitfinex/" epochs = 10 batch_size = 128 look_back = 50 look_forward = 10 pairs_info = { "BTCUSD": "./Data/Bitfinex/BTCUSD/all_in_one.csv", "ETHUSD": "./Data/Bitfinex/ETHUSD/all_in_one.csv", "LTCUSD": "./Data/Bitfinex/LTCUSD/all_in_one.csv" } feature_manager = FeatureManager.merge_pairs(pairs_info) feature_names = ["open", "high", "low", "close"] (X_train, Y_train), (X_test, Y_test) = \ feature_manager.load_splitted_datasets(file_path=path_to_datasets, prefix="bitf_polo_30_", split_ratio=0.9) np.random.seed(202) json_file = open("./Data/StateLESS_LSTM.json", "r") loaded_model_json = json_file.read() json_file.close() model = model_from_json(loaded_model_json) model.load_weights("./Data/StateLESS_LSTM.h5")
import utility import csv from feature_manager import FeatureManager directory_path = "C:/code/features/output/video/structure-analysis-video-dev/" feature_files = utility.get_file_name_list(directory_path) feature_vector_list = [] iteration = 1 max_value = len(feature_files) for file_name in feature_files: fm = FeatureManager(directory_path, file_name) feature_vector_list = feature_vector_list + fm.get_feature_vector_list() utility.print_progress_bar(iteration, max_value) iteration += 1 print "start csv" with open('complete_video_features.csv', 'wb') as f: the_writer = csv.writer(f) headers = [ "video", "shot_number", "category", "interactions_number_speakers_2", "interactions_number_speakers_3", "interactions_number_speakers_4", "interactions_number_speakers_4+", "intervention_short", "intervention_long", "speakers_type_ponctuel", "speakers_type_localise", "speakers_type_present", "speakers_type_regulier", "speakers_type_important", "speaker_distribution", "mean_number_of_faces", "std_number_of_faces", "inter_intensity_variation1", "inter_intensity_variation2", "inter_intensity_variation3", "inter_intensity_variation4", "inter_intensity_variation5", "inter_intensity_variation6", "inter_intensity_variation7", "inter_intensity_variation8", "inter_intensity_variation9", "intra_intensity_variation1", "intra_intensity_variation2", "intra_intensity_variation3",
import numpy as np from data_sampler import DataSampler from feature_manager import FeatureManager basic_path = "./Data/Poloniex/" for pair in ["BTCETH", "BTCGAS", "BTCXRP"]: print("Extracting " + pair) DataSampler.sample_files_from_dir( target_dir=basic_path + pair + "/", save_to_dir=basic_path + pair + "/", time_delta=np.timedelta64(30, "s"), file_type="csv", start_date=np.datetime64("2018-01-21T00:00:00"), end_date=np.datetime64("2018-02-06T14:21:00"), name=pair + "_30") for pair in ["BTCETH", "BTCGAS", "BTCXRP"]: feature_manager = FeatureManager() feature_manager.read_dataset_from_csv("{}{}/{}_30.csv".format( basic_path, pair, pair)) print(feature_manager.get_trades_df().head()) feature_manager.add_indicators() feature_manager.save_dataset_to_csv("{}{}/all_in_one_30.csv".format( basic_path, pair))
def main(output_column): # pairs_info = {"BTCUSD": "./Data/Bitfinex/BTCUSD/all_in_one.csv", # "ETHUSD": "./Data/Bitfinex/ETHUSD/all_in_one.csv", # "LTCUSD": "./Data/Bitfinex/LTCUSD/all_in_one.csv", # "XRPUSD": "./Data/Bitfinex/XRPUSD/all_in_one.csv", # "BCHUSD": "./Data/Bitfinex/BCHUSD/all_in_one.csv", # "ETCUSD": "./Data/Bitfinex/ETCUSD/all_in_one.csv", # "ZECUSD": "./Data/Bitfinex/ZECUSD/all_in_one.csv", # "XMRUSD": "./Data/Bitfinex/XMRUSD/all_in_one.csv", # "NEOUSD": "./Data/Bitfinex/NEOUSD/all_in_one.csv", # # "BTCETH": "./Data/Poloniex/BTCETH/all_in_one.csv", # "BTCXRP": "./Data/Poloniex/BTCXRP/all_in_one.csv"} # feature_manager = FeatureManager.merge_pairs(pairs_info) # feature_manager.add_total_market_cap(path="./Data/cmc_5s.csv") feature_manager = FeatureManager() feature_manager.read_dataset_from_csv("Data/BTC_data.csv") # feature_manager.save_dataset_to_csv("./Data/BTC-ETH-ETC-LTC-XRP-XMR-BCH-ZEC-NEO.csv") trades_features = [ "BTCUSD_open", "BTCUSD_high", "BTCUSD_low", "BTCUSD_close", "BTCUSD_total_volume", "BTCUSD_total_amount", ] ind = ['MACD', 'CCI', 'ATR', 'BOLL', 'BOOL20', 'EMA20', 'MA5', 'MA10', 'MTM6', 'MTM12', 'ROC', 'SMI', 'WVAD'] other_features = [ 'time_since_extrema', 'grow', #'growth_decrease' ] all_features = trades_features + other_features + ind feature_manager.extract_features(all_features, fill_nan=True, scale=True) # CONFIGURATIONS path_to_datasets = "./Data/" # input_columns = trades_features + ['extremas', 'growth_decrease'] input_columns = all_features prefix = "couple_30_" trainer = Trainer(feature_manager=feature_manager, prefix=prefix, path_to_datasets=path_to_datasets, output_column=output_column, input_columns=input_columns, is_saved_dataset=False) trainer.run() trainer.save_model() prediction, verification = trainer.build_prediction()