示例#1
0
 def configureAll(self, feature, scenario):
     vs = MockViewSpace(MockCanvas())
     fm = FeatureManager(vs)
     feat = feature(fm, vs)
     fm.features.append(feat)
     self.configure(fm.machine, scenario['machine_config'])
     self.configure(fm.work_piece, scenario['work_piece_config'])
     self.configure(feat, scenario['config'])
     if hasattr(feat, 'is_composed'):
         feat.addChildByClass()
         for child in feat.features:
             self.configure(child,
                            scenario['child_features'][child.__class__])
     return fm
示例#2
0
    def __init__(self, patientId, dbConnection, plotFeatureImportance=False):
        logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
                            datefmt='%d.%m.%Y %I:%M:%S %p', level=logging.DEBUG)
        self.log = logging.getLogger("BaseClassifier")
        self.patient_id = patientId
        self.con = dbConnection

        self.glucose_data = list()
        self.insulin_data = list()
        self.carb_data = list()
        self.activity_data = list()
        # load data necessary for ARIMA
        self.discretized_data = list()

        ###### LOAD DATA ######
        ###### do not change line order ###
        self.load_all_data()

        ###### LOAD Feature Extraction ####
        self.Features = FeatureManager(self.glucose_data, self.insulin_data, self.carb_data,
                                       self.activity_data, self.patient_id)
        # tuning option for RF
        # set it now as a common parameter
        # for all models
        self.tune = False
        # parameters
        self.split_ratio = .66
        self.look_back = 8
        self._plotFeatureImportance = plotFeatureImportance
        self._plotLearnedModel = False

        # customize feature set option
        # TODO: set from outside
        self._customizeFeatureSet = False

        self._allFeatureDesp = list()
示例#3
0
class BaseRegressor(object):

    def __init__(self, patientId, dbConnection, plotFeatureImportance=False):
        logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
                            datefmt='%d.%m.%Y %I:%M:%S %p', level=logging.DEBUG)
        self.log = logging.getLogger("BaseClassifier")
        self.patient_id = patientId
        self.con = dbConnection

        self.glucose_data = list()
        self.insulin_data = list()
        self.carb_data = list()
        self.activity_data = list()
        # load data necessary for ARIMA
        self.discretized_data = list()

        ###### LOAD DATA ######
        ###### do not change line order ###
        self.load_all_data()

        ###### LOAD Feature Extraction ####
        self.Features = FeatureManager(self.glucose_data, self.insulin_data, self.carb_data,
                                       self.activity_data, self.patient_id)
        # tuning option for RF
        # set it now as a common parameter
        # for all models
        self.tune = False
        # parameters
        self.split_ratio = .66
        self.look_back = 8
        self._plotFeatureImportance = plotFeatureImportance
        self._plotLearnedModel = False

        # customize feature set option
        # TODO: set from outside
        self._customizeFeatureSet = False

        self._allFeatureDesp = list()

    def load_all_data(self):
        ###### LOAD DATA ######
        self.load_glucose_data()
        self.load_insulin_data()
        self.load_carb_data()
        self.load_activity_data()
        # load data necessary for ARIMA
        self.load_discretized_data()

    def load_glucose_data(self):
        """
        Retrieve glucose (ground truth) data from database
        """
        self.log.info("Loading Glucose data for patient {}".format(self.patient_id))
        with self.con:
            cur = self.con.cursor()
            query = "SELECT date as 'time', `gt-value` as 'value', pos as 'index' FROM BG_Instance " \
                    "WHERE patientID = {patientId} and date > '2017-02-25'".format(patientId=self.patient_id)
            self.log.debug("load_glucose_data() query: '" + query + "'")
            cur.execute(query)
            logging.debug("{} rows returned".format(cur.rowcount))
            rows = cur.fetchall()
            if not rows:
                self.log.error("No Glucose data was returned!")
                return
            for row in rows:
                self.glucose_data.append(row)

        logging.debug("{} glucose measurements returned".format(len(self.glucose_data)))

    def load_insulin_data(self, ignoreBasal=False):
        """
        Retrieve insulin data
        """
        self.log.info("Loading insulin data for patient {}".format(self.patient_id))
        if ignoreBasal:
            with self.con:
                cur = self.con.cursor()
                query = "SELECT date as 'time', value, type FROM BG_Insulin " \
                        "WHERE patientID = {patientId} and date > '2017-02-25' and type='rapid'".format(
                    patientId=self.patient_id)
                self.log.debug("load_insulin_data() query: '" + query + "'")
                cur.execute(query)
                logging.debug("{} rows returned".format(cur.rowcount))
                rows = cur.fetchall()
                if not rows:
                    self.log.error("No insulin data was returned!")
                    return
                for row in rows:
                    self.insulin_data.append(row)
        else:
            with self.con:
                cur = self.con.cursor()
                query = "SELECT date as 'time', value, type FROM BG_Insulin " \
                        "WHERE patientID = {patientId} and date > '2017-02-25'".format(patientId=self.patient_id)
                self.log.debug("load_insulin_data() query: '" + query + "'")
                cur.execute(query)
                logging.debug("{} rows returned".format(cur.rowcount))
                rows = cur.fetchall()
                if not rows:
                    self.log.error("No insulin data was returned!")
                    return
                for row in rows:
                    self.insulin_data.append(row)

    def load_carb_data(self):
        """
        Retrieve carbohydrate data
        """
        self.log.info("Loading carbohydrate data for patient {}".format(self.patient_id))
        with self.con:
            cur = self.con.cursor()
            query = "SELECT date as 'time', value FROM BG_carbohydrate " \
                    "WHERE patientID = {patientId} and date > '2017-02-25'".format(patientId=self.patient_id)
            self.log.debug("loadCarbohydrateData() query: '" + query + "'")
            cur.execute(query)
            logging.debug("{} rows returned".format(cur.rowcount))
            rows = cur.fetchall()
            if not rows:
                self.log.error("No carb data was returned!")
                return
            for row in rows:
                self.carb_data.append(row)

    def load_activity_data(self):
        """
        Retrieve activity data
        """
        # FIXED: import steps and use them in place of Akcal
        self.log.info("Loading activity data for patient {}".format(self.patient_id))
        with self.con:
            cur = self.con.cursor()
            query = "SELECT date as 'time', value FROM BG_steps " \
                    "WHERE patientID = {patientId} and date > '2017-02-25'".format(patientId=self.patient_id)
            self.log.debug("load_activity_data() query: '" + query + "'")
            cur.execute(query)
            logging.debug("{} rows returned".format(cur.rowcount))
            rows = cur.fetchall()
            if not rows:
                self.log.error("No activity data was returned!")
                return
            for row in rows:
                self.activity_data.append(row)

    def load_discretized_data(self):
        self.log.info("Loading discretized glucose data for patient {}".format(self.patient_id))
        with self.con:
            cur = self.con.cursor()
            query = "SELECT date, bloodglucose as 'bg' FROM BG_Timeseries " \
                    "WHERE patientID = {patient_id} and date > '2017-02-25' ".format(patientId=self.patient_id)
            df = pd.read_sql(query, self.con)
            logging.debug("{} rows returned".format(len(df)))
        df['date'] = pd.to_datetime(df['date'])
        df = df.set_index('date')
        # IMPORTANT: sort by date
        df = df.sort_index()

        # select the time only from 2017-03-01
        # where we have stable measurement data
        df = df["2017-02-25":]
        min_date = min(df['bg'].index)
        max_date = max(df['bg'].index)
        df = df[min_date:max_date]
        self.discretized_data = df.values

    ''''' load the raw timestamp for blood glucose data '''

    def load_timestamps(self, con, patientId):
        with con:
            cur = con.cursor()
            query = "SELECT date, pos FROM BG_Instance " \
                    "WHERE patientID = {patientId} ".format(patientId=patientId)
            df = pd.read_sql(query, con)
        df['date'] = pd.to_datetime(df['date'])
        df = df.set_index('date')
        # IMPORTANT: sort by date
        df = df.sort_index()

        return df

    def predict(self):
        raise NotImplementedError()

    def save_base_params(self):
        return ";".join(
            ("tune: " + str(self.tune), "look_back: " + str(self.look_back), "split_ratio: " + str(self.split_ratio)))

    def save_params(self):
        raise NotImplementedError()

    def extract_features(self, customizeFeatureSet=False, customGroup=None):
        X, Y = self.Features.build_feature_matrix(self.look_back)
        if customGroup != None:
            return self.Features.custom_feature_group(X, customGroup), Y
        if not customizeFeatureSet:
            return X, Y
        else:
            new_X, desp = self.Features.customFeatureGroupSubset(X)
            return new_X, Y, desp

    def to_result(self, test_glucoseData, predictions, test_y, timestamps):
        """
        :param test_glucoseData:
        :param predictions:
        :param test_y:
        :param timestamps:
        :return:
        """
        pass
        # TODO: return ground truth (test set) and predictions (as a dict)
        results = dict()
        return results

    def categorized_y(self, y, threshold):
        '''
        Labeling Y
        :param y:
        :param threshold:
        :return:
        '''
        if len(threshold) == 1:
            if y >= threshold[0]:
                return 1
            else:
                return 0

        elif len(threshold) == 2:
            if y <= threshold[0]:
                return 0
            elif y >= threshold[0] and y <= threshold[1]:
                return 1
            else:
                return 2

    def select_k_importance(self, model, X, k=10):
        """
        Only for Decision Tree-based models
        :param model:
        :param X:
        :param k:
        :return:
        """
        return X[:, model.feature_importances_.argsort()[::-1][:k]]

    def plot_feature_importance(self, model, X):
        """
        Only for Decision Tree-based models
        :param model:
        :param X:
        :return:
        """
        if not self._plotFeatureImportance:
            return
        # import matplotlib only when necessary
        import matplotlib.pyplot as plt
        from matplotlib import rcParams
        rcParams.update({'figure.autolayout': True, 'font.size': 10})
        # do the plotting
        importances = model.feature_importances_
        std = np.std([tree.feature_importances_ for tree in model.estimators_],
                     axis=0)
        indices = np.argsort(importances)

        # Plot the feature importances of the forest
        plt.figure()
        plt.title("Feature importances")
        plt.barh(range(X.shape[1]), importances[indices],
                 color="r", xerr=std[indices], align="center")
        # If you want to define your own labels,
        # change indices to a list of labels on the following line.
        plt.yticks(range(X.shape[1]), self.Features.featureNames)
        plt.ylim([-1, X.shape[1]])
        plt.savefig("prediction/tmp/feature_importance{}.png".format(self.patient_id))
        plt.close()

    def plot_learned_model(self, test, sample, timestamp):
        '''

        :param test:
        :param sample:
        :return:
        '''
        # import matplotlib only when necessary
        import matplotlib.pyplot as plt
        if not self._plotLearnedModel: return

        assert len(test) == len(sample)
        xfmt = md.DateFormatter('%Y-%m-%d %H')

        plt.figure()
        plt.title("Prediction Variance")
        ax = plt.gca()
        ax.set_xticks(timestamp)
        ax.xaxis.set_major_formatter(xfmt)
        p = plt.plot(timestamp, test, label="prediction")
        s = plt.plot(timestamp, sample, label="sample")
        plt.legend()
        plt.savefig("prediction/tmp/prediction_time{}.png".format(self.patient_id))
        plt.close()
示例#4
0
# CONFIGURATIONS
path_to_datasets = "./Data/Bitfinex/"

epochs = 10
batch_size = 128
look_back = 50
look_forward = 10

pairs_info = {
    "BTCUSD": "./Data/Bitfinex/BTCUSD/all_in_one.csv",
    "ETHUSD": "./Data/Bitfinex/ETHUSD/all_in_one.csv",
    "LTCUSD": "./Data/Bitfinex/LTCUSD/all_in_one.csv"
}

feature_manager = FeatureManager.merge_pairs(pairs_info)

feature_names = ["open", "high", "low", "close"]

(X_train, Y_train), (X_test, Y_test) = \
    feature_manager.load_splitted_datasets(file_path=path_to_datasets,
                                           prefix="bitf_polo_30_",
                                           split_ratio=0.9)
np.random.seed(202)

json_file = open("./Data/StateLESS_LSTM.json", "r")
loaded_model_json = json_file.read()
json_file.close()

model = model_from_json(loaded_model_json)
model.load_weights("./Data/StateLESS_LSTM.h5")
示例#5
0
import utility
import csv
from feature_manager import FeatureManager

directory_path = "C:/code/features/output/video/structure-analysis-video-dev/"
feature_files = utility.get_file_name_list(directory_path)
feature_vector_list = []
iteration = 1
max_value = len(feature_files)
for file_name in feature_files:
    fm = FeatureManager(directory_path, file_name)
    feature_vector_list = feature_vector_list + fm.get_feature_vector_list()
    utility.print_progress_bar(iteration, max_value)
    iteration += 1
print "start csv"
with open('complete_video_features.csv', 'wb') as f:
    the_writer = csv.writer(f)
    headers = [
        "video", "shot_number", "category", "interactions_number_speakers_2",
        "interactions_number_speakers_3", "interactions_number_speakers_4",
        "interactions_number_speakers_4+", "intervention_short",
        "intervention_long", "speakers_type_ponctuel",
        "speakers_type_localise", "speakers_type_present",
        "speakers_type_regulier", "speakers_type_important",
        "speaker_distribution", "mean_number_of_faces", "std_number_of_faces",
        "inter_intensity_variation1", "inter_intensity_variation2",
        "inter_intensity_variation3", "inter_intensity_variation4",
        "inter_intensity_variation5", "inter_intensity_variation6",
        "inter_intensity_variation7", "inter_intensity_variation8",
        "inter_intensity_variation9", "intra_intensity_variation1",
        "intra_intensity_variation2", "intra_intensity_variation3",
import numpy as np
from data_sampler import DataSampler
from feature_manager import FeatureManager

basic_path = "./Data/Poloniex/"

for pair in ["BTCETH", "BTCGAS", "BTCXRP"]:
    print("Extracting " + pair)
    DataSampler.sample_files_from_dir(
        target_dir=basic_path + pair + "/",
        save_to_dir=basic_path + pair + "/",
        time_delta=np.timedelta64(30, "s"),
        file_type="csv",
        start_date=np.datetime64("2018-01-21T00:00:00"),
        end_date=np.datetime64("2018-02-06T14:21:00"),
        name=pair + "_30")

for pair in ["BTCETH", "BTCGAS", "BTCXRP"]:
    feature_manager = FeatureManager()
    feature_manager.read_dataset_from_csv("{}{}/{}_30.csv".format(
        basic_path, pair, pair))
    print(feature_manager.get_trades_df().head())

    feature_manager.add_indicators()
    feature_manager.save_dataset_to_csv("{}{}/all_in_one_30.csv".format(
        basic_path, pair))
示例#7
0
def main(output_column):
    # pairs_info = {"BTCUSD": "./Data/Bitfinex/BTCUSD/all_in_one.csv",
    #               "ETHUSD": "./Data/Bitfinex/ETHUSD/all_in_one.csv",
    #               "LTCUSD": "./Data/Bitfinex/LTCUSD/all_in_one.csv",
    #               "XRPUSD": "./Data/Bitfinex/XRPUSD/all_in_one.csv",
    #               "BCHUSD": "./Data/Bitfinex/BCHUSD/all_in_one.csv",
    #               "ETCUSD": "./Data/Bitfinex/ETCUSD/all_in_one.csv",
    #               "ZECUSD": "./Data/Bitfinex/ZECUSD/all_in_one.csv",
    #               "XMRUSD": "./Data/Bitfinex/XMRUSD/all_in_one.csv",
    #               "NEOUSD": "./Data/Bitfinex/NEOUSD/all_in_one.csv",
    #
    #               "BTCETH": "./Data/Poloniex/BTCETH/all_in_one.csv",
    #               "BTCXRP": "./Data/Poloniex/BTCXRP/all_in_one.csv"}

    # feature_manager = FeatureManager.merge_pairs(pairs_info)

    # feature_manager.add_total_market_cap(path="./Data/cmc_5s.csv")

    feature_manager = FeatureManager()
    feature_manager.read_dataset_from_csv("Data/BTC_data.csv")

    # feature_manager.save_dataset_to_csv("./Data/BTC-ETH-ETC-LTC-XRP-XMR-BCH-ZEC-NEO.csv")

    trades_features = [
        "BTCUSD_open",
        "BTCUSD_high",
        "BTCUSD_low",
        "BTCUSD_close",
        "BTCUSD_total_volume",
        "BTCUSD_total_amount",
    ]
    ind = ['MACD',
           'CCI',
           'ATR',
           'BOLL',
           'BOOL20',
           'EMA20',
           'MA5',
           'MA10',
           'MTM6',
           'MTM12',
           'ROC',
           'SMI',
           'WVAD']

    other_features = [
        'time_since_extrema',
        'grow',
        #'growth_decrease'
    ]

    all_features = trades_features + other_features + ind

    feature_manager.extract_features(all_features, fill_nan=True, scale=True)

    # CONFIGURATIONS
    path_to_datasets = "./Data/"

   # input_columns = trades_features + ['extremas', 'growth_decrease']
    input_columns = all_features
    prefix = "couple_30_"

    trainer = Trainer(feature_manager=feature_manager,
                      prefix=prefix,
                      path_to_datasets=path_to_datasets,
                      output_column=output_column,
                      input_columns=input_columns,
                      is_saved_dataset=False)

    trainer.run()
    trainer.save_model()

    prediction, verification = trainer.build_prediction()