Exemplo n.º 1
0
def main(argv):
    if argv[1] == 'train_process':
        get_data()
        data_preparation = DataPreparation()
        data_preparation.generate_data_for_model()
        train_model = Train()
        train_model.compute_locations_models()
        prediction = Prediction()
        prediction.get_models()
        create_dashboard(prediction)
 def __init__(self, model_deciding=None):
     get_time()
     self.data_path = data_path if model_deciding == 'all' else features_data_path
     self.data = get_data(path=self.data_path, is_for_model=False)
     self.columns = list(self.data.columns)
     self.features = decide_feature_name(feature_path)
     self.model_deciding = model_deciding
Exemplo n.º 3
0
def plot_subject(subject, session, options):
    """Load, Process and plot data for one subject/session pair.

       Possible options: ('ecg','gsr','blocks','trials','plot_beats','show','save')
    """

    # transform all keys to lower case
    options = {key.lower():value for key, value in options.items()}

    # add missing keys to options and transform to namedtuple
    option_keys =('do_ecg','do_gsr','do_blocks','do_trials')
    default_opts = dict.fromkeys(option_keys, False)
    default_opts.update(options)

    if 'figsize' not in default_opts:
        default_opts['figsize'] = (15,10)

    options = namedtuple('Options', default_opts.keys())(**default_opts)

    physio_data, trials, time_range = da.get_data(subject, session)
    results = dpp.process_data(physio_data, trials, subject, session, options)
    names,figs = plot_results(results, options)

    start_time = datetime.fromtimestamp(time_range[0]).strftime('%Y-%m-%d %H:%M:%S')
    end_time = datetime.fromtimestamp(time_range[1]).strftime('%Y-%m-%d %H:%M:%S')

    print 'start_time:', start_time, ' end_time:', end_time

    if options.save:
        for name,fig in zip(names,figs):
            filename = subject + '_' + session + '_' + name
            save_plot(filename, fig)

    if options.show:
        plt.show()
 def __init__(self,
              hyper_parameters=None,
              model_deciding=None,
              last_day_predictor=None,
              params=None):
     get_time()
     self.data = get_data(main_data_path + params['args']['data'],
                          True)  # data that created at feature engineering
     self.features = list(
         decide_feature_name(main_data_path +
                             params['args']['feature_set']).keys())
     self.params = hyper_parameters  # get hyper parameters for model: hyper_parameters.json
     self.model_params = params
     self.train, self.test = None, None
     self.X = None
     self.optimum_cluster_centroids = None
     self.centroids = None
     self.po_data = None  # Possible_outlier_transactions data
     self.model_dbscan = None
     self.m_s, self.eps = [], []
     self.o_min_sample = None
     self.o_epsilon = None
     self.o_devision = None
     self.last_day_predictor = last_day_predictor  # splitting data indicator
     self.uids = None
Exemplo n.º 5
0
def main(args):
    logger.get_time()
    if is_local_run:
        args = sample_args
    sys.stdout = logger.Logger()
    print("*"*3, " args :", args)
    if len(args) != 0:
        if (args[1]) == 'feature_engineering':
            """
            run from terminal: python main.py feature_engineering all
            all: create all features which are at features.json
            Ex: 'python main.py feature_engineering c_m_ratios' create only 'c_m_ratios' adds to features set.
            """
            create_feature = CreateFeatures(model_deciding=args[2])
            create_feature.compute_features()

        if (args[1]) == 'train_process':
            # TODO: description must be updated
            """
            run from terminal: python main.py train_process 0
            0/1: 0; test data splits from date
                 1: test data is last day of each customer
            Models: isolation forest and AutoEncoder for Multivariate and Univariate Models
            """
            train = trainModel(args=args)
            train.process()

        if sys.argv[1] == 'prediction':
            # TODO: description must be updated
            """
            run from terminal: python main.py prediction 0
            0/1: 0; test data splits from date
                 1: test data is last day of each customer
            It creates prediction values for each transaction is added to raw data set
            """
            prediction = trainModel(args=args, is_prediction=True)
            prediction.process()

        if args[1] == 'dashboard':
            # TODO: description must be updated
            """
            run from terminal: python main.py dashboard 0 # 10.20.10.196:3030
            run from terminal: python main.py dashboard 0 uni # 10.20.10.196:3031
            0/1: 0; test data splits from date
                 1: test data is last day of each customer
            uni: creates only for univariate models. I order to run for multivariate dashboard assign null
            Dashboard for Multi - Uni Models is created
            """
            # TODO: get prediction data from predicted .csv file
            model = ModelTrainIsolationForest(last_day_predictor=int(args[2]))
            model.train_test_split()
            create_dahboard(model.train, get_data(pred_data_path, True))
        logger.get_time()
Exemplo n.º 6
0
 def __init__(self,
              hyper_parameters=None,
              last_day_predictor=None,
              params=None):
     get_time()
     self.data = get_data(features_data_path,
                          True)  # data that created at feature engineering
     # TODO: get specific feature from specific model.
     self.features = list(decide_feature_name(feature_path).keys())
     self.params = hyper_parameters  # get hyper parameters for model: hyper_parameters.json
     self.model_params = params
     self.train, self.test = None, None
     self.X = None
     self.model_e_iso_f = None
     self.last_day_predictor = last_day_predictor  # splitting data indicator
Exemplo n.º 7
0
 def __init__(self,
              hyper_parameters=None,
              last_day_predictor=None,
              params=None):
     get_time()
     self.data = get_data(features_data_path, True)
     self.features = list(decide_feature_name(feature_path).keys())
     self.params = hyper_parameters
     self.last_day_predictor = last_day_predictor
     self.model_params = params
     self.train, self.test = None, None
     self.X, self.y_pred, self.y = None, None, None
     self.input, self.fr_output = None, None
     self.model_ae, self.model_ae_l, self.model_u = None, None, None
     self.gpu_devices = [
         d for d in device_lib.list_local_devices()
         if d.device_type == "GPU"
     ] if run_gpu else []
Exemplo n.º 8
0
def plot_subject(subject, session, options):
    """Load, Process and plot data for one subject/session pair.

       Possible options: ('ecg','gsr','blocks','trials','plot_beats','show','save')
    """

    # transform all keys to lower case
    options = {key.lower(): value for key, value in options.items()}

    # add missing keys to options and transform to namedtuple
    option_keys = ('do_ecg', 'do_gsr', 'do_blocks', 'do_trials')
    default_opts = dict.fromkeys(option_keys, False)
    default_opts.update(options)

    if 'figsize' not in default_opts:
        default_opts['figsize'] = (15, 10)

    options = namedtuple('Options', default_opts.keys())(**default_opts)

    physio_data, trials, time_range = da.get_data(subject, session)
    results = dpp.process_data(physio_data, trials, subject, session, options)
    names, figs = plot_results(results, options)

    start_time = datetime.fromtimestamp(
        time_range[0]).strftime('%Y-%m-%d %H:%M:%S')
    end_time = datetime.fromtimestamp(
        time_range[1]).strftime('%Y-%m-%d %H:%M:%S')

    print 'start_time:', start_time, ' end_time:', end_time

    if options.save:
        for name, fig in zip(names, figs):
            filename = subject + '_' + session + '_' + name
            save_plot(filename, fig)

    if options.show:
        plt.show()
Exemplo n.º 9
0
            """
            run from terminal: python main.py train_process 0
            0/1: 0; test data splits from date
                 1: test data is last day of each customer
            Models: isolation forest & AutoEncoder & DBScan 
            """
            train = trainModel(args=sys.argv)
            train.process()

        if sys.argv[1] == 'prediction':
            """
            run from terminal: python main.py prediction 0
            0/1: 0; test data splits from date
                 1: test data is last day of each customer
            It creates prediction values for each transaction is added to raw data set
            """
            prediction = trainModel(args=sys.argv, is_prediction=True)
            prediction.process()

        if sys.argv[1] == 'dashboard':
            """
            run from terminal: python main.py dashboard 0
            0/1: 0; test data splits from date
                 1: test data is last day of each customer
            """
            model = ModelTrainIsolationForest(
                last_day_predictor=int(sys.argv[2]))
            model.train_test_split()
            create_dahboard(model.train, get_data(pred_data_path, True))
        logger.get_time()
Exemplo n.º 10
0
def save_raw_table():

    #subjects = [403, 416, 421, 424, 430, 433, 434, 437, 419, 420, 425, 426, 428, 429, 432, 436]
    
    #subjects = [314, 319,321,323,325,326,327,328,332,333]

    subjects = [312,314,315,317,320,322,
                329,330,332,403,416,419,
                420,421,424,425,426,428,
                430,432,433,436,437]
    
    path = da.config['PATH']['physio_path']
    version_major = 5
    version_minor = 0
    path = os.path.join(path, 'gsr_to_gamedata_table_v' + str(version_major) + '.' + str(version_minor)  + '.csv')

    options = {
        'do_gsr' : True,
        'do_trials' : True,
        'only_success' : False,
        'silent' : True,
        'overwrite' : True,
    }

    # transform all keys to lower case
    options = {key.lower():value for key, value in options.items()}

    # add missing keys to options and transform to namedtuple
    option_keys =('do_ecg','do_gsr','do_blocks','do_trials','only_success', 'silent','overwrite')
    default_opts = dict.fromkeys(option_keys, False)
    default_opts.update(options)

    # convert to named tuple for easier access
    options = namedtuple('Options', default_opts.keys())(**default_opts)

    # if file already exists cancel everything
    if not options.overwrite and os.path.exists(path):
        print path, 'file already exists'
        return

    # write column names to csv file
    head = 'subject,session,physio_time,raw_gsr,condition,trial_id,success\n'
    with open(path,'w') as f:
        f.write(head)

    # bring subject and session in a form to easy iterate over
    
    sessions = [1,2]*len(subjects)
    subjects = itertools.chain(*zip(subjects,subjects))

    for subject, session in zip(subjects, sessions) :

        print 'Processing subject %s session %s' % (subject, session)

        subject = str(subject)
        session = str(session)

        # skip 
        try:
            physio_data, trials, time_range = da.get_data(subject, session, options.only_success, options.silent)
        except da.DataAccessError as e:
            print('Skip subject %s session %s: %s' % (subject, session, e))
            continue

        time_scale = np.array(physio_data['time'])

        #print len(time_range), len(time_scale)

        if len(time_scale) == 0 :
            raise Exception('not physio data')

        results = dpp.process_data(physio_data, trials, subject, session, options)

        raw_gsr = physio_data['gsr']
        cond_for_physio = results.conditions_for_physio
        trial_for_physio = results.trial_ids_for_physio
        success_for_physio = results.success_for_physio

        lines = [ ','.join([subject, session, str(t), str(gsr), str(cond), str(tid), str(sfp)+ '\n']) for t,gsr,cond,tid,sfp in \
        zip(time_scale, raw_gsr, cond_for_physio, trial_for_physio, success_for_physio) ]

        with open(path,'a') as f :
            f.writelines(lines)
Exemplo n.º 11
0
def save_mean_table() :

    subjects = [403, 416, 421, 424, 430, 433, 434, 437,
        419, 420, 425, 426, 428, 429, 432, 436]

    path = da.config['PATH']['physio_path']
    path = os.path.join(path, 'gsr_results_table.csv')

    options = {
        'do_gsr' : True,
        'do_trials' : True,
        'only_success' : False,
        'silent' : True,
    }

    # transform all keys to lower case
    options = {key.lower():value for key, value in options.items()}

    # add missing keys to options and transform to namedtuple
    option_keys =('do_ecg','do_gsr','do_blocks','do_trials','silent')
    default_opts = dict.fromkeys(option_keys, False)
    default_opts.update(options)

    # convert to named tuple for easier access
    options = namedtuple('Options', default_opts.keys())(**default_opts)

    # if file already exists cancel everything
    if os.path.exists(path) :
        print path, 'file already exists'
        return

    # write column names to csv file
    head = 'subject,session,trial_id,condition,mean_gsr\n'
    with open(path,'w') as f :
        f.write(head)

    # bring subject and session in a form to easy iterate over
    
    sessions = [1,2]*len(subjects)
    subjects = itertools.chain(*zip(subjects,subjects))

    for subject, session in zip(subjects, sessions) :

        print 'Processing subject %s session %s' % (subject, session)

        subject = str(subject)
        session = str(session)

        # try to load data
        try:
            physio_data, trials, time_range = da.get_data(subject, session, options.only_success)
        except da.DataAccessError as e:
            print('Skip subject %s session %s: %s' % (subject, session, e))
            continue 

        time_scale = np.array(physio_data['time'])

        #print len(time_range), len(time_scale)

        if len(time_scale) == 0 :
            raise Exception('not physio data')

        results = dpp.process_data(physio_data, trials, subject, session, options)

        condition = trials[2]
        trial_id = trials[3]
        trails = trials[:-1]
        gsr_mean = results.mean_gsr_for_trials

        trials = zip(*trials)

        lines = [ ','.join([subject,session,str(tid),cond,str(gsr) + '\n']) for tid,cond,gsr in \
        zip(trial_id, condition, gsr_mean) ]

        with open(path,'a') as f :
            f.writelines(lines)
Exemplo n.º 12
0
def save_raw_table():

    #subjects = [403, 416, 421, 424, 430, 433, 434, 437, 419, 420, 425, 426, 428, 429, 432, 436]

    #subjects = [314, 319,321,323,325,326,327,328,332,333]

    subjects = [
        312, 314, 315, 317, 320, 322, 329, 330, 332, 403, 416, 419, 420, 421,
        424, 425, 426, 428, 430, 432, 433, 436, 437
    ]

    path = da.config['PATH']['physio_path']
    version_major = 5
    version_minor = 0
    path = os.path.join(
        path, 'gsr_to_gamedata_table_v' + str(version_major) + '.' +
        str(version_minor) + '.csv')

    options = {
        'do_gsr': True,
        'do_trials': True,
        'only_success': False,
        'silent': True,
        'overwrite': True,
    }

    # transform all keys to lower case
    options = {key.lower(): value for key, value in options.items()}

    # add missing keys to options and transform to namedtuple
    option_keys = ('do_ecg', 'do_gsr', 'do_blocks', 'do_trials',
                   'only_success', 'silent', 'overwrite')
    default_opts = dict.fromkeys(option_keys, False)
    default_opts.update(options)

    # convert to named tuple for easier access
    options = namedtuple('Options', default_opts.keys())(**default_opts)

    # if file already exists cancel everything
    if not options.overwrite and os.path.exists(path):
        print path, 'file already exists'
        return

    # write column names to csv file
    head = 'subject,session,physio_time,raw_gsr,condition,trial_id,success\n'
    with open(path, 'w') as f:
        f.write(head)

    # bring subject and session in a form to easy iterate over

    sessions = [1, 2] * len(subjects)
    subjects = itertools.chain(*zip(subjects, subjects))

    for subject, session in zip(subjects, sessions):

        print 'Processing subject %s session %s' % (subject, session)

        subject = str(subject)
        session = str(session)

        # skip
        try:
            physio_data, trials, time_range = da.get_data(
                subject, session, options.only_success, options.silent)
        except da.DataAccessError as e:
            print('Skip subject %s session %s: %s' % (subject, session, e))
            continue

        time_scale = np.array(physio_data['time'])

        #print len(time_range), len(time_scale)

        if len(time_scale) == 0:
            raise Exception('not physio data')

        results = dpp.process_data(physio_data, trials, subject, session,
                                   options)

        raw_gsr = physio_data['gsr']
        cond_for_physio = results.conditions_for_physio
        trial_for_physio = results.trial_ids_for_physio
        success_for_physio = results.success_for_physio

        lines = [ ','.join([subject, session, str(t), str(gsr), str(cond), str(tid), str(sfp)+ '\n']) for t,gsr,cond,tid,sfp in \
        zip(time_scale, raw_gsr, cond_for_physio, trial_for_physio, success_for_physio) ]

        with open(path, 'a') as f:
            f.writelines(lines)
Exemplo n.º 13
0
def save_mean_table():

    subjects = [
        403, 416, 421, 424, 430, 433, 434, 437, 419, 420, 425, 426, 428, 429,
        432, 436
    ]

    path = da.config['PATH']['physio_path']
    path = os.path.join(path, 'gsr_results_table.csv')

    options = {
        'do_gsr': True,
        'do_trials': True,
        'only_success': False,
        'silent': True,
    }

    # transform all keys to lower case
    options = {key.lower(): value for key, value in options.items()}

    # add missing keys to options and transform to namedtuple
    option_keys = ('do_ecg', 'do_gsr', 'do_blocks', 'do_trials', 'silent')
    default_opts = dict.fromkeys(option_keys, False)
    default_opts.update(options)

    # convert to named tuple for easier access
    options = namedtuple('Options', default_opts.keys())(**default_opts)

    # if file already exists cancel everything
    if os.path.exists(path):
        print path, 'file already exists'
        return

    # write column names to csv file
    head = 'subject,session,trial_id,condition,mean_gsr\n'
    with open(path, 'w') as f:
        f.write(head)

    # bring subject and session in a form to easy iterate over

    sessions = [1, 2] * len(subjects)
    subjects = itertools.chain(*zip(subjects, subjects))

    for subject, session in zip(subjects, sessions):

        print 'Processing subject %s session %s' % (subject, session)

        subject = str(subject)
        session = str(session)

        # try to load data
        try:
            physio_data, trials, time_range = da.get_data(
                subject, session, options.only_success)
        except da.DataAccessError as e:
            print('Skip subject %s session %s: %s' % (subject, session, e))
            continue

        time_scale = np.array(physio_data['time'])

        #print len(time_range), len(time_scale)

        if len(time_scale) == 0:
            raise Exception('not physio data')

        results = dpp.process_data(physio_data, trials, subject, session,
                                   options)

        condition = trials[2]
        trial_id = trials[3]
        trails = trials[:-1]
        gsr_mean = results.mean_gsr_for_trials

        trials = zip(*trials)

        lines = [ ','.join([subject,session,str(tid),cond,str(gsr) + '\n']) for tid,cond,gsr in \
        zip(trial_id, condition, gsr_mean) ]

        with open(path, 'a') as f:
            f.writelines(lines)