示例#1
0
if __name__ == "__main__":

    from gdayf.core.controller import Controller
    from gdayf.common.constants import *
    from pandas import set_option
    from gdayf.common.dataload import DataLoad

    #Analysis
    controller = Controller()
    if controller.config_checks():
        data_train, data_test = DataLoad().footset()
        status, recomendations = controller.exec_analysis(
            datapath=data_train,
            objective_column='HomeWin',
            amode=FAST_PARANOIAC,
            metric='combined_accuracy',
            deep_impact=3)

        controller.reconstruct_execution_tree(metric='test_accuracy',
                                              store=True)
        controller.remove_models(arlist=recomendations, mode=EACH_BEST)

        set_option('display.max_rows', 500)
        set_option('display.max_columns', 50)
        set_option('display.max_colwidth', 100)
        set_option('display.precision', 4)
        set_option('display.width', 1024)

        #Prediction
        print('Starting Prediction\'s Phase')
        print(recomendations[0]['load_path'][0]['value'])
示例#2
0
if __name__ == "__main__":

    from gdayf.core.controller import Controller
    from gdayf.common.constants import *
    from pandas import set_option
    from gdayf.common.dataload import DataLoad

    #Analysis
    controller = Controller()
    if controller.config_checks():
        data_train, data_test = DataLoad().dm()
        status, recomendations = controller.exec_analysis(
            datapath=data_train,
            objective_column='Weather_Temperature',
            amode=POC,
            metric='test_rmse',
            deep_impact=5)

        controller.reconstruct_execution_tree(arlist=None,
                                              metric='test_rmse',
                                              store=True)
        controller.remove_models(recomendations, mode=EACH_BEST)

        set_option('display.max_rows', 500)
        set_option('display.max_columns', 50)
        set_option('display.max_colwidth', 100)
        set_option('display.precision', 4)
        set_option('display.width', 1024)

        #Prediction
        print('Starting Prediction\'s Phase')
if __name__ == "__main__":

    from gdayf.core.controller import Controller
    from gdayf.common.constants import *
    from pandas import set_option
    from gdayf.common.dataload import DataLoad

    #Analysis
    controller = Controller()
    if controller.config_checks():
        data_train, data_test = DataLoad().dm()
        status, recomendations = controller.exec_analysis(
            datapath=data_train,
            objective_column=None,
            amode=CLUSTERING,
            metric='cdistance',
            deep_impact=4,
            k=8,
            estimate_k=False)

        controller.reconstruct_execution_tree(recomendations,
                                              metric='cdistance')
        controller.remove_models(recomendations, mode=EACH_BEST)

        set_option('display.max_rows', 500)
        set_option('display.max_columns', 50)
        set_option('display.max_colwidth', 100)
        set_option('display.precision', 4)
        set_option('display.width', 1024)

        #Prediction
示例#4
0
if __name__ == "__main__":

    from gdayf.core.controller import Controller
    from gdayf.common.constants import *
    from pandas import set_option
    from gdayf.common.dataload import DataLoad

    #Analysis

    controller = Controller()
    if controller.config_checks():
        data_train, data_test = DataLoad().dm()
        status, recomendations = controller.exec_analysis(
            datapath=data_train,
            objective_column=None,
            amode=ANOMALIES,
            metric='train_rmse',
            deep_impact=5)

        controller.reconstruct_execution_tree(recomendations,
                                              metric='train-rmse')
        controller.remove_models(recomendations, mode=EACH_BEST)

        set_option('display.max_rows', 500)
        set_option('display.max_columns', 50)
        set_option('display.max_colwidth', 100)
        set_option('display.precision', 4)
        set_option('display.width', 1024)

        #Prediction
        print('Starting Prediction\'s Phase')
示例#5
0
    def predict_workflow(self,
                         datapath,
                         wkey,
                         workflow,
                         prefix='main',
                         workflow_id='default',
                         remove_models=EACH_BEST):
        set_option('display.height', 1000)
        set_option('display.max_rows', 500)
        set_option('display.max_columns', 500)
        set_option('display.width', 1000)

        error, dataset = self.check_path(datapath)
        if dataset is None:
            return error

        if isinstance(workflow, str):
            file = open(workflow, 'r')
            wf = load(file, object_hook=OrderedDict)
        else:
            wf = workflow
        pfix = xstr(prefix)
        controller = Controller(e_c=self._ec)
        if controller.config_checks():
            variables = dataset.columns.tolist()

            #for wkey, wvalue in wf.items():
            if wf["model"] is not None and \
                    (isinstance(wf["model"], str) or isinstance(wf["model"], dict)):

                if wf["data"]["filtered_columns"] is not None:
                    for delete in wf["data"]["filtered_columns"]:
                        try:
                            variables.remove(delete)
                        except Exception:
                            self._logging.log_info('gDayF', "Workflow",
                                                   self._labels["failed_var"],
                                                   delete)

                self._logging.log_info('gDayF', "Workflow",
                                       self._labels["variables_desc"],
                                       variables)

                if wf["data"]["for_each"] is not None:
                    fe_column = wf["data"]["for_each"]
                    fe_data_exclusions = wf["data"]["for_each_exclusions"]
                    fe_filtered_data = wf["data"]["filtered_data"]
                    fe_next = wf["Next"]

                    for each in eval('dataset.' + fe_column + '.unique()'):
                        if fe_data_exclusions is None or each not in fe_data_exclusions:
                            aux_dataset = eval('dataset[dataset.' + fe_column +
                                               '== each]')
                            pfix = xstr(prefix + '_' + str(each))

                            if fe_filtered_data is not None:
                                qcolumn = fe_filtered_data["column"]
                                quantile = aux_dataset[qcolumn].quantile(
                                    q=fe_filtered_data["quantile"])
                                aux_dataset = eval(
                                    'aux_dataset.loc[aux_dataset.' + qcolumn +
                                    '<= ' + str(quantile) + ']')
                                pfix = xstr(pfix + '_' +
                                            str(fe_filtered_data["quantile"]))

                            prediction_frame = controller.exec_prediction(
                                datapath=aux_dataset,
                                model_file=wf["model"][str(each)])
                            try:
                                if 'predict' in prediction_frame.columns.values:
                                    prediction_frame.rename(
                                        columns={"predict": wkey},
                                        inplace=True)
                                elif 'prediction' in prediction_frame.columns.values:
                                    prediction_frame.rename(
                                        columns={"prediction": wkey},
                                        inplace=True)
                            except AttributeError:
                                self._logging.log_info(
                                    'gDayF', "Workflow",
                                    self._labels["anomalies_operation"])

                            self._logging.log_info(
                                'gDayF', 'workflow',
                                self._labels["results"] + '\n',
                                prediction_frame.to_string(index_names=False,
                                                           justify="left"))

                            try:
                                if isinstance(prediction_frame, DataFrame) \
                                        and self._config['common']['workflow_summary_enabled']:
                                    '''filename = self.storage_path('predict', wkey + '_'
                                                        + str(pfix) + '_' + 'prediction', 'xls')'''
                                    filename = self.storage_path(
                                        'predict',
                                        str(pfix) + '_' + str(self.timestamp) +
                                        '_' + 'prediction', 'xls')
                                    prediction_frame.to_excel(
                                        filename,
                                        index=False,
                                        sheet_name="prediction")
                                    self.replicate_file('predict',
                                                        filename=filename)
                                elif self._config['common'][
                                        'workflow_summary_enabled']:
                                    for ikey, ivalue in prediction_frame[
                                            'columns'].items():
                                        ppDF = decode_ordered_dict_to_dataframe(
                                            ivalue)
                                        if isinstance(ppDF, DataFrame):
                                            '''filename = self.storage_path('predict', wkey + '_'
                                                          + str(pfix) + '_' + 'prediction_' + ikey, 'xls')'''
                                            filename = self.storage_path(
                                                'predict',
                                                str(pfix) + '_' +
                                                str(self.timestamp) + '_' +
                                                'prediction_' + ikey, 'xls')
                                            ppDF.to_excel(
                                                filename,
                                                index=False,
                                                sheet_name="prediction")
                                            self.replicate_file(
                                                'predict', filename=filename)

                                    filename = self.storage_path(
                                        'predict',
                                        str(pfix) + '_' + str(self.timestamp) +
                                        '_' + '_prediction', 'json')
                                    with open(filename, 'w') as f:
                                        f.write(
                                            dumps(
                                                prediction_frame['global_mse'])
                                        )
                                    self.replicate_file('predict',
                                                        filename=filename)
                            except AttributeError:
                                self._logging.log_info(
                                    'gDayF', "Workflow",
                                    self._labels["anomalies_operation"],
                                    prediction_frame)

                            try:
                                if fe_next is not None and prediction_frame is not None:
                                    self.workflow(prediction_frame,
                                                  fe_next,
                                                  pfix,
                                                  remove_models=remove_models)
                            except Exception as oexecution_error:
                                self._logging.log_critical(
                                    'gDayF', "Workflow",
                                    self._labels["failed_wf"], str(fe_next))
                                self._logging.log_critical(
                                    'gDayF', "Workflow",
                                    self._labels["failed_wf"],
                                    repr(oexecution_error))
                else:
                    aux_dataset = dataset

                    prediction_frame = controller.exec_prediction(
                        datapath=aux_dataset, model_file=wf["model"])
                    if 'predict' in prediction_frame.columns.values:
                        prediction_frame.rename(columns={"predict": wkey},
                                                inplace=True)
                    elif 'prediction' in prediction_frame.columns.values:
                        prediction_frame.rename(columns={"prediction": wkey},
                                                inplace=True)

                    self._logging.log_info(
                        'gDayF', 'workflow', self._labels["results"] + '\n',
                        prediction_frame.to_string(index_names=False,
                                                   justify="left"))
                    if isinstance(
                            prediction_frame, DataFrame
                    ) and self._config['common']['workflow_summary_enabled']:
                        filename = self.storage_path(
                            'predict',
                            str(pfix) + str(self.timestamp) + '_' +
                            '_prediction', 'xls')
                        prediction_frame.to_excel(filename,
                                                  index=False,
                                                  sheet_name="prediction")
                        self.replicate_file('predict', filename=filename)
                    elif self._config['common']['workflow_summary_enabled']:
                        for ikey, ivalue in prediction_frame['columns'].items(
                        ):
                            ppDF = decode_ordered_dict_to_dataframe(ivalue)
                            if isinstance(ppDF, DataFrame):
                                filename = self.storage_path(
                                    'predict',
                                    str(pfix) + '_' + str(self.timestamp) +
                                    '_' + 'prediction_' + ikey, 'xls')
                                ppDF.to_excel(filename,
                                              index=False,
                                              sheet_name="prediction")
                                self.replicate_file('predict',
                                                    filename=filename)

                        filename = self.storage_path(
                            'predict',
                            str(pfix) + '_' + str(self.timestamp) + '_' +
                            '_prediction', 'json')
                        with open(filename, 'w') as f:
                            f.write(dumps(prediction_frame))
                        self.replicate_file('predict', filename=filename)

                    if wf['Next'] is not None and prediction_frame is not None:
                        try:
                            self.workflow(datapath=prediction_frame,
                                          workflow=wf['Next'],
                                          prefix=pfix,
                                          remove_models=remove_models)
                        except Exception as oexecution_error:
                            self._logging.log_critical(
                                'gDayF', "Workflow", self._labels["failed_wf"],
                                str(wf['Next']))
                            self._logging.log_critical(
                                'gDayF', "Workflow", self._labels["failed_wf"],
                                repr(oexecution_error))

        controller.clean_handlers()
        del controller
示例#6
0
    def train_workflow(self,
                       datapath,
                       wkey,
                       workflow,
                       prefix='main',
                       remove_models=EACH_BEST):
        set_option('display.max_rows', 500)
        set_option('display.max_columns', 500)
        set_option('display.width', 1000)

        wf = workflow
        pfix = prefix

        error, dataset = self.check_path(datapath)
        if dataset is None:
            return error

        controller = Controller(e_c=self._ec)
        if controller.config_checks():
            variables = dataset.columns.tolist()

            #for wkey, wvalue in wf.items():
            if wf["data"]["filtered_columns"] is not None:
                for delete in wf["data"]["filtered_columns"]:
                    try:
                        variables.remove(delete)
                    except Exception:
                        self._logging.log_info('gDayF', "Workflow",
                                               self._labels["failed_var"],
                                               delete)
            self._logging.log_info('gDayF', "Workflow",
                                   self._labels["variables_desc"], variables)
            if wf["data"]["for_each"] is not None:
                fe_column = wf["data"]["for_each"]
                fe_data_exclusions = wf["data"]["for_each_exclusions"]
                fe_filtered_data = wf["data"]["filtered_data"]
                fe_parameters = wf["parameters"]
                fe_next = wf["Next"]

                for each in eval('dataset.' + fe_column + '.unique()'):
                    if fe_data_exclusions is None or each not in fe_data_exclusions:
                        aux_dataset = eval('dataset[dataset.' + fe_column +
                                           '== each]')
                        pfix = xstr(prefix + '_' + str(each))

                        if fe_filtered_data is not None:
                            qcolumn = fe_filtered_data["column"]
                            quantile = aux_dataset[qcolumn].quantile(
                                q=fe_filtered_data["quantile"])
                            aux_dataset = eval('aux_dataset.loc[aux_dataset.' +
                                               qcolumn + '<= ' +
                                               str(quantile) + ']')
                            pfix = xstr(pfix + '_' +
                                        str(fe_filtered_data["quantile"]))

                        if fe_parameters is not None:
                            source_parameters = list()
                            source_parameters.append(
                                'controller.exec_analysis(')
                            source_parameters.append(
                                'datapath=aux_dataset.loc[:, variables]')
                            for ikey, ivalue in fe_parameters.items():
                                source_parameters.append(',')
                                source_parameters.append(ikey)
                                source_parameters.append('=')
                                if isinstance(ivalue, str) and ikey != "amode":
                                    source_parameters.append('\'')
                                    source_parameters.append(ivalue)
                                    source_parameters.append('\'')
                                else:
                                    source_parameters.append(str(ivalue))
                            source_parameters.append(')')

                            self._logging.log_info(
                                'gDayF', "Workflow",
                                self._labels["desc_operation"],
                                ''.join(source_parameters))
                            status, recomendations = eval(
                                ''.join(source_parameters))
                            controller.remove_models(recomendations,
                                                     mode=remove_models)
                            controller.reconstruct_execution_tree(
                                recomendations,
                                metric=fe_parameters['metric'],
                                store=True)

                            #model_id = recomendations[0]['model_id']
                            table_model_list = controller.table_model_list(
                                ar_list=recomendations,
                                metric=eval(fe_parameters['metric']))
                            self._logging.log_info(
                                'gDayF', 'workflow',
                                self._labels["results"] + '\n',
                                table_model_list.to_string(justify="left"))

                            #filename = self.storage_path('train', wkey + '_' + str(pfix) + '_' + 'train_performance'
                            if self._config['common'][
                                    'workflow_summary_enabled']:
                                filename = self.storage_path(
                                    'train',
                                    str(pfix) + '_' + 'train_performance',
                                    'xls')
                                table_model_list.to_excel(
                                    filename,
                                    index=False,
                                    sheet_name='performance')
                                self.replicate_file('train', filename=filename)

                            prediction_frame = controller.exec_prediction(
                                datapath=aux_dataset,
                                model_file=recomendations[0]['json_path'][0]
                                ['value'])
                            try:
                                if 'predict' in prediction_frame.columns.values:
                                    prediction_frame.rename(
                                        columns={"predict": wkey},
                                        inplace=True)
                                elif 'prediction' in prediction_frame.columns.values:
                                    prediction_frame.rename(
                                        columns={"prediction": wkey},
                                        inplace=True)

                                self._logging.log_info(
                                    'gDayF', 'workflow',
                                    self._labels["results"] + '\n',
                                    prediction_frame.to_string(
                                        index_names=False, justify="left"))
                                '''filename = self.storage_path('train', wkey + '_'
                                                             + str(pfix) + '_' + 'prediction', 'xls')'''
                                if self._config['common'][
                                        'workflow_summary_enabled']:
                                    filename = self.storage_path(
                                        'train',
                                        str(pfix) + '_' + 'prediction', 'xls')
                                    prediction_frame.to_excel(
                                        filename,
                                        index=False,
                                        sheet_name='train_prediction')
                                    self.replicate_file('train',
                                                        filename=filename)

                            except AttributeError as oexecution_error:
                                self._logging.log_info(
                                    'gDayF', "Workflow",
                                    self._labels["failed_model"],
                                    str(repr(oexecution_error)))

                            try:
                                if fe_next is not None and prediction_frame is not None:
                                    self.workflow(prediction_frame,
                                                  fe_next,
                                                  pfix,
                                                  remove_models=remove_models)
                            except Exception as oexecution_error:
                                self._logging.log_critical(
                                    'gDayF', "Workflow",
                                    self._labels["failed_wf"], str(fe_next))
            else:
                aux_dataset = dataset

                if wf["data"]["filtered_data"] is not None:
                    qcolumn = wf["data"]["filtered_data"]["column"]
                    quantile = aux_dataset[[qcolumn]].quatile(
                        [wf["data"]["filtered_data"]["quantile"]])
                    aux_dataset = aux_dataset.query('%s <= %s' %
                                                    (qcolumn, quantile))

                if wf['parameters'] is not None:
                    source_parameters = list()
                    source_parameters.append('controller.exec_analysis(')
                    source_parameters.append(
                        'datapath=aux_dataset.loc[:, variables]')
                    for ikey, ivalue in wf['parameters'].items():
                        source_parameters.append(',')
                        source_parameters.append(ikey)
                        source_parameters.append('=')
                        if isinstance(ivalue, str) and ikey != "amode":
                            source_parameters.append('\'')
                            source_parameters.append(ivalue)
                            source_parameters.append('\'')
                        else:
                            source_parameters.append(str(ivalue))
                    source_parameters.append(')')
                    self._logging.log_info('gDayF', "Workflow",
                                           self._labels["desc_operation"],
                                           ''.join(source_parameters))
                    status, recomendations = eval(''.join(source_parameters))
                    controller.remove_models(recomendations,
                                             mode=remove_models)
                    controller.reconstruct_execution_tree(
                        recomendations,
                        metric=wf['parameters']['metric'],
                        store=True)

                    model_id = recomendations[0]['model_id']
                    table_model_list = controller.table_model_list(
                        ar_list=recomendations,
                        metric=eval(wf['parameters']['metric']))
                    self._logging.log_info(
                        'gDayF', 'workflow', self._labels["results"] + '\n',
                        table_model_list.to_string(justify="left"))

                    if self._config['common']['workflow_summary_enabled']:
                        '''filename = self.storage_path('train', wkey + '_' + str(pfix) + '_'
                                                     + 'train_performance', 'xls')'''
                        filename = self.storage_path(
                            'train',
                            str(pfix) + '_' + 'train_performance', 'xls')
                        table_model_list.to_excel(filename,
                                                  index=False,
                                                  sheet_name="performace")
                        self.replicate_file('train', filename=filename)

                    prediction_frame = controller.exec_prediction(
                        datapath=aux_dataset,
                        model_file=recomendations[0]['json_path'][0]['value'])
                    try:
                        if 'predict' in prediction_frame.columns.values:
                            prediction_frame.rename(columns={"predict": wkey},
                                                    inplace=True)
                        elif 'prediction' in prediction_frame.columns.values:
                            prediction_frame.rename(
                                columns={"prediction": wkey}, inplace=True)

                        self._logging.log_info(
                            'gDayF', 'workflow',
                            self._labels["results"] + '\n',
                            prediction_frame.to_string(index_names=False,
                                                       justify="left"))
                        '''filename = self.storage_path('train', wkey + '_' + str(pfix) + '_'
                                                     + 'prediction', 'xls')'''
                        if self._config['common']['workflow_summary_enabled']:
                            filename = self.storage_path(
                                'train',
                                str(pfix) + '_' + 'prediction', 'xls')
                            prediction_frame.to_excel(
                                filename,
                                index=False,
                                sheet_name="train_prediction")
                            self.replicate_file('train', filename=filename)

                    except AttributeError as oexecution_error:
                        self._logging.log_info('gDayF', "Workflow",
                                               self._labels["failed_model"],
                                               str(repr(oexecution_error)))

                    if wf['Next'] is not None and prediction_frame is not None:
                        try:
                            self.workflow(datapath=prediction_frame,
                                          workflow=wf['Next'],
                                          prefix=pfix,
                                          remove_models=remove_models)
                        except Exception as oexecution_error:
                            self._logging.log_critical(
                                'gDayF', "Workflow", self._labels["failed_wf"],
                                str(wf['Next']))
                            self._logging.log_critical(
                                'gDayF', "Workflow", self._labels["failed_wf"],
                                repr(oexecution_error))

        controller.clean_handlers()
        del controller
        '/Data/gdayf-v1/experiments/Crulogic-r2/CRULOGIC-avg-fuel-speed-predicted_1537911231.396293/'
    )
    source_1_data.append('summary/predict/')
    source_1_data.append('Avg-speed_a2_p_prediction.xls')
    model_1_data = read_excel(io=''.join(source_1_data))

    source_2_data = list()
    source_2_data.append(
        '/Data/gdayf-v1/experiments/Crulogic-r2/CRULOGIC-avg-fuel-speed-predicted_1537911231.396293/'
    )
    source_2_data.append('summary/predict/')
    source_2_data.append('Avg-speed_a4_p_prediction.xls')
    model_2_data = read_excel(io=''.join(source_2_data))

    #Analysis
    controller = Controller(user_id='Crulogic-r2')
    if controller.config_checks():

        model_1 = '/Data/gdayf-v1/experiments/Crulogic-r2/CRULOGIC-avg-fuel-speed-predicted_1537911231.396293/Crulogic-r2_Dataframe_77256_6438_12_1537911811.7863178/h2o/train/1537911811.7874267/json/H2ORandomForestEstimator_1537912248.5985134.json'
        model_2 = '/Data/gdayf-v1/experiments/Crulogic-r2/CRULOGIC-avg-fuel-speed-predicted_1537911231.396293/Crulogic-r2_Dataframe_77256_6438_12_1537913921.5431554/h2o/train/1537913921.544499/json/H2OGradientBoostingEstimator_1537914152.3058493.json'

        dataframe_dict = OrderedDict()
        objective_column = 'avg-fuel'
        dataframe_dict[objective_column] = model_data[objective_column]
        prediction_frame = controller.exec_prediction(datapath=model_1_data,
                                                      model_file=model_1)
        model_data['model_1'] = prediction_frame['predict']
        prediction_frame = controller.exec_prediction(datapath=model_2_data,
                                                      model_file=model_2)
        model_data['model_2'] = prediction_frame['predict']
        columns = model_data.columns.values.tolist()