Пример #1
0
 def unregister_predictor(self, name):
     for integration in self._get_integrations():
         if integration.check_connection():
             integration.unregister_predictor(name)
         else:
             logger.warning(
                 f"There is no connection to {integration.name}. predictor wouldn't be unregistred"
             )
Пример #2
0
 def setup_integration(self, db_alias):
     try:
         # If this is the name of an integration
         integration = self._get_integration(db_alias)
         if integration != True:
             integration.setup()
     except Exception as e:
         logger.warning('Failed to integrate with database ' + integration.name + f', error: {e}')
Пример #3
0
 def register_predictors(self, model_data_arr):
     for integration in self._get_integrations():
         if integration.check_connection():
             integration.register_predictors(model_data_arr)
         else:
             logger.warning(
                 f"There is no connection to {integration.name}. predictor wouldn't be registred."
             )
Пример #4
0
 def _get_integration(self, db_alias):
     integration = self.integration_controller.get(db_alias)
     if integration:
         db_type = integration['type']
         if db_type in self.known_dbs:
             return self.known_dbs[db_type](self.config, db_alias, integration)
         logger.warning(f'Uknown integration type: {db_type} for database called: {db_alias}')
         return False
     return True
Пример #5
0
 def register_predictors(self, model_data_arr):
     for integration in self._get_integrations():
         if integration.check_connection():
             try:
                 integration.register_predictors(model_data_arr)
             except Exception as e:
                 logger.warning(f"Error {e} when trying to register predictor to {integration.name}. Predictor wouldn't be registred.")
         else:
             logger.warning(f"There is no connection to {integration.name}. Predictor wouldn't be registred.")
Пример #6
0
 def _get_integration(self, db_alias):
     if self.config['integrations'][db_alias]['publish']:
         db_type = self.config['integrations'][db_alias]['type']
         if db_type in self.known_dbs:
             return self.known_dbs[db_type](self.config, db_alias)
         logger.warning(
             f'Uknown integration type: {db_type} for database called: {db_alias}'
         )
         return False
     return True
Пример #7
0
 def setup_integration(self, db_alias):
     try:
         # If this is the name of an integration
         integration = self._get_integration(db_alias)
         if integration is False:
             raise Exception(f'Unkonw database integration type for: {db_alias}')
         if integration is not True:
             integration.setup()
     except Exception as e:
         logger.warning('Failed to integrate with database ' + db_alias + f', error: {e}')
Пример #8
0
 def unregister_predictor(self, name):
     for integration in self._get_integrations(publish=True):
         # FIXME
         # !!! Integrations from config.json add to db on each start!!!!
         if '@@@@@' in name:
             sn = name.split('@@@@@')
             assert len(sn) < 3  # security
             name = sn[1]
         if integration.check_connection():
             integration.unregister_predictor(name)
         else:
             logger.warning(f"There is no connection to {integration.name}. predictor wouldn't be unregistred")
Пример #9
0
    def register_predictors(self, model_data_arr, setup=True):
        it = self._get_integrations()
        for integration in it:
            register = True
            if setup:
                register = self._setup_integration(integration)
            if register:
                if integration.check_connection():
                    integration.register_predictors(model_data_arr)
                else:
                    logger.warning(
                        f"There is no connection to {integration.name}. predictor wouldn't be registred."
                    )

            integration = [integration]
Пример #10
0
    def register_predictors(self, model_data_arr, integration_name=None):
        if integration_name is None:
            integrations = self._get_integrations(publish=True)
        else:
            integration = self._get_integration(integration_name)
            integrations = [] if isinstance(integration, bool) else [integration]

        for integration in integrations:
            if integration.check_connection():
                try:
                    integration.register_predictors(model_data_arr)
                except Exception as e:
                    logger.warning(f"Error {e} when trying to register predictor to {integration.name}. Predictor wouldn't be registred.")
            else:
                logger.warning(f"There is no connection to {integration.name}. Predictor wouldn't be registred.")
Пример #11
0
 def _get_integration(self, db_alias):
     if self.config['integrations'][db_alias]['publish']:
         db_type = self.config['integrations'][db_alias]['type']
         if db_type == 'clickhouse':
             return Clickhouse(self.config, db_alias)
         elif db_type == 'mariadb':
             return Mariadb(self.config, db_alias)
         elif db_type == 'mysql':
             return MySQL(self.config, db_alias)
         elif db_type == 'postgres':
             return PostgreSQL(self.config, db_alias)
         elif db_type == 'mssql':
             return MSSQL(self.config, db_alias)
         elif db_type == 'mongodb':
             return MongoDB(self.config, db_alias)
         else:
             logger.warning(f'Uknown integration type: {db_type} for database called: {db_alias}')
         return False
     return True
Пример #12
0
    def predict(self, name: str, when_data: Union[dict, list, pd.DataFrame],
                pred_format: str, company_id: int):
        original_name = name
        name = f'{company_id}@@@@@{name}'

        predictor_record = db.session.query(db.Predictor).filter_by(
            company_id=company_id, name=original_name).first()
        assert predictor_record is not None
        predictor_data = self.get_model_data(name, company_id)
        fs_name = f'predictor_{company_id}_{predictor_record.id}'

        if (name in self.predictor_cache
                and self.predictor_cache[name]['updated_at'] !=
                predictor_record.updated_at):
            del self.predictor_cache[name]

        if name not in self.predictor_cache:
            # Clear the cache entirely if we have less than 1.2 GB left
            if psutil.virtual_memory().available < 1.2 * pow(10, 9):
                self.predictor_cache = {}

            if predictor_data['status'] == 'complete':
                self.fs_store.get(fs_name, fs_name,
                                  self.config['paths']['predictors'])
                self.predictor_cache[name] = {
                    'predictor':
                    lightwood.predictor_from_state(
                        os.path.join(self.config['paths']['predictors'],
                                     fs_name), predictor_record.code),
                    'updated_at':
                    predictor_record.updated_at,
                    'created':
                    datetime.datetime.now(),
                    'code':
                    predictor_record.code,
                    'pickle':
                    str(
                        os.path.join(self.config['paths']['predictors'],
                                     fs_name))
                }
            else:
                raise Exception(
                    f'Trying to predict using predictor {original_name} with status: {predictor_data["status"]}. Error is: {predictor_data.get("error", "unknown")}'
                )

        if isinstance(when_data,
                      dict) and 'kwargs' in when_data and 'args' in when_data:
            ds_cls = getattr(mindsdb_datasources, when_data['class'])
            df = ds_cls(*when_data['args'], **when_data['kwargs']).df
        else:
            if isinstance(when_data, dict):
                when_data = [when_data]
            df = pd.DataFrame(when_data)

        predictions = self.predictor_cache[name]['predictor'].predict(df)
        predictions = predictions.to_dict(orient='records')
        # Bellow is useful for debugging caching and storage issues
        # del self.predictor_cache[name]

        target = predictor_record.to_predict[0]
        if pred_format in ('explain', 'dict', 'dict&explain'):
            explain_arr = []
            dict_arr = []
            for i, row in enumerate(predictions):
                explain_arr.append({
                    target: {
                        'predicted_value': row['prediction'],
                        'confidence': row.get('confidence', None),
                        'confidence_lower_bound': row.get('lower', None),
                        'confidence_upper_bound': row.get('upper', None),
                        'anomaly': row.get('anomaly', None),
                        'truth': row.get('truth', None)
                    }
                })

                td = {'predicted_value': row['prediction']}
                for col in df.columns:
                    if col in row:
                        td[col] = row[col]
                    elif f'order_{col}' in row:
                        td[col] = row[f'order_{col}']
                    elif f'group_{col}' in row:
                        td[col] = row[f'group_{col}']
                    else:
                        orginal_index = row.get('original_index')
                        if orginal_index is None:
                            log.warning('original_index is None')
                            orginal_index = i
                        td[col] = df.iloc[orginal_index][col]
                dict_arr.append({target: td})
            if pred_format == 'explain':
                return explain_arr
            elif pred_format == 'dict':
                return dict_arr
            elif pred_format == 'dict&explain':
                return dict_arr, explain_arr
        # New format -- Try switching to this in 2-3 months for speed, for now above is ok
        else:
            return predictions