def _handle_ok(self, response, response_json): if not response_json: return FireflyResponse(headers=response.headers, status_code=response.status_code) if 'result' not in response_json: response_json = {'result': response_json} response_type = type(response_json['result']) if response_type == dict: result = FireflyResponse(data=response_json.get( 'result', response_json), headers=response.headers, status_code=response.status_code) elif response_type == bool: result = FireflyResponse(data=response_json, headers=response.headers, status_code=response.status_code) elif response_type == int: result = FireflyResponse(data={'id': response_json['result']}, headers=response.headers, status_code=response.status_code) else: result = FireflyResponse(data=response_json, headers=response.headers, status_code=response.status_code) return result
def authenticate(username: str, password: str) -> FireflyResponse: """ Authenticates user and stores temporary token in `fireflyai.token`. Other modules automatically detect if a token exists and use it, unless a user specifically provides a token for a specific request. The token is valid for a 24-hour period, after which this method needs to be called again in order to generate a new token. Args: username (str): Username. password (str): Password. Returns: FireflyResponse: Empty FireflyResponse if successful, raises FireflyError otherwise. """ url = 'login' requestor = APIRequestor() response = requestor.post(url, body={ 'username': username, 'password': password, 'tnc': None }, api_key="") fireflyai.token = response['token'] return FireflyResponse(status_code=response.status_code, headers=response.headers)
def impersonate(cls, user_id: str = None, admin_token: str = None) -> FireflyResponse: """ impersonate user and stores temporary token in `fireflyai.token` Args: user_id (str): user ID. email (str): User email. admin_token (str): Admin user token. Returns: FireflyResponse: Empty FireflyResponse if successful, raises FireflyError otherwise. """ logging.info(' '.join(['impersonate to user_ID:', str(user_id)])) url = ''.join(['users/login_as/', str(user_id)]) requester = APIRequestor() try: response = requester.post(url, api_key=admin_token) fireflyai.token = response['result'] my_token = UserToken(fireflyai.token) logging.info(' '.join([ 'user ID:', str(user_id), '- Login successful with Account ID: ' + str(my_token.get_account_id()) ])) except Exception as ex: logging.warning(' '.join( ['user ID:', str(user_id), '- Login Failed'])) raise ex return FireflyResponse(status_code=response.status_code, headers=response.headers)
def _get_available_configuration_options(cls, id: int, inter_level: InterpretabilityLevel = None, api_key: str = None) -> FireflyResponse: inter_level = inter_level.value if inter_level is not None else None requestor = APIRequestor() url = "tasks/configuration/options" response = requestor.get(url=url, params={'dataset_id': id, 'interpretable': inter_level}, api_key=api_key) new_data = { 'estimators': [Estimator(e) for e in response['estimators']], 'target_metric': [TargetMetric(e) for e in response['target_metric']], 'splitting_strategy': [SplittingStrategy(e) for e in response['splitting_strategy']], 'pipeline': [Pipeline(e) for e in response['pipeline']], } return FireflyResponse(data=new_data)
def refit(cls, id: int, datasource_id: int, wait: bool = False, api_key: str = None) -> FireflyResponse: """ Refits the chosen Ensemble of a Task on a specific Datasource. A refit trains the chosen Ensemble's models with the data of the given Datasource. The model training is done from scratch and uses all the given data. A new Ensemble is created that is made of all the refitted models of the chosen Ensemble and their original combination. Args: id (int): Task ID. datasource_id (int): Datasource ID. wait (Optional[bool]): Should the call be synchronous or not. api_key (Optional[str]): Explicit api_key, not required if `fireflyai.authenticate` was run prior. Returns: FireflyResponse: Ensemble ID, if successful and wait=False or Ensemble if successful and wait=True; raises FireflyError otherwise. """ data = { "datasource_id": datasource_id, } ensemble_id = cls.get(id=id, api_key=api_key).get('ensemble_id', None) if not ensemble_id: raise InvalidRequestError( message="No ensemble exists for this Task.") requestor = APIRequestor() url = "ensembles/{ensemble_id}/refit".format(ensemble_id=ensemble_id) response = requestor.post(url=url, body=data, api_key=api_key) new_ens_id = response.get('ensemble_id') if wait: utils.wait_for_finite_state(fireflyai.Ensemble.get, new_ens_id, api_key=api_key) response = fireflyai.Ensemble.get(new_ens_id, api_key=api_key) else: response = FireflyResponse(data={'id': new_ens_id}, headers=response.headers, status_code=response.status_code) return response
def create_from_dataframe(cls, df, data_source_name: str, na_values: List[str] = None, wait: bool = False, skip_if_exists: bool = False, api_key: str = None) -> FireflyResponse: """ Creates a Datasource from pandas DataFrame. Args: df (pandas.DataFrame): DataFrame object to upload to server. data_source_name (str): Name of the Datasource. na_values (Optional[List[str]]): List of user specific Null values. wait (Optional[bool]): Should the call be synchronous or not. skip_if_exists (Optional[bool]): Check if a Datasource with same name exists and skip if true. api_key (Optional[str]): Explicit `api_key`, not required, if `fireflyai.authenticate()` was run prior. Returns: FireflyResponse: Datasource ID, if successful and wait=False or Datasource if successful and wait=True; raises FireflyError otherwise. """ data_source_name = data_source_name if data_source_name.endswith( '.csv') else data_source_name + ".csv" existing_ds = cls.list(filter_={'name': [data_source_name]}, api_key=api_key) if existing_ds and existing_ds['total'] > 0: if skip_if_exists: return FireflyResponse(data=existing_ds['hits'][0]) else: raise APIError("Datasource with that name exists") csv_buffer = io.StringIO() df.to_csv(csv_buffer, index=False) aws_credentials = cls.__get_upload_details(api_key=api_key) utils.s3_upload_stream(csv_buffer, data_source_name, aws_credentials) return cls._create(data_source_name, na_values=na_values, wait=wait, api_key=api_key)
def login(cls, email: str, password: str) -> FireflyResponse: """ Authenticates user and stores temporary token in `fireflyai.token`. Other modules automatically detect if a token exists and use it, unless a user specifically provides a token for a specific request. The token is valid for a 24-hour period, after which this method needs to be called again in order to generate a new token. Args: email (str): email. password (str): Password. Returns: FireflyResponse: Empty FireflyResponse if successful, raises FireflyError otherwise. """ logging.info(' '.join(['login with user:'******'login' requestor = APIRequestor() try: response = requestor.post(url, body={ 'username': email, 'password': password, 'tnc': None }, api_key="") fireflyai.token = response['token'] my_token = UserToken(fireflyai.token) logging.info(' '.join([ 'user mail:', str(email), '- Login successful, User ID: ', str(my_token.get_user_id()), ' Account ID: ' + str(my_token.get_account_id()) ])) except Exception as ex: logging.warning(' '.join( ['user mail:', str(email), '- Login FAILED'])) raise ex return FireflyResponse(status_code=response.status_code, headers=response.headers)
def get_by_name(cls, name: str, api_key: str = None) -> FireflyResponse: """ Gets information on a specific Dataset identified by its name. Information includes the state of the Dataset and other attributes. Similar to calling `fireflyai.Dataset.list(filters_={'name': [NAME]})`. Args: name (str): Dataset name. api_key (Optional[str]): Explicit api_key, not required if `fireflyai.authenticate` was run prior. Returns: FireflyResponse: Information about the Dataset. """ resp = cls.list(filter_={'name': [name]}, api_key=api_key) if resp and 'total' in resp and resp['total'] > 0: ds = resp['hits'][0] return FireflyResponse(data=ds) else: raise APIError("Dataset with that name does not exist")
def get_feature_importance_report(cls, id: int, api_key: str = None) -> FireflyResponse: """ Gets feature importance report for Ensemble. Args: id (int): Ensemble ID. api_key (Optional[str]): Explicit api_key, not required if `fireflyai.authenticate` was run prior. Returns: FireflyResponse: Contains mapping of feature importance for the ensemble_id. """ requestor = APIRequestor() url = "reports/{prefix}/{id}/feature_importance".format( prefix=cls._CLASS_PREFIX, id=id) response = requestor.get(url=url, api_key=api_key) result = response.to_dict() cls.__cleanup_report(result) return FireflyResponse(data=result)
def create(cls, ensemble_id: int, data_id: int = None, file_path: str = None, download_details: Dict = None, remove_header: bool = False, data_name: str = None, header: List = None, wait: bool = None, api_key: str = None) -> FireflyResponse: """ Create a prediction from a given ensemble and prediction datasource. The prediction datasource should include all the of original features, without the target column (unless the ensemble belongs to a timeseries task). The prediction uses the ensemble to produce the prediction's results file. Args: ensemble_id (int): Ensemble to use for the prediction. data_id (int): Datasource to run the prediction on. wait (Optional[bool]): Should the call be synchronous or not. api_key (Optional[str]): Explicit api_key, not required if `fireflyai.authenticate` was run prior. Returns: FireflyResponse: Prediction ID, if successful and wait=False or Prediction if successful and wait=True; raises FireflyError otherwise. """ data_name = data_name or os.path.basename(file_path) if file_path else None data = { "ensemble_id": ensemble_id, "datasource_id": data_id, "header": header, "data_name": data_name, "file_path": file_path, "remove_header": remove_header, } if download_details: data['download_details'] = download_details requestor = APIRequestor() response = requestor.post(url=cls._CLASS_PREFIX, body=data, api_key=api_key) id = response['id'] if wait: utils.wait_for_finite_state(cls.get, id, state_field='stage', api_key=api_key) response = cls.get(id, api_key=api_key) else: response = FireflyResponse(data={'id': id}) return response
def get_model_sensitivity_report(cls, id: int, api_key: str = None) -> FireflyResponse: """ Gets sensitivity report for Ensemble. Contains each feature's sensitivity score for missing values and feature values. Args: id (int): Ensemble ID. api_key (Optional[str]): Explicit api_key, not required if `fireflyai.authenticate` was run prior. Returns: FireflyResponse: Score for each feature in every sensitivity test. """ requestor = APIRequestor() url = "reports/{prefix}/{id}/sensitivity".format( prefix=cls._CLASS_PREFIX, id=id) response = requestor.get(url=url, api_key=api_key) result = response.to_dict() cls.__cleanup_report(result) return FireflyResponse(data=result)
def create(cls, filename: str, na_values: List[str] = None, wait: bool = False, skip_if_exists: bool = False, api_key: str = None) -> FireflyResponse: """ Uploads a file to the server to creates a new Datasource. Args: filename (str): File to be uploaded. na_values (Optional[List[str]]): List of user specific Null values. wait (Optional[bool]): Should the call be synchronous or not. skip_if_exists (Optional[bool]): Check if a Datasource with same name exists and skip if true. api_key (Optional[str]): Explicit api_key, not required if `fireflyai.authenticate` was run prior. Returns: FireflyResponse: Datasource ID, if successful and wait=False or Datasource if successful and wait=True; raises FireflyError otherwise. """ data_source_name = os.path.basename(filename) existing_ds = cls.list(filter_={'name': [data_source_name]}, api_key=api_key) if existing_ds and existing_ds['total'] > 0: if skip_if_exists: return FireflyResponse(data=existing_ds['hits'][0]) else: raise InvalidRequestError( "Datasource with that name already exists") aws_credentials = cls.__get_upload_details(api_key=api_key) utils.s3_upload(data_source_name, filename, aws_credentials.to_dict()) return cls._create(data_source_name, na_values=na_values, wait=wait, api_key=api_key)
def create(cls, datasource_id: int, dataset_name: str, target: str, problem_type: ProblemType, header: bool = True, na_values: List[str] = None, retype_columns: Dict[str, FeatureType] = None, rename_columns: List[str] = None, datetime_format: str = None, time_axis: str = None, block_id: List[str] = None, sample_id: List[str] = None, subdataset_id: List[str] = None, sample_weight: List[str] = None, not_used: List[str] = None, hidden: List[str] = False, wait: bool = False, skip_if_exists: bool = False, api_key: str = None) -> FireflyResponse: """ Creates and prepares a Dataset. While creating a Dataset, the feature roles are labeled and the feature types can be set by the user. Data analysis is done in order to optimize model training and search process. Args: datasource_id (int): Datasource ID. dataset_name (str): The name of the Dataset. target (str): The name of the target feature, or its column index if header=False. problem_type (ProblemType): The problem type. header (bool): Does the file include a header row or not. na_values (Optional[List[str]]): List of user specific Null values. retype_columns (Dict[str, FeatureType]): Change the types of certain columns. rename_columns (Optional[List[str]]): ??? #TODO datetime_format (Optional[str]): The datetime format used in the data. time_axis (Optional[str]): In timeseries problems, the feature that is the time axis. block_id (Optional[List[str]]): To avoid data leakage, data can be split into blocks. Rows with the same `block_id`, must all be in the train set or the test set. Requires at least 50 unique values in the data. sample_id (Optional[List[str]]): Row identifier. subdataset_id (Optional[List[str]]): Features which specify a subdataset ID in the data. sample_weight (Optional[List[str]]): ??? #TODO not_used (Optional[List[str]]): List of features to ignore. hidden (Optional[List[str]]): List of features to mark as hidden. wait (Optional[bool]): Should the call be synchronous or not. skip_if_exists (Optional[bool]): Check if a Dataset with same name exists and skip if true. api_key (Optional[str]): Explicit `api_key`, not required, if `fireflyai.authenticate()` was run prior. Returns: FireflyResponse: Dataset ID, if successful and wait=False or Dataset if successful and wait=True; raises FireflyError otherwise. """ existing_ds = cls.list(filter_={'name': [dataset_name]}, api_key=api_key) if existing_ds and existing_ds['total'] > 0: if skip_if_exists: return FireflyResponse(data=existing_ds['hits'][0]) else: raise InvalidRequestError("Dataset with that name already exists") data = { "name": dataset_name, "data_id": datasource_id, "header": header, "problem_type": problem_type.value if problem_type is not None else None, "hidden": hidden, "na_values": na_values, "retype_columns": {key: retype_columns[key].value for key in retype_columns} if retype_columns is not None else None, "datetime_format": datetime_format, "target": target, "time_axis": time_axis, "block_id": block_id, "sample_id": sample_id, "subdataset_id": subdataset_id, "sample_weight": sample_weight, "not_used": not_used, "rename_columns": rename_columns } requestor = APIRequestor() response = requestor.post(url=cls._CLASS_PREFIX, body=data, api_key=api_key) if wait: id = response['id'] utils.wait_for_finite_state(cls.get, id, api_key=api_key) response = cls.get(id, api_key=api_key) return response
def create(cls, name: str, dataset_id: int, estimators: List[Estimator] = None, target_metric: TargetMetric = None, splitting_strategy: SplittingStrategy = None, notes: str = None, ensemble_size: int = None, max_models_num: int = None, single_model_timeout: int = None, pipeline: List[Pipeline] = None, prediction_latency: int = None, interpretability_level: InterpretabilityLevel = None, timeout: int = 7200, cost_matrix_weights: List[List[str]] = None, train_size: float = None, test_size: float = None, validation_size: float = None, fold_size: int = None, n_folds: int = None, horizon: int = None, validation_strategy: ValidationStrategy = None, cv_strategy: CVStrategy = None, forecast_horizon: int = None, model_life_time: int = None, refit_on_all: bool = None, wait: bool = False, skip_if_exists: bool = False, leaky_features: List[str] = None, api_key: str = None) -> FireflyResponse: """ Create and run a training task. A task is responsible for searching for hyper-parameters that would maximize the model scores. The task constructs ensembles made of selected models. Seeking ways to combine different models allows us a smarter decision making. Args: name (str): Task's name. dataset_id (int): Dataset ID of the training data. estimators (List[Estimator]): Estimators to use in the train task. target_metric (TargetMetric): The target metric is the metric the model hyperparameter search process attempts to optimize. splitting_strategy (SplittingStrategy): Splitting strategy of the data. notes (Optional[str]): Notes of the task. ensemble_size (Optional[int]): Maximum number for models in ensemble. max_models_num (Optional[int]): Maximum number of models to train. single_model_timeout (Optional[int]): Maximum time for training one model. pipeline (Optional[List[Pipeline]): Possible pipeline steps. prediction_latency (Optional[int]): Maximum number of seconds ensemble prediction should take. interpretability_level (Optional[InterpretabilityLevel]): Determines how interpertable your ensemble is. Higher level of interpretability leads to more interpretable ensembles timeout (Optional[int]): timeout in seconds for the search process (default: 2 hours). cost_matrix_weights (Optional[List[List[str]]]): For classification and anomaly detection problems, the weights allow determining a custom cost metric, which assigns different weights to the entries of the confusion matrix. train_size (Optional[int]): The ratio of data taken for the train set of the model. test_size (Optional[int]): The ratio of data taken for the test set of the model. validation_size (Optional[int]): The ratio of data taken for the validation set of the model. fold_size (Optional[int]): Fold size where performing cross-validation splitting.s n_folds (Optional[int]): Number of folds when performing cross-validation splitting.\ validation_strategy (Optional[ValidationStrategy]): Validation strategy used for the train task. cv_strategy (Optional[CVStrategy]): Cross-validation strategy to use for the train task. horizon (Optional[int]): DEPRECATED. Please use `forecast_horizon` and `model_life_time`. forecast_horizon (Optional[int]): Something related to time-series models. model_life_time (Optional[int]): Something related to time-series models. refit_on_all (Optional[bool]): Determines if the final ensemble will be refit on all data after search process is done. leaky_features: add leaky features wait (Optional[bool]): Should the call be synchronous or not. skip_if_exists (Optional[bool]): Check if a Datasource with same name exists and skip if true. api_key (Optional[str]): Explicit api_key, not required if `fireflyai.authenticate` was run prior. Returns: FireflyResponse: Task ID, if successful and wait=False or Task if successful and wait=True; raises FireflyError otherwise. """ if horizon is not None: fireflyai.logger.warning( "Parameter `horizon` is DEPRECATED. Please use `forecast_horizon` and `model_life_time`." ) existing_ds = cls.list(filter_={'name': [name]}, api_key=api_key) if existing_ds and existing_ds['total'] > 0: if skip_if_exists: return FireflyResponse(data=existing_ds['hits'][0]) else: raise InvalidRequestError("Task with that name already exists") try: dataset = fireflyai.Dataset.get(id=dataset_id, api_key=api_key) except InvalidRequestError as e: raise e problem_type = ProblemType(dataset['problem_type']) task_config = cls._get_config_defaults( dataset_id=dataset_id, problem_type=problem_type, inter_level=interpretability_level) user_config = { 'dataset_id': dataset_id, 'name': name, 'estimators': [e.value for e in estimators] if estimators is not None else None, 'target_metric': target_metric.value if target_metric is not None else None, 'splitting_strategy': splitting_strategy.value if splitting_strategy is not None else None, 'ensemble_size': ensemble_size, 'max_models_num': max_models_num, 'single_model_timeout': single_model_timeout, 'pipeline': [p.value for p in pipeline] if pipeline is not None else None, 'prediction_latency': prediction_latency, 'interpretability_level': interpretability_level.value if interpretability_level is not None else None, 'timeout': timeout, 'cost_matrix_weights': cost_matrix_weights, 'train_size': train_size, 'test_size': test_size, 'validation_size': validation_size, 'cv_strategy': cv_strategy.value if cv_strategy is not None else None, 'n_folds': n_folds, 'forecast_horizon': forecast_horizon, 'model_life_time': model_life_time, 'fold_size': fold_size, 'validation_strategy': validation_strategy.value if validation_strategy is not None else None, 'notes': notes, 'leaky_features': leaky_features, 'refit_on_all': refit_on_all } task_config.update( {k: v for k, v in user_config.items() if v is not None}) requestor = APIRequestor() response = requestor.post(url=cls._CLASS_PREFIX, body=task_config, api_key=api_key) id = response['task_id'] if wait: utils.wait_for_finite_state(cls.get, id, max_time=timeout, api_key=api_key) response = cls.get(id, api_key=api_key) else: response = FireflyResponse(data={'id': id}) return response