def get(self, dependency): if dependency == 'snowflake': dependency = [ 'snowflake-connector-python[pandas]', 'asn1crypto==1.3.0' ] elif dependency == 'athena': dependency = ['PyAthena >= 2.0.0'] elif dependency == 'google': dependency = ['google-cloud-storage', 'google-auth'] elif dependency == 's3': dependency = ['boto3 >= 1.9.0'] elif dependency == 'lightgbm_gpu': dependency = ['lightgbm', '--install-option=--gpu', '--upgrade'] elif dependency == 'mssql': dependency = ['pymssql >= 2.1.4'] elif dependency == 'cassandra': dependency = ['cassandra-driver'] elif dependency == 'scylladb': dependency = ['scylla-driver'] else: return f'Unkown dependency: {dependency}', 400 outs = b'' errs = b'' try: sp = subprocess.Popen( [sys.executable, '-m', 'pip', 'install', *dependency], stdout=subprocess.PIPE, stderr=subprocess.PIPE) code = sp.wait() outs, errs = sp.communicate(timeout=1) except Exception as e: return http_error(500, 'Failed to install dependency', str(e)) if code != 0: output = '' if isinstance(outs, bytes) and len(outs) > 0: output = output + 'Output: ' + outs.decode() if isinstance(errs, bytes) and len(errs) > 0: if len(output) > 0: output = output + '\n' output = output + 'Errors: ' + errs.decode() return http_error(500, 'Failed to install dependency', output) return 'Installed', 200
def delete(self, name): '''delete datasource''' try: request.default_store.delete_datasource(name) except Exception as e: log.error(e) return http_error( 400, "Error deleting datasource", f"There was an error while tring to delete datasource with name '{name}'" ) return '', 200
def put(self, name): '''Learning new predictor''' data = request.json to_predict = data.get('to_predict') try: kwargs = data.get('kwargs') except Exception: kwargs = None if isinstance(kwargs, dict) is False: kwargs = {} if 'equal_accuracy_for_all_output_categories' not in kwargs: kwargs['equal_accuracy_for_all_output_categories'] = True if 'advanced_args' not in kwargs: kwargs['advanced_args'] = {} if 'use_selfaware_model' not in kwargs['advanced_args']: kwargs['advanced_args']['use_selfaware_model'] = False try: retrain = data.get('retrain') if retrain in ('true', 'True'): retrain = True else: retrain = False except Exception: retrain = None ds_name = data.get('data_source_name') from_ds = data.get('from') delete_ds_on_fail = False if ds_name is not None: ds = request.default_store.get_datasource_obj(ds_name, raw=True) if ds is None: return http_error( 400, 'DS not exists', f'Can not find datasource: {ds_name}' ) elif isinstance(from_ds, dict): if 'datasource' not in from_ds or 'query' not in from_ds: return http_error( 400, 'Wring arguments', "'from' must contain 'datasource' and 'query'" ) delete_ds_on_fail = True ds_name = request.default_store.get_vacant_name(name) if request.integration_controller.get(from_ds['datasource']) is None: return http_error( 400, 'Datasource not exist', f"Datasource not exist: {from_ds['datasource']}" ) ds = request.default_store.save_datasource(ds_name, from_ds['datasource'], {'query': from_ds['query']}) else: return http_error( 400, 'Wring arguments', "query must contain 'data_source_name' or 'from'" ) if retrain is True: original_name = name name = name + '_retrained' model_names = [x['name'] for x in request.model_interface.get_models()] if name in model_names: return http_error( 409, f"Predictor '{name}' already exists", f"Predictor with name '{name}' already exists. Each predictor must have unique name." ) request.model_interface.learn( name, ds, to_predict, request.default_store.get_datasource(ds_name)['id'], kwargs=kwargs, delete_ds_on_fail=delete_ds_on_fail ) if retrain is True: try: request.model_interface.delete_model(original_name) request.model_interface.rename_model(name, original_name) except Exception: pass return '', 200
def put(self, name): '''add new datasource''' data = {} def on_field(field): name = field.field_name.decode() value = field.value.decode() data[name] = value file_object = None def on_file(file): nonlocal file_object data['file'] = file.file_name.decode() file_object = file.file_object temp_dir_path = tempfile.mkdtemp(prefix='datasource_file_') if request.headers['Content-Type'].startswith('multipart/form-data'): parser = multipart.create_form_parser( headers=request.headers, on_field=on_field, on_file=on_file, config={ 'UPLOAD_DIR': temp_dir_path.encode(), # bytes required 'UPLOAD_KEEP_FILENAME': True, 'UPLOAD_KEEP_EXTENSIONS': True, 'MAX_MEMORY_FILE_SIZE': 0 }) while True: chunk = request.stream.read(8192) if not chunk: break parser.write(chunk) parser.finalize() parser.close() if file_object is not None and not file_object.closed: file_object.close() else: data = request.json if 'query' in data: integration_id = request.json['integration_id'] integration = get_db_integration(integration_id, request.company_id) if integration is None: abort(400, f"{integration_id} integration doesn't exist") if integration['type'] == 'mongodb': data['find'] = data['query'] request.default_store.save_datasource(name, integration_id, data) os.rmdir(temp_dir_path) return request.default_store.get_datasource(name) ds_name = data['name'] if 'name' in data else name source = data['source'] if 'source' in data else name source_type = data['source_type'] if source_type == 'file': file_path = os.path.join(temp_dir_path, data['file']) lp = file_path.lower() if lp.endswith(('.zip', '.tar.gz')): if lp.endswith('.zip'): with zipfile.ZipFile(file_path) as f: f.extractall(temp_dir_path) elif lp.endswith('.tar.gz'): with tarfile.open(file_path) as f: f.extractall(temp_dir_path) os.remove(file_path) files = os.listdir(temp_dir_path) if len(files) != 1: os.rmdir(temp_dir_path) return http_error( 400, 'Wrong content.', 'Archive must contain only one data file.') file_path = os.path.join(temp_dir_path, files[0]) source = files[0] if not os.path.isfile(file_path): os.rmdir(temp_dir_path) return http_error( 400, 'Wrong content.', 'Archive must contain data file in root.') else: file_path = None request.default_store.save_datasource(ds_name, source_type, source, file_path) os.rmdir(temp_dir_path) return request.default_store.get_datasource(ds_name)
def put(self, name): '''Learning new predictor''' data = request.json to_predict = data.get('to_predict') try: kwargs = data.get('kwargs') except Exception: kwargs = None if isinstance(kwargs, dict) is False: kwargs = {} if 'equal_accuracy_for_all_output_categories' not in kwargs: kwargs['equal_accuracy_for_all_output_categories'] = True if 'advanced_args' not in kwargs: kwargs['advanced_args'] = {} if 'use_selfaware_model' not in kwargs['advanced_args']: kwargs['advanced_args']['use_selfaware_model'] = False try: retrain = data.get('retrain') if retrain in ('true', 'True'): retrain = True else: retrain = False except Exception: retrain = None ds_name = data.get('data_source_name') if data.get( 'data_source_name') is not None else data.get('from_data') from_data = request.default_store.get_datasource_obj(ds_name, raw=True) if from_data is None: return {'message': f'Can not find datasource: {ds_name}'}, 400 if retrain is True: original_name = name name = name + '_retrained' model_names = [x['name'] for x in request.model_interface.get_models()] if name in model_names: return http_error( 409, f"Predictor '{name}' already exists", f"Predictor with name '{name}' already exists. Each predictor must have unique name." ) request.model_interface.learn( name, from_data, to_predict, request.default_store.get_datasource(ds_name)['id'], kwargs=kwargs) for i in range(20): try: # Dirty hack, we should use a messaging queue between the predictor process and this bit of the code request.model_interface.get_model_data(name) break except Exception: time.sleep(1) if retrain is True: try: request.model_interface.delete_model(original_name) request.model_interface.rename_model(name, original_name) except Exception: pass return '', 200