Пример #1
0
    def get(self, dependency):
        if dependency == 'snowflake':
            dependency = [
                'snowflake-connector-python[pandas]', 'asn1crypto==1.3.0'
            ]
        elif dependency == 'athena':
            dependency = ['PyAthena >= 2.0.0']
        elif dependency == 'google':
            dependency = ['google-cloud-storage', 'google-auth']
        elif dependency == 's3':
            dependency = ['boto3 >= 1.9.0']
        elif dependency == 'lightgbm_gpu':
            dependency = ['lightgbm', '--install-option=--gpu', '--upgrade']
        elif dependency == 'mssql':
            dependency = ['pymssql >= 2.1.4']
        elif dependency == 'cassandra':
            dependency = ['cassandra-driver']
        elif dependency == 'scylladb':
            dependency = ['scylla-driver']
        else:
            return f'Unkown dependency: {dependency}', 400

        outs = b''
        errs = b''
        try:
            sp = subprocess.Popen(
                [sys.executable, '-m', 'pip', 'install', *dependency],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)
            code = sp.wait()
            outs, errs = sp.communicate(timeout=1)
        except Exception as e:
            return http_error(500, 'Failed to install dependency', str(e))

        if code != 0:
            output = ''
            if isinstance(outs, bytes) and len(outs) > 0:
                output = output + 'Output: ' + outs.decode()
            if isinstance(errs, bytes) and len(errs) > 0:
                if len(output) > 0:
                    output = output + '\n'
                output = output + 'Errors: ' + errs.decode()
            return http_error(500, 'Failed to install dependency', output)

        return 'Installed', 200
Пример #2
0
    def delete(self, name):
        '''delete datasource'''

        try:
            request.default_store.delete_datasource(name)
        except Exception as e:
            log.error(e)
            return http_error(
                400, "Error deleting datasource",
                f"There was an error while tring to delete datasource with name '{name}'"
            )
        return '', 200
Пример #3
0
    def put(self, name):
        '''Learning new predictor'''
        data = request.json
        to_predict = data.get('to_predict')

        try:
            kwargs = data.get('kwargs')
        except Exception:
            kwargs = None

        if isinstance(kwargs, dict) is False:
            kwargs = {}

        if 'equal_accuracy_for_all_output_categories' not in kwargs:
            kwargs['equal_accuracy_for_all_output_categories'] = True

        if 'advanced_args' not in kwargs:
            kwargs['advanced_args'] = {}

        if 'use_selfaware_model' not in kwargs['advanced_args']:
            kwargs['advanced_args']['use_selfaware_model'] = False

        try:
            retrain = data.get('retrain')
            if retrain in ('true', 'True'):
                retrain = True
            else:
                retrain = False
        except Exception:
            retrain = None

        ds_name = data.get('data_source_name')
        from_ds = data.get('from')
        delete_ds_on_fail = False
        if ds_name is not None:
            ds = request.default_store.get_datasource_obj(ds_name, raw=True)
            if ds is None:
                return http_error(
                    400,
                    'DS not exists',
                    f'Can not find datasource: {ds_name}'
                )
        elif isinstance(from_ds, dict):
            if 'datasource' not in from_ds or 'query' not in from_ds:
                return http_error(
                    400,
                    'Wring arguments',
                    "'from' must contain 'datasource' and 'query'"
                )
            delete_ds_on_fail = True
            ds_name = request.default_store.get_vacant_name(name)

            if request.integration_controller.get(from_ds['datasource']) is None:
                return http_error(
                    400,
                    'Datasource not exist',
                    f"Datasource not exist: {from_ds['datasource']}"
                )

            ds = request.default_store.save_datasource(ds_name, from_ds['datasource'], {'query': from_ds['query']})
        else:
            return http_error(
                400,
                'Wring arguments',
                "query must contain 'data_source_name' or 'from'"
            )

        if retrain is True:
            original_name = name
            name = name + '_retrained'

        model_names = [x['name'] for x in request.model_interface.get_models()]
        if name in model_names:
            return http_error(
                409,
                f"Predictor '{name}' already exists",
                f"Predictor with name '{name}' already exists. Each predictor must have unique name."
            )

        request.model_interface.learn(
            name, ds, to_predict, request.default_store.get_datasource(ds_name)['id'],
            kwargs=kwargs, delete_ds_on_fail=delete_ds_on_fail
        )

        if retrain is True:
            try:
                request.model_interface.delete_model(original_name)
                request.model_interface.rename_model(name, original_name)
            except Exception:
                pass

        return '', 200
Пример #4
0
    def put(self, name):
        '''add new datasource'''
        data = {}

        def on_field(field):
            name = field.field_name.decode()
            value = field.value.decode()
            data[name] = value

        file_object = None

        def on_file(file):
            nonlocal file_object
            data['file'] = file.file_name.decode()
            file_object = file.file_object

        temp_dir_path = tempfile.mkdtemp(prefix='datasource_file_')

        if request.headers['Content-Type'].startswith('multipart/form-data'):
            parser = multipart.create_form_parser(
                headers=request.headers,
                on_field=on_field,
                on_file=on_file,
                config={
                    'UPLOAD_DIR': temp_dir_path.encode(),  # bytes required
                    'UPLOAD_KEEP_FILENAME': True,
                    'UPLOAD_KEEP_EXTENSIONS': True,
                    'MAX_MEMORY_FILE_SIZE': 0
                })

            while True:
                chunk = request.stream.read(8192)
                if not chunk:
                    break
                parser.write(chunk)
            parser.finalize()
            parser.close()

            if file_object is not None and not file_object.closed:
                file_object.close()
        else:
            data = request.json

        if 'query' in data:
            integration_id = request.json['integration_id']
            integration = get_db_integration(integration_id,
                                             request.company_id)
            if integration is None:
                abort(400, f"{integration_id} integration doesn't exist")

            if integration['type'] == 'mongodb':
                data['find'] = data['query']

            request.default_store.save_datasource(name, integration_id, data)
            os.rmdir(temp_dir_path)
            return request.default_store.get_datasource(name)

        ds_name = data['name'] if 'name' in data else name
        source = data['source'] if 'source' in data else name
        source_type = data['source_type']

        if source_type == 'file':
            file_path = os.path.join(temp_dir_path, data['file'])
            lp = file_path.lower()
            if lp.endswith(('.zip', '.tar.gz')):
                if lp.endswith('.zip'):
                    with zipfile.ZipFile(file_path) as f:
                        f.extractall(temp_dir_path)
                elif lp.endswith('.tar.gz'):
                    with tarfile.open(file_path) as f:
                        f.extractall(temp_dir_path)
                os.remove(file_path)
                files = os.listdir(temp_dir_path)
                if len(files) != 1:
                    os.rmdir(temp_dir_path)
                    return http_error(
                        400, 'Wrong content.',
                        'Archive must contain only one data file.')
                file_path = os.path.join(temp_dir_path, files[0])
                source = files[0]
                if not os.path.isfile(file_path):
                    os.rmdir(temp_dir_path)
                    return http_error(
                        400, 'Wrong content.',
                        'Archive must contain data file in root.')
        else:
            file_path = None

        request.default_store.save_datasource(ds_name, source_type, source,
                                              file_path)
        os.rmdir(temp_dir_path)

        return request.default_store.get_datasource(ds_name)
Пример #5
0
    def put(self, name):
        '''Learning new predictor'''
        data = request.json
        to_predict = data.get('to_predict')

        try:
            kwargs = data.get('kwargs')
        except Exception:
            kwargs = None

        if isinstance(kwargs, dict) is False:
            kwargs = {}

        if 'equal_accuracy_for_all_output_categories' not in kwargs:
            kwargs['equal_accuracy_for_all_output_categories'] = True

        if 'advanced_args' not in kwargs:
            kwargs['advanced_args'] = {}

        if 'use_selfaware_model' not in kwargs['advanced_args']:
            kwargs['advanced_args']['use_selfaware_model'] = False

        try:
            retrain = data.get('retrain')
            if retrain in ('true', 'True'):
                retrain = True
            else:
                retrain = False
        except Exception:
            retrain = None

        ds_name = data.get('data_source_name') if data.get(
            'data_source_name') is not None else data.get('from_data')
        from_data = request.default_store.get_datasource_obj(ds_name, raw=True)

        if from_data is None:
            return {'message': f'Can not find datasource: {ds_name}'}, 400

        if retrain is True:
            original_name = name
            name = name + '_retrained'

        model_names = [x['name'] for x in request.model_interface.get_models()]
        if name in model_names:
            return http_error(
                409, f"Predictor '{name}' already exists",
                f"Predictor with name '{name}' already exists. Each predictor must have unique name."
            )

        request.model_interface.learn(
            name,
            from_data,
            to_predict,
            request.default_store.get_datasource(ds_name)['id'],
            kwargs=kwargs)
        for i in range(20):
            try:
                # Dirty hack, we should use a messaging queue between the predictor process and this bit of the code
                request.model_interface.get_model_data(name)
                break
            except Exception:
                time.sleep(1)

        if retrain is True:
            try:
                request.model_interface.delete_model(original_name)
                request.model_interface.rename_model(name, original_name)
            except Exception:
                pass

        return '', 200