def _put_update_xml_action(self, **kwargs): handler = self._get_details_query(None, **kwargs) if not handler._can_modify(): return odesk_error_response(405, ERR_INVALID_METHOD, handler.reason_msg) form = XmlImportHandlerUpdateXmlForm(obj={}) if not form.is_valid(): return self._render({'error': form.error_messages}) try: for e in XmlEntity.query.filter_by(import_handler=handler).all(): e.delete() for ds in XmlDataSource.query.filter_by( import_handler=handler).all(): ds.delete() for ip in XmlInputParameter.query.filter_by( import_handler=handler).all(): ip.delete() for s in XmlScript.query.filter_by(import_handler=handler).all(): s.delete() handler.data = form.cleaned_data['data'] except Exception, exc: return odesk_error_response(400, ERR_INVALID_DATA, str(exc), exc)
def post(self, action=None, **kwargs): if action == 'get_auth_url': auth_url, oauth_token, oauth_token_secret =\ User.get_auth_url() # TODO: Use redis? # app.db['auth_tokens'].insert({ # 'oauth_token': oauth_token, # 'oauth_token_secret': oauth_token_secret, # }) auth = AuthToken(oauth_token, oauth_token_secret) auth.save() logging.debug("User Auth: oauth token %s added", oauth_token) return self._render({'auth_url': auth_url}) if action == 'authenticate': parser = reqparse.RequestParser() parser.add_argument('oauth_token', type=str) parser.add_argument('oauth_verifier', type=str) params = parser.parse_args() oauth_token = params.get('oauth_token') oauth_verifier = params.get('oauth_verifier') logging.debug("User Auth: trying to authenticate with token %s", oauth_token) # TODO: Use redis? auth = AuthToken.get_auth(oauth_token) if not auth: logging.error('User Auth: token %s not found', oauth_token) return odesk_error_response( 500, 500, 'Wrong token: {0!s}'.format(oauth_token)) oauth_token_secret = auth.get('oauth_token_secret') auth_token, user = User.authenticate(oauth_token, oauth_token_secret, oauth_verifier) logging.debug('User Auth: Removing token %s', oauth_token) AuthToken.delete(auth.get('oauth_token')) return self._render({'auth_token': auth_token, 'user': user}) if action == 'get_user': user = getattr(request, 'user', None) if user: return self._render({'user': user}) return odesk_error_response(401, 401, 'Unauthorized') logging.error('User Auth: invalid action %s', action) raise NotFound('Action not found')
def delete(self, action=None, **kwargs): server = self._get_server(kwargs) uid = self._get_uid(kwargs) folder = self._get_folder(kwargs) try: server.set_key_metadata(uid, folder, 'hide', 'True') from .tasks import update_at_server file_name = '{0}/{1}'.format(folder, uid) update_at_server.delay(file_name, server.id) except AmazonS3ObjectNotFound as err: return odesk_error_response(404, 1001, str(err), err) except ClientError as err: return odesk_error_response(500, 1006, str(err), err) return '', 204
def _post_clone_action(self, **kwargs): from datetime import datetime handler = self._get_details_query(None, **kwargs) name = "{0} clone: {1}".format( handler.name, datetime.now().strftime("%Y-%m-%d %H:%M:%S")) new_handler = XmlImportHandler(name=name) try: import xml.etree.ElementTree as ET if not handler._can_modify(): data = handler.data e = ET.fromstring(data) datasources = e.find('datasources') if datasources: for ds in datasources.iter('*'): if ds.tag == 'pig': ds.set('amazon_access_token', '') ds.set('amazon_token_secret', '') if ds.tag == 'db': ds.set('password', '') data = ET.tostring(e) new_handler.data = data else: new_handler.data = handler.data except Exception, exc: return odesk_error_response(400, ERR_INVALID_DATA, str(exc), exc)
def _modify(self, mtd, msg, action=None, **kwargs): handler_id = kwargs.get('import_handler_id', None) handler = XmlImportHandler.query.filter_by(id=handler_id).one() if handler and not handler.can_edit: return odesk_error_response( 405, ERR_INVALID_METHOD, '{0} {1}'.format(msg, handler.reason_msg)) else: mthd = getattr(super(XmlImportHandlerPartResource, self), mtd) return mthd(action, **kwargs)
def _put_run_sql_action(self, **kwargs): """ Run sql query for testing """ from api.import_handlers.forms import QueryTestForm model = self._get_details_query({}, **kwargs) if model is None: raise NotFound(self.MESSAGE404 % kwargs) form = QueryTestForm(obj={}) if not form.is_valid(): return self._render({'error': form.error_messages}) sql = form.cleaned_data['sql'] limit = form.cleaned_data['limit'] params = form.cleaned_data.get('params', {}) datasource_name = form.cleaned_data['datasource'] try: sql = re.sub('#{(\w+)}', '%(\\1)s', sql) sql = sql % params except (KeyError, ValueError) as e: return odesk_error_response(400, ERR_INVALID_DATA, 'Wrong query parameters', e) try: model.check_sql(sql) except Exception as e: return odesk_error_response(400, ERR_INVALID_DATA, str(e), e) # Change query LIMIT sql = model.build_query(sql, limit=limit) try: data = list(model.execute_sql_iter(sql, datasource_name)) except DatabaseError as e: return odesk_error_response(400, ERR_INVALID_DATA, str(e), e) columns = [] if len(data) > 0: columns = data[0].keys() return self._render({'data': data, 'columns': columns, 'sql': sql})
def _get_script_string_action(self, **kwargs): script = self._get_details_query({}, **kwargs) if script is None: raise NotFound(self.MESSAGE404 % kwargs) try: return self._render({ self.OBJECT_NAME: script.id, 'script_string': script.script_string }) except Exception as e: return odesk_error_response(400, ERR_INVALID_DATA, str(e), e)
def _put_generate_visualization_action(self, **kwargs): model = self._get_details_query(None, **kwargs) if not app.config['MODIFY_DEPLOYED_MODEL'] and model.locked: return odesk_error_response( 405, ERR_INVALID_METHOD, 'Forbidden to change visualization ' 'data. Model is deployed and blocked ' 'for modifications.') form = VisualizationOptionsForm(obj=model) if form.is_valid(): form.process() return self._render({self.OBJECT_NAME: model})
def put(self, action=None, **kwargs): if action: return self._apply_action(action, method='PUT', **kwargs) server = self._get_server(kwargs) uid = self._get_uid(kwargs) folder = self._get_folder(kwargs) try: for key, val in request.form.iteritems(): if key in self.ALLOWED_METADATA_KEY_NAMES: server.set_key_metadata(uid, folder, key, val) from .tasks import update_at_server file_name = '{0}/{1}'.format(folder, uid) update_at_server.delay(file_name, server.id) except ValueError as err: return odesk_error_response(400, 1006, str(err), err) except AmazonS3ObjectNotFound as err: return odesk_error_response(404, 1006, str(err), err) return self._render({self.OBJECT_NAME: {'id': uid}})
def _put_reimport_action(self, **kwargs): from api.import_handlers.tasks import import_data dataset = self._get_details_query({}, **kwargs) if dataset.locked: return odesk_error_response( 405, 1006, 'Data set is locked for modifications.' ' Some trained/tested models use it.') if dataset.status not in (DataSet.STATUS_IMPORTING, DataSet.STATUS_UPLOADING): dataset.status = DataSet.STATUS_IMPORTING dataset.save() import_data.delay(dataset_id=dataset.id) return self._render({self.OBJECT_NAME: dataset})
def get(self, action=None): try: basedir = os.path.abspath( os.path.join(os.path.dirname(__file__), '../../')) with open(os.path.join(basedir, 'changelog.rst')) as fh: res = fh.read() fh.close() return self._render({ 'about': { 'version': __version__, 'releasenotes': res.replace('.. _changelog:', '').strip(), } }) except Exception as e: return odesk_error_response(500, ERR_INVALID_DATA, e.message, e)
def _put_pig_fields_action(self, **kwargs): sqoop = self._get_details_query({}, **kwargs) if sqoop is None: raise NotFound(self.MESSAGE404 % kwargs) from ..forms import LoadPigFieldsForm form = LoadPigFieldsForm(obj={}) if form.is_valid(): from api.import_handlers.tasks import load_pig_fields params = form.cleaned_data.get('params') load_pig_fields.delay(sqoop.id, params) return self._render({ 'result': "Generating pig fields delayed " "(link will appear in sqoop section)" }) return odesk_error_response(400, 400, 'Parameters are invalid')
def _put_dataset_download_action(self, **kwargs): model = self._get_details_query(None, **kwargs) if model is None: raise NotFound('Model not found') if model.status != Model.STATUS_TRAINED: return odesk_error_response(400, ERR_INVALID_DATA, 'Model is not trained') form = TransformDataSetForm(obj=model) if not form.is_valid(): return dataset = form.cleaned_data['dataset'] from api.ml_models.tasks import transform_dataset_for_download transform_dataset_for_download.delay(model.id, dataset.id) return self._render({})
def _put_csv_task_action(self, model_id, test_result_id): """ Schedules a task to generate examples in CSV format """ test = TestResult.query.get(test_result_id) if not test: raise NotFound('Test not found') form = SelectFieldsForCSVForm(obj=test) if form.is_valid(): fields = form.cleaned_data['fields'] if isinstance(fields, list) and len(fields) > 0: from tasks import get_csv_results logging.info('Download examples in csv') get_csv_results.delay(test.model_id, test.id, fields) return self._render({}) return odesk_error_response(400, ERR_INVALID_DATA, 'Fields of the CSV export is required')
def _put_upload_to_server_action(self, **kwargs): from api.servers.tasks import upload_model_to_server, update_at_server from api.servers.forms import ChooseServerForm model = self._get_details_query(None, **kwargs) if model.status != Model.STATUS_TRAINED: return odesk_error_response(400, ERR_INVALID_DATA, 'Model is not yet trained') form = ChooseServerForm(obj=model) if form.is_valid(): server = form.cleaned_data['server'] (upload_model_to_server.s(server.id, model.id, request.user.id) | update_at_server.s(server.id)).apply_async() return self._render({ self.OBJECT_NAME: model, 'status': 'Model "{0}" will be uploaded to server'.format(model.name) })
def _put_db_task_action(self, model_id, test_result_id): """ Schedules a task to export examples to the specified DB """ test = TestResult.query.get(test_result_id) if not test: raise NotFound('Test not found') form = ExportToDbForm(obj=test) if form.is_valid(): fields = form.cleaned_data['fields'] datasource = form.cleaned_data['datasource'] tablename = form.cleaned_data['tablename'] if isinstance(fields, list) and len(fields) > 0: from tasks import export_results_to_db logging.info('Export examples to db') export_results_to_db.delay( test.model_id, test.id, datasource.id, tablename, fields) return self._render({}) return odesk_error_response(400, ERR_INVALID_DATA, 'Fields of the DB export is required')
def _put_transformers_download_action(self, **kwargs): model = self._get_details_query(None, **kwargs) if model is None: raise NotFound('Model not found') if model.status != Model.STATUS_TRAINED: return odesk_error_response(405, ERR_INVALID_METHOD, 'Model is not trained') form = TransformersDownloadForm(obj=model) if not form.is_valid(): return segment = form.cleaned_data['segment'] segment_obj = Segment.query.filter(Segment.model_id == model.id)\ .filter(Segment.name == segment).all() if not len(segment_obj): raise NotFound('Segment not found in trained model') data_format = form.cleaned_data['data_format'] from api.ml_models.tasks import upload_segment_features_transformers upload_segment_features_transformers.delay(model.id, segment_obj[0].id, data_format) return self._render({})
def _put_import_features_from_xml_ih_action(self, **kwargs): model = self._get_details_query(None, **kwargs) error_response = odesk_error_response( 405, ERR_INVALID_METHOD, 'Only new models with 0 features and Xml import handler as ' 'trainer is allowed for this feature') if model.status != Model.STATUS_NEW: return error_response if model.train_import_handler_type.lower() != 'xml': return error_response features_count = Feature.query.join( FeatureSet, FeatureSet.id == Feature.feature_set_id).join( Model, Model.features_set_id == FeatureSet.id).filter( Model.id == model.id).with_entities(func.count( Feature.id)).scalar() if features_count > 0: return error_response fields = model.train_import_handler.list_fields() features = [] for field in fields: feature = Feature() feature.name = field.name feature.type = Feature.field_type_to_feature_type(field.type) feature.feature_set_id = model.features_set_id feature.save(commit=False) features.append(feature) app.sql_db.session.commit() return self._render({ self.OBJECT_NAME: model.id, 'features': [f.to_dict() for f in features] })
def _get_groupped_action(self, **kwargs): """ Groups data by `group_by_field` field and calculates mean average precision. Note: `group_by_field` should be specified in request parameters. """ from ml_metrics import apk import numpy as np from operator import itemgetter logging.info('Start request for calculating MAP') group_by_field, count = self._parse_map_params() if not group_by_field: return odesk_error_response(400, ERR_INVALID_DATA, 'field parameter is required') res = [] avps = [] groups = TestExample.get_grouped( field=group_by_field, model_id=kwargs.get('model_id'), test_result_id=kwargs.get('test_result_id') ) import sklearn.metrics as sk_metrics import numpy if len(groups) < 1: logging.error('Can not group') return odesk_error_response(400, ERR_INVALID_DATA, 'Can not group') if 'prob' not in groups[0]['list'][0]: logging.error('Examples do not contain probabilities') return odesk_error_response(400, ERR_INVALID_DATA, 'Examples do \ not contain probabilities') if not isinstance(groups[0]['list'][0]['prob'], list): logging.error('Examples do not contain probabilities') return odesk_error_response(400, ERR_INVALID_DATA, 'Examples do \ not contain probabilities') if groups[0]['list'][0]['label'] in ("True", "False"): def transform(x): return int(bool(x)) elif groups[0]['list'][0]['label'] in ("0", "1"): def transform(x): return int(x) else: logging.error('Type of labels do not support') return odesk_error_response(400, ERR_INVALID_DATA, 'Type of labels do not support') logging.info('Calculating avps for groups') calc_average = True for group in groups: group_list = group['list'] labels = [transform(item['label']) for item in group_list] pred_labels = [transform(item['pred']) for item in group_list] probs = [item['prob'][1] for item in group_list] if len(labels) > 1: labels = numpy.array(labels) probs = numpy.array(probs) try: precision, recall, thresholds = \ sk_metrics.precision_recall_curve(labels, probs) avp = sk_metrics.auc(recall[:count], precision[:count]) except: avp = apk(labels, pred_labels, count) else: avp = apk(labels, pred_labels, count) if math.isnan(avp): calc_average = False avp = "Can't be calculated" avps.append(avp) res.append({'group_by_field': group[group_by_field], 'count': len(group_list), 'avp': avp}) res = sorted(res, key=itemgetter("count"), reverse=True)[:100] logging.info('Calculating map') mavp = np.mean(avps) if calc_average else "N/A" context = {self.list_key: {'items': res}, 'field_name': group_by_field, 'mavp': mavp} logging.info('End request for calculating MAP') return self._render(context)
def _put_train_action(self, **kwargs): from api.import_handlers.tasks import import_data from api.instances.tasks import request_spot_instance, \ get_request_instance from celery import chain obj = self._get_details_query(None, **kwargs) # check if model is deployed if not app.config['MODIFY_DEPLOYED_MODEL'] and \ self.ENTITY_TYPE == 'model' and obj.locked: return odesk_error_response( 405, ERR_INVALID_METHOD, 'Re-train is forbidden. Model is ' 'deployed and blocked for ' 'modifications.') # check if some model tests are in progress from api.model_tests.models import TestResult tests_in_progress = TestResult.query.\ filter(TestResult.model_id == obj.id)\ .filter(TestResult.status.in_(TestResult.TEST_STATUSES)).count() if tests_in_progress: return odesk_error_response( 405, ERR_INVALID_METHOD, 'There are some tests of this model ' 'in progress. Please, wait for a ' 'moment before re-training model.') # start train model delete_metadata = obj.status != obj.STATUS_NEW form = self.train_form(obj=obj, **kwargs) if form.is_valid(): entity = form.save() # set status to queued entity_key = '{0}_id'.format(self.ENTITY_TYPE) new_dataset_selected = form.cleaned_data.get( 'new_dataset_selected') existing_instance_selected = form.cleaned_data.get( 'existing_instance_selected') instance = form.cleaned_data.get('aws_instance', None) spot_instance_type = form.cleaned_data.get('spot_instance_type', None) tasks_list = [] if new_dataset_selected: import_handler = entity.train_import_handler params = form.cleaned_data.get('parameters', None) dataset = import_handler.create_dataset( params, data_format=form.cleaned_data.get('format', DataSet.FORMAT_JSON)) opts = {'dataset_id': dataset.id, entity_key: entity.id} tasks_list.append(import_data.s(**opts)) dataset = [dataset] else: dataset = form.cleaned_data.get('dataset', None) dataset_ids = [ds.id for ds in dataset] if not existing_instance_selected: # request spot instance if self.ENTITY_TYPE != 'model': raise NotImplemented() tasks_list.append( request_spot_instance.s(instance_type=spot_instance_type, model_id=entity.id)) tasks_list.append( get_request_instance.subtask( (), { 'callback': 'train', 'dataset_ids': dataset_ids, 'model_id': entity.id, 'user_id': request.user.id, }, retry=True, countdown=10, retry_policy={ 'max_retries': 3, 'interval_start': 5, 'interval_step': 5, 'interval_max': 10 })) else: opts = { entity_key: entity.id, 'user_id': request.user.id, 'delete_metadata': delete_metadata } if not new_dataset_selected: opts['dataset_ids'] = dataset_ids tasks_list.append( self.train_entity_task.subtask(None, opts, queue=instance.name)) chain(tasks_list).apply_async() ret_obj = { 'id': entity.id, 'status': entity.status, 'training_in_progress': entity.training_in_progress } if new_dataset_selected: ret_obj['new_dataset'] = dataset[0].id return self._render({self.OBJECT_NAME: ret_obj})