class FeatureSetForm(BaseForm): schema_name = CharField() group_by = JsonField() target_variable = CharField() target_feature = None def clean_group_by(self, value, field): if value is not None: ids = [feature['id'] for feature in value] return Feature.query.filter(Feature.id.in_(ids)).all() def clean_target_variable(self, value, field): if value: self.target_feature = Feature.query.filter_by( name=value, feature_set_id=self.id).one() if self.target_feature is None: raise ValidationError('Feature not found') return value def save(self): self.cleaned_data['modified'] = True if self.target_feature: self.target_feature.is_target_variable = True self.target_feature.required = True self.target_feature.save(commit=False) return super(FeatureSetForm, self).save()
class DataSetAddForm(BaseForm): required_fields = ('format', ) format = ChoiceField(choices=DataSet.FORMATS) import_params = JsonField() def before_clean(self): self.importhandler = ImportHandler.query.get(self.import_handler_id) def clean_import_params(self, value, field): if not isinstance(value, dict): raise ValidationError('Should be a dict') for param in self.importhandler.import_params: if param not in value: raise ValidationError( '{0!s} parameter is required'.format(param)) return value def save(self, commit=True): from api.import_handlers.tasks import import_data dataset = self.importhandler.create_dataset( params=self.cleaned_data['import_params'], data_format=self.cleaned_data['format'], compress=True) dataset.save() import_data.delay(dataset.id) return dataset
class SelectFieldsForCSVForm(BaseForm): """ Form containing one json entry called fields which is an array of fields to use for generating test examples csv in _put_csv_task_action """ required_fields = ('fields', ) fields = JsonField()
class BaseChooseInstanceAndDataset(BaseForm): HANDLER_TYPE = 'train' TYPE_CHOICES = ('m3.xlarge', 'm3.2xlarge', 'cc2.8xlarge', 'cr1.8xlarge', 'hi1.4xlarge', 'hs1.8xlarge') new_dataset_selected = BooleanField() existing_instance_selected = BooleanField() aws_instance = ModelField(model=Instance, return_model=True) dataset = ModelField(model=DataSet, return_model=True) parameters = JsonField() spot_instance_type = ChoiceField(choices=TYPE_CHOICES) format = ChoiceField(choices=DataSet.FORMATS) @property def import_handler(self): if self.model is not None and self.HANDLER_TYPE is not None: return getattr(self.model, '%s_import_handler' % self.HANDLER_TYPE) def clean_parameters(self, params, field): params = params or {} if self.model is None: return params if not isinstance(params, dict): raise ValidationError('Invalid parameters format') return params def validate_data(self): # DataSet tab new_dataset_selected = self.cleaned_data.get('new_dataset_selected') if new_dataset_selected: if not self.cleaned_data.get('format'): self.add_error("format", "Please select format of the Data Set") parameter_names = self.import_handler.import_params # No params for this import handler if parameter_names and len(parameter_names) > 0: parameters = self.cleaned_data.get('parameters') missed_params = set(parameter_names) - set(parameters.keys()) if missed_params: self.add_error( "parameters", "Some parameters are missing: %s" % ', '.join(missed_params)) else: if not self.cleaned_data.get('dataset'): self.add_error("dataset", "Please select Data Set") # Instance tab existing_instance_selected = self.cleaned_data.get( 'existing_instance_selected') if existing_instance_selected: if not self.cleaned_data.get('aws_instance'): self.add_error("aws_instance", "Please select instance with a worker") else: if not self.cleaned_data.get('spot_instance_type'): self.add_error("spot_instance_type", "Please select Spot instance type")
class ServerModelVerificationForm(BaseForm): required_fields = ('server_id', 'model_id', 'test_result_id') server_id = ModelField(model=Server) model_id = ModelField(model=Model) import_handler_id = ModelField(model=XmlImportHandler) test_result_id = ModelField(model=TestResult) description = JsonField() params_map = JsonField() count = IntegerField() clazz = CharField() def save(self, *args, **kwargs): obj = super(ServerModelVerificationForm, self).save(*args, **kwargs) from tasks import verify_model verify_model.delay( obj.id, self.cleaned_data['count']) return obj
class FeatureTransformerForm(BaseForm, ParametersConvertorMixin): """ Adds/edits feature transformer form. """ group_chooser = 'predefined_selected' REQUIRED_FORM = ['type'] REQUIRED_PRETRAINED = ['transformer'] required_fields_groups = { 'true': REQUIRED_PRETRAINED, 'false': REQUIRED_FORM, None: REQUIRED_FORM } predefined_selected = BooleanField() feature_id = ModelField(model=Feature, return_model=True) type_field = CharField(name='type') params = JsonField() transformer = ModelField(model=Transformer, return_model=True) def validate_data(self): type_ = self.cleaned_data.get('type') pretrained_selected = self.cleaned_data.get('predefined_selected') if not pretrained_selected and type_ \ and type_ not in Transformer.TYPES_LIST: self.add_error('type', 'type is invalid') return self.convert_params(type_, self.cleaned_data.get('params'), configuration=TRANSFORMERS) def save(self, commit=True, save=True): feature = self.cleaned_data.get('feature_id', None) is_pretrained = self.cleaned_data.get('predefined_selected', False) if is_pretrained: pretrained_transformer = self.cleaned_data.get('transformer') transformer = { 'type': pretrained_transformer.name, 'id': pretrained_transformer.id } else: transformer = { 'id': -1, "type": self.cleaned_data.get('type'), "params": self.cleaned_data.get('params') } if feature is not None: feature.transformer = transformer feature.save() return transformer
class ModelEditForm(BaseForm): NO_REQUIRED_FOR_EDIT = True required_fields = ('name', ) name = CharField() train_import_handler = ImportHandlerField() test_import_handler = ImportHandlerField() example_id = CharField() example_label = CharField() tags = JsonField() features = FeaturesField() def save(self, commit=True): old_tags = [tag for tag in self.obj.tags] old_tags_texts = [t.text for t in self.obj.tags] model = super(ModelEditForm, self).save() tags = self.cleaned_data.get('tags', None) if tags: for tag_text in tags: if tag_text not in old_tags_texts: t = Tag.query.filter_by(text=tag_text).all() if len(t): new_tag = t[0] else: new_tag = Tag() new_tag.text = tag_text new_tag.save() old_tags.append(new_tag) model.tags = [tag for tag in old_tags if tag.text in tags] model.save() for tag in old_tags: tag.update_counter() features = self.cleaned_data.get('features', None) if features: try: Feature.query.filter_by( feature_set_id=model.features_set_id).delete() model.classifier = features['classifier'] or {} model.features_set.from_dict(features, commit=False) except Exception as e: db.session.rollback() raise DBException( "Error occurred while updating features: " "{0}".format(e), e) else: db.session.commit() return model
class GridSearchForm(BaseForm): parameters = JsonField() scoring = CharField() train_dataset = ModelField(model=DataSet, return_model=True) test_dataset = ModelField(model=DataSet, return_model=True) def __init__(self, *args, **kwargs): self.model = kwargs.get('model', None) super(GridSearchForm, self).__init__(*args, **kwargs) def clean_parameters(self, grid_params, field): params = {} config = CLASSIFIERS[self.model.classifier['type']] config_params = config['parameters'] for pconfig in config_params: name = pconfig['name'] if name in grid_params: value = grid_params[name] if not value: continue value = value.split(',') type_ = pconfig.get('type', 'string') if type_ == 'integer': value = [int(item) for item in value] elif type_ == 'float': value = [float(item) for item in value] elif type_ == 'boolean': value = [item == 'true' for item in value] choices = pconfig.get('choices') if choices: for item in value: if item not in choices: raise ValidationError( 'Invalid {0}: should be one of {1}'.format( name, ','.join(choices))) params[name] = value return params def save(self, *args, **kwargs): obj = super(GridSearchForm, self).save(commit=False) obj.model = self.model obj.save() return obj
class ScalerForm(BasePredefinedForm): OBJECT_NAME = 'scaler' DOC = PredefinedScaler group_chooser = 'predefined_selected' required_fields_groups = {'true': ('scaler', ), 'false': ('type', ), None: ('type', )} name = CharField() type_field = ChoiceField(choices=PredefinedScaler.TYPES_LIST, name='type') params = JsonField() # whether need to copy feature scaler fields from predefined one predefined_selected = BooleanField() # whether we need to create predefined item (not feature related) scaler = DocumentField(doc=PredefinedScaler, by_name=True, return_doc=True) feature_id = DocumentField(doc=Feature, by_name=False, return_doc=False)
class ModelAddForm(BaseForm): """ Adds new model. Note: If import handler and import handler file would be specified, new model will use import handler from file. """ NO_REQUIRED_FOR_EDIT = True required_fields = ('name', ('import_handler', 'import_handler_file')) name = UniqueNameField(Model=Model) import_handler = ImportHandlerField() import_handler_file = ImportHandlerFileField() test_import_handler = ImportHandlerField() test_import_handler_file = ImportHandlerFileField() features = JsonField() trainer = CharField() def clean_import_handler(self, value, field): self.cleaned_data['train_import_handler'] = value return value def clean_import_handler_file(self, value, field): self.cleaned_data['train_import_params'] = field.import_params return value def clean_test_import_handler_file(self, value, field): self.cleaned_data['test_import_params'] = field.import_params return value def clean_features(self, value, field): if value: from cloudml.trainer.trainer import Trainer from cloudml.trainer.config import FeatureModel, SchemaException try: # TODO: add support of json dict to FeatureModel feature_model = FeatureModel(json.dumps(value), is_file=False) self.cleaned_data['trainer'] = Trainer(feature_model) except SchemaException, exc: raise ValidationError( 'Features JSON file is invalid: %s' % exc, exc) return value
class VisualizationOptionsForm(BaseForm): """ Form used for updating Trained model visualization. Note: Now it support only `tree_deep` type for Decision Tree and Random Forest classifiers. """ UPDATE_TREE_DEEP = 'tree_deep' TYPES = [ UPDATE_TREE_DEEP, ] PARAMS_BY_TYPE = {UPDATE_TREE_DEEP: [{'name': 'deep', 'type': 'int'}]} parameters = JsonField() type_ = CharField(name="type") def __init__(self, *args, **kwargs): super(VisualizationOptionsForm, self).__init__(*args, **kwargs) def clean_type(self, value, field): if value and value not in self.TYPES: raise ValidationError('invalid type') return value def validate_data(self): type_ = self.cleaned_data.get('type') parameters = self.cleaned_data.get('parameters') config = self.PARAMS_BY_TYPE[type_] for item in config: name = item['name'] val = parameters.get(name) if not val: self.add_error('parameters', 'Parameter %s is required' % name) if item['type'] == 'int': try: parameters[name] = int(val) except Exception, exc: self.add_error( 'parameters', "Can't parse parameter %s: %s" % (name, exc), exc)
class PredefinedDataSourceForm(BaseForm): """ DataSource add/edit form """ NO_REQUIRED_FOR_EDIT = True required_fields = ('name', 'type') name = CharField() type_field = ChoiceField(choices=PredefinedDataSource.TYPES_LIST, name='type') db = JsonField() def clean_name(self, value, field): query = PredefinedDataSource.query.filter_by(name=value) if self.obj.id: query = query.filter(PredefinedDataSource.id != self.obj.id) count = query.count() if count: raise ValidationError("DataSource with name \"%s\" already exist. " "Please choose another one." % value) return value
class NamedFeatureTypeForm(BaseForm, FeatureParamsMixin): required_fields = ('name', 'type') name = UniqueNameField(Model=NamedFeatureType) type_field = ChoiceField(choices=NamedFeatureType.TYPES_LIST, name='type') input_format = CharField() params = JsonField() def validate_data(self): if self.errors: return # Trying to make instance of the type type_ = self.cleaned_data.get('type') type_factory = FEATURE_TYPE_FACTORIES.get(type_) try: params = self.cleaned_data.get('params') or {} input_format = self.cleaned_data.get('params') or 'plain' type_factory.get_instance(params, input_format) except InvalidFeatureTypeException, exc: self.add_error("type", 'Cannot create instance of ' 'feature type: {0}'.format(exc), exc)
class XmlDataSourceForm(ParametersConvertorMixin, BaseForm): XML_PARAMETERS = True PARAMETERS_CONFIGURATION = ExtractionPlan.get_datasources_config() required_fields = ('name', 'type', 'import_handler_id') NO_REQUIRED_FOR_EDIT = True name = CharField() type_field = ChoiceField(choices=_get_ds_types(), name='type') params = JsonField() import_handler_id = DocumentField(doc=XmlImportHandler, by_name=False, return_doc=False) def clean_name(self, value, field): if not ((self.NO_REQUIRED_FOR_EDIT and self.obj.id) or value): raise ValidationError('name is required field') import_handler_id = self.obj.import_handler_id if \ self.obj.id else self.data['import_handler_id'] query = XmlDataSource.query.filter_by( name=value, import_handler_id=import_handler_id) if self.obj.id: query = query.filter(XmlDataSource.id != self.obj.id) count = query.count() if count: raise ValidationError('Data Source with name "%s" already \ exist. Please choose another one.' % value) return value def validate_data(self): type_ = self.cleaned_data.get('type') self.convert_params(type_, self.cleaned_data.get('params'), configuration=self.PARAMETERS_CONFIGURATION)
class ClassifierForm(BasePredefinedForm, ParametersConvertorMixin): """ Form for one of this cases (dependly of parameters): 1. adding/edditing predifined classifier 2. edditing specific model classifier 3. copying classifier config from predefined one to the model's classifier. """ OBJECT_NAME = 'classifier' DOC = PredefinedClassifier group_chooser = 'predefined_selected' required_fields_groups = {'true': ('classifier', ), 'false': ('type', ), None: ('type', )} name = CharField() type_field = ChoiceField( choices=PredefinedClassifier.TYPES_LIST, name='type') params = JsonField() # whether need to copy model classifier fields from predefined one predefined_selected = BooleanField() # whether we need to create predefined item (not model-related) classifier = DocumentField( doc=PredefinedClassifier, by_name=False, return_doc=True) model_id = DocumentField(doc=Model, by_name=False, return_doc=False) def validate_data(self): super(ClassifierForm, self).validate_data() params = self.cleaned_data.get('params') if params: from config import CLASSIFIERS self.convert_params(self.cleaned_data['type'], params, configuration=CLASSIFIERS)
class QueryTestForm(BaseForm): required_fields = ('sql', 'limit', 'datasource') sql = CharField() params = JsonField() limit = IntegerField() datasource = CharField()
class LoadPigFieldsForm(BaseForm): params = JsonField()
class TransformerForm(BaseForm, ParametersConvertorMixin): """ Adds/Edits Pretrained transformer form """ NO_REQUIRED_FOR_EDIT = True REQUIRED_FIELDS = ['train_import_handler'] FORM_REQUIRED_FIELDS = REQUIRED_FIELDS + \ ['name', 'type', 'feature_type', 'field_name'] group_chooser = 'json_selected' required_fields_groups = { 'true': REQUIRED_FIELDS + ['json'], 'false': FORM_REQUIRED_FIELDS, None: FORM_REQUIRED_FIELDS } name = CharField() feature_type = CharField() field_name = CharField() type_field = ChoiceField(choices=Transformer.TYPES_LIST, name='type') params = JsonField() json = JsonField() json_selected = BooleanField() train_import_handler = ImportHandlerField() def validate_data(self): name = self.cleaned_data.get('name') json_selected = self.cleaned_data.get('json_selected') if json_selected: json = self.cleaned_data.get('json') name = json['transformer-name'] params = json['transformer'].get('params') type_ = json['transformer'].get('type') self.is_name_available(name, field_name='json') else: self.is_name_available(name) params = self.cleaned_data.get('params') type_ = self.cleaned_data.get('type') self.convert_params(type_, params, configuration=TRANSFORMERS) def save(self, commit=True): if self.cleaned_data.get('json_selected'): json = self.cleaned_data['json'] transformer = Transformer() transformer.load_from_json(json) transformer.train_import_handler = \ self.cleaned_data['train_import_handler'] transformer.save(commit=commit) return transformer else: return super(TransformerForm, self).save(commit) def is_name_available(self, name, field_name='name'): if self.obj and self.obj.id: return True # edit if Transformer.query.filter_by(name=name).count(): self.add_error( field_name, 'Transformer with name {0} \ already exist'.format(name)) return False return True
class FeatureForm(BaseForm, FeatureParamsMixin): """ Feature add/edit form. """ # we could edit only one feature field. # no need to fill all of them for edit NO_REQUIRED_FOR_EDIT = True required_fields = ('name', 'type', 'feature_set_id') name = CharField() type_field = CharField(name='type') input_format = CharField() params = JsonField() required = BooleanField() default = CharField() is_target_variable = BooleanField() feature_set_id = DocumentField(doc=FeatureSet, by_name=False, return_doc=False) disabled = BooleanField() transformer = FeatureTransformerForm( Model=Transformer, prefix='transformer-', data_from_request=False) remove_transformer = BooleanField() scaler = ScalerForm(Model=PredefinedScaler, prefix='scaler-', data_from_request=False) remove_scaler = BooleanField() def validate_data(self): from numpy import nan feature_set_id = self.cleaned_data.get('feature_set_id') name = self.cleaned_data.get('name') query = Feature.query.filter_by( name=name, feature_set_id=feature_set_id) if self.obj.id: query = query.filter(Feature.id != self.obj.id) count = query.count() if count: self.add_error('name', 'Feature with name "%s" already \ exist. Please choose another one.' % name) return # Validating feature type and parameters def get_field_value(name): value = self.cleaned_data.get(name) if value is None and self.is_edit: return getattr(self.obj, name) return value feature_type = get_field_value('type') type_factory = FEATURE_TYPE_FACTORIES.get(feature_type) if type_factory: # inline type try: params = get_field_value('params') input_format = get_field_value('input_format') or 'plain' type_ = type_factory.get_instance(params, input_format) default = self.cleaned_data.get('default', None) if default: self.cleaned_data['default'] = type_.transform(default) if self.cleaned_data['default'] is nan: self.add_error( "default", "Incorrect default value {0} for type {1}. " .format(default, feature_type)) except InvalidFeatureTypeException, exc: self.add_error("type", 'Cannot create instance of ' 'feature type: {0}'.format(exc), exc) else: