Пример #1
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)
        self.output = named_outputs.get('output data',
                                        'out_task_{}'.format(self.order))

        if self.L1_PARAM not in parameters or self.L2_PARAM not in parameters:
            raise ValueError(
                gettext('Parameters {l1} and {l2} are required.').format(
                    l1=self.L1_PARAM, l2=self.L2_PARAM))

        self.l1 = float(parameters.get(self.L1_PARAM, 0.0))
        self.l2 = float(parameters.get(self.L2_PARAM, 0.0))
        self.task_name = self.parameters.get('task').get('name')
        self.parent = ""
        self.var_name = ""
        self.has_code = True

        self.parents_by_port = parameters.get('my_ports', [])
        self.python_code_to_remove = self.remove_python_code_parent()
        self.treatment()

        self.import_code = {
            'layer': 'ActivityRegularization',
            'callbacks': [],
            'model': None,
            'preprocessing_image': None,
            'others': None
        }
Пример #2
0
    def __init__(self, parameters, named_inputs,
                 named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        if self.MIN_SUPPORT_PARAM not in parameters:
            raise ValueError(_(
                'Support must be informed for classifier {}').format(
                self.__class__))

        self.min_support = float(parameters.get(self.MIN_SUPPORT_PARAM))
        if self.min_support < .0001 or self.min_support > 1.0:
            raise ValueError('Support must be greater or equal '
                             'to 0.0001 and smaller than 1.0')

        self.output = self.named_outputs.get(
            'output data', 'freq_items_{}'.format(self.order))

        self.rules = self.named_outputs.get(
            'rules output', 'rules_{}'.format(self.order))

        self.attribute = parameters.get(self.ATTRIBUTE_PARAM)
        if not self.attribute:
            raise ValueError(
                'Missing parameter {}'.format(self.ATTRIBUTE_PARAM))
        self.attribute = self.attribute[0]

        self.has_code = len(self.named_inputs) == 1

        self.confidence = float(parameters.get(self.CONFIDENCE_PARAM, 0.9))
Пример #3
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        self.has_code = all([
            len(named_inputs) == 1,
            self.contains_results() or len(named_outputs) > 0
        ])
        if self.has_code:
            self.output = named_outputs.get(
                'output data', 'output_data_{}'.format(self.order))

            self.confidence = float(parameters.get(self.CONFIDENCE_PARAM, 0.5))
            if self.confidence < .0001 or self.confidence > 1.0:
                raise ValueError('Confidence must be greater or equal '
                                 'to 0.0001 and smaller than 1.0')

            self.has_import = \
                "from juicer.scikit_learn.library.rules_generator " \
                "import RulesGenerator\n"

            self.support_col = \
                parameters.get(self.SUPPORT_ATTR_PARAM,
                               [self.SUPPORT_ATTR_PARAM_VALUE])[0]
            self.items_col = parameters.get(self.ITEMSET_ATTR_PARAM, [''])[0]
            self.max_rules = parameters.get(self.MAX_COUNT_PARAM, -1) or -1
Пример #4
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        self.has_code = len(named_inputs) > 0 and any(
            [len(self.named_outputs) > 0,
             self.contains_results()])
        if self.has_code:
            self.with_mean = parameters.get(self.WITH_MEAN_PARAM,
                                            False) in ['1', 1, True]
            self.with_std = parameters.get(self.WITH_STD_PARAM,
                                           True) in ['1', 1, True]
            self.output = self.named_outputs.get(
                'output data', 'output_data_{}'.format(self.order))
            self.model = named_outputs.get('transformation model',
                                           'model_{}'.format(self.order))

            if self.ATTRIBUTE_PARAM not in self.parameters:
                msg = _("Parameters '{}' must be informed for task {}")
                raise ValueError(
                    msg.format(self.ATTRIBUTE_PARAM, self.__class__.__name__))
            self.attribute = parameters[self.ATTRIBUTE_PARAM]

            self.alias = parameters.get(self.ALIAS_PARAM,
                                        'scaled_{}'.format(self.order))

            self.has_import = \
                "from sklearn.preprocessing import StandardScaler\n"
Пример #5
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        self.has_code = True

        self.number_neighbors = int(
            parameters.get(self.NUMBER_NEIGHBORS_ATTRIBUTE_PARAM, 5))
        self.n_estimators = int(
            parameters.get(self.N_ESTIMATORS_ATTRIBUTE_PARAM, 10))
        self.min_hash_match = int(
            parameters.get(self.MIN_HASH_MATCH_ATTRIBUTE_PARAM, 4))
        self.n_candidates = int(parameters.get(self.N_CANDIDATES, 10))
        self.random_state = int(
            parameters.get(self.RANDOM_STATE_ATTRIBUTE_PARAM, 0))
        self.radius = float(parameters.get(self.RADIUS_ATTRIBUTE_PARAM, 1.0))
        self.radius_cutoff_ratio = float(
            parameters.get(self.RADIUS_CUTOFF_RATIO_ATTRIBUTE_PARAM, 0.9))

        if not all([self.LABEL_PARAM in parameters]):
            msg = _("Parameters '{}' must be informed for task {}")
            raise ValueError(
                msg.format(self.LABEL_PARAM, self.__class__.__name__))

        self.label = parameters[self.LABEL_PARAM]
        self.model = self.named_outputs.get('model',
                                            'model_{}'.format(self.order))
        self.output = self.named_outputs.get('output data',
                                             'out_task_{}'.format(self.order))

        self.input_treatment()

        self.has_import = \
            """
Пример #6
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        if 'label' not in parameters and 'features' not in parameters:
            raise ValueError(
                _("Parameters '{}' and '{}' must be informed for task {}").
                format('label', 'features', self.__class__))

        self.label = parameters['label'][0]
        self.features = parameters['features'][0]
        self.predCol = parameters.get('prediction', 'prediction')
        self.has_code = len(self.named_inputs) == 2
        self.has_import = ""
        if not self.has_code:
            raise ValueError(
                _("Parameters '{}' and '{}' must be informed for task {}").
                format('train input data', 'algorithm', self.__class__))

        self.model = self.named_outputs.get('model',
                                            'model_tmp{}'.format(self.order))

        self.perform_transformation = 'output data' in self.named_outputs
        if not self.perform_transformation:
            self.output = 'task_{}'.format(self.order)
        else:
            self.output = self.named_outputs['output data']
            self.prediction = self.parameters.get('prediction', 'prediction')
Пример #7
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        self.has_code = True

        self.output = self.named_outputs.get(
            'output data', 'output_data_{}'.format(self.order))

        self.number_neighbors = int(
            parameters.get(self.NUMBER_NEIGHBORS_ATTRIBUTE_PARAM, 20))
        self.algorithm = parameters.get(self.ALGORITHM_ATTRIBUTE_PARAM, "auto")
        self.leaf_size = int(parameters.get(self.LEAF_SIZE_ATTRIBUTE_PARAM,
                                            30))
        self.metric = parameters.get(self.METRIC_ATTRIBUTE_PARAM, "minkowski")
        self.contamination = float(
            parameters.get(self.CONTAMINATION_ATTRIBUTE_PARAM, 0.22))
        self.p = int(parameters.get(self.P_ATTRIBUTE_PARAM, 2))
        self.metric_params = parameters.get(self.METRIC_PARAMS_ATTRIBUTE_PARAM,
                                            None)
        self.novelty = int(parameters.get(self.NOVELTY_ATTRIBUTE_PARAM, 0))
        self.n_jobs = int(parameters.get(self.N_JOBS_ATTRIBUTE_PARAM, 0))
        self.features = parameters['features']
        self.outlier = self.parameters.get(self.OUTLIER_PARAM, 'outlier')

        self.input_treatment()
        self.has_import = \
            """
Пример #8
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        self.has_code = len(named_inputs) == 1 and any([self.contains_results(),
                                                        len(named_outputs) > 0])
        if self.has_code:

            if any([self.LAT_PARAM not in parameters,
                    self.LON_PARAM not in parameters,
                    self.DATETIME_PARAM not in parameters]):
                raise ValueError(
                    _('Parameters {}, {} and {} must be informed for task {}.')
                    .format('Latitude', 'Longitude', 'Datetime',
                            self.__class__))

            self.output = self.named_outputs.get(
                    'output data', 'output_data_{}'.format(self.order))

            self.lat_col = parameters.get(self.LAT_PARAM)[0]
            self.lon_col = parameters.get(self.LON_PARAM)[0]
            self.datetime_col = parameters.get(self.DATETIME_PARAM)[0]

            self.alias = parameters.get(self.ALIAS_PARAM, 'cluster')
            self.min_pts = parameters.get(self.MIN_SAMPLE_PARAM, 15) or 15
            self.spatial_thr = parameters.get(self.SPA_THRESHOLD_PARAM,
                                              500) or 500
            self.temporal_thr = parameters.get(self.TMP_THRESHOLD_PARAM,
                                               60) or 60

            self.min_pts = abs(int(self.min_pts))
            self.spatial_thr = abs(float(self.spatial_thr))
            self.temporal_thr = abs(int(self.temporal_thr))
            self.has_import = "from juicer.scikit_learn.library.stdbscan " \
                              "import STDBSCAN\n"
Пример #9
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        self.has_code = len(named_inputs) == 1 and any([self.contains_results(),
                                                        len(named_outputs) > 0])
        if self.has_code:

            for att in [self.SRC_PROJ_PARAM, self.DST_PROJ_PARAM,
                        self.LAT_PARAM, self.LON_PARAM]:
                if att not in self.parameters:
                    raise ValueError(
                        _('Parameters {} must be informed for task {}.')
                        .format(att, self.__class__))

            self.output = self.named_outputs.get(
                'output data', 'output_data_{}'.format(self.order))

            self.lat_col = parameters.get(self.LAT_PARAM)[0]
            self.lon_col = parameters.get(self.LON_PARAM)[0]

            self.lat_alias = parameters.get(self.LAT_ALIAS_PARAM, self.lat_col)
            self.lon_alias = parameters.get(self.LON_ALIAS_PARAM, self.lon_col)

            self.src_prj = parameters.get(self.SRC_PROJ_PARAM)
            self.dst_prj = parameters.get(self.DST_PROJ_PARAM)
Пример #10
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        if 'attributes' not in self.parameters:
            self.has_code = False
            raise ValueError(
                _("Parameter '{}' must be informed for task {}").format(
                    'attributes', self.__class__))

        self.output = named_outputs.get('output data',
                                        'output_data_{}'.format(self.order))

        self.attributes = parameters['attributes']
        self.name = [
            s.strip() for s in self.parameters.get('new_name', '').split(',')
        ]
        # Adjust alias in order to have the same number of aliases as
        # attributes by filling missing alias with the attribute name
        # sufixed by _indexed.
        if len(self.name) > 0:
            size = len(self.attributes)
            self.name = [
                x[1] or '{}_new'.format(x[0])
                for x in zip_longest(self.attributes, self.name[:size])
            ]

        self.new_type = parameters.get('new_data_type', 'keep')
        self.has_code = len(named_inputs) == 1

        if self.has_code:
            self.has_import = "from functions.data.attributes_changer " \
                              "import AttributesChangerOperation\n"
Пример #11
0
    def __init__(self, parameters, named_inputs,
                 named_outputs):
        Operation.__init__(self, parameters, named_inputs,
                           named_outputs)

        self.prediction_attribute = (parameters.get(
            self.PREDICTION_ATTRIBUTE_PARAM) or [''])[0]
        # self.feature_attribute = (parameters.get(
        #         self.FEATURE_ATTRIBUTE_PARAM) or [''])[0]
        self.label_attribute = (parameters.get(
            self.LABEL_ATTRIBUTE_PARAM) or [''])[0]
        self.type_model = parameters.get(self.METRIC_PARAM) or ''

        if any([self.prediction_attribute == '', self.type_model == '']):
            msg = \
                _("Parameters '{}' and '{}' must be informed for task {}")
            raise ValueError(msg.format(
                self.PREDICTION_ATTRIBUTE_PARAM,
                self.METRIC_PARAM, self.__class__))

        if self.type_model not in self.METRIC_TO_EVALUATOR:
            raise ValueError(_('Invalid metric value {}').format(
                self.type_model))

        # if self.type_model == 'clustering':
        #     if self.feature_attribute == '':
        #         msg = \
        #             _("Parameters '{}' must be informed for task {}")
        #         raise ValueError(msg.format(
        #                 self.FEATURE_ATTRIBUTE_PARAM, self.__class__))
        #     else:
        #         self.label_attribute = self.feature_attribute
        # else:
        if self.label_attribute == '':
            msg = \
                _("Parameters '{}' must be informed for task {}")
            raise ValueError(msg.format(
                self.LABEL_ATTRIBUTE_PARAM, self.__class__))

        self.has_code = any([(
                (len(self.named_inputs) > 0 and len(self.named_outputs) > 0) or
                (self.named_outputs.get('evaluator') is not None) or
                ('input data' in self.named_inputs)
        ), self.contains_results()])

        self.model = self.named_inputs.get(
            'model', 'model_{}'.format(self.order))

        self.model_out = self.named_outputs.get(
            'evaluated model', 'model_task_{}'.format(self.order))

        self.evaluator_out = self.named_outputs.get(
            'evaluator', 'evaluator_task_{}'.format(self.order))
        if not self.has_code and self.named_outputs.get(
                'evaluated model') is not None:
            raise ValueError(
                _('Model is being used, but at least one input is missing'))

        self.supports_cache = False
        self.has_import = "from sklearn.metrics import * \n"
Пример #12
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        if self.FILE not in parameters:
            raise ValueError(
                _("Parameter '{}' must be informed for task {}").format(
                    self.FILE, self.__class__))

        self.name_file = "/" + parameters[self.FILE]
        self.separator = parameters.get(self.SEPARATOR, ',')
        self.header = parameters.get('header', False) in (1, '1', True)
        self.schema = parameters.get(self.SCHEMA, "FROM_VALUES")
        self.mode = parameters.get(self.MODE_PARAM, 'FAILFAST')
        null_values = parameters.get(self.NULL_VALUES_PARAM, '')
        self.format = parameters.get('format', 'csv')

        if null_values == '':
            self.null_values = []
        else:
            self.null_values = \
                list(set(v.strip() for v in null_values.split(",")))

        self.has_code = len(named_outputs) > 0
        self.has_import = "from functions.data.read_data import "\
                          "ReadOperationHDFS\n"
Пример #13
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        for att in [self.NAME_PARAM, self.FORMAT_PARAM, self.PATH_PARAM]:
            if att not in parameters:
                raise ValueError(
                    _("Parameter '{}' must be informed for task {}").format(
                        att, self.__class__))

        self.name = parameters.get(self.NAME_PARAM)
        self.format = parameters.get(self.FORMAT_PARAM, self.FORMAT_CSV)
        self.path = parameters.get(self.PATH_PARAM, '.')
        self.mode = parameters.get(self.OVERWRITE_MODE_PARAM, self.MODE_ERROR)
        self.storage = parameters.get(self.STORAGE_ID_PARAM, 'hdfs')
        self.header = parameters.get('header', False) in (1, '1', True)
        self.has_code = len(named_inputs) == 1
        if self.has_code:
            self.has_import = \
                "from functions.data.save_data import SaveOperation\n"

        tmp = 'output_data_{}'.format(self.order)
        self.output = self.named_outputs.get('output data', tmp)

        if len(self.path) > 0:
            self.filename = '/' + self.path + '/' + self.name
        else:
            self.filename = '/' + self.name

        self.has_code = len(self.named_inputs) == 1
Пример #14
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)
        self.output = named_outputs.get('output data',
                                        'out_task_{}'.format(self.order))

        if self.TARGET_SHAPE_PARAM not in parameters or self.TARGET_SHAPE_PARAM is None:
            raise ValueError(
                gettext('Parameter {} is required.').format(
                    self.TARGET_SHAPE_PARAM))

        self._target_shape = parameters.get(self.TARGET_SHAPE_PARAM, None)
        self.target_shape = None

        self.task_name = self.parameters.get('task').get('name')
        self.task_workflow_order = self.parameters.get('task').get('order')
        self.has_code = True

        self.parent = ""
        self.var_name = ""

        self.parents_by_port = parameters.get('my_ports', [])
        self.python_code_to_remove = self.remove_python_code_parent()
        self.treatment()

        self.import_code = {
            'layer': 'Reshape',
            'callbacks': [],
            'model': None,
            'preprocessing_image': None,
            'others': None
        }
Пример #15
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        for att in ['label_attribute', 'prediction_attribute', 'metric']:
            if att not in parameters:
                raise ValueError(
                    _("Parameter '{}' must be informed for task {}").format(
                        att, self.__class__))

        self.true_col = self.parameters['label_attribute'][0]
        self.pred_col = self.parameters['prediction_attribute'][0]
        self.metric = self.parameters['metric']

        self.has_code = len(self.named_inputs) == 2
        if not self.has_code:
            raise ValueError(
                _("Parameters '{}' and '{}' must be informed for task {}").
                format('input data', 'model', self.__class__))

        if self.metric in ['rmse', 'mse', 'mae']:
            self.modeltype = 'RegressionModelEvaluation'
            self.has_import = "from functions.ml.metrics." \
                              "RegressionModelEvaluation import *\n"
        else:
            self.modeltype = 'ClassificationModelEvaluation'
            self.has_import = \
                "from functions.ml.metrics.ClassificationModelEvaluation" \
                " import *\n"

        self.evaluated_out = \
            self.named_outputs.get('evaluated model',
                                   'evaluated_model{}'.format(self.order))
        tmp = 'evaluator{}'.format(self.order)
        self.evaluator = self.named_outputs.get("evaluator", tmp)
Пример #16
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        if len(named_inputs) == 2:
            self.has_code = True

            attributes = [self.TARGET_LAT_COLUMN_PARAM,
                          self.TARGET_LON_COLUMN_PARAM,
                          self.POLYGON_POINTS_COLUMN_PARAM]

            for att in attributes:
                if att not in parameters:
                    raise ValueError(
                        _("Parameter '{}' must be informed for task {}")
                        .format(att, self.__class__))

            self.lat_column = parameters[self.TARGET_LAT_COLUMN_PARAM]
            self.lon_column = parameters[self.TARGET_LON_COLUMN_PARAM]
            self.polygon_column = parameters.get(
                    self.POLYGON_POINTS_COLUMN_PARAM, 'points')
            self.attributes = parameters.get(self.POLYGON_ATTR_COLUMN_PARAM, [])
            if len(self.attributes) == 0:
                self.attributes = []

            self.alias = parameters.get('alias', '_shp')

            self.output = self.named_outputs.get(
                'output data', 'output_data_{}'.format(self.order))
            self.has_import = \
                "from juicer.scikit_learn.library.geo_within " \
                "import GeoWithinOperation\n"
Пример #17
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)
        self.output = named_outputs.get('output data',
                                        'out_task_{}'.format(self.order))

        self._inputs = parameters.get(self.INPUTS_PARAM, None)
        self._kwargs = parameters.get(self.KWARGS_PARAM, None)
        self._advanced_options = parameters.get(self.ADVANCED_OPTIONS_PARAM, 0)

        self.inputs = None
        self.kwargs = None
        self.advanced_options = None

        self.task_name = self.parameters.get('task').get('name')
        self.parents = ""
        self.var_name = ""
        self.has_code = True

        self.add_functions_required = ""

        self.parents_by_port = parameters.get('my_ports', [])
        self.parents_slug = parameters.get('parents_slug', [])
        self.python_code_to_remove = self.remove_python_code_parent()
        self.treatment()

        self.import_code = {
            'layer': 'subtract',
            'callbacks': [],
            'model': None,
            'preprocessing_image': None,
            'others': None
        }
Пример #18
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        self.title = parameters.get(self.TITLE_PARAM, '')
        self.has_code = len(self.named_inputs) == 1
        self.supports_cache = False
        self.icon = 'fa-question'
Пример #19
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)
        if self.ATTRIBUTES_PARAM in parameters:
            self.attributes = parameters.get(self.ATTRIBUTES_PARAM)
        else:
            raise ValueError(
                _("Parameter '{}' must be informed for task {}").format(
                    self.ATTRIBUTES_PARAM, self.__class__))

        self.stop_word_attribute = self.parameters.get(
            self.STOP_WORD_ATTRIBUTE_PARAM, "")

        self.stop_word_list = [
            s.strip() for s in self.parameters.get(self.STOP_WORD_LIST_PARAM,
                                                   '').split(',')
        ]

        self.alias = parameters.get(self.ALIAS_PARAM, 'tokenized_rm')

        self.sw_case_sensitive = self.parameters.get(
            self.STOP_WORD_CASE_SENSITIVE_PARAM, 'False')

        self.stopwords_input = self.named_inputs.get('stop words', [])

        self.output = self.named_outputs.get(
            'output data', 'output_data_{}'.format(self.order))

        self.has_code = 'input data' in self.named_inputs
        if self.has_code:
            self.has_import = "from functions.text.remove_stopwords "\
                              "import RemoveStopWordsOperation\n"
Пример #20
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        for att in [self.NAME_PARAM, self.FORMAT_PARAM, self.PATH_PARAM]:
            if att not in parameters:
                raise ValueError(
                    _("Parameter '{}' must be informed for task {}").format(
                        att, self.__class__))

        self.name = parameters.get(self.NAME_PARAM)
        self.tags = parameters.get(self.TAGS_PARAM)
        self.format = parameters.get(self.FORMAT_PARAM, self.FORMAT_CSV)
        self.path = parameters.get(self.PATH_PARAM, '.')
        self.mode = parameters.get(self.OVERWRITE_MODE_PARAM, self.MODE_ERROR)
        self.storage_id = parameters.get(self.STORAGE_ID_PARAM)
        self.user = parameters.get(self.USER_PARAM)
        self.workflow_id = parameters.get(self.WORKFLOW_ID_PARAM)

        self.header = parameters.get(self.HEADER_PARAM,
                                     False) in (1, '1', True)

        self.output = self.named_outputs.get(
            'output data', 'output_data_{}'.format(self.order))

        self.filename = self.name
        self.has_code = len(self.named_inputs) == 1
Пример #21
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)
        self.output = named_outputs.get('output data',
                                        'out_task_{}'.format(self.order))

        self.code = parameters.get(self.CODE_PARAM, None) or None
        self._out_code = int(parameters.get(self.OUT_CODE_PARAM, 0))

        self.task_name = self.parameters.get('task').get('name')

        if self.CODE_PARAM not in parameters:
            raise ValueError(
                gettext('Parameter {} is required').format(self.CODE_PARAM))

        self.parents_by_port = parameters.get('my_ports', [])
        self.python_code_to_remove = self.remove_python_code_parent()
        self.out_code = False
        self.treatment()

        self.import_code = {
            'layer': None,
            'callbacks': [],
            'model': None,
            'preprocessing_image': None,
            'others': None
        }

        self.has_code = not self.out_code
        self.has_external_python_code_operation = self.out_code
Пример #22
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        self.has_code = any(
            [len(self.named_outputs) > 0,
             self.contains_results()])

        self.header = False
        if self.has_code:
            if self.DATA_SOURCE_ID_PARAM in parameters:
                self._set_data_source_parameters(parameters)
            else:
                raise ValueError(
                    _("Parameter '{}' must be informed for task {}").format(
                        self.DATA_SOURCE_ID_PARAM, self.__class__))

            # Test if data source was changed since last execution and
            # invalidate cache if so.
            self._set_data_source_parameters(parameters)
            data_source_updated = self.metadata.get('updated')

            if data_source_updated:
                data_source_updated = datetime.datetime.strptime(
                    data_source_updated[0:19], '%Y-%m-%dT%H:%M:%S')
            self.supports_cache = (
                parameters.get('execution_date') is not None
                and data_source_updated < parameters['execution_date'])

            self.output = named_outputs.get('output data',
                                            'out_task_{}'.format(self.order))
Пример #23
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        self.has_code = len(self.named_inputs) == 1
        if self.has_code:

            if self.ATTRIBUTE_PARAM not in parameters:
                raise ValueError(
                    _("Parameters '{}' must be informed for task {}").format(
                        'attributes', self.__class__))

            self.output = self.named_outputs.get(
                'output data', 'output_data_{}'.format(self.order))
            self.model = self.named_outputs.get('model',
                                                'model_{}'.format(self.order))
            self.attribute = parameters[self.ATTRIBUTE_PARAM]
            self.alias = parameters.get(self.ALIAS_PARAM,
                                        'quantiledisc_{}'.format(self.order))
            self.n_quantiles = parameters.get(self.N_QUANTILES_PARAM,
                                              1000) or 1000
            self.output_distribution = parameters.get(
                    self.DISTRIBUITION_PARAM, self.DISTRIBUITION_PARAM_UNIFORM)\
                or self.DISTRIBUITION_PARAM_UNIFORM
            self.seed = parameters.get(self.SEED_PARAM, 'None') or 'None'

            if int(self.n_quantiles) <= 0:
                raise ValueError(
                    _("Parameter '{}' must be x>0 for task {}").format(
                        self.N_QUANTILES_PARAM, self.__class__))
Пример #24
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        self.has_code = 'input data' in self.named_inputs \
                        and any([self.contains_results(),
                                 len(named_outputs) > 0])

        if self.has_code:
            self.output = self.named_outputs.get(
                'output data', 'output_data_{}'.format(self.order))

            if self.ATTRIBUTES_PARAM in parameters:
                self.attributes = parameters.get(self.ATTRIBUTES_PARAM)[0]
            else:
                raise ValueError(
                    _("Parameter '{}' must be informed for task {}").format(
                        self.ATTRIBUTES_PARAM, self.__class__))

            self.sw_case_sensitive = self.parameters.get(
                self.STOP_WORD_CASE_SENSITIVE_PARAM, False)

            self.stop_word_list = [
                s.strip() for s in self.parameters.get(
                    self.STOP_WORD_LIST_PARAM, '').split(',')
            ]

            self.alias = parameters.get(self.ALIAS_PARAM, 'tokenized_rm')
            self.stopwords_input = self.named_inputs.get('stop words', None)
            self.stop_word_attribute = self.parameters.get(
                self.STOP_WORD_ATTRIBUTE_PARAM, [''])[0]
            self.lang = self.parameters.get(self.LANG_PARAM, '') or ''

            self.has_import = "import nltk\n" \
                              "nltk.download('stopwords')\n" \
                              "from nltk.corpus import stopwords\n"
Пример #25
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        self.has_code = len(self.named_inputs) == 1
        if self.has_code:

            if self.ATTRIBUTE_PARAM not in parameters:
                raise ValueError(
                    _("Parameters '{}' must be informed for task {}").format(
                        self.ATTRIBUTE_PARAM, self.__class__))

            self.output = self.named_outputs.get(
                'output data', 'output_data_{}'.format(self.order))
            self.model = named_outputs.get('transformation model',
                                           'model_{}'.format(self.order))

            if self.ATTRIBUTE_PARAM not in self.parameters:
                msg = _("Parameters '{}' must be informed for task {}")
                raise ValueError(
                    msg.format(self.ATTRIBUTE_PARAM, self.__class__.__name__))
            self.attribute = parameters[self.ATTRIBUTE_PARAM]

            self.alias = parameters.get(self.ALIAS_PARAM,
                                        'scaled_{}'.format(self.order))

            self.min = parameters.get(self.MIN_PARAM, 0) or 0
            self.max = parameters.get(self.MAX_PARAM, 1) or 1

            self.has_import = \
                "from sklearn.preprocessing import MinMaxScaler\n"
Пример #26
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        self.has_code = 'input data' in self.named_inputs \
                        and any([self.contains_results(),
                                 len(named_outputs) > 0])
        if self.has_code:
            self.output = self.named_outputs.get('output data',
                                                 'out_{}'.format(self.order))
            if self.N_PARAM in parameters:
                self.n = abs(int(self.parameters.get(self.N_PARAM, 2)))
            else:
                raise ValueError(
                    _("Parameter '{}' must be informed for task {}").format(
                        self.N_PARAM, self.__class__))

            if self.ATTRIBUTES_PARAM in parameters:
                self.attributes = parameters.get(self.ATTRIBUTES_PARAM)[0]
            else:
                raise ValueError(
                    _("Parameter '{}' must be informed for task {}").format(
                        self.ATTRIBUTES_PARAM, self.__class__))

            self.alias = parameters.get(self.ALIAS_PARAM,
                                        '{}_ngram'.format(self.attributes))

            self.has_import = "from nltk.util import ngrams\n"
Пример #27
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        self.has_code = all([
            len(named_inputs) == 1,
            self.contains_results() or len(named_outputs) > 0
        ])
        if self.has_code:

            if self.MIN_SUPPORT_PARAM not in parameters:
                raise ValueError(
                    _("Parameter '{}' must be informed for task {}").format(
                        self.MIN_SUPPORT_PARAM, self.__class__))

            self.column = parameters.get(self.ATTRIBUTE_PARAM, [''])[0]
            self.confidence = float(parameters.get(self.CONFIDENCE_PARAM, 0.9))
            self.min_support = float(parameters.get(self.MIN_SUPPORT_PARAM))
            if self.min_support < .0001 or self.min_support > 1.0:
                raise ValueError('Support must be greater or equal '
                                 'to 0.0001 and smaller than 1.0')

            self.output = named_outputs.get(
                'output data', 'output_data_{}'.format(self.order))
            self.rules_output = named_outputs.get(
                'rules output', 'rules_{}'.format(self.order))

            self.has_import = "import pyfpgrowth\n"
Пример #28
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs,
                           named_outputs)

        if self.ATTRIBUTES_PARAM in parameters:
            self.attributes = parameters.get(self.ATTRIBUTES_PARAM)
        else:
            raise ValueError(
                _("Parameter '{}' must be informed for task {}").format(
                    self.ATTRIBUTES_PARAM, self.__class__))

        self.handle_invalid = self.parameters.get(self.HANDLE_INVALID_PARAM)
        if self.handle_invalid is not None:
            if self.handle_invalid not in ['skip', 'keep', 'error']:
                raise ValueError(
                    _('Parameter {} must be one of these: {}').format(
                        _('type'), ','.join([_('keep'), _('skip'), _('error')])
                    )
                )
        self.aliases = self._get_aliases(
            self.attributes, parameters.get(self.ALIASES_PARAM, '').split(','),
            'bucketed')

        self.splits = self._get_splits(parameters)
        self.model = self.named_outputs.get(
            'model', 'model_task_{}'.format(self.order))

        self.output = self.named_outputs.get('output data',
                                             'out_task_{}'.format(self.order))
        self.has_code = [len(self.named_inputs) > 0, self.contains_results()]
Пример #29
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)

        self.has_code = all([
            len(named_inputs) == 1,
            self.contains_results() or len(named_outputs) > 0
        ])
        if self.has_code:
            if self.MIN_SUPPORT_PARAM not in parameters:
                raise ValueError(
                    _("Parameter '{}' must be informed for task {}").format(
                        self.MIN_SUPPORT_PARAM, self.__class__))

            self.column = parameters.get(self.ATTRIBUTE_PARAM, [''])[0]
            self.output = self.named_outputs.get(
                'output data', 'output_data_{}'.format(self.order))

            self.min_support = float(parameters.get(self.MIN_SUPPORT_PARAM))
            if self.min_support < .0001 or self.min_support > 1.0:
                raise ValueError('Support must be greater or equal '
                                 'to 0.0001 and smaller than 1.0')

            self.max_length = abs(
                int(parameters.get(self.MAX_LENGTH_PARAM, 10))) or 10
            self.has_import = \
                "from juicer.scikit_learn.library." \
                "prefix_span import PrefixSpan\n"
Пример #30
0
    def __init__(self, parameters, named_inputs, named_outputs):
        Operation.__init__(self, parameters, named_inputs, named_outputs)
        self.output = named_outputs.get('output data',
                                        'out_task_{}'.format(self.order))

        if self.MASK_VALUE_PARAM not in parameters or self.MASK_VALUE_PARAM is None:
            raise ValueError(
                gettext('Parameter {} are required.').format(
                    self.MASK_VALUE_PARAM))

        self.mask_value = parameters.get(self.MASK_VALUE_PARAM, 0.0) or 0.0
        self.task_name = self.parameters.get('task').get('name')
        self.parent = ""
        self.var_name = ""
        self.has_code = True

        self.parents_by_port = parameters.get('my_ports', [])
        self.python_code_to_remove = self.remove_python_code_parent()
        self.treatment()

        self.import_code = {
            'layer': 'Masking',
            'callbacks': [],
            'model': None,
            'preprocessing_image': None,
            'others': None
        }