def test_whole(self):
        # Create datatype
        datatype = Numerical()

        # Load observations
        observations = lib.load_titanic()

        # Transform observations
        mapper = DataFrameMapper(
            [(['fare'], datatype.default_transformation_pipeline)],
            df_out=True)
        transformed_df = mapper.fit_transform(observations)

        # Create network
        input_layer, input_nub = datatype.input_nub_generator(
            'fare', transformed_df)
        output_nub = datatype.output_nub_generator('fare', transformed_df)

        x = input_nub
        x = output_nub(x)

        model = Model(input_layer, x)
        model.compile(optimizer='adam', loss=datatype.output_suggested_loss())
 def test_init(self):
     datatype = Numerical()
     self.assertTrue(datatype.supports_output)
Exemplo n.º 3
0
    def __init__(self,
                 data_type_dict=dict(),
                 output_var=None,
                 datatype_handlers=dict()):
        """
        :param data_type_dict: A dictionary, in the format {'datatype': ['variable_name_1', 'variable_name_2']}
        :type data_type_dict: {str:[str]}
        :param output_var: The name of the response variable
        :type output_var: str
        :param datatype_handlers: Any custom or external datatype handlers, in the format {'datatype': DataTypeClass}
        :type datatype_handlers: {str:class}
        """

        # Dictionary of the format {'datatype': ['variable_name_1', 'variable_name_2']}
        self.datatype_variable_dict = data_type_dict

        # Set up a list of all input variables
        self.input_vars = copy.copy(
            reduce(lambda x, y: x + y, self.datatype_variable_dict.values()))

        # If there's an output_var, remove it from from input_vars
        if (output_var is not None) and (output_var in self.input_vars):
            self.input_vars.remove(output_var)

        # Set up output
        self.output_var = output_var
        self.supervised = self.output_var is not None

        # Set up datatype handlers
        self.datatype_handlers = {
            'numerical': Numerical(),
            'categorical': Categorical(),
            'boolean': Boolean(),
            'timeseries': TimeSeries(),
            'text': Text()
        }

        # Add user-supplied datatype handlers
        self.datatype_handlers.update(datatype_handlers)

        # Dictionary of the format {'variable_name_1': DataTypeClass}
        self.variable_datatype_dict = dict()
        for datatype_name, variable_list in self.datatype_variable_dict.items(
        ):
            for variable in variable_list:
                if datatype_name in self.datatype_handlers:
                    handler = self.datatype_handlers.get(datatype_name, None)
                    self.variable_datatype_dict[variable] = handler
                    logging.info(
                        'Providing variable: {} with datatype handler: {}'.
                        format(variable, handler))
                else:
                    raise ValueError(
                        'Unknown datatype: {}'.format(datatype_name))

        # Set up mappers
        self.input_mapper = self._create_mapper(self.input_vars)
        if self.supervised:
            self.output_mapper = self._create_mapper([self.output_var])
        else:
            self.output_mapper = None

        # Attributes
        self.fitted = False
        self.input_layers = None
        self.input_nub = None
        self.output_nub = None

        # Exit checks
        self._valid_configurations_check()
    def test_init(self):
        # Create datatype
        datatype = Numerical()

        # Check for output support (or not)
        self.assertTrue(datatype.supports_output)
 def test_datatype_signature(self):
     datatype = Numerical()
     lib.check_valid_datatype(datatype)
     self.assertTrue(datatype.supports_output)
Exemplo n.º 6
0
    def __init__(self,
                 data_type_dict=dict(),
                 output_var=None,
                 datatype_handlers=dict()):
        """
        An Automater object, allows users to rapidly build and iterate on deep learning models.

        This class supports building and iterating on deep learning models by providing:

         - A cleaned, transformed and correctly formatted X and y (good for keras, sklearn or any other ML platform)
         - An `input_nub`, without the hassle of worrying about input shapes or data types
         - An `nub`, correctly formatted for the kind of response variable provided

        :param data_type_dict: A dictionary, in the format {'datatype': ['variable_name_1', 'variable_name_2']}
        :type data_type_dict: {str:[str]}
        :param output_var: The name of the response variable
        :type output_var: str
        :param datatype_handlers: Any custom or external datatype handlers, in the format {'datatype': DataTypeClass}
        :type datatype_handlers: {str:class}
        """

        # Dictionary of the format {'datatype': ['variable_name_1', 'variable_name_2']}
        self.datatype_variable_dict = data_type_dict

        # Set up a list of all input variables
        self.input_vars = copy.copy(
            reduce(lambda x, y: x + y, self.datatype_variable_dict.values()))

        # If there's an output_var, remove it from from input_vars
        if (output_var is not None) and (output_var in self.input_vars):
            self.input_vars.remove(output_var)

        # Set up output
        self.output_var = output_var
        self.supervised = self.output_var is not None

        # Set up datatype handlers
        self.datatype_handlers = {
            'numerical': Numerical(),
            'categorical': Categorical(),
            'boolean': Categorical(),
            'timeseries': TimeSeries(),
            'text': Text()
        }

        # Add user-supplied datatype handlers
        self.datatype_handlers.update(datatype_handlers)

        # Dictionary of the format {'variable_name_1': DataTypeClass}
        self.variable_datatype_dict = dict()
        for datatype_name, variable_list in self.datatype_variable_dict.items(
        ):
            for variable in variable_list:
                if datatype_name in self.datatype_handlers:
                    handler = self.datatype_handlers.get(datatype_name, None)
                    self.variable_datatype_dict[variable] = handler
                    logging.info(
                        'Providing variable: {} with datatype handler: {}'.
                        format(variable, handler))
                else:
                    raise ValueError(
                        'Unknown datatype: {}'.format(datatype_name))

        # Set up mappers
        self.input_mapper = self._create_mapper(self.input_vars)
        if self.supervised:
            self.output_mapper = self._create_mapper([self.output_var])
        else:
            self.output_mapper = None

        # Attributes
        self.fitted = False
        self.input_layers = None
        self.input_nub = None
        self.output_nub = None

        # Exit checks
        self._valid_configurations_check()