示例#1
0
    def update_schema(self, tablename, mappings):
        """
        mappings is a dict of column name to 'continuous', 'multinomial', 'ignore', or 'key'.
        TODO: can we get rid of cctypes?
        """
        metadata_full = self.get_metadata_full(tablename)
        cctypes_full = metadata_full['cctypes_full']
        M_c_full = metadata_full['M_c_full']
        raw_T_full = metadata_full['raw_T_full']
        colnames_full = utils.get_all_column_names_in_original_order(M_c_full)

        # Now, update cctypes_full (cctypes updated later, after removing ignores).
        mapping_set = 'continuous', 'multinomial', 'ignore', 'key'
        for col, mapping in mappings.items():
            if col.lower() not in M_c_full['name_to_idx']:
                raise utils.BayesDBError('Error: column %s does not exist.' % col)
            elif mapping not in mapping_set:
                raise utils.BayesDBError('Error: datatype %s is not one of the valid datatypes: %s.' % (mapping, str(mapping_set)))

            cidx = M_c_full['name_to_idx'][col.lower()]

            # If the column's current type is key, don't allow the change.
            if cctypes_full[cidx] == 'key':
                raise utils.BayesDBError('Error: %s is already set as the table key. To change its type, reload the table using CREATE BTABLE and choose a different key column.' % col.lower())
            # If the user tries to change a column to key, it's easier to reload the table, since at this point
            # there aren't models anyways. Eventually we can build this in if it's desirable.
            elif mapping == 'key':
                raise utils.BayesDBError('Error: key column already exists. To choose a different key, reload the table using CREATE BTABLE')

            cctypes_full[cidx] = mapping

        # Make sure there isn't more than one key.
        assert len(filter(lambda x: x=='key', cctypes_full)) == 1

        T_full, M_r_full, M_c_full, _ = data_utils.gen_T_and_metadata(colnames_full, raw_T_full, cctypes=cctypes_full)

        # Variables without "_full" don't include ignored columns.
        raw_T, cctypes, colnames = data_utils.remove_ignore_cols(raw_T_full, cctypes_full, colnames_full)
        T, M_r, M_c, _ = data_utils.gen_T_and_metadata(colnames, raw_T, cctypes=cctypes)

        # Now, put cctypes, T, M_c, and M_r back into the DB
        self.update_metadata(tablename, M_r, M_c, T, cctypes)
        self.update_metadata_full(tablename, M_r_full, M_c_full, T_full, cctypes_full)
        
        return self.get_metadata_full(tablename)
示例#2
0
    def update_schema(self, tablename, mappings):
        """
        mappings is a dict of column name to 'cyclic', 'numerical', 'categorical', 'ignore', or
        'key'.
        TODO: can we get rid of cctypes?
        """
        metadata_full = self.get_metadata_full(tablename)
        cctypes_full = metadata_full['cctypes_full']
        M_c_full = metadata_full['M_c_full']
        raw_T_full = metadata_full['raw_T_full']
        colnames_full = utils.get_all_column_names_in_original_order(M_c_full)
        try:
            parameters_full = [x['parameters'] for x in M_c_full['column_metadata']]
        except KeyError:
            print('WARNING: resetting parameters to defaults. Please check these values with '
                  'DESCRIBE and adjust them manually if necessary.')
            parameters_full = []
            for md in M_c_full['column_metadata']:
                if 'dirichlet' in md['modeltype']:
                    params = {
                        'cardinality': len(md['value_to_code'])
                    }
                elif'vonmises' in md['modeltype']:
                    params = {
                        'min': 0.0,
                        'max': 2.0*3.14159265358979323846264338328
                    }
                else:
                    params = None

                parameters_full.append(params)

            parameters_full = [None for _ in range(len(M_c_full['column_metadata']))]

        # Now, update cctypes_full (cctypes updated later, after removing ignores).
        mapping_set = 'numerical', 'categorical', 'ignore', 'key', 'cyclic'

        for colname, mapping in mappings.items():
            cctype = mapping['cctype']
            parameters = mapping['parameters']

            if colname.lower() not in M_c_full['name_to_idx']:
                raise utils.BayesDBError('Error: column %s does not exist.' % colname)
            elif cctype not in mapping_set:
                raise utils.BayesDBError('Error: datatype %s is not one of the valid datatypes: %s.'
                                         % (mapping, str(mapping_set)))

            cidx = M_c_full['name_to_idx'][colname.lower()]

            # If the column's current type is key, don't allow the change.
            if cctypes_full[cidx] == 'key':
                raise utils.BayesDBError('Error: %s is already set as the table key. To change its '
                                         'type, reload the table using CREATE BTABLE and choose a '
                                         'different key column.' % colname.lower())
            # If the user tries to change a column to key, it's easier to reload the table, since at
            # this point there aren't models anyways. Eventually we can build this in if it's
            # desirable.
            elif cctype == 'key':
                raise utils.BayesDBError('Error: key column already exists. To choose a different '
                                         'key, reload the table using CREATE BTABLE')

            cctypes_full[cidx] = cctype
            parameters_full[cidx] = parameters

        # Make sure there isn't more than one key.
        assert len(filter(lambda x: x == 'key', cctypes_full)) == 1

        T_full, M_r_full, M_c_full, _ = data_utils.gen_T_and_metadata(colnames_full, raw_T_full,
                                                                      cctypes=cctypes_full,
                                                                      parameters=parameters_full)

        # Variables without "_full" don't include ignored columns.
        raw_T, cctypes, colnames, parameters = data_utils.remove_ignore_cols(raw_T_full,
                                                                             cctypes_full,
                                                                             colnames_full,
                                                                             parameters_full)
        T, M_r, M_c, _ = data_utils.gen_T_and_metadata(colnames, raw_T, cctypes=cctypes,
                                                       parameters=parameters)

        # Now, put cctypes, T, M_c, and M_r back into the DB
        self.update_metadata(tablename, M_r, M_c, T, cctypes)
        self.update_metadata_full(tablename, M_r_full, M_c_full, T_full, cctypes_full)

        return self.get_metadata_full(tablename)
示例#3
0
    def update_schema(self, tablename, mappings):
        """
        mappings is a dict of column name to 'cyclic', 'numerical', 'categorical', 'ignore', or
        'key'.
        TODO: can we get rid of cctypes?
        """
        metadata_full = self.get_metadata_full(tablename)
        cctypes_full = metadata_full['cctypes_full']
        M_c_full = metadata_full['M_c_full']
        raw_T_full = metadata_full['raw_T_full']
        colnames_full = utils.get_all_column_names_in_original_order(M_c_full)
        try:
            parameters_full = [x['parameters'] for x in M_c_full['column_metadata']]
        except KeyError:
            print('WARNING: resetting parameters to defaults. Please check these values with '
                  'DESCRIBE and adjust them manually if necessary.')
            parameters_full = []
            for md in M_c_full['column_metadata']:
                if 'dirichlet' in md['modeltype']:
                    params = {
                        'cardinality': len(md['value_to_code'])
                    }
                elif'vonmises' in md['modeltype']:
                    params = {
                        'min': 0.0,
                        'max': 2.0*3.14159265358979323846264338328
                    }
                else:
                    params = None

                parameters_full.append(params)

            parameters_full = [None for _ in range(len(M_c_full['column_metadata']))]

        # Now, update cctypes_full (cctypes updated later, after removing ignores).
        mapping_set = 'numerical', 'categorical', 'ignore', 'key', 'cyclic'

        for colname, mapping in mappings.items():
            cctype = mapping['cctype']
            parameters = mapping['parameters']

            if colname.lower() not in M_c_full['name_to_idx']:
                raise utils.BayesDBError('Error: column %s does not exist.' % colname)
            elif cctype not in mapping_set:
                raise utils.BayesDBError('Error: datatype %s is not one of the valid datatypes: %s.'
                                         % (mapping, str(mapping_set)))

            cidx = M_c_full['name_to_idx'][colname.lower()]

            # If the column's current type is key, don't allow the change.
            if cctypes_full[cidx] == 'key':
                raise utils.BayesDBError('Error: %s is already set as the table key. To change its '
                                         'type, reload the table using CREATE BTABLE and choose a '
                                         'different key column.' % colname.lower())
            # If the user tries to change a column to key, it's easier to reload the table, since at
            # this point there aren't models anyways. Eventually we can build this in if it's
            # desirable.
            elif cctype == 'key':
                raise utils.BayesDBError('Error: key column already exists. To choose a different '
                                         'key, reload the table using CREATE BTABLE')

            cctypes_full[cidx] = cctype
            parameters_full[cidx] = parameters

        # Make sure there isn't more than one key.
        assert len(filter(lambda x: x == 'key', cctypes_full)) == 1

        T_full, M_r_full, M_c_full, _ = data_utils.gen_T_and_metadata(colnames_full, raw_T_full,
                                                                      cctypes=cctypes_full,
                                                                      parameters=parameters_full)

        # Variables without "_full" don't include ignored columns.
        raw_T, cctypes, colnames, parameters = data_utils.remove_ignore_cols(raw_T_full,
                                                                             cctypes_full,
                                                                             colnames_full,
                                                                             parameters_full)
        T, M_r, M_c, _ = data_utils.gen_T_and_metadata(colnames, raw_T, cctypes=cctypes,
                                                       parameters=parameters)

        # Now, put cctypes, T, M_c, and M_r back into the DB
        self.update_metadata(tablename, M_r, M_c, T, cctypes)
        self.update_metadata_full(tablename, M_r_full, M_c_full, T_full, cctypes_full)

        return self.get_metadata_full(tablename)