Exemplo n.º 1
0
    def _process_frame(df_source, config):
        """
        Create a result frame

        .. seealso::
            :`_process_row_series`
            :`generate`

        Reminder:
            - apply() works on a row / column basis of a DataFrame
            - applymap() works element-wise on a DataFrame
            - map() works element-wise on a Series

        """
        df = pd.DataFrame()

        # keep the patid from the source
        df['PATID'] = df_source[COL_PATID]

        for i, rule in enumerate(rulz):
            rule_data = rulz.get(rule)
            pattern = rule_data['pattern']
            required_attr = rule_data['required_attr']

            df[rule] = df_source.apply(
                lambda x: HashGenerator._process_row_series(
                    x, rule, pattern, required_attr, config),
                axis=1)
        # print("Processed frame: \n{}".format(df))
        return df
Exemplo n.º 2
0
    def _process_frame(cls, df_source, config):
        """
        Create a result frame

        Reminder:
            - apply() works on a row / column basis of a DataFrame
            - applymap() works element-wise on a DataFrame
            - map() works element-wise on a Series

        """
        df = pd.DataFrame()

        # keep the patid from the source
        df['PATID'] = df_source['patid']

        for i, rule in enumerate(rulz):
            cls.log.debug("Applying rule {}: {}".format(i, rule))
            rule_data = rulz.get(rule)
            pattern = rule_data['pattern']
            required_attr = rule_data['required_attr']

            df[rule] = df_source.apply(lambda x: cls._process_row_series(
                x, rule, pattern, required_attr, config),
                                       axis=1)
        cls.log.debug("Processed frame: \n{}".format(df))
        return df
Exemplo n.º 3
0
    def _validate_config(cls, config):
        """
        Helper method for preventing config errors
        """
        COLUMN_MAP = config.get('COLUMN_MAP', None)

        if COLUMN_MAP is None:
            raise ConfigErr(
                "Please verify that the config specifies the `COLUMN_MAP` parameter!"
            )  # noqa

        # Verify that every required column is mapped
        CONFIGURED_COLS = list(COLUMN_MAP.keys())
        diff = list(set(REQUIRED_COLS).difference(CONFIGURED_COLS))
        cls.log.info('REQUIRED_COLS: {}'.format(REQUIRED_COLS))
        cls.log.info('CONFIGURED_COLS: {}'.format(CONFIGURED_COLS))

        if len(diff) > 0:
            raise ConfigErr(
                'Please verify that the config specifies every column in the `COLUMN_MAP` parameter! Missing: {}'
                .format(diff))  # noqa

        enabled_rules = config.get('ENABLED_RULES', None)

        if not enabled_rules:
            raise ConfigErr(
                'Please verify that the config specifies the `ENABLED_RULES` parameter!'
            )  # noqa

        for rule_code in enabled_rules:
            if rule_code not in rulz:
                raise ConfigErr(
                    'Invalid rule: [{}]! Available codes are: {}'.format(
                        rule_code, rulz.keys()))
Exemplo n.º 4
0
    def _validate_config(cls, config):
        """
        Helper method for preventing config errors
        """
        enabled_rules = config.get('ENABLED_RULES', None)

        if not enabled_rules:
            raise ConfigErr('Please verify that the config specifies'
                            ' the `ENABLED_RULES` parameter!')

        for rule_code in enabled_rules:
            if rule_code not in rulz:
                raise ConfigErr('Invalid rule code: [{}]!'
                                ' Available codes are: {}'.format(
                                    rule_code, rulz.keys()))