def main(raw_args=None): """ Entry point for the clean rules reporter module. If you provide a list of arguments and settings, these will be parsed. If you leave this blank, the command line arguments are parsed. This allows this module to be easily called from other python modules. :param raw_args: The list of arguments to parse. Defaults to parsing the command line. """ args = parse_args(raw_args) engine.add_console_logging(args.console_log) if cdr_consts.DataStage.UNSPECIFIED.value in args.data_stage: args.data_stage = [ s.value for s in cdr_consts.DataStage if s is not cdr_consts.DataStage.UNSPECIFIED ] LOGGER.info( f"Data stage was {cdr_consts.DataStage.UNSPECIFIED.value}, so all stages " f"will be reported on: {args.data_stage}") write_csv_report(args.output_filepath, args.data_stage, args.fields) LOGGER.info("Finished the reporting module")
def main(args=None): """ :param args: list of all the arguments to apply the cleaning rules :return: """ args, kwargs = fetch_args_kwargs(args) rules = DATA_STAGE_RULES_MAPPING[args.data_stage.value] validate_custom_params(rules, **kwargs) if args.list_queries: clean_engine.add_console_logging() query_list = clean_engine.get_query_list( project_id=args.project_id, dataset_id=args.dataset_id, sandbox_dataset_id=args.sandbox_dataset_id, rules=rules, **kwargs) for query in query_list: LOGGER.info(query) else: clean_engine.add_console_logging(args.console_log) clean_engine.clean_dataset(project_id=args.project_id, dataset_id=args.dataset_id, sandbox_dataset_id=args.sandbox_dataset_id, rules=rules, **kwargs)
parser.REQUIRED: True }, { parser.SHORT_ARGUMENT: '-v', parser.LONG_ARGUMENT: '--validation_dataset_id', parser.ACTION: 'store', parser.DEST: 'validation_dataset_id', parser.HELP: 'validation_dataset_id', parser.REQUIRED: True }] args = parser.default_parse_args(additional_arguments) return args if __name__ == '__main__': import cdr_cleaner.clean_cdr_engine as clean_engine ARGS = parse_args() if ARGS.list_queries: clean_engine.add_console_logging() query_list = clean_engine.get_query_list( ARGS.project_id, ARGS.dataset_id, ARGS.sandbox_dataset_id, [(delete_records_for_non_matching_participants, )]) for query in query_list: LOGGER.info(query) else: clean_engine.add_console_logging(ARGS.console_log) clean_engine.clean_dataset( ARGS.project_id, ARGS.dataset_id, ARGS.sandbox_dataset_id, [(delete_records_for_non_matching_participants, )])
import argparse parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-d', '--data_stage', required=True, dest='data_stage', action='store', type=stage, choices=list( [s for s in stage if s is not stage.UNSPECIFIED]), help='Specify the dataset') parser.add_argument('-s', action='store_true', help='Send logs to console') args = parser.parse_args() clean_engine.add_console_logging(args.s) if args.data_stage == stage.EHR: clean_ehr_dataset() elif args.data_stage == stage.UNIONED: clean_unioned_ehr_dataset() elif args.data_stage == stage.RDR: clean_rdr_dataset() elif args.data_stage == stage.COMBINED: clean_combined_dataset() elif args.data_stage == stage.DEID_BASE: clean_combined_de_identified_dataset() elif args.data_stage == stage.DEID_CLEAN: clean_combined_de_identified_clean_dataset() else: raise EnvironmentError( f'Dataset selection should be from [{stage.EHR}, {stage.UNIONED}, {stage.RDR}, {stage.COMBINED},'