def parse_args(): """ Add file_path and sandbox dataset id to the default cdr_cleaner.args_parser argument list :return: an expanded argument list object """ import cdr_cleaner.args_parser as parser additional_argument_1 = { parser.SHORT_ARGUMENT: '-n', parser.LONG_ARGUMENT: '--sandbox_dataset_id', parser.ACTION: 'store', parser.DEST: 'sandbox_dataset_id', parser.HELP: 'Please specify the sandbox_dataset_id', parser.REQUIRED: True } help_text = 'path to csv file (with header row) containing pids whose observation records are to be removed' additional_argument_2 = { parser.SHORT_ARGUMENT: '-f', parser.LONG_ARGUMENT: '--file_path', parser.ACTION: 'store', parser.DEST: 'file_path', parser.HELP: help_text, parser.REQUIRED: True } args = parser.default_parse_args( [additional_argument_1, additional_argument_2]) return args
def parse_args(): """ This function expands the default argument list defined in cdr_cleaner.args_parser :return: an expanded argument list object """ import cdr_cleaner.args_parser as parser additional_arguments = [{ parser.SHORT_ARGUMENT: '-e', parser.LONG_ARGUMENT: '--ehr_dataset_id', parser.ACTION: 'store', parser.DEST: 'ehr_dataset_id', parser.HELP: 'ehr_dataset_id', parser.REQUIRED: True }, { parser.SHORT_ARGUMENT: '-v', parser.LONG_ARGUMENT: '--validation_dataset_id', parser.ACTION: 'store', parser.DEST: 'validation_dataset_id', parser.HELP: 'validation_dataset_id', parser.REQUIRED: True }] args = parser.default_parse_args(additional_arguments) return args
def parse_args(): """ This function expands the default argument list defined in cdr_cleaner.args_parser :return: an expanded argument list object """ import cdr_cleaner.args_parser as parser additional_argument = {parser.SHORT_ARGUMENT: '-n', parser.LONG_ARGUMENT: '--snapshot_dataset_id', parser.ACTION: 'store', parser.DEST: 'snapshot_dataset_id', parser.HELP: 'Create a snapshot of the dataset', parser.REQUIRED: True} args = parser.default_parse_args([additional_argument]) return args
def parse_args(): """ Add sandbox_dataset_id to the default cdr_cleaner.args_parser argument list :return: an expanded argument list object """ import cdr_cleaner.args_parser as parser additional_argument = { parser.SHORT_ARGUMENT: '-n', parser.LONG_ARGUMENT: '--sandbox_dataset_id', parser.ACTION: 'store', parser.DEST: 'sandbox_dataset_id', parser.HELP: 'Please specify the sandbox_dataset_id', parser.REQUIRED: True } args = parser.default_parse_args([additional_argument]) return args
if __name__ == '__main__': import cdr_cleaner.args_parser as parser import cdr_cleaner.clean_cdr_engine as clean_engine combined_dataset_arg = { parser.SHORT_ARGUMENT: '-c', parser.LONG_ARGUMENT: '--combined_dataset_id', parser.ACTION: 'store', parser.DEST: 'combined_dataset_id', parser.HELP: 'Identifies the combined dataset', parser.REQUIRED: True } ARGS = parser.default_parse_args([combined_dataset_arg]) if ARGS.list_queries: clean_engine.add_console_logging() query_list = clean_engine.get_query_list( ARGS.project_id, ARGS.dataset_id, ARGS.sandbox_dataset_id, [(RemoveFitbitDataIfMaxAgeExceeded, )], combined_dataset_id=ARGS.combined_dataset_id) for query in query_list: LOGGER.info(query) else: clean_engine.add_console_logging(ARGS.console_log) clean_engine.clean_dataset( ARGS.project_id, ARGS.dataset_id,
project=self.project_id, dataset=self.dataset_id, domain_table=table, string_fields=string_fields) result = client.query(validation_query).result() if result.total_rows > 0: raise RuntimeError( f'{table} has {result.total_rows} records that have non-null string values' ) if __name__ == '__main__': import cdr_cleaner.args_parser as parser import cdr_cleaner.clean_cdr_engine as clean_engine ARGS = parser.default_parse_args() if ARGS.list_queries: clean_engine.add_console_logging() query_list = clean_engine.get_query_list(ARGS.project_id, ARGS.dataset_id, ARGS.sandbox_dataset_id, [(StringFieldsSuppression, )]) for query in query_list: LOGGER.info(query) else: clean_engine.add_console_logging(ARGS.console_log) clean_engine.clean_dataset(ARGS.project_id, ARGS.dataset_id, ARGS.sandbox_dataset_id, [(StringFieldsSuppression, )])
parser.ACTION: 'store', parser.DEST: 'mapping_dataset_id', parser.HELP: 'Identifies the dataset containing pid-rid map table', parser.REQUIRED: True } mapping_table_arg = { parser.SHORT_ARGUMENT: '-t', parser.LONG_ARGUMENT: '--mapping_table_id', parser.ACTION: 'store', parser.DEST: 'mapping_table_id', parser.HELP: 'Identifies the pid-rid map table, typically _deid_map', parser.REQUIRED: True } ARGS = parser.default_parse_args([mapping_dataset_arg, mapping_table_arg]) if ARGS.list_queries: clean_engine.add_console_logging() query_list = clean_engine.get_query_list( ARGS.project_id, ARGS.dataset_id, ARGS.sandbox_dataset_id, [(PIDtoRID, )], mapping_dataset_id=ARGS.mapping_dataset_id, mapping_table_id=ARGS.mapping_table_id) for query in query_list: LOGGER.info(query) else: clean_engine.add_console_logging(ARGS.console_log) clean_engine.clean_dataset(ARGS.project_id, ARGS.dataset_id,