def test_load_mpf_tsv(self): sample_data = [ {'collection_cd': 'MSTCK', 'lbry_entity_cd': 'BC-BC', 'collection_name': 'Media Stacks / Request to Pick-up at Home Library', 'db_operation_cd': 'U', 'lbry_staff_lms_user_id': 'hhanson', 'db_operation_effective_date': '2019-08-19'} ] reader = ListReader(sample_data) writer = ListWriter() job_info = JobInfo(-1, 'test_user' , '1', '1') logger = None step = LoadMpfTsv(reader, writer, job_info, logger) step.execute() results = writer.list self.assertEqual(len(sample_data), len(results)) expected_keys = sorted([ 'collection_cd', 'collection_name', 'lbry_entity_cd', 'db_operation_cd', 'usmai_mbr_lbry_cd', 'lbry_staff_lms_user_id', 'db_operation_effective_date', 'em_create_dw_prcsng_cycle_id', 'em_create_dw_job_exectn_id', 'em_create_dw_job_name', 'em_create_dw_job_version_no', 'em_create_user_id', 'em_create_tmstmp' ]) self.assertEqual(expected_keys, sorted(list(results[0].keys()))) self.assertEqual("BC", results[0]['usmai_mbr_lbry_cd'])
def test_load_aleph_tsv(self): sample_data = [{ 'rec_type_cd': 'D', 'db_operation_cd': 'U', 'rec_trigger_key': '000007520' }, { 'rec_type_cd': 'D', 'db_operation_cd': 'U', 'rec_trigger_key': '000147967' }] reader = ListReader(sample_data) writer = ListWriter() job_info = JobInfo(-1, 'test_user', '1', '1') logger = None step = LoadAlephTsv(reader, writer, job_info, logger) step.execute() results = writer.list self.assertEqual(len(sample_data), len(results)) self.assertEqual('000007520', results[0]['rec_trigger_key']) self.assertEqual('000147967', results[1]['rec_trigger_key']) expected_keys = sorted([ 'rec_type_cd', 'db_operation_cd', 'rec_trigger_key', 'em_create_dw_prcsng_cycle_id', 'em_create_dw_job_exectn_id', 'em_create_dw_job_name', 'em_create_dw_job_version_no', 'em_create_user_id', 'em_create_tmstmp' ]) self.assertEqual(expected_keys, sorted(list(results[0].keys())))
def test_load_z00_field_tsv(self): sample_data = [{ 'z00_doc_number': '000025252', 'z00_marc_rec_field_cd': 'FMT', 'UNUSED': 'L', 'z00_marc_rec_field_txt': 'BK', }] reader = ListReader(sample_data) writer = ListWriter() job_info = JobInfo(-1, 'test_user', '1', '1') logger = None step = LoadZ00FieldTsv(reader, writer, job_info, logger) step.execute() results = writer.list self.assertEqual(len(sample_data), len(results)) expected_keys = sorted([ 'rec_type_cd', 'db_operation_cd', 'rec_trigger_key', 'z00_doc_number', 'dw_stg_1_marc_rec_field_seq_no', 'z00_marc_rec_field_cd', 'z00_marc_rec_field_txt', 'em_create_dw_prcsng_cycle_id', 'em_create_dw_job_exectn_id', 'em_create_dw_job_name', 'em_create_dw_job_version_no', 'em_create_user_id', 'em_create_tmstmp' ]) self.assertEqual(expected_keys, sorted(list(results[0].keys()))) self.assertEqual(1, results[0]['dw_stg_1_marc_rec_field_seq_no'])
def test_process_item(self): writer = ListWriter() job_info = JobInfo(-1, 'test_user', '1', '1') reader = ListReader(self.sample_data) ezproxy_fact_processor = EzproxyFactProcessor(reader, writer, job_info, self.logger, self.max_ezp_sessns_snap_fact_key ) ezproxy_fact_processor.execute() results = ezproxy_fact_processor.writer.list expected_keys = sorted([ 'em_create_dw_prcsng_cycle_id', 'in_ezp_sessns_snap_tmstmp', 'in_mbr_lbry_cd', 'em_create_dw_job_exectn_id', 'em_create_dw_job_name', 'em_create_dw_job_version_no', 'em_create_user_id', 'em_create_tmstmp', 'ezp_sessns_snap_fact_key' ]) self.assertEqual(expected_keys, sorted(list(results[0].keys())))
def test_identity_processor(self): sample_data = [{ 'rec_type_cd': 'D', 'db_operation_cd': 'U', 'rec_trigger_key': '000007520' }, { 'rec_type_cd': 'D', 'db_operation_cd': 'U', 'rec_trigger_key': '000147967' }] reader = ListReader(sample_data) writer = ListWriter() # Using negative processing_cycle_id so having real data in the # tables won't interfere with the tests. processing_cycle_id = -1 job_info = { 'em_create_dw_prcsng_cycle_id': processing_cycle_id, 'em_create_dw_job_exectn_id': 1, 'em_create_dw_job_name': 'TEST', 'em_create_dw_job_version_no': '0.0', 'em_create_user_id': 'test_user', 'em_create_tmstmp': datetime.datetime.now() } logger = None step = IdentityProcessor(reader, writer, job_info, logger) step.execute() results = writer.list self.assertEqual(len(sample_data), len(results)) self.assertEqual('000007520', results[0]['rec_trigger_key']) self.assertEqual('000147967', results[1]['rec_trigger_key']) # job_info keys are not expected, because IdentityProcessing only # passes data unchanged from reader to writer expected_keys = ['rec_type_cd', 'db_operation_cd', 'rec_trigger_key'] self.assertEqual(expected_keys, list(results[0].keys()))
def test_marc_rec_field_seq_no(self): """ tests to see if sequence number increments when the same z00_doc_number comes in. resets sequence number to 1 if new z00_doc_number """ sample_data = [ { 'z00_doc_number': '000025252', 'z00_marc_rec_field_cd': 'FMT', 'UNUSED': 'L', 'z00_marc_rec_field_txt': 'BK', }, { 'z00_doc_number': '000025252', 'z00_marc_rec_field_cd': 'LDR', 'UNUSED': 'L', 'z00_marc_rec_field_txt': '^^^^^cam^^2200493^^^4500', }, { 'z00_doc_number': '000090849', 'z00_marc_rec_field_cd': 'FMT', 'UNUSED': 'L', 'z00_marc_rec_field_txt': 'BK', }, ] reader = ListReader(sample_data) writer = ListWriter() job_info = JobInfo(-1, 'test_user', '1', '1') logger = None step = LoadZ00FieldTsv(reader, writer, job_info, logger) step.execute() results = writer.list self.assertEqual(len(sample_data), len(results)) self.assertEqual(1, results[0]['dw_stg_1_marc_rec_field_seq_no']) self.assertEqual(2, results[1]['dw_stg_1_marc_rec_field_seq_no']) self.assertEqual(1, results[2]['dw_stg_1_marc_rec_field_seq_no'])
def test_end_to_end(self): writer = ListWriter() job_info = JobInfo(-1, 'test_user', '1', '1') reader = ListReader(self.sample_data) ezproxy_reporting_processor = EzproxyReportingFactProcessor( reader, writer, job_info, self.logger) ezproxy_reporting_processor.execute() results = ezproxy_reporting_processor.writer.list expected_keys = sorted([ 'em_create_dw_job_exectn_id', 'em_create_dw_job_name', 'em_create_dw_job_version_no', 'em_create_dw_prcsng_cycle_id', 'em_create_tmstmp', 'em_create_user_id', 'em_update_dw_job_exectn_id', 'em_update_dw_job_name', 'em_update_dw_job_version_no', 'em_update_dw_prcsng_cycle_id', 'em_update_reason_txt', 'em_update_tmstmp', 'em_update_user_id', 'ezp_sessns_snap_actv_sessns_cnt', 'ezp_sessns_snap_clndr_dt_dim_key', 'ezp_sessns_snap_fact_key', 'ezp_sessns_snap_mbr_lbry_dim_key', 'ezp_sessns_snap_time_of_day_dim_key', 'ezp_sessns_snap_tmstmp', 'rm_current_rec_flag', 'rm_rec_effective_from_dt', 'rm_rec_effective_to_dt', 'rm_rec_type_cd', 'rm_rec_type_desc', 'rm_rec_version_no' ]) self.assertEqual(None, results[0]['em_update_dw_job_exectn_id']) self.assertEqual(None, results[0]['em_update_dw_job_name']) self.assertEqual(None, results[0]['em_update_dw_job_version_no']) self.assertEqual(None, results[0]['em_update_dw_prcsng_cycle_id']) self.assertEqual(None, results[0]['em_update_reason_txt']) self.assertEqual(None, results[0]['em_update_tmstmp']) self.assertEqual(None, results[0]['em_update_user_id']) self.assertEqual('EzproxyReportingFactProcessor', results[0]['em_create_dw_job_name']) self.assertEqual(expected_keys, sorted(list(results[0].keys())))
def test_end_to_end(self): writer = ListWriter() job_info = JobInfo(-1, 'test_user', '1', '1') reader = ListReader(self.sample_data) ezproxy_processor = EzproxyProcessor(reader, writer, job_info, self.logger) ezproxy_processor.execute() results = ezproxy_processor.writer.list expected_keys = sorted([ 't1_ezp_sessns_snap_actv_sessns_cnt', 't1_ezp_sessns_snap_tmstmp__ezp_sessns_snap_clndr_dt_dim_key', 't1_ezp_sessns_virtual_hosts_cnt', 't1_mbr_lbry_cd__ezp_sessns_snap_mbr_lbry_dim_key', 't2_ezp_sessns_snap_tmstmp__ezp_sessns_snap_tmstmp', 't3_ezp_sessns_snap_tmstmp__ezp_sessns_snap_time_of_day_dim_key', 'em_create_user_id', 'em_create_dw_prcsng_cycle_id', 'em_create_dw_job_exectn_id', 'em_create_dw_job_version_no', 'em_create_dw_job_name', 'em_create_tmstmp', 'in_ezp_sessns_snap_tmstmp', 'in_mbr_lbry_cd' ]) self.assertEqual(expected_keys, sorted(list(results[0].keys())))
def setUp(self): self.writer = ListWriter() self.job_info = JobInfoFactory.create_from_prcsng_cycle_id(-1) self.logger = None
def test_bib_rec_preprocess(self): """ tests the case where there's no whitespace """ sample_data = [{ # pk data 'db_operation_cd': 'U', 'dw_stg_2_aleph_lbry_name': 'mai60', 'em_create_dw_prcsng_cycle_id': '-1', # z00 don't have trims 'in_z00_doc_number': '000019087', 'in_z00_no_lines': '0011', 'in_z00_data_len': '000400', # z13 has trims 'in_z13_title': 'A literary history of America', 'in_z13_author': 'Wendell, Barrett, 1855-1921', 'in_z13_imprint': 'New York, Haskell House Publishers, 1968' }] reader = ListReader(sample_data) writer = ListWriter() job_info = JobInfo(-1, 'test_user', '1', '1') logger = None sample_json_config = { 'z00_doc_number': { "preprocessing_info": { "pre_or_post_dq": "N/A", "pre_action": "N/A", "pre_detailed_instructions": "N/A" } }, 'z00_no_lines': { "preprocessing_info": { "pre_or_post_dq": "N/A", "pre_action": "N/A", "pre_detailed_instructions": "N/A" } }, 'in_z00_data_len': { "preprocessing_info": { "pre_or_post_dq": "N/A", "pre_action": "N/A", "pre_detailed_instructions": "N/A" } }, 'z13_title': { "preprocessing_info": { "pre_or_post_dq": "N/A", "pre_action": "Trim", "pre_detailed_instructions": "Remove leading and trailing spaces" } }, 'z13_author': { "preprocessing_info": { "pre_or_post_dq": "N/A", "pre_action": "Trim", "pre_detailed_instructions": "Remove leading and trailing spaces" } }, 'z13_imprint': { "preprocessing_info": { "pre_or_post_dq": "N/A", "pre_action": "Trim", "pre_detailed_instructions": "Remove leading and trailing spaces" } } } pk_list = [ 'db_operation_cd', 'dw_stg_2_aleph_lbry_name', 'in_z00_doc_number', 'em_create_dw_prcsng_cycle_id' ] step = Preprocess(reader, writer, job_info, logger, sample_json_config, pk_list) step.execute() results = step.writer.list expected_keys = sorted([ 'in_z00_doc_number', 'pp_z00_doc_number', 'dw_stg_2_aleph_lbry_name', 'db_operation_cd', 'pp_z00_no_lines', 'pp_z13_title', 'pp_z13_author', 'pp_z00_data_len', 'pp_z13_imprint', 'em_update_dw_prcsng_cycle_id', 'em_update_dw_job_exectn_id', 'em_update_dw_job_name', 'em_update_dw_job_version_no', 'em_update_user_id', 'em_update_tmstmp', 'em_create_dw_prcsng_cycle_id' ]) self.assertEqual(expected_keys, sorted(list(results[0].keys()))) self.assertEqual("000019087", results[0]['pp_z00_doc_number']) self.assertEqual('0011', results[0]['pp_z00_no_lines']) self.assertEqual('000400', results[0]['pp_z00_data_len'])
def test_z00_pp(self): sample_data = [{ 'db_operation_cd': 'U', 'in_z00_data': '', 'in_z00_data_len': '001726', 'in_z00_doc_number': '000181506', 'in_z00_no_lines': '0038', 'dw_stg_2_aleph_lbry_name': 'mai01', 'em_create_dw_prcsng_cycle_id': '-1', }] reader = ListReader(sample_data) writer = ListWriter() job_info = JobInfo(-1, 'test_user', '1', '1') logger = None sample_json_config = { 'z00_doc_number': { "preprocessing_info": { "pre_or_post_dq": "N/A", "pre_action": "N/A", "pre_detailed_instructions": "N/A" } }, 'z00_no_lines': { "preprocessing_info": { "pre_or_post_dq": "N/A", "pre_action": "N/A", "pre_detailed_instructions": "N/A" } }, 'z00_data_len': { "preprocessing_info": { "pre_or_post_dq": "N/A", "pre_action": "N/A", "pre_detailed_instructions": "N/A" } } } pk_list = [ 'db_operation_cd', 'dw_stg_2_aleph_lbry_name', 'in_z00_doc_number', 'em_create_dw_prcsng_cycle_id' ] step = Preprocess(reader, writer, job_info, logger, sample_json_config, pk_list) step.execute() results = step.writer.list expected_keys = sorted([ 'in_z00_doc_number', 'pp_z00_doc_number', 'dw_stg_2_aleph_lbry_name', 'db_operation_cd', 'pp_z00_no_lines', 'pp_z00_data_len', 'pp_z00_data', 'em_update_dw_prcsng_cycle_id', 'em_update_dw_job_exectn_id', 'em_update_dw_job_name', 'em_update_dw_job_version_no', 'em_update_user_id', 'em_update_tmstmp', 'em_create_dw_prcsng_cycle_id' ]) self.assertEqual(False, Preprocess.need_preprocess(sample_json_config, '')) self.assertEqual(expected_keys, sorted(list(results[0].keys()))) self.assertEqual("000181506", results[0]['pp_z00_doc_number']) self.assertEqual("0038", results[0]['pp_z00_no_lines']) self.assertEqual("001726", results[0]['pp_z00_data_len']) self.assertEqual("", results[0]['pp_z00_data'])
def test_dataquality_bib_rec(self): writer = ListWriter() job_info = JobInfo(-1, 'test_user', '1', '1') # z00 json_config = self.bib_rec_json_config reader = ListReader(self.bib_record_dimension_sample_data_z00) z00_pk_list = [ 'db_operation_cd', 'dw_stg_2_aleph_lbry_name', 'in_z00_doc_number', 'em_create_dw_prcsng_cycle_id' ] z13_pk_list = [ 'db_operation_cd', 'dw_stg_2_aleph_lbry_name', 'in_z13_rec_key', 'em_create_dw_prcsng_cycle_id' ] data_quality_processor = DataQualityProcessor(reader, writer, job_info, self.logger, json_config, z00_pk_list) data_quality_processor.execute() z00_results = data_quality_processor.writer.list # z13 reader = ListReader(self.bib_record_dimension_sample_data_z00) data_quality_processor = DataQualityProcessor(reader, writer, job_info, self.logger, json_config, z13_pk_list) data_quality_processor.execute() z13_results = data_quality_processor.writer.list z00_expected_keys = sorted([ 'db_operation_cd', 'dq_z00_data', 'dq_z00_data_len', 'dq_z00_doc_number', 'dq_z00_no_lines', 'dw_stg_2_aleph_lbry_name', 'em_update_dw_job_exectn_id', 'em_update_dw_job_name', 'em_update_dw_job_version_no', 'em_update_dw_prcsng_cycle_id', 'em_update_tmstmp', 'em_update_user_id', 'in_z00_doc_number', 'rm_dq_check_excptn_cnt', 'rm_suspend_rec_flag', 'rm_suspend_rec_reason_cd' ]) z13_expected_keys = sorted([ 'db_operation_cd', 'dw_stg_2_aleph_lbry_name', 'in_z13_rec_key', 'dq_z13_year', 'dq_z13_open_date', 'dq_z13_update_date', 'dq_z13_author', 'dq_z13_title', 'em_update_dw_prcsng_cycle_id', 'em_update_user_id', 'em_update_dw_job_exectn_id', 'em_update_dw_job_version_no', 'em_update_dw_job_name', 'em_update_tmstmp', 'rm_dq_check_excptn_cnt', 'rm_suspend_rec_flag', 'rm_suspend_rec_reason_cd' ]) self.assertEqual(z00_expected_keys, sorted(list(z00_results[0].keys()))) self.assertEqual(z00_expected_keys, sorted(list(z00_results[1].keys()))) self.assertEqual(z13_expected_keys, sorted(list(z13_results[3].keys()))) elf.assertEqual(z13_expected_keys, sorted(list(z13_results[5].keys()))) self.assertEqual("SUS", results[0]['dq_z00_doc_number']) self.assertEqual(1, results[0]['rm_dq_check_excptn_cnt']) self.assertEqual("MIS", results[0]['rm_suspend_rec_reason_cd']) pdb.set_trace() self.assertEqual(None, results[3]['dq_z13_open_date']) self.assertEqual(1, results[3]['rm_dq_check_excptn_cnt']) self.assertEqual("MIS", results[0]['rm_suspend_rec_reason_cd']) self.assertEqual(None, results[4]['dq_z13_open_date']) self.assertEqual(1, results[4]['rm_dq_check_excptn_cnt']) self.assertEqual("LEN", results[1]['rm_suspend_rec_reason_cd']) self.assertEqual('0049', results[0]['dq_z00_no_lines']) self.assertEqual('001970', results[0]['dq_z00_data_len']) self.assertEqual('20130225', results[5]['dq_z13_update_date']) self.assertEqual('1969', results[5]['dq_z13_year']) self.assertEqual('20021124', results[5]['pp_z13_open_date'])
def test_single_write(self): writer = ListWriter() writer.write_row('Line 1') self.assertEqual(['Line 1'], writer.list)
def test_initialization(self): writer = ListWriter() self.assertEqual([], writer.list)
def test_multiple_writes(self): writer = ListWriter() writer.write_row('Line 1') writer.write_row('Line 2') self.assertEqual(['Line 1', 'Line 2'], writer.list)