示例#1
0
    def test_load_mpf_tsv(self):

        sample_data = [
            {'collection_cd': 'MSTCK',
            'lbry_entity_cd': 'BC-BC',
            'collection_name': 'Media Stacks / Request to Pick-up at Home Library',
            'db_operation_cd': 'U',
            'lbry_staff_lms_user_id': 'hhanson',
            'db_operation_effective_date': '2019-08-19'}
        ]

        reader = ListReader(sample_data)
        writer = ListWriter()

        job_info = JobInfo(-1, 'test_user' , '1', '1')

        logger = None

        step = LoadMpfTsv(reader, writer, job_info, logger)

        step.execute()

        results = writer.list

        self.assertEqual(len(sample_data), len(results))

        expected_keys = sorted([
            'collection_cd', 'collection_name', 'lbry_entity_cd', 'db_operation_cd', 'usmai_mbr_lbry_cd',
            'lbry_staff_lms_user_id', 'db_operation_effective_date',
            'em_create_dw_prcsng_cycle_id', 'em_create_dw_job_exectn_id',
            'em_create_dw_job_name', 'em_create_dw_job_version_no',
            'em_create_user_id', 'em_create_tmstmp'
        ])
        self.assertEqual(expected_keys, sorted(list(results[0].keys())))
        self.assertEqual("BC", results[0]['usmai_mbr_lbry_cd'])
示例#2
0
    def test_load_aleph_tsv(self):
        sample_data = [{
            'rec_type_cd': 'D',
            'db_operation_cd': 'U',
            'rec_trigger_key': '000007520'
        }, {
            'rec_type_cd': 'D',
            'db_operation_cd': 'U',
            'rec_trigger_key': '000147967'
        }]

        reader = ListReader(sample_data)
        writer = ListWriter()

        job_info = JobInfo(-1, 'test_user', '1', '1')

        logger = None

        step = LoadAlephTsv(reader, writer, job_info, logger)

        step.execute()

        results = writer.list

        self.assertEqual(len(sample_data), len(results))
        self.assertEqual('000007520', results[0]['rec_trigger_key'])
        self.assertEqual('000147967', results[1]['rec_trigger_key'])

        expected_keys = sorted([
            'rec_type_cd', 'db_operation_cd', 'rec_trigger_key',
            'em_create_dw_prcsng_cycle_id', 'em_create_dw_job_exectn_id',
            'em_create_dw_job_name', 'em_create_dw_job_version_no',
            'em_create_user_id', 'em_create_tmstmp'
        ])
        self.assertEqual(expected_keys, sorted(list(results[0].keys())))
示例#3
0
    def test_load_z00_field_tsv(self):

        sample_data = [{
            'z00_doc_number': '000025252',
            'z00_marc_rec_field_cd': 'FMT',
            'UNUSED': 'L',
            'z00_marc_rec_field_txt': 'BK',
        }]

        reader = ListReader(sample_data)
        writer = ListWriter()

        job_info = JobInfo(-1, 'test_user', '1', '1')

        logger = None

        step = LoadZ00FieldTsv(reader, writer, job_info, logger)

        step.execute()

        results = writer.list

        self.assertEqual(len(sample_data), len(results))

        expected_keys = sorted([
            'rec_type_cd', 'db_operation_cd', 'rec_trigger_key',
            'z00_doc_number', 'dw_stg_1_marc_rec_field_seq_no',
            'z00_marc_rec_field_cd', 'z00_marc_rec_field_txt',
            'em_create_dw_prcsng_cycle_id', 'em_create_dw_job_exectn_id',
            'em_create_dw_job_name', 'em_create_dw_job_version_no',
            'em_create_user_id', 'em_create_tmstmp'
        ])
        self.assertEqual(expected_keys, sorted(list(results[0].keys())))
        self.assertEqual(1, results[0]['dw_stg_1_marc_rec_field_seq_no'])
示例#4
0
    def test_process_item(self):
        writer = ListWriter()
        job_info = JobInfo(-1, 'test_user', '1', '1')
        reader = ListReader(self.sample_data)
        ezproxy_fact_processor = EzproxyFactProcessor(reader, writer, job_info, self.logger, self.max_ezp_sessns_snap_fact_key )
        ezproxy_fact_processor.execute()
        results = ezproxy_fact_processor.writer.list
        expected_keys = sorted([
            'em_create_dw_prcsng_cycle_id', 'in_ezp_sessns_snap_tmstmp', 'in_mbr_lbry_cd',
            'em_create_dw_job_exectn_id', 'em_create_dw_job_name', 'em_create_dw_job_version_no',
            'em_create_user_id', 'em_create_tmstmp',  'ezp_sessns_snap_fact_key'
            ])

        self.assertEqual(expected_keys, sorted(list(results[0].keys())))
示例#5
0
    def test_identity_processor(self):
        sample_data = [{
            'rec_type_cd': 'D',
            'db_operation_cd': 'U',
            'rec_trigger_key': '000007520'
        }, {
            'rec_type_cd': 'D',
            'db_operation_cd': 'U',
            'rec_trigger_key': '000147967'
        }]

        reader = ListReader(sample_data)
        writer = ListWriter()

        # Using negative processing_cycle_id so having real data in the
        # tables won't interfere with the tests.
        processing_cycle_id = -1

        job_info = {
            'em_create_dw_prcsng_cycle_id': processing_cycle_id,
            'em_create_dw_job_exectn_id': 1,
            'em_create_dw_job_name': 'TEST',
            'em_create_dw_job_version_no': '0.0',
            'em_create_user_id': 'test_user',
            'em_create_tmstmp': datetime.datetime.now()
        }

        logger = None

        step = IdentityProcessor(reader, writer, job_info, logger)

        step.execute()

        results = writer.list

        self.assertEqual(len(sample_data), len(results))
        self.assertEqual('000007520', results[0]['rec_trigger_key'])
        self.assertEqual('000147967', results[1]['rec_trigger_key'])

        # job_info keys are not expected, because IdentityProcessing only
        # passes data unchanged from reader to writer
        expected_keys = ['rec_type_cd', 'db_operation_cd', 'rec_trigger_key']
        self.assertEqual(expected_keys, list(results[0].keys()))
示例#6
0
    def test_marc_rec_field_seq_no(self):
        """
        tests to see if sequence number increments when the same
        z00_doc_number comes in. resets sequence number to 1 if new z00_doc_number
        """
        sample_data = [
            {
                'z00_doc_number': '000025252',
                'z00_marc_rec_field_cd': 'FMT',
                'UNUSED': 'L',
                'z00_marc_rec_field_txt': 'BK',
            },
            {
                'z00_doc_number': '000025252',
                'z00_marc_rec_field_cd': 'LDR',
                'UNUSED': 'L',
                'z00_marc_rec_field_txt': '^^^^^cam^^2200493^^^4500',
            },
            {
                'z00_doc_number': '000090849',
                'z00_marc_rec_field_cd': 'FMT',
                'UNUSED': 'L',
                'z00_marc_rec_field_txt': 'BK',
            },
        ]
        reader = ListReader(sample_data)
        writer = ListWriter()

        job_info = JobInfo(-1, 'test_user', '1', '1')

        logger = None

        step = LoadZ00FieldTsv(reader, writer, job_info, logger)

        step.execute()

        results = writer.list

        self.assertEqual(len(sample_data), len(results))

        self.assertEqual(1, results[0]['dw_stg_1_marc_rec_field_seq_no'])
        self.assertEqual(2, results[1]['dw_stg_1_marc_rec_field_seq_no'])
        self.assertEqual(1, results[2]['dw_stg_1_marc_rec_field_seq_no'])
    def test_end_to_end(self):
        writer = ListWriter()
        job_info = JobInfo(-1, 'test_user', '1', '1')
        reader = ListReader(self.sample_data)
        ezproxy_reporting_processor = EzproxyReportingFactProcessor(
            reader, writer, job_info, self.logger)
        ezproxy_reporting_processor.execute()

        results = ezproxy_reporting_processor.writer.list

        expected_keys = sorted([
            'em_create_dw_job_exectn_id', 'em_create_dw_job_name',
            'em_create_dw_job_version_no', 'em_create_dw_prcsng_cycle_id',
            'em_create_tmstmp', 'em_create_user_id',
            'em_update_dw_job_exectn_id', 'em_update_dw_job_name',
            'em_update_dw_job_version_no', 'em_update_dw_prcsng_cycle_id',
            'em_update_reason_txt', 'em_update_tmstmp', 'em_update_user_id',
            'ezp_sessns_snap_actv_sessns_cnt',
            'ezp_sessns_snap_clndr_dt_dim_key', 'ezp_sessns_snap_fact_key',
            'ezp_sessns_snap_mbr_lbry_dim_key',
            'ezp_sessns_snap_time_of_day_dim_key', 'ezp_sessns_snap_tmstmp',
            'rm_current_rec_flag', 'rm_rec_effective_from_dt',
            'rm_rec_effective_to_dt', 'rm_rec_type_cd', 'rm_rec_type_desc',
            'rm_rec_version_no'
        ])

        self.assertEqual(None, results[0]['em_update_dw_job_exectn_id'])
        self.assertEqual(None, results[0]['em_update_dw_job_name'])
        self.assertEqual(None, results[0]['em_update_dw_job_version_no'])
        self.assertEqual(None, results[0]['em_update_dw_prcsng_cycle_id'])
        self.assertEqual(None, results[0]['em_update_reason_txt'])
        self.assertEqual(None, results[0]['em_update_tmstmp'])
        self.assertEqual(None, results[0]['em_update_user_id'])
        self.assertEqual('EzproxyReportingFactProcessor',
                         results[0]['em_create_dw_job_name'])

        self.assertEqual(expected_keys, sorted(list(results[0].keys())))
示例#8
0
    def test_end_to_end(self):
        writer = ListWriter()
        job_info = JobInfo(-1, 'test_user', '1', '1')
        reader = ListReader(self.sample_data)
        ezproxy_processor = EzproxyProcessor(reader, writer, job_info,
                                             self.logger)
        ezproxy_processor.execute()

        results = ezproxy_processor.writer.list

        expected_keys = sorted([
            't1_ezp_sessns_snap_actv_sessns_cnt',
            't1_ezp_sessns_snap_tmstmp__ezp_sessns_snap_clndr_dt_dim_key',
            't1_ezp_sessns_virtual_hosts_cnt',
            't1_mbr_lbry_cd__ezp_sessns_snap_mbr_lbry_dim_key',
            't2_ezp_sessns_snap_tmstmp__ezp_sessns_snap_tmstmp',
            't3_ezp_sessns_snap_tmstmp__ezp_sessns_snap_time_of_day_dim_key',
            'em_create_user_id', 'em_create_dw_prcsng_cycle_id',
            'em_create_dw_job_exectn_id', 'em_create_dw_job_version_no',
            'em_create_dw_job_name', 'em_create_tmstmp',
            'in_ezp_sessns_snap_tmstmp', 'in_mbr_lbry_cd'
        ])

        self.assertEqual(expected_keys, sorted(list(results[0].keys())))
 def setUp(self):
     self.writer = ListWriter()
     self.job_info = JobInfoFactory.create_from_prcsng_cycle_id(-1)
     self.logger = None
示例#10
0
    def test_bib_rec_preprocess(self):
        """
        tests the case where there's no whitespace
        """

        sample_data = [{  # pk data
            'db_operation_cd':
            'U',
            'dw_stg_2_aleph_lbry_name':
            'mai60',
            'em_create_dw_prcsng_cycle_id':
            '-1',
            # z00 don't have trims
            'in_z00_doc_number':
            '000019087',
            'in_z00_no_lines':
            '0011',
            'in_z00_data_len':
            '000400',
            # z13 has trims
            'in_z13_title':
            'A literary history of America',
            'in_z13_author':
            'Wendell, Barrett, 1855-1921',
            'in_z13_imprint':
            'New York, Haskell House Publishers, 1968'
        }]

        reader = ListReader(sample_data)
        writer = ListWriter()

        job_info = JobInfo(-1, 'test_user', '1', '1')

        logger = None

        sample_json_config = {
            'z00_doc_number': {
                "preprocessing_info": {
                    "pre_or_post_dq": "N/A",
                    "pre_action": "N/A",
                    "pre_detailed_instructions": "N/A"
                }
            },
            'z00_no_lines': {
                "preprocessing_info": {
                    "pre_or_post_dq": "N/A",
                    "pre_action": "N/A",
                    "pre_detailed_instructions": "N/A"
                }
            },
            'in_z00_data_len': {
                "preprocessing_info": {
                    "pre_or_post_dq": "N/A",
                    "pre_action": "N/A",
                    "pre_detailed_instructions": "N/A"
                }
            },
            'z13_title': {
                "preprocessing_info": {
                    "pre_or_post_dq":
                    "N/A",
                    "pre_action":
                    "Trim",
                    "pre_detailed_instructions":
                    "Remove leading and trailing spaces"
                }
            },
            'z13_author': {
                "preprocessing_info": {
                    "pre_or_post_dq":
                    "N/A",
                    "pre_action":
                    "Trim",
                    "pre_detailed_instructions":
                    "Remove leading and trailing spaces"
                }
            },
            'z13_imprint': {
                "preprocessing_info": {
                    "pre_or_post_dq":
                    "N/A",
                    "pre_action":
                    "Trim",
                    "pre_detailed_instructions":
                    "Remove leading and trailing spaces"
                }
            }
        }

        pk_list = [
            'db_operation_cd', 'dw_stg_2_aleph_lbry_name', 'in_z00_doc_number',
            'em_create_dw_prcsng_cycle_id'
        ]

        step = Preprocess(reader, writer, job_info, logger, sample_json_config,
                          pk_list)
        step.execute()
        results = step.writer.list

        expected_keys = sorted([
            'in_z00_doc_number', 'pp_z00_doc_number',
            'dw_stg_2_aleph_lbry_name', 'db_operation_cd', 'pp_z00_no_lines',
            'pp_z13_title', 'pp_z13_author', 'pp_z00_data_len',
            'pp_z13_imprint', 'em_update_dw_prcsng_cycle_id',
            'em_update_dw_job_exectn_id', 'em_update_dw_job_name',
            'em_update_dw_job_version_no', 'em_update_user_id',
            'em_update_tmstmp', 'em_create_dw_prcsng_cycle_id'
        ])

        self.assertEqual(expected_keys, sorted(list(results[0].keys())))
        self.assertEqual("000019087", results[0]['pp_z00_doc_number'])
        self.assertEqual('0011', results[0]['pp_z00_no_lines'])
        self.assertEqual('000400', results[0]['pp_z00_data_len'])
示例#11
0
    def test_z00_pp(self):

        sample_data = [{
            'db_operation_cd': 'U',
            'in_z00_data': '',
            'in_z00_data_len': '001726',
            'in_z00_doc_number': '000181506',
            'in_z00_no_lines': '0038',
            'dw_stg_2_aleph_lbry_name': 'mai01',
            'em_create_dw_prcsng_cycle_id': '-1',
        }]

        reader = ListReader(sample_data)
        writer = ListWriter()

        job_info = JobInfo(-1, 'test_user', '1', '1')

        logger = None

        sample_json_config = {
            'z00_doc_number': {
                "preprocessing_info": {
                    "pre_or_post_dq": "N/A",
                    "pre_action": "N/A",
                    "pre_detailed_instructions": "N/A"
                }
            },
            'z00_no_lines': {
                "preprocessing_info": {
                    "pre_or_post_dq": "N/A",
                    "pre_action": "N/A",
                    "pre_detailed_instructions": "N/A"
                }
            },
            'z00_data_len': {
                "preprocessing_info": {
                    "pre_or_post_dq": "N/A",
                    "pre_action": "N/A",
                    "pre_detailed_instructions": "N/A"
                }
            }
        }

        pk_list = [
            'db_operation_cd', 'dw_stg_2_aleph_lbry_name', 'in_z00_doc_number',
            'em_create_dw_prcsng_cycle_id'
        ]

        step = Preprocess(reader, writer, job_info, logger, sample_json_config,
                          pk_list)
        step.execute()
        results = step.writer.list

        expected_keys = sorted([
            'in_z00_doc_number', 'pp_z00_doc_number',
            'dw_stg_2_aleph_lbry_name', 'db_operation_cd', 'pp_z00_no_lines',
            'pp_z00_data_len', 'pp_z00_data', 'em_update_dw_prcsng_cycle_id',
            'em_update_dw_job_exectn_id', 'em_update_dw_job_name',
            'em_update_dw_job_version_no', 'em_update_user_id',
            'em_update_tmstmp', 'em_create_dw_prcsng_cycle_id'
        ])
        self.assertEqual(False,
                         Preprocess.need_preprocess(sample_json_config, ''))
        self.assertEqual(expected_keys, sorted(list(results[0].keys())))
        self.assertEqual("000181506", results[0]['pp_z00_doc_number'])
        self.assertEqual("0038", results[0]['pp_z00_no_lines'])
        self.assertEqual("001726", results[0]['pp_z00_data_len'])
        self.assertEqual("", results[0]['pp_z00_data'])
示例#12
0
    def test_dataquality_bib_rec(self):
        writer = ListWriter()
        job_info = JobInfo(-1, 'test_user', '1', '1')

        # z00
        json_config = self.bib_rec_json_config
        reader = ListReader(self.bib_record_dimension_sample_data_z00)

        z00_pk_list = [
            'db_operation_cd', 'dw_stg_2_aleph_lbry_name', 'in_z00_doc_number',
            'em_create_dw_prcsng_cycle_id'
        ]
        z13_pk_list = [
            'db_operation_cd', 'dw_stg_2_aleph_lbry_name', 'in_z13_rec_key',
            'em_create_dw_prcsng_cycle_id'
        ]

        data_quality_processor = DataQualityProcessor(reader, writer, job_info,
                                                      self.logger, json_config,
                                                      z00_pk_list)
        data_quality_processor.execute()
        z00_results = data_quality_processor.writer.list

        # z13
        reader = ListReader(self.bib_record_dimension_sample_data_z00)
        data_quality_processor = DataQualityProcessor(reader, writer, job_info,
                                                      self.logger, json_config,
                                                      z13_pk_list)
        data_quality_processor.execute()
        z13_results = data_quality_processor.writer.list

        z00_expected_keys = sorted([
            'db_operation_cd', 'dq_z00_data', 'dq_z00_data_len',
            'dq_z00_doc_number', 'dq_z00_no_lines', 'dw_stg_2_aleph_lbry_name',
            'em_update_dw_job_exectn_id', 'em_update_dw_job_name',
            'em_update_dw_job_version_no', 'em_update_dw_prcsng_cycle_id',
            'em_update_tmstmp', 'em_update_user_id', 'in_z00_doc_number',
            'rm_dq_check_excptn_cnt', 'rm_suspend_rec_flag',
            'rm_suspend_rec_reason_cd'
        ])
        z13_expected_keys = sorted([
            'db_operation_cd', 'dw_stg_2_aleph_lbry_name', 'in_z13_rec_key',
            'dq_z13_year', 'dq_z13_open_date', 'dq_z13_update_date',
            'dq_z13_author', 'dq_z13_title', 'em_update_dw_prcsng_cycle_id',
            'em_update_user_id', 'em_update_dw_job_exectn_id',
            'em_update_dw_job_version_no', 'em_update_dw_job_name',
            'em_update_tmstmp', 'rm_dq_check_excptn_cnt',
            'rm_suspend_rec_flag', 'rm_suspend_rec_reason_cd'
        ])

        self.assertEqual(z00_expected_keys,
                         sorted(list(z00_results[0].keys())))
        self.assertEqual(z00_expected_keys,
                         sorted(list(z00_results[1].keys())))
        self.assertEqual(z13_expected_keys,
                         sorted(list(z13_results[3].keys())))
        elf.assertEqual(z13_expected_keys, sorted(list(z13_results[5].keys())))

        self.assertEqual("SUS", results[0]['dq_z00_doc_number'])
        self.assertEqual(1, results[0]['rm_dq_check_excptn_cnt'])
        self.assertEqual("MIS", results[0]['rm_suspend_rec_reason_cd'])
        pdb.set_trace()
        self.assertEqual(None, results[3]['dq_z13_open_date'])
        self.assertEqual(1, results[3]['rm_dq_check_excptn_cnt'])
        self.assertEqual("MIS", results[0]['rm_suspend_rec_reason_cd'])

        self.assertEqual(None, results[4]['dq_z13_open_date'])
        self.assertEqual(1, results[4]['rm_dq_check_excptn_cnt'])
        self.assertEqual("LEN", results[1]['rm_suspend_rec_reason_cd'])

        self.assertEqual('0049', results[0]['dq_z00_no_lines'])
        self.assertEqual('001970', results[0]['dq_z00_data_len'])
        self.assertEqual('20130225', results[5]['dq_z13_update_date'])
        self.assertEqual('1969', results[5]['dq_z13_year'])

        self.assertEqual('20021124', results[5]['pp_z13_open_date'])
示例#13
0
 def test_single_write(self):
     writer = ListWriter()
     writer.write_row('Line 1')
     self.assertEqual(['Line 1'], writer.list)
示例#14
0
 def test_initialization(self):
     writer = ListWriter()
     self.assertEqual([], writer.list)
示例#15
0
 def test_multiple_writes(self):
     writer = ListWriter()
     writer.write_row('Line 1')
     writer.write_row('Line 2')
     self.assertEqual(['Line 1', 'Line 2'], writer.list)