示例#1
0
 def test_pub_place_success(self):
     testCodes = {
         'tst': 'test'
     }
     placeRec = HathiRecord({})
     placeRec.parsePubPlace('tst', testCodes)
     self.assertEqual(placeRec.instance.pub_place, 'test')
示例#2
0
 def test_pub_place_failure(self):
     testCodes = {
         'tst': 'test'
     }
     placeRec = HathiRecord({})
     placeRec.parsePubPlace('mis', testCodes)
     self.assertEqual(placeRec.instance.pub_place, 'mis')
示例#3
0
 def test_parse_bad_identifiers(self):
     badRow = {
         'badTests': '1,2,3'
     }
     badTest = HathiRecord(badRow)
     badTest.parseIdentifiers(badTest.work, 'test', 'tests')
     self.assertEqual(len(badTest.work.identifiers), 0)
示例#4
0
 def test_parse_identifers(self):
     idenRow = {
         'tests': '1,2,3'
     }
     idenTest = HathiRecord(idenRow)
     idenTest.parseIdentifiers(idenTest.work, 'test', 'tests')
     self.assertIsInstance(idenTest.work.identifiers[0], Identifier)
     self.assertEqual(idenTest.work.identifiers[2].identifier, '3')
示例#5
0
 def test_parse_author_single_date(self):
     authorSingleDate = HathiRecord({})
     authorSingleDate.parseAuthor('Tester, Test, b. 1900')
     createdAuthor = authorSingleDate.work.agents[0]
     self.assertIsInstance(createdAuthor, Agent)
     self.assertEqual(createdAuthor.name, 'Tester, Test')
     self.assertEqual(len(createdAuthor.dates), 1)
     self.assertEqual(createdAuthor.dates[0].date_type, 'birth_date')
     self.assertEqual(createdAuthor.dates[0].display_date, '1900')
示例#6
0
def rowParser(row, columns, countryCodes):
    """Parse single HathiTrust item entry (corresponding to an item-level
    record in the SFR model) into the SFR data model and pass the resulting
    object to Kinesis for introduction into the SFR data pipeline.

    This method is a manager that handles methods around a HathiRecord object.
    Each method creates/enhances a part of the SFR metadata object, allowing
    for the object to both be built up and its components easily treated
    as seperate components if necessary

    Arguments:
    row -- list of fields from the HathiTrust source CSV file
    columns -- list of columns that corresponds to the source row
    countryCodes -- dict of country code and name translations

    Output: None, writes resulting work record to a Kinesis stream
    """

    logger.info('Reading entry for HathiTrust item {}'.format(row[0]))

    logger.debug('Generating source dict from row and column names')
    # This quickly builds a dictionary with column names that can be used to
    # retrieve specific values
    hathiDict = dict(zip(columns, row))
    # Generate a hathi record object with the source dict
    hathiRec = HathiRecord(hathiDict)

    try:
        # Generate an SFR-compliant object
        hathiRec.buildDataModel(countryCodes)
    except DataError as err:
        logger.error('Unable to process record {}'.format(
            hathiRec.ingest['htid']))
        logger.debug(err.message)
        raise ProcessingError('DataError', err.message)

    try:
        logger.debug('Writing hathi record {} to kinesis for ingest'.format(
            hathiRec.work.primary_identifier.identifier))
        KinesisOutput.putRecord(
            {
                'status': 200,
                'type': 'work',
                'method': 'insert',
                'data': hathiRec.work
            }, os.environ['OUTPUT_STREAM'])
    except KinesisError as err:
        logger.error('Unable to output record {} to Kinesis'.format(
            hathiRec.ingest['htid']))
        logger.debug(err.message)
        raise ProcessingError('KinesisError', err.message)

    # On success, return tuple containg status and identifier, verifies record
    # was passed to next step in the data pipeline
    return ('success', 'HathiTrust Item {}'.format(hathiRec.ingest['htid']))
示例#7
0
 def test_build_item(self):
     testItemRow = {
         'htid': 'test.00000',
         'source': 'nypl',
         'responsible_entity': 'nypl',
         'digitization_entity': 'ia',
     }
     itemTest = HathiRecord(testItemRow)
     itemTest.buildItem()
     self.assertIsInstance(itemTest.item, Format)
     self.assertEqual(itemTest.item.source, 'hathitrust')
示例#8
0
 def test_parse_author_dates(self):
     authorDateTest = HathiRecord({})
     authorDateTest.parseAuthor('Tester, Test, 1900-2000')
     createdAuthor = authorDateTest.work.agents[0]
     self.assertIsInstance(createdAuthor, Agent)
     self.assertEqual(createdAuthor.name, 'Tester, Test')
     self.assertEqual(len(createdAuthor.dates), 2)
     self.assertEqual(createdAuthor.dates[0].date_type, 'birth_date')
     self.assertEqual(createdAuthor.dates[0].display_date, '1900')
     self.assertEqual(createdAuthor.dates[1].date_type, 'death_date')
     self.assertEqual(createdAuthor.dates[1].display_date, '2000')
示例#9
0
    def test_create_rights(self):
        testRightsRow = {
            'htid': 'test.000000',
            'rights': 'pd',
            'rights_statement': 'ipma',
            'rights_determination_date': '2019',
            'copyright_date': '1990'
        }

        rightsTest = HathiRecord(testRightsRow)
        rightsTest.createRights()
        self.assertIsInstance(rightsTest.rights, Rights)
        self.assertEqual(rightsTest.rights.license, 'public_domain')
示例#10
0
 def test_build_item_google_digitization(self):
     testItemRow = {
         'htid': 'test.00000',
         'provider_entity': 'nypl',
         'responsible_entity': 'nypl',
         'digitization_entity': 'google',
     }
     itemTest = HathiRecord(testItemRow)
     itemTest.buildItem()
     self.assertIsInstance(itemTest.item, Format)
     self.assertEqual(itemTest.item.source, 'hathitrust')
     self.assertEqual(len(itemTest.item.links), 1)
     self.assertEqual(itemTest.item.links[0].flags['download'], False)
示例#11
0
 def test_hathi_create_date_string(self):
     testRec = {
         'htid': 1,
         'title': 'Test Record'
     }
     hathiRec = HathiRecord(testRec, ingestDateTime='2019-01-01')
     self.assertEqual(hathiRec.modified, '2019-01-01')
示例#12
0
 def test_hathi_repr(self):
     testRec = {
         'htid': 1,
         'title': 'Test Record'
     }
     hathiRec = HathiRecord(testRec)
     hathiRec.work.title = hathiRec.ingest['title']
     self.assertEqual(str(hathiRec), '<Hathi(title=Test Record)>')
示例#13
0
 def test_hathi_create(self):
     testRec = {
         'htid': 1,
         'title': 'Test Record'
     }
     hathiRec = HathiRecord(testRec)
     self.assertIsInstance(hathiRec, HathiRecord)
     self.assertEqual(hathiRec.ingest['htid'], 1)
示例#14
0
 def test_hathi_create_date(self):
     testRec = {
         'htid': 1,
         'title': 'Test Record'
     }
     testDate = datetime.strptime('2019-01-01', '%Y-%m-%d')
     hathiRec = HathiRecord(testRec, ingestDateTime=testDate)
     self.assertEqual(
         hathiRec.modified,
         testDate.strftime('%Y-%m-%d %H:%M:%S')
     )
示例#15
0
    def test_build_data_model(self):
        testRow = {
            'title': 'Work Test',
            'description': '1st of 4',
            'bib_key': '0000000',
            'htid': 'test.000000000',
            'gov_doc': 'f',
            'author': 'Author, Test',
            'copyright_date': '2019',
            'rights': 'test_rights'
        }
        workTest = HathiRecord(testRow)

        workTest.buildWork = MagicMock()
        workTest.buildInstance = MagicMock()
        workTest.buildItem = MagicMock()
        workTest.createRights = MagicMock()

        workTest.buildDataModel('countryCodes')
        self.assertIsInstance(workTest, HathiRecord)
示例#16
0
    def test_build_instance(self):
        testInstanceRow = {
            'title': 'Instance Test',
            'language': 'en',
            'copyright_date': '2019',
            'publisher_pub_date': 'New York [2019]',
            'pub_place': 'nyu'
        }
        instanceTest = HathiRecord(testInstanceRow)
        instanceTest.parseIdentifiers = MagicMock()
        instanceTest.parsePubInfo = MagicMock()
        instanceTest.parsePubPlace = MagicMock()

        instanceTest.buildInstance({})
        self.assertIsInstance(instanceTest.instance, InstanceRecord)
        self.assertEqual(instanceTest.instance.language, 'en')
        self.assertEqual(instanceTest.instance.title, 'Instance Test')
示例#17
0
    def test_build_work(self):
        testRow = {
            'title': 'Work Test',
            'description': '1st of 4',
            'bib_key': '0000000',
            'htid': 'test.000000000',
            'gov_doc': 'f',
            'author': 'Author, Test',
            'copyright_date': '2019'
        }
        workTest = HathiRecord(testRow)
        workTest.parseIdentifiers = MagicMock()
        workTest.parseAuthor = MagicMock()
        workTest.parseGovDoc = MagicMock()

        workTest.buildWork()
        self.assertIsInstance(workTest.work, WorkRecord)
        self.assertEqual(workTest.work.title, 'Work Test')
示例#18
0
    def test_build_instance_no_cover(self, mockCover):
        testInstanceRow = {
            'htid': 'test.1',
            'title': 'Instance Test',
            'language': 'en',
            'copyright_date': '2019',
            'publisher_pub_date': 'New York [2019]',
            'pub_place': 'nyu',
            'description': 'testing'
        }
        instanceTest = HathiRecord(testInstanceRow)
        instanceTest.parseIdentifiers = MagicMock()
        instanceTest.parsePubInfo = MagicMock()
        instanceTest.parsePubPlace = MagicMock()

        instanceTest.buildInstance({})
        mockCover.assert_called_once_with()
        self.assertIsInstance(instanceTest.instance, InstanceRecord)
        self.assertEqual(instanceTest.instance.language, 'en')
        self.assertEqual(instanceTest.instance.title, 'Instance Test')
示例#19
0
    def test_build_instance_cover(self, mockAddItem, mockCover):
        testInstanceRow = {
            'htid': 'test.1',
            'title': 'Instance Test',
            'language': 'en',
            'copyright_date': '2019',
            'publisher_pub_date': 'New York [2019]',
            'pub_place': 'nyu',
            'description': 'testing'
        }
        instanceTest = HathiRecord(testInstanceRow)
        instanceTest.parseIdentifiers = MagicMock()
        instanceTest.parsePubInfo = MagicMock()
        instanceTest.parsePubPlace = MagicMock()

        instanceTest.buildInstance({})
        mockCover.assert_called_once()
        mockAddItem.assert_has_calls([
            call(
                'dates', Date, **{
                    'display_date': '2019',
                    'date_range': '2019',
                    'date_type': 'copyright_date'
                }),
            call(
                'links', Link, **{
                    'url': 'test_url',
                    'media_type': 'image/jpeg',
                    'flags': {
                        'cover': True,
                        'temporary': True
                    }
                })
        ])
        self.assertIsInstance(instanceTest.instance, InstanceRecord)
        self.assertEqual(instanceTest.instance.language, 'en')
示例#20
0
 def test_parse_non_gov_doc(self):
     govRec = HathiRecord({})
     govRec.parseGovDoc(0, 1)
     self.assertEqual(govRec.work.measurements[0].value, 0)
示例#21
0
 def test_pub_info_no_date(self):
     pubRec = HathiRecord({})
     pubRec.parsePubInfo('Test.')
     self.assertEqual(pubRec.instance.agents[0].name, 'Test.')
示例#22
0
 def test_pub_info_date(self):
     pubRec = HathiRecord({})
     pubRec.parsePubInfo('Test, [1900?]')
     self.assertEqual(pubRec.instance.agents[0].name, 'Test')
     self.assertEqual(pubRec.instance.dates[0].display_date, '1900?')
示例#23
0
 def test_parse_author(self):
     authorTest = HathiRecord({})
     authorTest.parseAuthor('Tester, Test')
     self.assertIsInstance(authorTest.work.agents[0], Agent)
     self.assertEqual(authorTest.work.agents[0].name, 'Tester, Test')
     self.assertEqual(len(authorTest.work.agents[0].dates), 0)