def rowParser(row, columns, countryCodes): """Parse single HathiTrust item entry (corresponding to an item-level record in the SFR model) into the SFR data model and pass the resulting object to Kinesis for introduction into the SFR data pipeline. This method is a manager that handles methods around a HathiRecord object. Each method creates/enhances a part of the SFR metadata object, allowing for the object to both be built up and its components easily treated as seperate components if necessary Arguments: row -- list of fields from the HathiTrust source CSV file columns -- list of columns that corresponds to the source row countryCodes -- dict of country code and name translations Output: None, writes resulting work record to a Kinesis stream """ logger.info('Reading entry for HathiTrust item {}'.format(row[0])) logger.debug('Generating source dict from row and column names') # This quickly builds a dictionary with column names that can be used to # retrieve specific values hathiDict = dict(zip(columns, row)) # Generate a hathi record object with the source dict hathiRec = HathiRecord(hathiDict) try: # Generate an SFR-compliant object hathiRec.buildDataModel(countryCodes) except DataError as err: logger.error('Unable to process record {}'.format( hathiRec.ingest['htid'])) logger.debug(err.message) raise ProcessingError('DataError', err.message) try: logger.debug('Writing hathi record {} to kinesis for ingest'.format( hathiRec.work.primary_identifier.identifier)) KinesisOutput.putRecord( { 'status': 200, 'type': 'work', 'method': 'insert', 'data': hathiRec.work }, os.environ['OUTPUT_STREAM']) except KinesisError as err: logger.error('Unable to output record {} to Kinesis'.format( hathiRec.ingest['htid'])) logger.debug(err.message) raise ProcessingError('KinesisError', err.message) # On success, return tuple containg status and identifier, verifies record # was passed to next step in the data pipeline return ('success', 'HathiTrust Item {}'.format(hathiRec.ingest['htid']))
def test_build_data_model(self): testRow = { 'title': 'Work Test', 'description': '1st of 4', 'bib_key': '0000000', 'htid': 'test.000000000', 'gov_doc': 'f', 'author': 'Author, Test', 'copyright_date': '2019', 'rights': 'test_rights' } workTest = HathiRecord(testRow) workTest.buildWork = MagicMock() workTest.buildInstance = MagicMock() workTest.buildItem = MagicMock() workTest.createRights = MagicMock() workTest.buildDataModel('countryCodes') self.assertIsInstance(workTest, HathiRecord)