def article_ocr(identifier): """Retrieve ocr full text for article with identifier """ url = DelpherAPI.ocr_url(identifier) try: response = request.get(url) except: log.exception('Could not get OCR data for url {url}.'.format(**locals())) return '<failed to load>' if response is None: log.error('Did not get OCR data for url {url}.'.format(**locals())) return '<failed to load>' else: # Each paragraph is one item in the list return "\n\n".join([response[key] for key in sorted(response.keys()) if key != 'title'])
def list_next_articles(self): """Retrieve next page of search results """ url = self.results_url() try: response = request.get(url) except: log.exception('Could not get results for url {url}'.format(**locals())) return [] self.number_of_records = response['numberOfRecords'] log.info('Page {self.page} of article list retrieved. ' '{self.records_processed} of {self.number_of_records} articles processed.'.format(**locals())) return response['records']