Пример #1
0
    def upload_to_amcat(self, articles):
        """Create articles in Amcat using AmcatAPI

        Args:
            articles: List of articles in Amcat's format
        """
        try:
            self.amcat_api.create_articles(project=amcat.project, articleset=self.set_id, json_data=articles)
        except:
            log.exception('Could not upload articles to amcat. url: {amcat.project}; set: {self.set_id}; '
                          'data: {articles}'.format(**locals()))
Пример #2
0
    def article_ocr(identifier):
        """Retrieve ocr full text for article with identifier
        """
        url = DelpherAPI.ocr_url(identifier)
        try:
            response = request.get(url)
        except:
            log.exception('Could not get OCR data for url {url}.'.format(**locals()))
            return '<failed to load>'

        if response is None:
            log.error('Did not get OCR data for url {url}.'.format(**locals()))
            return '<failed to load>'
        else:
            # Each paragraph is one item in the list
            return "\n\n".join([response[key] for key in sorted(response.keys()) if key != 'title'])
Пример #3
0
    def list_next_articles(self):
        """Retrieve next page of search results
        """
        url = self.results_url()

        try:
            response = request.get(url)
        except:
            log.exception('Could not get results for url {url}'.format(**locals()))
            return []

        self.number_of_records = response['numberOfRecords']
        log.info('Page {self.page} of article list retrieved. '
                 '{self.records_processed} of {self.number_of_records} articles processed.'.format(**locals()))

        return response['records']
Пример #4
0
    def setup_amcat(self, from_date, until_date):
        """Create Amcat API object and save configuration data for later use.
        """
        amcat_api = AmcatAPI(amcat.host, amcat.username, amcat.password)

        log.info('Setup Amcat API with host {0}, username {1}. Use project {2}'.format(amcat.host, amcat.username,
                                                                                       amcat.project))

        now = datetime.now().replace(microsecond=0)
        set_name = amcat.set_name_template.format(**locals())

        try:
            aset = amcat_api.create_set(project=amcat.project, name=set_name, provenance=amcat.data_provenance)
        except:
            log.exception('Could not create article set')
            raise

        log.info('Created article set in Amcat. ID: {0}'.format(aset['id']))

        self.set_id = aset['id']

        return amcat_api