def fetch(cls, site_url_filter, since_datetime):
        import ckan.model as model
        from running_stats import StatsList
        log = logging.getLogger(__name__)
        stats = StatsList()

        # Use the generate_entries generator to get all of
        # the entries from the ODI Atom feed.  This should
        # correctly handle all of the pages within the feed.
        import ckanext.certificates.client as client
        for entry in client.generate_entries(since=since_datetime):

            # We have to handle the case where the rel='about' might be
            # missing, if so we'll ignore it and catch it next time
            about = entry.get('about', '')
            if not about:
                log.debug(stats.add('Ignore - no rel="about" specifying the dataset',
                                    '%s "%s" %s' % (about, entry['title'], entry['id'])))
                continue

            if not site_url_filter.search(about):
                log.debug(stats.add('Ignore - "about" field does not reference this site',
                                    '%s "%s" %s' % (about, entry['title'], entry['id'])))
                continue

            if not '/dataset/' in entry['about']:
                log.debug(stats.add('Ignore - is "about" DGU but not a dataset',
                                    '%s "%s" %s' % (about, entry['about'], entry['id'])))
                continue

            pkg = cls._get_package_from_url(entry.get('about'))
            if not pkg:
                log.error(stats.add('Unable to find the package',
                                    '%s "%s" %s %r' % (about, entry['about'], entry['id'], entry.get('about'))))
                continue

            # Build the JSON subset we want to describe the certificate
            badge_data = client.get_badge_data(entry['alternate'])
            if not badge_data:
                log.info(stats.add('Error fetching badge data - skipped',
                                   '%s "%s" %s' % (about, entry['title'], entry['id'])))
                continue
            badge_data['cert_title'] = entry.get('content', '')  # e.g. 'Basic Level Certificate'

            badge_json = json.dumps(badge_data)
            if pkg.extras.get('odi-certificate') == badge_json:
                log.debug(stats.add('Certificate unchanged',
                                         badge_data['certificate_url']))
            else:
                operation = 'updated' if 'odi-certificate' in pkg.extras \
                    else 'added'
                model.repo.new_revision()
                pkg.extras['odi-certificate'] = json.dumps(badge_data)
                log.debug(stats.add('Certificate %s' % operation,
                               '"%s" %s' % (badge_data['title'],
                                            badge_data['certificate_url'])))
                model.Session.commit()

        log.info('Summary:\n' + stats.report())
Пример #2
0
    def command(self):
        # Load configuration
        self._load_config()

        # Initialise database access
        import ckan.model as model
        model.Session.remove()
        model.Session.configure(bind=model.meta.engine)

        # Logging, post-config
        self.setup_logging()

        from pylons import config

        site_url = config.get('ckan.site_url')

        # Handling of sites that support www. but don't use it.
        full_site_url = site_url
        if not '//www.' in full_site_url:
            full_site_url = full_site_url.replace('//', '//www.')

        from running_stats import StatsList
        stats = StatsList()

        # Use the generate_entries generator to get all of
        # the entries from the ODI Atom feed.  This should
        # correctly handle all of the pages within the feed.
        for entry in client.generate_entries(self.log):

            # We have to handle the case where the rel='about' might be missing, if so
            # we'll ignore it and catch it next time
            about = entry.get('about', '')
            if not about:
                self.log.debug(stats.add('Ignore - no rel="about" specifying the dataset',
                                         '%s "%s" %s' % (about, entry['title'], entry['id'])))
                continue

            if not about.startswith(site_url) and not about.startswith(full_site_url):
                self.log.debug(stats.add('Ignore - "about" field does not reference this site',
                                         '%s "%s" %s' % (about, entry['title'], entry['id'])))
                continue

            if not '/dataset/' in entry['about']:
                self.log.debug(stats.add('Ignore - is "about" DGU but not a dataset',
                                         '%s "%s" %s' % (about, entry['title'], entry['id'])))
                continue

            pkg = self._get_package_from_url(entry.get('about'))
            if not pkg:
                self.log.error(stats.add('Unable to find the package',
                                         '%s "%s" %s' % (about, entry['title'], entry['id'])))
                continue

            # Build the JSON subset we want to describe the certificate
            badge_data = client.get_badge_data(self.log, entry['alternate'])
            badge_data['cert_title'] = entry.get('content', '')

            badge_json = json.dumps(badge_data)
            if pkg.extras.get('odi-certificate') == badge_json:
                self.log.debug(stats.add('Certificate unchanged',
                                         badge_data['certificate_url']))
            else:
                model.repo.new_revision()
                pkg.extras['odi-certificate'] = json.dumps(badge_data)
                operation = 'updated' if 'odi-certificate' in pkg.extras else 'added'
                self.log.debug(stats.add('Certificate %s' % operation,
                               '"%s" %s' % (badge_data['title'],
                                            badge_data['certificate_url'])))
                model.Session.commit()

        self.log.info('Summary:\n' + stats.report())
Пример #3
0
    def command(self):
        # Load configuration
        self._load_config()

        # Initialise database access
        import ckan.model as model
        model.Session.remove()
        model.Session.configure(bind=model.meta.engine)

        # Logging, post-config
        self.setup_logging()

        from pylons import config

        site_url = config.get('ckan.site_url')

        # Handling of sites that support www. but don't use it.
        full_site_url = site_url
        if not '//www.' in full_site_url:
            full_site_url = full_site_url.replace('//', '//www.')

        from running_stats import StatsList
        stats = StatsList()

        # Use the generate_entries generator to get all of
        # the entries from the ODI Atom feed.  This should
        # correctly handle all of the pages within the feed.
        for entry in client.generate_entries(self.log):

            # We have to handle the case where the rel='about' might be missing, if so
            # we'll ignore it and catch it next time
            about = entry.get('about', '')
            if not about:
                self.log.debug(
                    stats.add(
                        'Ignore - no rel="about" specifying the dataset',
                        '%s "%s" %s' % (about, entry['title'], entry['id'])))
                continue

            if not about.startswith(site_url) and not about.startswith(
                    full_site_url):
                self.log.debug(
                    stats.add(
                        'Ignore - "about" field does not reference this site',
                        '%s "%s" %s' % (about, entry['title'], entry['id'])))
                continue

            if not '/dataset/' in entry['about']:
                self.log.debug(
                    stats.add(
                        'Ignore - is "about" DGU but not a dataset',
                        '%s "%s" %s' % (about, entry['title'], entry['id'])))
                continue

            pkg = self._get_package_from_url(entry.get('about'))
            if not pkg:
                self.log.error(
                    stats.add(
                        'Unable to find the package',
                        '%s "%s" %s' % (about, entry['title'], entry['id'])))
                continue

            # Build the JSON subset we want to describe the certificate
            badge_data = client.get_badge_data(self.log, entry['alternate'])
            badge_data['cert_title'] = entry.get('content', '')

            badge_json = json.dumps(badge_data)
            if pkg.extras.get('odi-certificate') == badge_json:
                self.log.debug(
                    stats.add('Certificate unchanged',
                              badge_data['certificate_url']))
            else:
                model.repo.new_revision()
                pkg.extras['odi-certificate'] = json.dumps(badge_data)
                operation = 'updated' if 'odi-certificate' in pkg.extras else 'added'
                self.log.debug(
                    stats.add(
                        'Certificate %s' % operation, '"%s" %s' %
                        (badge_data['title'], badge_data['certificate_url'])))
                model.Session.commit()

        self.log.info('Summary:\n' + stats.report())
    def fetch(cls, site_url_filter, since_datetime):
        import ckan.model as model
        from running_stats import StatsList
        log = logging.getLogger(__name__)
        stats = StatsList()

        # Use the generate_entries generator to get all of
        # the entries from the ODI Atom feed.  This should
        # correctly handle all of the pages within the feed.
        import ckanext.certificates.client as client
        for entry in client.generate_entries(since=since_datetime):

            # We have to handle the case where the rel='about' might be
            # missing, if so we'll ignore it and catch it next time
            about = entry.get('about', '')
            if not about:
                log.debug(
                    stats.add(
                        'Ignore - no rel="about" specifying the dataset',
                        '%s "%s" %s' % (about, entry['title'], entry['id'])))
                continue

            if not site_url_filter.search(about):
                log.debug(
                    stats.add(
                        'Ignore - "about" field does not reference this site',
                        '%s "%s" %s' % (about, entry['title'], entry['id'])))
                continue

            if not '/dataset/' in entry['about']:
                log.debug(
                    stats.add(
                        'Ignore - is "about" DGU but not a dataset',
                        '%s "%s" %s' % (about, entry['about'], entry['id'])))
                continue

            pkg = cls._get_package_from_url(entry.get('about'))
            if not pkg:
                log.error(
                    stats.add(
                        'Unable to find the package',
                        '%s "%s" %s %r' % (about, entry['about'], entry['id'],
                                           entry.get('about'))))
                continue

            # Build the JSON subset we want to describe the certificate
            badge_data = client.get_badge_data(entry['alternate'])
            if not badge_data:
                log.info(
                    stats.add(
                        'Error fetching badge data - skipped',
                        '%s "%s" %s' % (about, entry['title'], entry['id'])))
                continue
            badge_data['cert_title'] = entry.get(
                'content', '')  # e.g. 'Basic Level Certificate'

            badge_json = json.dumps(badge_data)
            if pkg.extras.get('odi-certificate') == badge_json:
                log.debug(
                    stats.add('Certificate unchanged',
                              badge_data['certificate_url']))
            else:
                operation = 'updated' if 'odi-certificate' in pkg.extras \
                    else 'added'
                model.repo.new_revision()
                pkg.extras['odi-certificate'] = json.dumps(badge_data)
                log.debug(
                    stats.add(
                        'Certificate %s' % operation, '"%s" %s' %
                        (badge_data['title'], badge_data['certificate_url'])))
                model.Session.commit()

        log.info('Summary:\n' + stats.report())