示例#1
0
    def test_trigger_on_archival(cls):
        # create package
        context = {
            'model': model,
            'ignore_auth': True,
            'session': model.Session,
            'user': '******'
        }
        pkg = {
            'name':
            'testpkg',
            'license_id':
            'uk-ogl',
            'resources': [{
                'url': 'http://test.com/',
                'format': 'CSV',
                'description': 'Test'
            }]
        }
        pkg = get_action('package_create')(context, pkg)
        resource_dict = pkg['resources'][0]
        res_id = resource_dict['id']
        # create record of archival
        archival = Archival.create(res_id)
        cache_filepath = __file__  # just needs to exist
        archival.cache_filepath = cache_filepath
        archival.updated = TODAY
        model.Session.add(archival)
        model.Session.commit()
        # TODO show that QA hasn't run yet

        # create a send_data from ckanext-archiver, that gets picked up by
        # ckanext-qa to put a task on the queue
        ckanext.archiver.tasks.notify_package(pkg, 'priority')
示例#2
0
def save_archival(resource, status_id, reason, url_redirected_to,
                  download_result, archive_result, log):
    '''Writes to the archival table the result of an attempt to download
    the resource.

    May propagate a CkanError.
    '''
    now = datetime.datetime.now()

    from ckanext.archiver.model import Archival, Status
    from ckan import model

    archival = Archival.get_for_resource(resource['id'])
    first_archival = not archival
    previous_archival_was_broken = None
    if not archival:
        archival = Archival.create(resource['id'])
        model.Session.add(archival)
    else:
        log.info('Archival from before: %r', archival)
        previous_archival_was_broken = archival.is_broken

    revision = model.Session.query(model.Revision).get(resource['revision_id'])
    archival.resource_timestamp = revision.timestamp

    # Details of the latest archival attempt
    archival.status_id = status_id
    archival.is_broken = Status.is_status_broken(status_id)
    archival.reason = reason
    archival.url_redirected_to = url_redirected_to

    # Details of successful archival
    if archival.is_broken is False:
        archival.cache_filepath = archive_result['cache_filepath']
        archival.cache_url = archive_result['cache_url']
        archival.size = download_result['size']
        archival.mimetype = download_result['mimetype']
        archival.hash = download_result['hash']
        archival.etag = download_result['headers'].get('etag')
        archival.last_modified = download_result['headers'].get('last-modified')

    # History
    if archival.is_broken is False:
        archival.last_success = now
        archival.first_failure = None
        archival.failure_count = 0
    else:
        log.info('First_archival=%r Previous_broken=%r Failure_count=%r' %
                 (first_archival, previous_archival_was_broken,
                  archival.failure_count))
        if first_archival or previous_archival_was_broken is False:
            # i.e. this is the first failure (or the first archival)
            archival.first_failure = now
            archival.failure_count = 1
        else:
            archival.failure_count += 1

    archival.updated = now
    log.info('Archival saved: %r', archival)
    model.repo.commit_and_remove()
示例#3
0
 def _test_resource(self,
                    url='anything',
                    format='TXT',
                    archived=True,
                    cached=True,
                    license_id='uk-ogl'):
     context = {
         'model': model,
         'ignore_auth': True,
         'session': model.Session,
         'user': '******'
     }
     pkg = {
         'name': 'testpkg',
         'license_id': license_id,
         'resources': [{
             'url': url,
             'format': format,
             'description': 'Test'
         }]
     }
     pkg = get_action('package_create')(context, pkg)
     res_id = pkg['resources'][0]['id']
     if archived:
         archival = Archival.create(res_id)
         archival.cache_filepath = __file__ if cached else None  # just needs to exist
         archival.updated = TODAY
         model.Session.add(archival)
         model.Session.commit()
     return model.Resource.get(res_id)
示例#4
0
 def _test_resource(self, url='anything', format='TXT', archived=True, cached=True, license_id='uk-ogl'):
     pkg = {'license_id': license_id,
            'resources': [
                {'url': url, 'format': format, 'description': 'Test'}
            ]}
     pkg = ckan_factories.Dataset(**pkg)
     res_id = pkg['resources'][0]['id']
     if archived:
         archival = Archival.create(res_id)
         archival.cache_filepath = __file__ if cached else None  # just needs to exist
         archival.updated = TODAY
         model.Session.add(archival)
         model.Session.commit()
     return model.Resource.get(res_id)
示例#5
0
文件: test_tasks.py 项目: tbalaz/test
 def _test_resource(self, url='anything', format='TXT', archived=True, cached=True, license_id='uk-ogl'):
     context = {'model': model, 'ignore_auth': True, 'session': model.Session, 'user': '******'}
     pkg = {'name': 'testpkg', 'license_id': license_id, 'resources': [
         {'url': url, 'format': format, 'description': 'Test'}
         ]}
     pkg = get_action('package_create')(context, pkg)
     res_id = pkg['resources'][0]['id']
     if archived:
         archival = Archival.create(res_id)
         archival.cache_filepath = __file__ if cached else None  # just needs to exist
         archival.updated = TODAY
         model.Session.add(archival)
         model.Session.commit()
     return model.Resource.get(res_id)
示例#6
0
 def _test_resource(self, url='anything', format='TXT', archived=True, cached=True, license_id='uk-ogl'):
     pkg = {'license_id': license_id,
            'resources': [
                {'url': url, 'format': format, 'description': 'Test'}
            ]}
     pkg = ckan_factories.Dataset(**pkg)
     res_id = pkg['resources'][0]['id']
     if archived:
         archival = Archival.create(res_id)
         archival.cache_filepath = __file__ if cached else None  # just needs to exist
         archival.updated = TODAY
         model.Session.add(archival)
         model.Session.commit()
     return model.Resource.get(res_id)
示例#7
0
    def test_trigger_on_archival(cls):
        # create package
        context = {'model': model, 'ignore_auth': True, 'session': model.Session, 'user': '******'}
        pkg = {'name': 'testpkg', 'license_id': 'uk-ogl', 'resources': [
            {'url': 'http://test.com/', 'format': 'CSV', 'description': 'Test'}
            ]}
        pkg = get_action('package_create')(context, pkg)
        resource_dict = pkg['resources'][0]
        res_id = resource_dict['id']
        # create record of archival
        archival = Archival.create(res_id)
        cache_filepath = __file__  # just needs to exist
        archival.cache_filepath = cache_filepath
        archival.updated = TODAY
        model.Session.add(archival)
        model.Session.commit()
        # TODO show that QA hasn't run yet

        # create a send_data from ckanext-archiver, that gets picked up by
        # ckanext-qa to put a task on the queue
        ckanext.archiver.tasks.notify_package(pkg, 'priority', cache_filepath)
def migrate(options):
    from ckan import model
    from ckanext.archiver.model import Archival, Status

    resources = common.get_resources(state='active',
                                     publisher_ref=options.publisher,
                                     resource_id=options.resource,
                                     dataset_name=options.dataset)
    stats = StatsList()
    widgets = ['Resources: ', Percentage(), ' ', Bar(), ' ', ETA()]
    progress = ProgressBar(widgets=widgets)
    for res in progress(resources):
        # Gather the details of archivals from TaskStatus and Resource
        # to fill all properties of Archival apart from:
        # * package_id
        # * resource_id
        fields = {}
        archiver_task_status = model.Session.query(model.TaskStatus)\
                                    .filter_by(entity_id=res.id)\
                                    .filter_by(task_type='archiver')\
                                    .filter_by(key='status')\
                                    .first()
        if archiver_task_status:
            ats_error = json.loads(archiver_task_status.error)
            fields['status_id'] = Status.by_text(archiver_task_status.value)
            fields['is_broken'] = Status.is_status_broken(fields['status_id'])
            fields['reason'] = ats_error['reason']
            fields['last_success'] = date_str_to_datetime_or_none(ats_error['last_success'])
            fields['first_failure'] = date_str_to_datetime_or_none(ats_error['first_failure'])
            fields['failure_count'] = int(ats_error['failure_count'])
            fields['url_redirected_to'] = ats_error['url_redirected_to']
            fields['updated'] = archiver_task_status.last_updated
        else:
            if not (res.cache_url
                    or res.extras.get('cache_filepath')
                    or res.hash
                    or res.size
                    or res.mimetype):
                add_stat('No archive data', res, stats)
                continue
            for field_name in ('status_id', 'is_broken', 'reason',
                               'last_success', 'first_failure',
                               'failure_count', 'url_redirected_to',
                               'updated', 'created'):
                fields[field_name] = None

        fields['cache_filepath'] = res.extras.get('cache_filepath')
        fields['cache_url'] = res.cache_url
        fields['hash'] = res.hash
        fields['size'] = res.size
        fields['mimetype'] = res.mimetype

        revisions_with_hash = model.Session.query(model.ResourceRevision)\
                .filter_by(id=res.id)\
                .order_by(model.ResourceRevision.revision_timestamp)\
                .filter(model.ResourceRevision.hash != '').all()
        if revisions_with_hash:
            # these are not perfect by not far off
            fields['created'] = revisions_with_hash[0].revision_timestamp
            fields['resource_timestamp'] = revisions_with_hash[-1].revision_timestamp
        else:
            fields['created'] = min(fields['updated'] or END_OF_TIME,
                                    fields['first_failure'] or END_OF_TIME,
                                    fields['last_success'] or END_OF_TIME)
            fields['resource_timestamp'] = max(
                fields['updated'] or START_OF_TIME,
                fields['first_failure'] or START_OF_TIME,
                fields['last_success'] or START_OF_TIME)

        # Compare with any existing data in the Archival table
        archival = Archival.get_for_resource(res.id)
        if archival:
            changed = None
            for field, value in fields.items():
                if getattr(archival, field) != value:
                    if options.write:
                        setattr(archival, field, value)
                    changed = True
            if not changed:
                add_stat('Already exists correctly in archival table', res, stats)
                continue
            add_stat('Updated in archival table', res, stats)
        else:
            archival = Archival.create(res.id)
            if options.write:
                for field, value in fields.items():
                    setattr(archival, field, value)
                model.Session.add(archival)
            add_stat('Added to archival table', res, stats)

    print 'Summary\n', stats.report()
    if options.write:
        model.repo.commit_and_remove()
        print 'Written'
def migrate(options):
    from ckan import model
    from ckanext.archiver.model import Archival, Status

    resources = common.get_resources(state='active',
                                     publisher_ref=options.publisher,
                                     resource_id=options.resource,
                                     dataset_name=options.dataset)
    stats = StatsList()
    widgets = ['Resources: ', Percentage(), ' ', Bar(), ' ', ETA()]
    progress = ProgressBar(widgets=widgets)
    for res in progress(resources):
        # Gather the details of archivals from TaskStatus and Resource
        # to fill all properties of Archival apart from:
        # * package_id
        # * resource_id
        fields = {}
        archiver_task_status = model.Session.query(model.TaskStatus)\
                                    .filter_by(entity_id=res.id)\
                                    .filter_by(task_type='archiver')\
                                    .filter_by(key='status')\
                                    .first()
        if archiver_task_status:
            ats_error = json.loads(archiver_task_status.error)
            fields['status_id'] = Status.by_text(archiver_task_status.value)
            fields['is_broken'] = Status.is_status_broken(fields['status_id'])
            fields['reason'] = ats_error['reason']
            fields['last_success'] = date_str_to_datetime_or_none(
                ats_error['last_success'])
            fields['first_failure'] = date_str_to_datetime_or_none(
                ats_error['first_failure'])
            fields['failure_count'] = int(ats_error['failure_count'])
            fields['url_redirected_to'] = ats_error['url_redirected_to']
            fields['updated'] = archiver_task_status.last_updated
        else:
            if not (res.cache_url or res.extras.get('cache_filepath')
                    or res.hash or res.size or res.mimetype):
                add_stat('No archive data', res, stats)
                continue
            for field_name in ('status_id', 'is_broken', 'reason',
                               'last_success', 'first_failure',
                               'failure_count', 'url_redirected_to', 'updated',
                               'created'):
                fields[field_name] = None

        fields['cache_filepath'] = res.extras.get('cache_filepath')
        fields['cache_url'] = res.cache_url
        fields['hash'] = res.hash
        fields['size'] = res.size
        fields['mimetype'] = res.mimetype

        revisions_with_hash = model.Session.query(model.ResourceRevision)\
                .filter_by(id=res.id)\
                .order_by(model.ResourceRevision.revision_timestamp)\
                .filter(model.ResourceRevision.hash != '').all()
        if revisions_with_hash:
            # these are not perfect by not far off
            fields['created'] = revisions_with_hash[0].revision_timestamp
            fields['resource_timestamp'] = revisions_with_hash[
                -1].revision_timestamp
        else:
            fields['created'] = min(fields['updated'] or END_OF_TIME,
                                    fields['first_failure'] or END_OF_TIME,
                                    fields['last_success'] or END_OF_TIME)
            fields['resource_timestamp'] = max(
                fields['updated'] or START_OF_TIME, fields['first_failure']
                or START_OF_TIME, fields['last_success'] or START_OF_TIME)

        # Compare with any existing data in the Archival table
        archival = Archival.get_for_resource(res.id)
        if archival:
            changed = None
            for field, value in fields.items():
                if getattr(archival, field) != value:
                    if options.write:
                        setattr(archival, field, value)
                    changed = True
            if not changed:
                add_stat('Already exists correctly in archival table', res,
                         stats)
                continue
            add_stat('Updated in archival table', res, stats)
        else:
            archival = Archival.create(res.id)
            if options.write:
                for field, value in fields.items():
                    setattr(archival, field, value)
                model.Session.add(archival)
            add_stat('Added to archival table', res, stats)

    print 'Summary\n', stats.report()
    if options.write:
        model.repo.commit_and_remove()
        print 'Written'