Пример #1
0
 def _get_pkg(self, pkg_ref):
     if not self._pkg_cache.has_key(pkg_ref):
         pkg = self.ckanclient.package_entity_get(pkg_ref)
         if self.ckanclient.last_status != 200:
             raise ScriptError("Could not get package ID %s: %r" % (pkg_ref, self.ckanclient.last_status))
         remove_readonly_fields(pkg)
         self._pkg_cache[pkg_ref] = pkg
     return self._pkg_cache[pkg_ref]
Пример #2
0
 def _get_pkg(self, pkg_ref):
     if not self._pkg_cache.has_key(pkg_ref):
         pkg = self.ckanclient.package_entity_get(pkg_ref)
         if self.ckanclient.last_status != 200:
             raise ScriptError('Could not get package ID %s: %r' % \
                   (pkg_ref, self.ckanclient.last_status))
         remove_readonly_fields(pkg)
         self._pkg_cache[pkg_ref] = pkg
     return self._pkg_cache[pkg_ref]
Пример #3
0
    def run(self):
        limit = 100
        def search(page=None):
            opts = {
#                'external_reference': 'ONSHUB',
                    'limit': limit}
            if page != None:
                opts['offset'] = page * limit
            return self.ckanclient.package_search(
                'Education',
#                'Source agency: Education',
                opts)
        res = search()
        print 'Found %i packages possibly related.' % res['count']
        pkgs_done = []
        pkgs_rejected = defaultdict(list) # reason: [pkgs]
        for page in range(res['count'] / limit):
            res = search(page)
            pkg_refs = res['results']
            for pkg_ref in pkg_refs:
                pkg = self.ckanclient.package_entity_get(pkg_ref)
                if 'ONS' not in pkg['extras'].get('import_source', ''):
                    pkgs_rejected['Not imported from ONS'].append(pkg)
                    continue
                if pkg.get('state', 'active') != 'active':
                    pkgs_rejected['Package state = %r' % pkg.get('state')].append(pkg)
                    continue
                source_agency = '|'.join([line.replace('Source agency:', '').strip() for line in pkg['notes'].split('\n') if 'Source agency' in line])
                if source_agency != 'Education':
                    pkgs_rejected['Source agency = %r' % source_agency].append(pkg)
                    continue
                if 'Department for Education' in pkg['extras'].get('department', ''):
                    pkgs_rejected['Department = %r' % pkg['extras'].get('department', '')].append(pkg)
                    continue

                pkg_name = pkg['name']
                dept = pkg['extras'].get('department')
                agency = pkg['extras'].get('agency')
                author = pkg['author']
                print '%s :\n %r %r %r' % (pkg_name, dept, agency, author)
                if not self.dry_run:
                    pkg['extras']['department'] = 'Department for Education'
                    pkg['extras']['agency'] = ''
                    pkg['author'] = 'Department for Education'
                    remove_readonly_fields(pkg)
                    self.ckanclient.package_entity_put(pkg)
                    print '...done'
                pkgs_done.append(pkg)
        print 'Processed %i packages' % len(pkgs_done)
        print 'Rejected packages:'
        for reason, pkgs in pkgs_rejected.items():
            print '  %i: %s' % (len(pkgs), reason)
Пример #4
0
    def run(self):
        pkgs_done = []
        pkgs_rejected = defaultdict(list) # reason: [pkgs]
        all_pkgs = self.ckanclient.package_register_get()
        log.info('Working on %i packages', len(all_pkgs))
        for pkg_ref in all_pkgs:
            log.info('Package: %s', pkg_ref)
            try:
                try:
                    pkg = self.ckanclient.package_entity_get(pkg_ref)
                except CkanApiError, e:
                    log.error('Could not get: %r' % e)
                    pkgs_rejected['Could not get package: %r' % e].append(pkg_ref)
                    continue
                pkg_before_changes = copy.deepcopy(pkg)

                if pkg['state'] != 'active':
                    msg = 'Not active (%s)' % pkg['state']
                    log.info('...%s: %r' % (msg, pkg['name']))
                    pkgs_rejected[msg].append(pkg)
                    continue             
                if pkg['extras'].get('external_reference') != 'ONSHUB':
                    msg = 'Not ONS'
                    log.info('...%s: %r' % (msg, pkg['name']))
                    pkgs_rejected[msg].append(pkg)
                    continue             

                if pkg['resources'] == []:
                    pkg['state'] = 'deleted'
                
                if pkg == pkg_before_changes:
                    log.info('...package unchanged: %r' % pkg['name'])
                    pkgs_rejected['Package unchanged'].append(pkg)
                    continue             
                if not self.dry_run:
                    remove_readonly_fields(pkg)
                    try:
                        self.ckanclient.package_entity_put(pkg)
                    except CkanApiError, e:
                        log.error('Could not put: %r' % e)
                        pkgs_rejected['Could not put package: %r' % e].append(pkg_ref)
                        continue
                    log.info('...done')
                pkgs_done.append(pkg)
Пример #5
0
    def run(self):
        pkgs_done = []
        pkgs_rejected = defaultdict(list) # reason: [pkgs]
        all_pkgs = self.ckanclient.package_register_get()
        log.info('Working on %i packages', len(all_pkgs))
        for pkg_ref in all_pkgs:
            log.info('Package: %s', pkg_ref)
            try:
                try:
                    pkg = self.ckanclient.package_entity_get(pkg_ref)
                except CkanApiError, e:
                    log.error('Could not get: %r' % e)
                    pkgs_rejected['Could not get package: %r' % e].append(pkg_ref)
                    continue
                pkg_before_changes = copy.deepcopy(pkg)

                for attribute in mapped_attributes:
                    orig_value = pkg['extras'].get(attribute)
                    if not orig_value:
                        continue
                    mapped_value = mapped_attributes[attribute].get(orig_value)
                    if mapped_value:
                        pkg['extras'][attribute] = mapped_value
                        log.info('%s: %r -> %r', \
                                 attribute, orig_value, mapped_value)
                    else:
                        log.warn('Invalid value for %r: %r', \
                                 attribute, orig_value)

                if pkg == pkg_before_changes:
                    log.info('...package unchanged: %r' % pkg['name'])
                    pkgs_rejected['Package unchanged: %r' % pkg['name']].append(pkg)
                    continue                    
                if not self.dry_run:
                    remove_readonly_fields(pkg)
                    try:
                        self.ckanclient.package_entity_put(pkg)
                    except CkanApiError, e:
                        log.error('Could not put: %r' % e)
                        pkgs_rejected['Could not put package: %r' % e].append(pkg_ref)
                        continue
                    log.info('...done')
                pkgs_done.append(pkg)
    def run(self):
        pkgs_done = []
        pkgs_rejected = defaultdict(list)  # reason: [pkgs]
        all_pkgs = self.ckanclient.package_register_get()
        log.info("Working on %i packages", len(all_pkgs))
        for pkg_ref in all_pkgs:
            log.info("Package: %s", pkg_ref)
            try:
                try:
                    pkg = self.ckanclient.package_entity_get(pkg_ref)
                except CkanApiError, e:
                    log.error("Could not get: %r" % e)
                    pkgs_rejected["Could not get package: %r" % e].append(pkg_ref)
                    continue
                pkg_before_changes = copy.deepcopy(pkg)

                for attribute in mapped_attributes:
                    orig_value = pkg["extras"].get(attribute)
                    if not orig_value:
                        continue
                    mapped_value = mapped_attributes[attribute].get(orig_value)
                    if mapped_value:
                        pkg["extras"][attribute] = mapped_value
                        log.info("%s: %r -> %r", attribute, orig_value, mapped_value)
                    else:
                        log.warn("Invalid value for %r: %r", attribute, orig_value)

                if pkg == pkg_before_changes:
                    log.info("...package unchanged: %r" % pkg["name"])
                    pkgs_rejected["Package unchanged: %r" % pkg["name"]].append(pkg)
                    continue
                if not self.dry_run:
                    remove_readonly_fields(pkg)
                    try:
                        self.ckanclient.package_entity_put(pkg)
                    except CkanApiError, e:
                        log.error("Could not put: %r" % e)
                        pkgs_rejected["Could not put package: %r" % e].append(pkg_ref)
                        continue
                    log.info("...done")
                pkgs_done.append(pkg)
Пример #7
0
    def run(self):
        pkgs_done = []
        pkgs_rejected = defaultdict(list) # reason: [pkgs]
        all_pkgs = self.ckanclient.package_register_get()
        log.info('Working on %i packages', len(all_pkgs))
        for pkg_ref in all_pkgs:
            log.info('Package: %s', pkg_ref)
            try:
                try:
                    pkg = self.ckanclient.package_entity_get(pkg_ref)
                except CkanApiError, e:
                    log.error('Could not get: %r' % e)
                    pkgs_rejected['Could not get package: %r' % e].append(pkg_ref)
                    continue
                pkg_before_changes = copy.deepcopy(pkg)

                # mapped attributes
                for attribute in mapped_attributes:
                    orig_value = pkg['extras'].get(attribute)
                    if not orig_value:
                        continue
                    mapped_value = mapped_attributes[attribute].get(orig_value)
                    if not mapped_value:
                        mapped_value = mapped_attributes[attribute].get(orig_value.lower().strip())
                        if not mapped_value:
                            if orig_value.lower() in mapped_attributes[attribute].values():
                                mapped_value = orig_value.lower()
                    if mapped_value and orig_value != mapped_value:
                        pkg['extras'][attribute] = mapped_value
                        log.info('%s: %r -> %r', \
                                 attribute, orig_value, mapped_value)
                    else:
                        log.warn('Invalid value for %r: %r', \
                                 attribute, orig_value)

                # create publisher fields
                if self.update_all or not pkg['extras'].get('published_by'):
                    dept = pkg['extras'].get('department')
                    agency = pkg['extras'].get('agency')
                    if dept:
                        pub_by = self.get_organisation(dept)                
                        pub_via = self.get_organisation(agency) if agency else ''
                    else:
                        pub_by = self.get_organisation(agency) if agency else ''
                        pub_via = ''
                        if not pub_by or pub_via:
                            log.warn('No publisher for package: %s', pkg['name'])
                    log.info('%s:\n  %r/%r ->\n  %r/%r', \
                             pkg['name'], dept, agency, pub_by, pub_via)
                    pkg['extras']['published_by'] = pub_by
                    pkg['extras']['published_via'] = pub_via
                
                if pkg == pkg_before_changes:
                    log.info('...package unchanged: %r' % pkg['name'])
                    pkgs_rejected['Package unchanged'].append(pkg)
                    continue             
                if not self.dry_run:
                    remove_readonly_fields(pkg)
                    try:
                        self.ckanclient.package_entity_put(pkg)
                    except CkanApiError, e:
                        log.error('Could not put: %r' % e)
                        pkgs_rejected['Could not put package: %r' % e].append(pkg_ref)
                        continue
                    log.info('...done')
                pkgs_done.append(pkg)
Пример #8
0
    def run(self):
        pkgs_done = []
        pkgs_rejected = defaultdict(list)  # reason: [pkgs]
        all_pkgs = self.ckanclient.package_register_get()
        log.info('Working on %i packages', len(all_pkgs))
        for pkg_ref in all_pkgs:
            log.info('Package: %s', pkg_ref)
            try:
                try:
                    pkg = self.ckanclient.package_entity_get(pkg_ref)
                except CkanApiError, e:
                    log.error('Could not get: %r' % e)
                    pkgs_rejected['Could not get package: %r' %
                                  e].append(pkg_ref)
                    continue
                pkg_before_changes = copy.deepcopy(pkg)

                # mapped attributes
                for attribute in mapped_attributes:
                    orig_value = pkg['extras'].get(attribute)
                    if not orig_value:
                        continue
                    mapped_value = mapped_attributes[attribute].get(orig_value)
                    if not mapped_value:
                        mapped_value = mapped_attributes[attribute].get(
                            orig_value.lower().strip())
                        if not mapped_value:
                            if orig_value.lower(
                            ) in mapped_attributes[attribute].values():
                                mapped_value = orig_value.lower()
                    if mapped_value and orig_value != mapped_value:
                        pkg['extras'][attribute] = mapped_value
                        log.info('%s: %r -> %r', \
                                 attribute, orig_value, mapped_value)
                    else:
                        log.warn('Invalid value for %r: %r', \
                                 attribute, orig_value)

                # create publisher fields
                if self.update_all or not pkg['extras'].get('published_by'):
                    dept = pkg['extras'].get('department')
                    agency = pkg['extras'].get('agency')
                    if dept:
                        pub_by = self.get_organisation(dept)
                        pub_via = self.get_organisation(
                            agency) if agency else ''
                    else:
                        pub_by = self.get_organisation(
                            agency) if agency else ''
                        pub_via = ''
                        if not pub_by or pub_via:
                            log.warn('No publisher for package: %s',
                                     pkg['name'])
                    log.info('%s:\n  %r/%r ->\n  %r/%r', \
                             pkg['name'], dept, agency, pub_by, pub_via)
                    pkg['extras']['published_by'] = pub_by
                    pkg['extras']['published_via'] = pub_via

                if pkg == pkg_before_changes:
                    log.info('...package unchanged: %r' % pkg['name'])
                    pkgs_rejected['Package unchanged'].append(pkg)
                    continue
                if not self.dry_run:
                    remove_readonly_fields(pkg)
                    try:
                        self.ckanclient.package_entity_put(pkg)
                    except CkanApiError, e:
                        log.error('Could not put: %r' % e)
                        pkgs_rejected['Could not put package: %r' %
                                      e].append(pkg_ref)
                        continue
                    log.info('...done')
                pkgs_done.append(pkg)
    def run(self):
        limit = 100

        def search(page=None):
            opts = {
                #                'external_reference': 'ONSHUB',
                'limit': limit
            }
            if page != None:
                opts['offset'] = page * limit
            return self.ckanclient.package_search(
                'Education',
                #                'Source agency: Education',
                opts)

        res = search()
        print 'Found %i packages possibly related.' % res['count']
        pkgs_done = []
        pkgs_rejected = defaultdict(list)  # reason: [pkgs]
        for page in range(res['count'] / limit):
            res = search(page)
            pkg_refs = res['results']
            for pkg_ref in pkg_refs:
                pkg = self.ckanclient.package_entity_get(pkg_ref)
                if 'ONS' not in pkg['extras'].get('import_source', ''):
                    pkgs_rejected['Not imported from ONS'].append(pkg)
                    continue
                if pkg.get('state', 'active') != 'active':
                    pkgs_rejected['Package state = %r' %
                                  pkg.get('state')].append(pkg)
                    continue
                source_agency = '|'.join([
                    line.replace('Source agency:', '').strip()
                    for line in pkg['notes'].split('\n')
                    if 'Source agency' in line
                ])
                if source_agency != 'Education':
                    pkgs_rejected['Source agency = %r' %
                                  source_agency].append(pkg)
                    continue
                if 'Department for Education' in pkg['extras'].get(
                        'department', ''):
                    pkgs_rejected['Department = %r' % pkg['extras'].get(
                        'department', '')].append(pkg)
                    continue

                pkg_name = pkg['name']
                dept = pkg['extras'].get('department')
                agency = pkg['extras'].get('agency')
                author = pkg['author']
                print '%s :\n %r %r %r' % (pkg_name, dept, agency, author)
                if not self.dry_run:
                    pkg['extras']['department'] = 'Department for Education'
                    pkg['extras']['agency'] = ''
                    pkg['author'] = 'Department for Education'
                    remove_readonly_fields(pkg)
                    self.ckanclient.package_entity_put(pkg)
                    print '...done'
                pkgs_done.append(pkg)
        print 'Processed %i packages' % len(pkgs_done)
        print 'Rejected packages:'
        for reason, pkgs in pkgs_rejected.items():
            print '  %i: %s' % (len(pkgs), reason)
Пример #10
0
    def run(self):
        pkgs_done = []
        pkgs_rejected = defaultdict(list)  # reason: [pkgs]
        all_pkgs = sorted(self.ckanclient.package_register_get())
        log.info('Working on %i packages', len(all_pkgs))
        for pkg_ref in all_pkgs:
            log.info('Package: %s', pkg_ref)
            try:
                try:
                    pkg = self.ckanclient.package_entity_get(pkg_ref)
                except CkanApiError, e:
                    log.error('Could not get: %r' % e)
                    pkgs_rejected['Could not get package: %r' %
                                  e].append(pkg_ref)
                    if self.force:
                        continue
                    else:
                        log.error('Exiting due to error')
                        break
                pkg_before_changes = copy.deepcopy(pkg)

                if pkg['state'] != 'active':
                    msg = 'Not active (%s)' % pkg['state']
                    log.info('...%s: %r' % (msg, pkg['name']))
                    pkgs_rejected[msg].append(pkg)
                    continue
                is_ons = pkg['extras'].get('import_source',
                                           '').startswith('ONS')
                if not is_ons:
                    msg = 'Not ONS'
                    log.info('...%s: %r' % (msg, pkg['name']))
                    pkgs_rejected[msg].append(pkg)
                    continue

                # comment out name
                existing_name = pkg['name']
                pkg['name'] = '_' + pkg['name'][:99]

                # delete
                pkg['state'] = 'deleted'

                if pkg == pkg_before_changes:
                    log.info('...package unchanged: %r' % pkg['name'])
                    pkgs_rejected['Package unchanged'].append(pkg)
                    continue
                if not self.dry_run:
                    remove_readonly_fields(pkg)
                    try:
                        self.ckanclient.package_entity_put(
                            pkg, package_name=existing_name)
                    except CkanApiError, e:
                        log.error('Could not put: %r' % e)
                        pkgs_rejected['Could not PUT package: %r' %
                                      e].append(pkg_ref)
                        if self.force:
                            continue
                        else:
                            log.error('Exiting due to error')
                            break
                    # just check the state is correct as older CKANs don't
                    # work properly
                    if self.ckanclient.last_message['state'] != pkg['state'] and \
                       pkg['state'] == 'deleted':
                        self.ckanclient.package_entity_delete(pkg['name'])
                        log.info('...deleted separately')
                    log.info('...done')
                pkgs_done.append(pkg)
Пример #11
0
    def run(self):
        pkgs_done = []
        pkgs_rejected = defaultdict(list) # reason: [pkgs]
        all_pkgs = sorted(self.ckanclient.package_register_get())
        log.info('Working on %i packages', len(all_pkgs))
        for pkg_ref in all_pkgs:
            log.info('Package: %s', pkg_ref)
            try:
                try:
                    pkg = self.ckanclient.package_entity_get(pkg_ref)
                except CkanApiError, e:
                    log.error('Could not get: %r' % e)
                    pkgs_rejected['Could not get package: %r' % e].append(pkg_ref)
                    if self.force:
                        continue
                    else:
                        log.error('Exiting due to error')
                        break
                pkg_before_changes = copy.deepcopy(pkg)

                if pkg['state'] != 'active':
                    msg = 'Not active (%s)' % pkg['state']
                    log.info('...%s: %r' % (msg, pkg['name']))
                    pkgs_rejected[msg].append(pkg)
                    continue
                is_ons = pkg['extras'].get('import_source', '').startswith('ONS')
                if not is_ons:
                    msg = 'Not ONS'
                    log.info('...%s: %r' % (msg, pkg['name']))
                    pkgs_rejected[msg].append(pkg)
                    continue

                # comment out name
                existing_name = pkg['name']
                pkg['name'] = '_' + pkg['name'][:99]

                # delete
                pkg['state'] = 'deleted'
                
                if pkg == pkg_before_changes:
                    log.info('...package unchanged: %r' % pkg['name'])
                    pkgs_rejected['Package unchanged'].append(pkg)
                    continue             
                if not self.dry_run:
                    remove_readonly_fields(pkg)
                    try:
                        self.ckanclient.package_entity_put(pkg,
                                                           package_name=existing_name)
                    except CkanApiError, e:
                        log.error('Could not put: %r' % e)
                        pkgs_rejected['Could not PUT package: %r' % e].append(pkg_ref)
                        if self.force:
                            continue
                        else:
                            log.error('Exiting due to error')
                            break
                    # just check the state is correct as older CKANs don't
                    # work properly
                    if self.ckanclient.last_message['state'] != pkg['state'] and \
                       pkg['state'] == 'deleted':
                        self.ckanclient.package_entity_delete(pkg['name'])
                        log.info('...deleted separately')
                    log.info('...done')
                pkgs_done.append(pkg)