def openness_for_organization(organization=None, include_sub_organizations=False): org = model.Group.get(organization) if not org: raise p.toolkit.ObjectNotFound if not include_sub_organizations: orgs = [org] else: orgs = lib.go_down_tree(org) context = {'model': model, 'session': model.Session, 'ignore_auth': True} score_counts = Counter() rows = [] num_packages = 0 for org in orgs: # NB org.packages() misses out many - see: # http://redmine.dguteam.org.uk/issues/1844 pkgs = model.Session.query(model.Package) \ .filter_by(owner_org=org.id) \ .filter_by(state='active') \ .filter_by(private=False) \ .all() num_packages += len(pkgs) for pkg in pkgs: try: qa = p.toolkit.get_action('qa_package_openness_show')( context, { 'id': pkg.id }) except p.toolkit.ObjectNotFound: log.warning('No QA info for package %s', pkg.name) return rows.append( OrderedDict(( ('dataset_name', pkg.name), ('dataset_title', pkg.title), ('dataset_notes', lib.dataset_notes(pkg)), ('organization_name', org.name), ('organization_title', org.title), ('openness_score', qa['openness_score']), ('openness_score_reason', qa['openness_score_reason']), ))) score_counts[qa['openness_score']] += 1 total_stars = sum([k * v for k, v in score_counts.items() if k]) num_pkgs_with_stars = sum( [v for k, v in score_counts.items() if k is not None]) average_stars = round(float(total_stars) / num_pkgs_with_stars, 1) \ if num_pkgs_with_stars else 0.0 return { 'table': rows, 'score_counts': jsonify_counter(score_counts), 'total_stars': total_stars, 'average_stars': average_stars, 'num_packages_scored': len(rows), 'num_packages': num_packages, }
def openness_for_organization(organization=None, include_sub_organizations=False): org = model.Group.get(organization) if not org: raise p.toolkit.ObjectNotFound if not include_sub_organizations: orgs = [org] else: orgs = lib.go_down_tree(org) context = {'model': model, 'session': model.Session, 'ignore_auth': True} score_counts = Counter() rows = [] num_packages = 0 for org in orgs: # NB org.packages() misses out many - see: # http://redmine.dguteam.org.uk/issues/1844 pkgs = model.Session.query(model.Package) \ .filter_by(owner_org=org.id) \ .filter_by(state='active') \ .all() num_packages += len(pkgs) for pkg in pkgs: try: qa = p.toolkit.get_action('qa_package_openness_show')(context, {'id': pkg.id}) except p.toolkit.ObjectNotFound: log.warning('No QA info for package %s', pkg.name) return rows.append(OrderedDict(( ('dataset_name', pkg.name), ('dataset_title', pkg.title), ('dataset_notes', lib.dataset_notes(pkg)), ('organization_name', org.name), ('organization_title', org.title), ('openness_score', qa['openness_score']), ('openness_score_reason', qa['openness_score_reason']), ))) score_counts[qa['openness_score']] += 1 total_stars = sum([k*v for k, v in score_counts.items() if k]) num_pkgs_with_stars = sum([v for k, v in score_counts.items() if k is not None]) average_stars = round(float(total_stars) / num_pkgs_with_stars, 1) \ if num_pkgs_with_stars else 0.0 return {'table': rows, 'score_counts': jsonify_counter(score_counts), 'total_stars': total_stars, 'average_stars': average_stars, 'num_packages_scored': len(rows), 'num_packages': num_packages, }
def broken_links_for_organization(organization, include_sub_organizations=False): ''' Returns a dictionary detailing broken resource links for the organization or if organization it returns the index page for all organizations. params: organization - name of an organization Returns: {'organization_name': 'cabinet-office', 'organization_title:': 'Cabinet Office', 'table': [ {'package_name', 'package_title', 'resource_url', 'status', 'reason', 'last_success', 'first_failure', 'failure_count', 'last_updated'} ...] ''' from ckanext.archiver.model import Archival org = model.Group.get(organization) if not org: raise p.toolkit.ObjectNotFound() name = org.name title = org.title archivals = model.Session.query(Archival, model.Package, model.Group).\ filter(Archival.is_broken == True).\ join(model.Package, Archival.package_id == model.Package.id).\ filter(model.Package.state == 'active').\ join(model.Resource, Archival.resource_id == model.Resource.id).\ filter(model.Resource.state == 'active') if not include_sub_organizations: org_ids = [org.id] archivals = archivals.filter(model.Package.owner_org == org.id) else: # We want any organization_id that is part of this organization's tree org_ids = ['%s' % organization.id for organization in lib.go_down_tree(org)] archivals = archivals.filter(model.Package.owner_org.in_(org_ids)) archivals = archivals.join(model.Group, model.Package.owner_org == model.Group.id) results = [] for archival, pkg, org in archivals.all(): pkg = model.Package.get(archival.package_id) resource = model.Resource.get(archival.resource_id) via = '' er = pkg.extras.get('external_reference', '') if er == 'ONSHUB': via = "Stats Hub" elif er.startswith("DATA4NR"): via = "Data4nr" archived_resource = model.Session.query(model.ResourceRevision)\ .filter_by(id=resource.id)\ .filter_by(revision_timestamp=archival.resource_timestamp)\ .first() or resource row_data = OrderedDict(( ('dataset_title', pkg.title), ('dataset_name', pkg.name), ('dataset_notes', lib.dataset_notes(pkg)), ('organization_title', org.title), ('organization_name', org.name), ('resource_position', resource.position), ('resource_id', resource.id), ('resource_url', archived_resource.url), ('url_up_to_date', resource.url == archived_resource.url), ('via', via), ('first_failure', archival.first_failure.isoformat() if archival.first_failure else None), ('last_updated', archival.updated.isoformat() if archival.updated else None), ('last_success', archival.last_success.isoformat() if archival.last_success else None), ('url_redirected_to', archival.url_redirected_to), ('reason', archival.reason), ('status', archival.status), ('failure_count', archival.failure_count), )) results.append(row_data) num_broken_packages = archivals.distinct(model.Package.name).count() num_broken_resources = len(results) # Get total number of packages & resources num_packages = model.Session.query(model.Package)\ .filter(model.Package.owner_org.in_(org_ids))\ .filter_by(state='active')\ .count() num_resources = model.Session.query(model.Resource)\ .filter_by(state='active') if p.toolkit.check_ckan_version(max_version='2.2.99'): num_resources = num_resources.join(model.ResourceGroup) num_resources = num_resources \ .join(model.Package)\ .filter(model.Package.owner_org.in_(org_ids))\ .filter_by(state='active').count() return {'organization_name': name, 'organization_title': title, 'num_broken_packages': num_broken_packages, 'num_broken_resources': num_broken_resources, 'num_packages': num_packages, 'num_resources': num_resources, 'broken_package_percent': lib.percent(num_broken_packages, num_packages), 'broken_resource_percent': lib.percent(num_broken_resources, num_resources), 'table': results}
def licence_report(organization=None, include_sub_organizations=False): ''' Returns a dictionary detailing licences for datasets in the organisation specified, and optionally sub organizations. ''' # Get packages if organization: top_org = model.Group.by_name(organization) if not top_org: raise p.toolkit.ObjectNotFound('Publisher not found') if include_sub_organizations: orgs = lib.go_down_tree(top_org) else: orgs = [top_org] pkgs = set() for org in orgs: org_pkgs = model.Session.query(model.Package)\ .filter_by(state='active') org_pkgs = lib.filter_by_organizations( org_pkgs, organization, include_sub_organizations=False)\ .all() pkgs |= set(org_pkgs) else: pkgs = model.Session.query(model.Package)\ .filter_by(state='active')\ .all() # Get their licences packages_by_licence = collections.defaultdict(list) rows = [] num_pkgs = 0 for pkg in pkgs: if asbool(pkg.extras.get('unpublished')) is True: # Ignore unpublished datasets continue licence_tuple = (pkg.license_id or '', pkg.license.title if pkg.license else '', pkg.extras.get('licence', '')) packages_by_licence[licence_tuple].append((pkg.name, pkg.title)) num_pkgs += 1 for licence_tuple, dataset_tuples in sorted(packages_by_licence.items(), key=lambda x: -len(x[1])): license_id, license_title, licence = licence_tuple dataset_tuples.sort(key=lambda x: x[0]) dataset_names, dataset_titles = zip(*dataset_tuples) licence_dict = OrderedDict(( ('license_id', license_id), ('license_title', license_title), ('licence', licence), ('dataset_titles', '|'.join(t for t in dataset_titles)), ('dataset_names', ' '.join(dataset_names)), )) rows.append(licence_dict) return { 'num_datasets': num_pkgs, 'num_licences': len(rows), 'table': rows, }