def check_related_publisher_properties(): group = model.Group.by_name(publisher_name) # datasets assert_equal(set([grp.name for grp in group.active_packages()]), set([u'directgov-cota'])) # parents doh = model.Group.by_name('dept-health') child_groups_of_doh = [ grp.name for grp in list(publib.go_down_tree(doh)) ] assert publisher_name in child_groups_of_doh, child_groups_of_doh # children child_groups = set( [grp.name for grp in list(publib.go_down_tree(group))]) assert set([ u'newham-primary-care-trust', u'barnsley-primary-care-trust' ]) <= child_groups, child_groups # admins & editors assert_equal( set([ user.name for user in group.members_of_type(model.User, capacity='admin') ]), set(('nhsadmin', ))) assert_equal( set([ user.name for user in group.members_of_type(model.User, capacity='editor') ]), set(('nhseditor', 'user_d101')))
def guess_theme(commit): from ckan import model from ckanext.dgu.lib import publisher as publib log = global_log for k, v in publisher_themes.iteritems(): updated = 0 pubs = list(publib.go_down_tree(model.Group.get(k))) print "Processing %d publishers from %s" % (len(pubs), k) for publisher in pubs: packages = publisher.members_of_type( model.Package).filter(model.Package.state == 'active') print "\r", " " * 80, # blank the line print "\rProcessing %s" % publisher.name, for package in packages: if 'spend' in package.name or 'financ' in package.name: continue if not 'theme-primary' in package.extras or package.extras[ 'theme-primary'] == '': package.extras['theme-primary'] = v model.Session.add(package) updated = updated + 1 print "\nWe updated %d themes under %s" % (updated, k) if commit.lower() == 'y': print "Committing results" model.Session.commit()
def guess_theme(commit): from ckan import model from ckanext.dgu.lib import publisher as publib log = global_log for k,v in publisher_themes.iteritems(): updated = 0 pubs = list(publib.go_down_tree(model.Group.get(k))) print "Processing %d publishers from %s" % (len(pubs), k) for publisher in pubs: packages = publisher.members_of_type(model.Package).filter(model.Package.state=='active') print "\r", " " * 80, # blank the line print "\rProcessing %s" % publisher.name, for package in packages: if 'spend' in package.name or 'financ' in package.name: continue if not 'theme-primary' in package.extras or package.extras['theme-primary'] == '': package.extras['theme-primary'] = v model.Session.add(package) updated = updated + 1 print "\nWe updated %d themes under %s" % (updated, k) if commit.lower() == 'y': print "Committing results" model.Session.commit()
def export(cls, csv_filepath): csv_filepath = os.path.abspath(csv_filepath) log = global_log from ckan import model f = open(csv_filepath, 'w') f.write(cls.header) number_of_publishers = 0 expected_publishers = set(model.Session.query(model.Group).\ filter_by(state='active').\ filter_by(type='organization').all()) written_publishers = set() for top_level_pub in model.Group.get_top_level_groups( type='organization'): for pub in publisher.go_down_tree(top_level_pub): number_of_publishers += 1 if pub in written_publishers: warn('publisher written twice: %s %s', pub.name, pub.id) written_publishers.add(pub) parent_publishers = pub.get_parent_groups(type='organization') if len(parent_publishers) > 1: warn( 'Publisher has multiple parents. Just using first: %s %s', pub.name, parent_publishers) parent_pub_name = parent_publishers[ 0].name if parent_publishers else '' parent_pub_title = parent_publishers[ 0].title if parent_publishers else '' wdtk_id = '' #pub.extras csv_row_values = \ (pub.id, pub.name, pub.title, parent_pub_name, parent_pub_title, dict(pub.extras).get('abbreviation', ''), dict(pub.extras).get('wdtk-title', ''), dict(pub.extras).get('website-url', ''), dict(pub.extras).get('contact-email', ''), dict(pub.extras).get('foi-email', ''), dict(pub.extras).get('category', ''), dict(pub.extras).get('spending_published_by', ''), ) # assume they are all strings csv_row_str = ','.join( ['"%s"' % cell for cell in csv_row_values]) log.info(csv_row_str) f.write(csv_row_str.encode('utf8') + '\n') f.flush() f.close() # checks expected_number_of_publishers = len(expected_publishers) assert_equal(sorted(written_publishers), sorted(set(written_publishers))) assert_equal(expected_publishers, set(written_publishers))
def check_related_publisher_properties(): group = model.Group.by_name(publisher_name) # datasets assert_equal(set([grp.name for grp in group.packages()]), set([u'directgov-cota'])) # parents doh = model.Group.by_name('dept-health') child_groups_of_doh = [grp.name for grp in list(publib.go_down_tree(doh))] assert publisher_name in child_groups_of_doh, child_groups_of_doh # children child_groups = set([grp.name for grp in list(publib.go_down_tree(group))]) assert set([u'newham-primary-care-trust', u'barnsley-primary-care-trust']) <= child_groups, child_groups # admins & editors assert_equal(set([user.name for user in group.members_of_type(model.User, capacity='admin')]), set(('nhsadmin',))) assert_equal(set([user.name for user in group.members_of_type(model.User, capacity='editor')]), set(('nhseditor', 'user_d101')))
def export(cls, csv_filepath): csv_filepath = os.path.abspath(csv_filepath) log = global_log from ckan import model f = open(csv_filepath, 'w') f.write(cls.header) number_of_publishers = 0 expected_publishers = set(model.Session.query(model.Group).\ filter_by(state='active').\ filter_by(type='organization').all()) written_publishers = set() for top_level_pub in model.Group.get_top_level_groups(type='organization'): for pub in publisher.go_down_tree(top_level_pub): number_of_publishers += 1 if pub in written_publishers: warn('publisher written twice: %s %s', pub.name, pub.id) written_publishers.add(pub) parent_publishers = pub.get_parent_groups(type='organization') if len(parent_publishers) > 1: warn('Publisher has multiple parents. Just using first: %s %s', pub.name, parent_publishers) parent_pub_name = parent_publishers[0].name if parent_publishers else '' parent_pub_title = parent_publishers[0].title if parent_publishers else '' wdtk_id = ''#pub.extras csv_row_values = \ (pub.id, pub.name, pub.title, parent_pub_name, parent_pub_title, dict(pub.extras).get('abbreviation', ''), dict(pub.extras).get('wdtk-title', ''), dict(pub.extras).get('website-url', ''), dict(pub.extras).get('contact-email', ''), dict(pub.extras).get('foi-email', ''), dict(pub.extras).get('category', ''), dict(pub.extras).get('spending_published_by', ''), ) # assume they are all strings csv_row_str = ','.join(['"%s"' % cell for cell in csv_row_values]) log.info(csv_row_str) f.write(csv_row_str.encode('utf8') + '\n') f.flush() f.close() # checks expected_number_of_publishers = len(expected_publishers) assert_equal(sorted(written_publishers), sorted(set(written_publishers))) assert_equal(expected_publishers, set(written_publishers))
def dgu_package_create(context, data_dict): model = context['model'] user = context.get('user') user_obj = model.User.get( user ) if not user_obj: return {'success': False} if Authorizer().is_sysadmin(user_obj): return {'success': True} user_publishers = user_obj.get_groups('publisher') if not data_dict: # i.e. not asking in relation to a particular package. We only let # publishers do this return {'success': bool(user_publishers)} if not user_obj: return {'success': False, 'msg': _('User %s not authorized to edit packages of this publisher') % str(user)} # For users who are admins of groups, we should also include all of their child groups # in the list of user_publishers as_admin = user_obj.get_groups('publisher', 'admin') for g in as_admin: user_publishers.extend(list(publib.go_down_tree(g))) user_publisher_names = [pub.name for pub in set(user_publishers)] if data_dict['groups'] and isinstance(data_dict['groups'][0], dict): package_group_names = [pub['name'] for pub in data_dict['groups']] elif data_dict['groups'] and isinstance(data_dict['groups'], list): # data_dict['groups'] is already a list of names at this point so we # should just assign it. package_group_names = data_dict['groups'] else: # In the case where we have received a single string in the data_dict['groups'] # we should wrap it in a list to make sure the intersection check works package_group_names = [data_dict['groups']] if data_dict['groups'] else [] # If the user has a group (is a publisher), but there is no package # group name, then we need to continue to allow validation to cause the # failure. if user_publishers and package_group_names == [u' ']: return {'success': True} if not _groups_intersect(user_publisher_names, package_group_names): return {'success': False, 'msg': _('User %s not authorized to edit packages of this publisher') % str(user)} return {'success': True}
def sql_to_filter_by_organisation(organisation, include_sub_organisations=False): """ Returns: (sql_org_filter, sql_params) In your sql you need: WHERE %(org_filter)s Run this function: sql_org_filter, sql_params = sql_to_filter_by_organisation( ... ) And execute your sql with the tuple: rows = model.Session.execute(sql % sql_org_filter, sql_params) """ sql_org_filter = {} sql_params = {} if not include_sub_organisations: sql_org_filter["org_filter"] = '"group".name = :org_name' sql_params["org_name"] = organisation.name else: sub_org_filters = ["\"group\".name='%s'" % org.name for org in go_down_tree(organisation)] sql_org_filter["org_filter"] = "(%s)" % " or ".join(sub_org_filters) return sql_org_filter, sql_params
def sql_to_filter_by_organisation(organisation, include_sub_organisations=False): ''' Returns: (sql_org_filter, sql_params) In your sql you need: WHERE %(org_filter)s Run this function: sql_org_filter, sql_params = sql_to_filter_by_organisation( ... ) And execute your sql with the tuple: rows = model.Session.execute(sql % sql_org_filter, sql_params) ''' sql_org_filter = {} sql_params = {} if not include_sub_organisations: sql_org_filter['org_filter'] = '"group".name = :org_name' sql_params['org_name'] = organisation.name else: sub_org_filters = ['"group".name=\'%s\'' % org.name for org in go_down_tree(organisation)] sql_org_filter['org_filter'] = '(%s)' % ' or '.join(sub_org_filters) return sql_org_filter, sql_params
def export(cls, csv_filepath): csv_filepath = os.path.abspath(csv_filepath) log = global_log from ckan import model f = open(csv_filepath, 'w') f.write(cls.header) number_of_publishers = 0 expected_publishers = set(model.Session.query(model.Group).\ filter_by(state='active').\ filter_by(type='publisher').all()) written_publishers = set() for top_level_pub in publisher.get_top_level(): for pub in publisher.go_down_tree(top_level_pub): number_of_publishers += 1 if pub in written_publishers: warn('publisher written twice: %s %s', pub.name, pub.id) written_publishers.add(pub) parent_pub_title = top_level_pub.title if top_level_pub.id != pub.id else '' csv_line = '"%s","%s","%s","%s","%s"' % \ (pub.id, pub.title, parent_pub_title, dict(pub.extras).get('category', ''), dict(pub.extras).get('spending_published_by', ''), ) log.info(csv_line) f.write(csv_line + '\n') f.flush() f.close() # checks expected_number_of_publishers = len(expected_publishers) assert_equal(sorted(written_publishers), sorted(set(written_publishers))) assert_equal(expected_publishers, set(written_publishers))
def export(cls, csv_filepath): csv_filepath = os.path.abspath(csv_filepath) log = global_log from ckan import model f = open(csv_filepath, 'w') f.write(cls.header) number_of_publishers = 0 expected_publishers = set(model.Session.query(model.Group).\ filter_by(state='active').\ filter_by(type='organization').all()) written_publishers = set() for top_level_pub in model.Group.get_top_level_groups(type='organization'): for pub in publisher.go_down_tree(top_level_pub): number_of_publishers += 1 if pub in written_publishers: warn('publisher written twice: %s %s', pub.name, pub.id) written_publishers.add(pub) parent_pub_title = top_level_pub.title if top_level_pub.id != pub.id else '' csv_line = '"%s","%s","%s","%s","%s"' % \ (pub.id, pub.title, parent_pub_title, dict(pub.extras).get('category', ''), dict(pub.extras).get('spending_published_by', ''), ) log.info(csv_line) f.write(csv_line + '\n') f.flush() f.close() # checks expected_number_of_publishers = len(expected_publishers) assert_equal(sorted(written_publishers), sorted(set(written_publishers))) assert_equal(expected_publishers, set(written_publishers))
def sql_to_filter_by_organisation(organisation, include_sub_organisations=False): ''' Returns: (sql_org_filter, sql_params) In your sql you need: WHERE %(org_filter)s Run this function: sql_org_filter, sql_params = sql_to_filter_by_organisation( ... ) And execute your sql with the tuple: rows = model.Session.execute(sql % sql_org_filter, sql_params) ''' sql_org_filter = {} sql_params = {} if not include_sub_organisations: sql_org_filter['org_filter'] = '"group".name = :org_name' sql_params['org_name'] = organisation.name else: sub_org_filters = [ '"group".name=\'%s\'' % org.name for org in go_down_tree(organisation) ] sql_org_filter['org_filter'] = '(%s)' % ' or '.join(sub_org_filters) return sql_org_filter, sql_params
def get_publishers(self): from ckan.model.group import Group if Authorizer().is_sysadmin(c.user): groups = Group.all(group_type='publisher') elif c.userobj: # need to get c.userobj again as it may be detached from the # session since the last time we called get_groups (it caches) c.userobj = model.User.by_name(c.user) # For each group where the user is an admin, we should also include # all of the child publishers. admin_groups = set() for g in c.userobj.get_groups('publisher', 'admin'): for pub in publib.go_down_tree(g): admin_groups.add(pub) editor_groups = c.userobj.get_groups('publisher', 'editor') groups = list(admin_groups) + editor_groups else: # anonymous user shouldn't have access to this page anyway. groups = [] # Be explicit about which fields we make available in the template groups = [ { 'name': g.name, 'id': g.id, 'title': g.title, 'contact-name': g.extras.get('contact-name', ''), 'contact-email': g.extras.get('contact-email', ''), 'contact-phone': g.extras.get('contact-phone', ''), 'foi-name': g.extras.get('foi-name', ''), 'foi-email': g.extras.get('foi-email', ''), 'foi-phone': g.extras.get('foi-phone', ''), 'foi-web': g.extras.get('foi-name', ''), } for g in groups ] return dict( (g['name'], g) for g in groups )
def get_publishers(self): from ckan.model.group import Group if dgu_helpers.is_sysadmin(): groups = Group.all(group_type='organization') elif c.userobj: # need to get c.userobj again as it may be detached from the # session since the last time we called get_groups (it caches) c.userobj = model.User.by_name(c.user) # For each group where the user is an admin, we should also include # all of the child publishers. admin_groups = set() for g in c.userobj.get_groups('organization', 'admin'): for pub in publib.go_down_tree(g): admin_groups.add(pub) editor_groups = c.userobj.get_groups('organization', 'editor') groups = list(admin_groups) + editor_groups else: # anonymous user shouldn't have access to this page anyway. groups = [] # Be explicit about which fields we make available in the template groups = [{ 'name': g.name, 'id': g.id, 'title': g.title, 'contact-name': g.extras.get('contact-name', ''), 'contact-email': g.extras.get('contact-email', ''), 'contact-phone': g.extras.get('contact-phone', ''), 'foi-name': g.extras.get('foi-name', ''), 'foi-email': g.extras.get('foi-email', ''), 'foi-phone': g.extras.get('foi-phone', ''), 'foi-web': g.extras.get('foi-web', ''), } for g in groups] return dict((g['name'], g) for g in groups)
except DrupalRequestError, e: user_emails[user.name] = user.email else: user_emails[user.name] = user_properties['mail'] else: # not a drupal user user_emails[user.name] = user.email return user_emails[user.name] # NHS publishers nhs = model.Group.by_name('national-health-service') assert nhs pub_stats = StatsList() pct_rows = [] non_pct_rows = [] for pub in publisher_lib.go_down_tree(nhs): # Filter to PCTs title = pub.title not_pct = ('NHS Choices', 'NHS Connecting for Health', 'NHS Connecting for Health and NHS Business Services Authority') is_pct = ('Care Trust' in title or 'PCT' in title or title.startswith('NHS ') or 'Care Tust' in title) \ and title not in not_pct and 'Foundation' not in title # Get the admins & editors admins = pub.members_of_type(model.User, 'admin').all() editors = pub.members_of_type(model.User, 'editor').all() # Get their email addresses users_with_email = [] users_without_email = [] warnings = None for user in admins: if get_email_for_user(user): users_with_email.append(user)
def organisation_dataset_scores(organisation_name, include_sub_organisations=False): ''' Returns a dictionary detailing openness scores for the organisation for each dataset. i.e.: {'publisher_name': 'cabinet-office', 'publisher_title:': 'Cabinet Office', 'data': [ {'package_name', 'package_title', 'resource_url', 'openness_score', 'reason', 'last_updated', 'is_broken', 'format'} ...] NB the list does not contain datasets that have 0 resources and therefore score 0 ''' values = {} sql = """ select package.id as package_id, task_status.key as task_status_key, task_status.value as task_status_value, task_status.error as task_status_error, task_status.last_updated as task_status_last_updated, resource.id as resource_id, resource.url as resource_url, resource.position, package.title as package_title, package.name as package_name, "group".id as publisher_id, "group".name as publisher_name, "group".title as publisher_title from resource left join task_status on task_status.entity_id = resource.id left join resource_group on resource.resource_group_id = resource_group.id left join package on resource_group.package_id = package.id left join member on member.table_id = package.id left join "group" on member.group_id = "group".id where entity_id in (select entity_id from task_status where task_status.task_type='qa') and package.state = 'active' and resource.state='active' and resource_group.state='active' and "group".state='active' and task_status.task_type='qa' and task_status.key='status' %(org_filter)s order by package.title, package.name, resource.position """ sql_options = {} org = model.Group.by_name(organisation_name) if not org: abort(404, 'Publisher not found') organisation_title = org.title if not include_sub_organisations: sql_options['org_filter'] = 'and "group".name = :org_name' values['org_name'] = organisation_name else: sub_org_filters = ['"group".name=\'%s\'' % org.name for org in go_down_tree(org)] sql_options['org_filter'] = 'and (%s)' % ' or '.join(sub_org_filters) rows = model.Session.execute(sql % sql_options, values) data = dict() # dataset_name: {properties} for row in rows: package_data = data.get(row.package_name) if not package_data: package_data = OrderedDict(( ('dataset_title', row.package_title), ('dataset_name', row.package_name), ('publisher_title', row.publisher_title), ('publisher_name', row.publisher_name), # the rest are placeholders to hold the details # of the highest scoring resource ('resource_position', None), ('resource_id', None), ('resource_url', None), ('openness_score', None), ('openness_score_reason', None), ('last_updated', None), )) if row.task_status_value > package_data['openness_score']: package_data['resource_position'] = row.position package_data['resource_id'] = row.resource_id package_data['resource_url'] = row.resource_url try: package_data.update(json.loads(row.task_status_error)) except ValueError, e: log.error('QA status "error" should have been in JSON format, but found: "%s" %s', task_status_error, e) package_data['reason'] = 'Could not display reason due to a system error' package_data['openness_score'] = row.task_status_value package_data['openness_score_reason'] = package_data['reason'] # deprecated package_data['last_updated'] = row.task_status_last_updated data[row.package_name] = package_data
def feedback_report(publisher, include_sub_publishers=False, include_published=False, use_cache=False): """ For the publisher provided (and optionally for sub-publishers) this function will generate a report on the feedback for that publisher. """ import collections import datetime import ckan.lib.helpers as helpers from ckanext.dgu.lib.publisher import go_down_tree from ckanext.dgu.model.feedback import Feedback from operator import itemgetter from sqlalchemy.util import OrderedDict publisher_name = '__all__' if publisher: publisher_name = publisher.name if use_cache: key = 'feedback-report' if include_published: key = 'feedback-all-report' if include_sub_publishers: key = "".join([key, '-withsub']) report, report_date = model.DataCache.get_if_fresh(publisher_name, key) if report is None: log.info("Did not find cached report - %s/%s" % (publisher_name, key)) else: log.info("Found feedback report in cache") return report, report_date if publisher: group_ids = [publisher.id] if include_sub_publishers: groups = sorted([x for x in go_down_tree(publisher)], key=lambda x: x.title) group_ids = [x.id for x in groups] memberships = model.Session.query(model.Member)\ .join(model.Package, model.Package.id==model.Member.table_id)\ .filter(model.Member.state == 'active')\ .filter(model.Member.group_id.in_(group_ids))\ .filter(model.Member.table_name == 'package')\ .filter(model.Package.state == 'active') else: memberships = model.Session.query(model.Member)\ .join(model.Package, model.Package.id==model.Member.table_id)\ .filter(model.Member.state == 'active')\ .filter(model.Member.table_name == 'package')\ .filter(model.Package.state == 'active') results = [] for member in memberships.all(): pkg = model.Package.get(member.table_id) # For now we will skip over unpublished items if not include_published and not pkg.extras.get('unpublished', False): continue key = pkg.name data = collections.defaultdict(int) data['publisher-name'] = member.group.name data['generated-at'] = helpers.render_datetime(datetime.datetime.now(), "%d/%m/%Y %H:%M") data['publisher-title'] = member.group.title data['package-name'] = pkg.name data['package-title'] = pkg.title data['publish-date'] = pkg.extras.get('publish-date', '') for item in model.Session.query(Feedback).filter(Feedback.visible == True)\ .filter(Feedback.package_id == member.table_id )\ .filter(Feedback.active == True ): if item.economic: data['economic'] += 1 if item.social: data['social'] += 1 if item.linked: data['linked'] += 1 if item.other: data['other'] += 1 if item.effective: data['effective'] += 1 data['total-comments'] = sum([ data['economic'], data['social'], data['linked'], data['other'], data['effective'] ]) results.append(data) log.info( 'Report generated: feedback_report publishers=%s subpub=%s published=%s', publisher.name if publisher else 'all', include_sub_publishers, include_published) return sorted(results, key=itemgetter('package-title')), 'just now'
def feedback_report(publisher, include_sub_publishers=False, include_published=False, use_cache=False): """ For the publisher provided (and optionally for sub-publishers) this function will generate a report on the feedback for that publisher. """ import collections import datetime import ckan.lib.helpers as helpers from ckanext.dgu.lib.publisher import go_down_tree from ckanext.dgu.model.feedback import Feedback from operator import itemgetter from sqlalchemy.util import OrderedDict publisher_name = '__all__' if publisher: publisher_name = publisher.name if use_cache: key = 'feedback-report' if include_published: key = 'feedback-all-report' if include_sub_publishers: key = "".join([key, '-withsubpub']) cache = model.DataCache.get_fresh(publisher_name, key) if cache is None: log.info("Did not find cached report - %s/%s" % (publisher_name,key,)) else: log.info("Found feedback report in cache") return cache if publisher: group_ids = [publisher.id] if include_sub_publishers: groups = sorted([x for x in go_down_tree(publisher)], key=lambda x: x.title) group_ids = [x.id for x in groups] memberships = model.Session.query(model.Member)\ .join(model.Package, model.Package.id==model.Member.table_id)\ .filter(model.Member.state == 'active')\ .filter(model.Member.group_id.in_(group_ids))\ .filter(model.Member.table_name == 'package')\ .filter(model.Package.state == 'active') else: memberships = model.Session.query(model.Member)\ .join(model.Package, model.Package.id==model.Member.table_id)\ .filter(model.Member.state == 'active')\ .filter(model.Member.table_name == 'package')\ .filter(model.Package.state == 'active') results = [] for member in memberships.all(): pkg = model.Package.get(member.table_id) # For now we will skip over unpublished items if not include_published and not pkg.extras.get('unpublished', False): continue key = pkg.name data = collections.defaultdict(int) data['publisher-name'] = member.group.name data['generated-at'] = helpers.render_datetime(datetime.datetime.now(), "%d/%m/%Y %H:%M") data['publisher-title'] = member.group.title data['package-name'] = pkg.name data['package-title'] = pkg.title data['publish-date'] = pkg.extras.get('publish-date', '') for item in model.Session.query(Feedback).filter(Feedback.visible == True)\ .filter(Feedback.package_id == member.table_id )\ .filter(Feedback.active == True ): if item.economic: data['economic'] += 1 if item.social: data['social'] += 1 if item.linked: data['linked'] += 1 if item.other: data['other'] += 1 if item.effective: data['effective'] += 1 data['total-comments'] = sum([data['economic'], data['social'], data['linked'], data['other'], data['effective']]) results.append(data) return sorted(results, key=itemgetter('package-title'))
user_emails[user.name] = user.email else: user_emails[user.name] = user_properties['mail'] else: # not a drupal user user_emails[user.name] = user.email return user_emails[user.name] # NHS publishers nhs = model.Group.by_name('national-health-service') assert nhs pub_stats = StatsList() pct_rows = [] non_pct_rows = [] for pub in publisher_lib.go_down_tree(nhs): # Filter to PCTs title = pub.title not_pct = ('NHS Choices', 'NHS Connecting for Health', 'NHS Connecting for Health and NHS Business Services Authority') is_pct = ('Care Trust' in title or 'PCT' in title or title.startswith('NHS ') or 'Care Tust' in title) \ and title not in not_pct and 'Foundation' not in title # Get the admins & editors admins = pub.members_of_type(model.User, 'admin').all() editors = pub.members_of_type(model.User, 'editor').all() # Get their email addresses users_with_email = [] users_without_email = [] warnings = None for user in admins: if get_email_for_user(user):
try: c.group_dict = get_action('organization_show')(context, {"id": id}) c.group = context['group'] except ObjectNotFound: abort(404, 'Organization not found') except NotAuthorized: abort(401, 'Unauthorized to read Organization %s' % id) try: context['group'] = c.group check_access('organization_update', context) except NotAuthorized, e: abort(401, 'User %r not authorized to download unpublished '% (c.user)) groups = [c.group] if request.params.get('include_sub') == 'true': groups = go_down_tree(c.group) # Set the content-disposition so that it downloads the file # response.headers['Content-Type'] = "text/plain; charset=utf-8" response.headers['Content-Type'] = "text/csv; charset=utf-8" response.headers['Content-Disposition'] = str('attachment; filename=%s-inventory.csv' % (c.group.name,)) writer = csv.writer(response) inventory_lib.render_inventory_header(writer) for gp in groups: ds = gp.members_of_type(model.Package).all() inventory_lib.render_inventory_row(writer, ds, gp)
def organisation_dataset_scores(organisation_name, include_sub_organisations=False): ''' Returns a dictionary detailing openness scores for the organisation for each dataset. i.e.: {'publisher_name': 'cabinet-office', 'publisher_title:': 'Cabinet Office', 'data': [ {'package_name', 'package_title', 'resource_url', 'openness_score', 'reason', 'last_updated', 'is_broken', 'format'} ...] NB the list does not contain datasets that have 0 resources and therefore score 0 ''' values = {} sql = """ select package.id as package_id, task_status.key as task_status_key, task_status.value as task_status_value, task_status.error as task_status_error, task_status.last_updated as task_status_last_updated, resource.id as resource_id, resource.url as resource_url, resource.position, package.title as package_title, package.name as package_name, "group".id as publisher_id, "group".name as publisher_name, "group".title as publisher_title from resource left join task_status on task_status.entity_id = resource.id left join resource_group on resource.resource_group_id = resource_group.id left join package on resource_group.package_id = package.id left join member on member.table_id = package.id left join "group" on member.group_id = "group".id where entity_id in (select entity_id from task_status where task_status.task_type='qa') and package.state = 'active' and resource.state='active' and resource_group.state='active' and "group".state='active' and task_status.task_type='qa' and task_status.key='status' %(org_filter)s order by package.title, package.name, resource.position """ sql_options = {} org = model.Group.by_name(organisation_name) if not org: abort(404, 'Publisher not found') organisation_title = org.title if not include_sub_organisations: sql_options['org_filter'] = 'and "group".name = :org_name' values['org_name'] = organisation_name else: sub_org_filters = [ '"group".name=\'%s\'' % org.name for org in go_down_tree(org) ] sql_options['org_filter'] = 'and (%s)' % ' or '.join(sub_org_filters) rows = model.Session.execute(sql % sql_options, values) data = dict() # dataset_name: {properties} for row in rows: package_data = data.get(row.package_name) if not package_data: package_data = OrderedDict(( ('dataset_title', row.package_title), ('dataset_name', row.package_name), ('publisher_title', row.publisher_title), ('publisher_name', row.publisher_name), # the rest are placeholders to hold the details # of the highest scoring resource ('resource_position', None), ('resource_id', None), ('resource_url', None), ('openness_score', None), ('openness_score_reason', None), ('last_updated', None), )) if row.task_status_value > package_data['openness_score']: package_data['resource_position'] = row.position package_data['resource_id'] = row.resource_id package_data['resource_url'] = row.resource_url try: package_data.update(json.loads(row.task_status_error)) except ValueError, e: log.error( 'QA status "error" should have been in JSON format, but found: "%s" %s', task_status_error, e) package_data[ 'reason'] = 'Could not display reason due to a system error' package_data['openness_score'] = row.task_status_value package_data['openness_score_reason'] = package_data[ 'reason'] # deprecated package_data['last_updated'] = row.task_status_last_updated data[row.package_name] = package_data