示例#1
0
        def check_related_publisher_properties():
            group = model.Group.by_name(publisher_name)
            # datasets
            assert_equal(set([grp.name for grp in group.active_packages()]),
                         set([u'directgov-cota']))
            # parents

            doh = model.Group.by_name('dept-health')
            child_groups_of_doh = [
                grp.name for grp in list(publib.go_down_tree(doh))
            ]
            assert publisher_name in child_groups_of_doh, child_groups_of_doh
            # children

            child_groups = set(
                [grp.name for grp in list(publib.go_down_tree(group))])
            assert set([
                u'newham-primary-care-trust', u'barnsley-primary-care-trust'
            ]) <= child_groups, child_groups
            # admins & editors
            assert_equal(
                set([
                    user.name
                    for user in group.members_of_type(model.User,
                                                      capacity='admin')
                ]), set(('nhsadmin', )))
            assert_equal(
                set([
                    user.name
                    for user in group.members_of_type(model.User,
                                                      capacity='editor')
                ]), set(('nhseditor', 'user_d101')))
def guess_theme(commit):
    from ckan import model
    from ckanext.dgu.lib import publisher as publib

    log = global_log

    for k, v in publisher_themes.iteritems():
        updated = 0
        pubs = list(publib.go_down_tree(model.Group.get(k)))
        print "Processing %d publishers from %s" % (len(pubs), k)

        for publisher in pubs:
            packages = publisher.members_of_type(
                model.Package).filter(model.Package.state == 'active')
            print "\r", " " * 80,  # blank the line
            print "\rProcessing %s" % publisher.name,

            for package in packages:
                if 'spend' in package.name or 'financ' in package.name:
                    continue

                if not 'theme-primary' in package.extras or package.extras[
                        'theme-primary'] == '':
                    package.extras['theme-primary'] = v
                    model.Session.add(package)
                    updated = updated + 1

        print "\nWe updated %d themes under %s" % (updated, k)

        if commit.lower() == 'y':
            print "Committing results"
            model.Session.commit()
示例#3
0
def guess_theme(commit):
    from ckan import model
    from ckanext.dgu.lib import publisher as publib

    log = global_log

    for k,v in publisher_themes.iteritems():
        updated = 0
        pubs = list(publib.go_down_tree(model.Group.get(k))) 
        print "Processing %d publishers from %s" % (len(pubs), k)

        for publisher in pubs:
            packages = publisher.members_of_type(model.Package).filter(model.Package.state=='active')
            print "\r", " " * 80,  # blank the line
            print "\rProcessing %s" % publisher.name,

            for package in packages:
                if 'spend' in package.name or 'financ' in package.name:
                    continue 

                if not 'theme-primary' in package.extras or package.extras['theme-primary'] == '':
                    package.extras['theme-primary'] = v
                    model.Session.add(package)
                    updated = updated + 1

        print "\nWe updated %d themes under %s" % (updated, k)

        if commit.lower() == 'y':
            print "Committing results"
            model.Session.commit()
    def export(cls, csv_filepath):
        csv_filepath = os.path.abspath(csv_filepath)
        log = global_log

        from ckan import model

        f = open(csv_filepath, 'w')
        f.write(cls.header)

        number_of_publishers = 0
        expected_publishers = set(model.Session.query(model.Group).\
                                  filter_by(state='active').\
                                  filter_by(type='organization').all())
        written_publishers = set()

        for top_level_pub in model.Group.get_top_level_groups(
                type='organization'):
            for pub in publisher.go_down_tree(top_level_pub):
                number_of_publishers += 1
                if pub in written_publishers:
                    warn('publisher written twice: %s %s', pub.name, pub.id)
                written_publishers.add(pub)
                parent_publishers = pub.get_parent_groups(type='organization')
                if len(parent_publishers) > 1:
                    warn(
                        'Publisher has multiple parents. Just using first: %s %s',
                        pub.name, parent_publishers)
                parent_pub_name = parent_publishers[
                    0].name if parent_publishers else ''
                parent_pub_title = parent_publishers[
                    0].title if parent_publishers else ''
                wdtk_id = ''  #pub.extras
                csv_row_values = \
                           (pub.id,
                            pub.name,
                            pub.title,
                            parent_pub_name,
                            parent_pub_title,
                            dict(pub.extras).get('abbreviation', ''),
                            dict(pub.extras).get('wdtk-title', ''),
                            dict(pub.extras).get('website-url', ''),
                            dict(pub.extras).get('contact-email', ''),
                            dict(pub.extras).get('foi-email', ''),
                            dict(pub.extras).get('category', ''),
                            dict(pub.extras).get('spending_published_by', ''),
                            )
                # assume they are all strings
                csv_row_str = ','.join(
                    ['"%s"' % cell for cell in csv_row_values])
                log.info(csv_row_str)
                f.write(csv_row_str.encode('utf8') + '\n')
                f.flush()

        f.close()

        # checks
        expected_number_of_publishers = len(expected_publishers)
        assert_equal(sorted(written_publishers),
                     sorted(set(written_publishers)))
        assert_equal(expected_publishers, set(written_publishers))
        def check_related_publisher_properties():
            group = model.Group.by_name(publisher_name)
            # datasets
            assert_equal(set([grp.name for grp in group.packages()]),
                         set([u'directgov-cota']))
            # parents

            doh = model.Group.by_name('dept-health')
            child_groups_of_doh = [grp.name for grp in list(publib.go_down_tree(doh))]
            assert publisher_name in child_groups_of_doh, child_groups_of_doh
            # children

            child_groups = set([grp.name for grp in list(publib.go_down_tree(group))])
            assert set([u'newham-primary-care-trust', u'barnsley-primary-care-trust']) <= child_groups, child_groups
            # admins & editors
            assert_equal(set([user.name for user in group.members_of_type(model.User, capacity='admin')]),
                         set(('nhsadmin',)))
            assert_equal(set([user.name for user in group.members_of_type(model.User, capacity='editor')]),
                         set(('nhseditor', 'user_d101')))
示例#6
0
    def export(cls, csv_filepath):
        csv_filepath = os.path.abspath(csv_filepath)
        log = global_log

        from ckan import model

        f = open(csv_filepath, 'w')
        f.write(cls.header)

        number_of_publishers = 0
        expected_publishers = set(model.Session.query(model.Group).\
                                  filter_by(state='active').\
                                  filter_by(type='organization').all())
        written_publishers = set()

        for top_level_pub in model.Group.get_top_level_groups(type='organization'):
            for pub in publisher.go_down_tree(top_level_pub):
                number_of_publishers += 1
                if pub in written_publishers:
                    warn('publisher written twice: %s %s', pub.name, pub.id)
                written_publishers.add(pub)
                parent_publishers = pub.get_parent_groups(type='organization')
                if len(parent_publishers) > 1:
                    warn('Publisher has multiple parents. Just using first: %s %s', pub.name, parent_publishers)
                parent_pub_name = parent_publishers[0].name if parent_publishers else ''
                parent_pub_title = parent_publishers[0].title if parent_publishers else ''
                wdtk_id = ''#pub.extras
                csv_row_values = \
                           (pub.id,
                            pub.name,
                            pub.title,
                            parent_pub_name,
                            parent_pub_title,
                            dict(pub.extras).get('abbreviation', ''),
                            dict(pub.extras).get('wdtk-title', ''),
                            dict(pub.extras).get('website-url', ''),
                            dict(pub.extras).get('contact-email', ''),
                            dict(pub.extras).get('foi-email', ''),
                            dict(pub.extras).get('category', ''),
                            dict(pub.extras).get('spending_published_by', ''),
                            )
                # assume they are all strings
                csv_row_str = ','.join(['"%s"' % cell for cell in csv_row_values])
                log.info(csv_row_str)
                f.write(csv_row_str.encode('utf8') + '\n')
                f.flush()

        f.close()

        # checks
        expected_number_of_publishers = len(expected_publishers)
        assert_equal(sorted(written_publishers), sorted(set(written_publishers)))
        assert_equal(expected_publishers, set(written_publishers))
示例#7
0
def dgu_package_create(context, data_dict):
    model = context['model']
    user = context.get('user')
    user_obj = model.User.get( user )

    if not user_obj:
        return {'success': False}

    if Authorizer().is_sysadmin(user_obj):
        return {'success': True}

    user_publishers = user_obj.get_groups('publisher')

    if not data_dict:
        # i.e. not asking in relation to a particular package. We only let
        # publishers do this
        return {'success': bool(user_publishers)}

    if not user_obj:
        return {'success': False,
                'msg': _('User %s not authorized to edit packages of this publisher') % str(user)}

    # For users who are admins of groups, we should also include all of their child groups
    # in the list of user_publishers
    as_admin = user_obj.get_groups('publisher', 'admin')
    for g in as_admin:
        user_publishers.extend(list(publib.go_down_tree(g)))

    user_publisher_names = [pub.name for pub in set(user_publishers)]

    if data_dict['groups'] and isinstance(data_dict['groups'][0], dict):
        package_group_names = [pub['name'] for pub in data_dict['groups']]
    elif data_dict['groups'] and isinstance(data_dict['groups'], list):
        # data_dict['groups'] is already a list of names at this point so we
        # should just assign it.
        package_group_names = data_dict['groups']
    else:
        # In the case where we have received a single string in the data_dict['groups']
        # we should wrap it in a list to make sure the intersection check works
        package_group_names = [data_dict['groups']] if data_dict['groups'] else []


    # If the user has a group (is a publisher), but there is no package
    # group name, then we need to continue to allow validation to cause the
    # failure.
    if user_publishers and package_group_names == [u' ']:
        return {'success': True}

    if not _groups_intersect(user_publisher_names, package_group_names):
        return {'success': False,
                'msg': _('User %s not authorized to edit packages of this publisher') % str(user)}

    return {'success': True}
示例#8
0
def sql_to_filter_by_organisation(organisation, include_sub_organisations=False):
    """
    Returns: (sql_org_filter, sql_params)
    In your sql you need:
          WHERE %(org_filter)s
    Run this function:
          sql_org_filter, sql_params = sql_to_filter_by_organisation( ... )
    And execute your sql with the tuple:
          rows = model.Session.execute(sql % sql_org_filter, sql_params)
    """
    sql_org_filter = {}
    sql_params = {}
    if not include_sub_organisations:
        sql_org_filter["org_filter"] = '"group".name = :org_name'
        sql_params["org_name"] = organisation.name
    else:
        sub_org_filters = ["\"group\".name='%s'" % org.name for org in go_down_tree(organisation)]
        sql_org_filter["org_filter"] = "(%s)" % " or ".join(sub_org_filters)
    return sql_org_filter, sql_params
示例#9
0
def sql_to_filter_by_organisation(organisation,
                                  include_sub_organisations=False):
    '''
    Returns: (sql_org_filter, sql_params)
    In your sql you need:
          WHERE %(org_filter)s
    Run this function:
          sql_org_filter, sql_params = sql_to_filter_by_organisation( ... )
    And execute your sql with the tuple:
          rows = model.Session.execute(sql % sql_org_filter, sql_params)
    '''
    sql_org_filter = {}
    sql_params = {}
    if not include_sub_organisations:
        sql_org_filter['org_filter'] = '"group".name = :org_name'
        sql_params['org_name'] = organisation.name
    else:
        sub_org_filters = ['"group".name=\'%s\'' % org.name for org in go_down_tree(organisation)]
        sql_org_filter['org_filter'] = '(%s)' % ' or '.join(sub_org_filters)
    return sql_org_filter, sql_params
    def export(cls, csv_filepath):
        csv_filepath = os.path.abspath(csv_filepath)
        log = global_log

        from ckan import model

        f = open(csv_filepath, 'w')
        f.write(cls.header)

        number_of_publishers = 0
        expected_publishers = set(model.Session.query(model.Group).\
                                  filter_by(state='active').\
                                  filter_by(type='publisher').all())
        written_publishers = set()

        for top_level_pub in publisher.get_top_level():
            for pub in publisher.go_down_tree(top_level_pub):
                number_of_publishers += 1
                if pub in written_publishers:
                    warn('publisher written twice: %s %s', pub.name, pub.id)
                written_publishers.add(pub)
                parent_pub_title = top_level_pub.title if top_level_pub.id != pub.id else ''
                csv_line = '"%s","%s","%s","%s","%s"' % \
                           (pub.id,
                            pub.title,
                            parent_pub_title,
                            dict(pub.extras).get('category', ''),
                            dict(pub.extras).get('spending_published_by', ''),
                            )
                log.info(csv_line)
                f.write(csv_line + '\n')
                f.flush()

        f.close()

        # checks
        expected_number_of_publishers = len(expected_publishers)
        assert_equal(sorted(written_publishers),
                     sorted(set(written_publishers)))
        assert_equal(expected_publishers, set(written_publishers))
示例#11
0
    def export(cls, csv_filepath):
        csv_filepath = os.path.abspath(csv_filepath)
        log = global_log

        from ckan import model

        f = open(csv_filepath, 'w')
        f.write(cls.header)

        number_of_publishers = 0
        expected_publishers = set(model.Session.query(model.Group).\
                                  filter_by(state='active').\
                                  filter_by(type='organization').all())
        written_publishers = set()

        for top_level_pub in model.Group.get_top_level_groups(type='organization'):
            for pub in publisher.go_down_tree(top_level_pub):
                number_of_publishers += 1
                if pub in written_publishers:
                    warn('publisher written twice: %s %s', pub.name, pub.id)
                written_publishers.add(pub)
                parent_pub_title = top_level_pub.title if top_level_pub.id != pub.id else ''
                csv_line = '"%s","%s","%s","%s","%s"' % \
                           (pub.id,
                            pub.title,
                            parent_pub_title,
                            dict(pub.extras).get('category', ''),
                            dict(pub.extras).get('spending_published_by', ''),
                            )
                log.info(csv_line)
                f.write(csv_line + '\n')
                f.flush()

        f.close()

        # checks
        expected_number_of_publishers = len(expected_publishers)
        assert_equal(sorted(written_publishers), sorted(set(written_publishers)))
        assert_equal(expected_publishers, set(written_publishers))
示例#12
0
def sql_to_filter_by_organisation(organisation,
                                  include_sub_organisations=False):
    '''
    Returns: (sql_org_filter, sql_params)
    In your sql you need:
          WHERE %(org_filter)s
    Run this function:
          sql_org_filter, sql_params = sql_to_filter_by_organisation( ... )
    And execute your sql with the tuple:
          rows = model.Session.execute(sql % sql_org_filter, sql_params)
    '''
    sql_org_filter = {}
    sql_params = {}
    if not include_sub_organisations:
        sql_org_filter['org_filter'] = '"group".name = :org_name'
        sql_params['org_name'] = organisation.name
    else:
        sub_org_filters = [
            '"group".name=\'%s\'' % org.name
            for org in go_down_tree(organisation)
        ]
        sql_org_filter['org_filter'] = '(%s)' % ' or '.join(sub_org_filters)
    return sql_org_filter, sql_params
示例#13
0
    def get_publishers(self):
        from ckan.model.group import Group

        if Authorizer().is_sysadmin(c.user):
            groups = Group.all(group_type='publisher')
        elif c.userobj:
            # need to get c.userobj again as it may be detached from the
            # session since the last time we called get_groups (it caches)
            c.userobj = model.User.by_name(c.user)

            # For each group where the user is an admin, we should also include
            # all of the child publishers.
            admin_groups = set()
            for g in c.userobj.get_groups('publisher', 'admin'):
                for pub in publib.go_down_tree(g):
                    admin_groups.add(pub)

            editor_groups = c.userobj.get_groups('publisher', 'editor')
            groups = list(admin_groups) + editor_groups
        else: # anonymous user shouldn't have access to this page anyway.
            groups = []

        # Be explicit about which fields we make available in the template
        groups = [ {
            'name': g.name,
            'id': g.id,
            'title': g.title,
            'contact-name': g.extras.get('contact-name', ''),
            'contact-email': g.extras.get('contact-email', ''),
            'contact-phone': g.extras.get('contact-phone', ''),
            'foi-name': g.extras.get('foi-name', ''),
            'foi-email': g.extras.get('foi-email', ''),
            'foi-phone': g.extras.get('foi-phone', ''),
            'foi-web': g.extras.get('foi-name', ''),
        } for g in groups ]

        return dict( (g['name'], g) for g in groups )
示例#14
0
    def get_publishers(self):
        from ckan.model.group import Group

        if dgu_helpers.is_sysadmin():
            groups = Group.all(group_type='organization')
        elif c.userobj:
            # need to get c.userobj again as it may be detached from the
            # session since the last time we called get_groups (it caches)
            c.userobj = model.User.by_name(c.user)

            # For each group where the user is an admin, we should also include
            # all of the child publishers.
            admin_groups = set()
            for g in c.userobj.get_groups('organization', 'admin'):
                for pub in publib.go_down_tree(g):
                    admin_groups.add(pub)

            editor_groups = c.userobj.get_groups('organization', 'editor')
            groups = list(admin_groups) + editor_groups
        else:  # anonymous user shouldn't have access to this page anyway.
            groups = []

        # Be explicit about which fields we make available in the template
        groups = [{
            'name': g.name,
            'id': g.id,
            'title': g.title,
            'contact-name': g.extras.get('contact-name', ''),
            'contact-email': g.extras.get('contact-email', ''),
            'contact-phone': g.extras.get('contact-phone', ''),
            'foi-name': g.extras.get('foi-name', ''),
            'foi-email': g.extras.get('foi-email', ''),
            'foi-phone': g.extras.get('foi-phone', ''),
            'foi-web': g.extras.get('foi-web', ''),
        } for g in groups]

        return dict((g['name'], g) for g in groups)
示例#15
0
            except DrupalRequestError, e:
                user_emails[user.name] = user.email
            else:
                user_emails[user.name] = user_properties['mail']
        else:
            # not a drupal user
            user_emails[user.name] = user.email
    return user_emails[user.name]

# NHS publishers
nhs = model.Group.by_name('national-health-service')
assert nhs
pub_stats = StatsList()
pct_rows = []
non_pct_rows = []
for pub in publisher_lib.go_down_tree(nhs):
    # Filter to PCTs
    title = pub.title
    not_pct = ('NHS Choices', 'NHS Connecting for Health', 'NHS Connecting for Health and NHS Business Services Authority')
    is_pct = ('Care Trust' in title or 'PCT' in title or title.startswith('NHS ') or 'Care Tust' in title) \
              and title not in not_pct and 'Foundation' not in title
    # Get the admins & editors
    admins = pub.members_of_type(model.User, 'admin').all()
    editors = pub.members_of_type(model.User, 'editor').all()
    # Get their email addresses
    users_with_email = []
    users_without_email = []
    warnings = None
    for user in admins:
        if get_email_for_user(user):
            users_with_email.append(user)
示例#16
0
def organisation_dataset_scores(organisation_name,
                                include_sub_organisations=False):
    '''
    Returns a dictionary detailing openness scores for the organisation
    for each dataset.

    i.e.:
    {'publisher_name': 'cabinet-office',
     'publisher_title:': 'Cabinet Office',
     'data': [
       {'package_name', 'package_title', 'resource_url', 'openness_score', 'reason', 'last_updated', 'is_broken', 'format'}
      ...]

    NB the list does not contain datasets that have 0 resources and therefore
       score 0

    '''
    values = {}
    sql = """
        select package.id as package_id,
               task_status.key as task_status_key,
               task_status.value as task_status_value,
               task_status.error as task_status_error,
               task_status.last_updated as task_status_last_updated,
               resource.id as resource_id,
               resource.url as resource_url,
               resource.position,
               package.title as package_title,
               package.name as package_name,
               "group".id as publisher_id,
               "group".name as publisher_name,
               "group".title as publisher_title
        from resource
            left join task_status on task_status.entity_id = resource.id
            left join resource_group on resource.resource_group_id = resource_group.id
            left join package on resource_group.package_id = package.id
            left join member on member.table_id = package.id
            left join "group" on member.group_id = "group".id
        where
            entity_id in (select entity_id from task_status where task_status.task_type='qa')
            and package.state = 'active'
            and resource.state='active'
            and resource_group.state='active'
            and "group".state='active'
            and task_status.task_type='qa'
            and task_status.key='status'
            %(org_filter)s
        order by package.title, package.name, resource.position
        """
    sql_options = {}
    org = model.Group.by_name(organisation_name)
    if not org:
        abort(404, 'Publisher not found')
    organisation_title = org.title

    if not include_sub_organisations:
        sql_options['org_filter'] = 'and "group".name = :org_name'
        values['org_name'] = organisation_name
    else:
        sub_org_filters = ['"group".name=\'%s\'' % org.name for org in go_down_tree(org)]
        sql_options['org_filter'] = 'and (%s)' % ' or '.join(sub_org_filters)

    rows = model.Session.execute(sql % sql_options, values)
    data = dict() # dataset_name: {properties}
    for row in rows:
        package_data = data.get(row.package_name)
        if not package_data:
            package_data = OrderedDict((
                ('dataset_title', row.package_title),
                ('dataset_name', row.package_name),
                ('publisher_title', row.publisher_title),
                ('publisher_name', row.publisher_name),
                # the rest are placeholders to hold the details
                # of the highest scoring resource
                ('resource_position', None),
                ('resource_id', None),
                ('resource_url', None),
                ('openness_score', None),
                ('openness_score_reason', None),
                ('last_updated', None),
                ))
        if row.task_status_value > package_data['openness_score']:
            package_data['resource_position'] = row.position
            package_data['resource_id'] = row.resource_id
            package_data['resource_url'] = row.resource_url

            try:
                package_data.update(json.loads(row.task_status_error))
            except ValueError, e:
                log.error('QA status "error" should have been in JSON format, but found: "%s" %s', task_status_error, e)
                package_data['reason'] = 'Could not display reason due to a system error'

            package_data['openness_score'] = row.task_status_value
            package_data['openness_score_reason'] = package_data['reason'] # deprecated
            package_data['last_updated'] = row.task_status_last_updated

        data[row.package_name] = package_data
示例#17
0
def feedback_report(publisher,
                    include_sub_publishers=False,
                    include_published=False,
                    use_cache=False):
    """
    For the publisher provided (and optionally for sub-publishers) this
    function will generate a report on the feedback for that publisher.
    """
    import collections
    import datetime
    import ckan.lib.helpers as helpers
    from ckanext.dgu.lib.publisher import go_down_tree
    from ckanext.dgu.model.feedback import Feedback
    from operator import itemgetter
    from sqlalchemy.util import OrderedDict

    publisher_name = '__all__'
    if publisher:
        publisher_name = publisher.name

    if use_cache:
        key = 'feedback-report'
        if include_published:
            key = 'feedback-all-report'

        if include_sub_publishers:
            key = "".join([key, '-withsub'])
        report, report_date = model.DataCache.get_if_fresh(publisher_name, key)
        if report is None:
            log.info("Did not find cached report - %s/%s" %
                     (publisher_name, key))
        else:
            log.info("Found feedback report in cache")
            return report, report_date

    if publisher:
        group_ids = [publisher.id]
        if include_sub_publishers:
            groups = sorted([x for x in go_down_tree(publisher)],
                            key=lambda x: x.title)
            group_ids = [x.id for x in groups]

        memberships = model.Session.query(model.Member)\
            .join(model.Package, model.Package.id==model.Member.table_id)\
            .filter(model.Member.state == 'active')\
            .filter(model.Member.group_id.in_(group_ids))\
            .filter(model.Member.table_name == 'package')\
            .filter(model.Package.state == 'active')

    else:
        memberships = model.Session.query(model.Member)\
            .join(model.Package, model.Package.id==model.Member.table_id)\
            .filter(model.Member.state == 'active')\
            .filter(model.Member.table_name == 'package')\
            .filter(model.Package.state == 'active')

    results = []
    for member in memberships.all():
        pkg = model.Package.get(member.table_id)

        # For now we will skip over unpublished items
        if not include_published and not pkg.extras.get('unpublished', False):
            continue

        key = pkg.name

        data = collections.defaultdict(int)
        data['publisher-name'] = member.group.name
        data['generated-at'] = helpers.render_datetime(datetime.datetime.now(),
                                                       "%d/%m/%Y %H:%M")
        data['publisher-title'] = member.group.title
        data['package-name'] = pkg.name
        data['package-title'] = pkg.title
        data['publish-date'] = pkg.extras.get('publish-date', '')

        for item in model.Session.query(Feedback).filter(Feedback.visible == True)\
                .filter(Feedback.package_id == member.table_id )\
                .filter(Feedback.active == True ):
            if item.economic: data['economic'] += 1
            if item.social: data['social'] += 1
            if item.linked: data['linked'] += 1
            if item.other: data['other'] += 1
            if item.effective: data['effective'] += 1

        data['total-comments'] = sum([
            data['economic'], data['social'], data['linked'], data['other'],
            data['effective']
        ])
        results.append(data)

    log.info(
        'Report generated: feedback_report publishers=%s subpub=%s published=%s',
        publisher.name if publisher else 'all', include_sub_publishers,
        include_published)
    return sorted(results, key=itemgetter('package-title')), 'just now'
示例#18
0
def feedback_report(publisher, include_sub_publishers=False, include_published=False, use_cache=False):
    """
    For the publisher provided (and optionally for sub-publishers) this
    function will generate a report on the feedback for that publisher.
    """
    import collections
    import datetime
    import ckan.lib.helpers as helpers
    from ckanext.dgu.lib.publisher import go_down_tree
    from ckanext.dgu.model.feedback import Feedback
    from operator import itemgetter
    from sqlalchemy.util import OrderedDict

    publisher_name = '__all__'
    if publisher:
        publisher_name = publisher.name

    if use_cache:
        key = 'feedback-report'
        if include_published:
          key = 'feedback-all-report'

        if include_sub_publishers:
            key = "".join([key, '-withsubpub'])
        cache = model.DataCache.get_fresh(publisher_name, key)
        if cache is None:
            log.info("Did not find cached report - %s/%s" % (publisher_name,key,))
        else:
            log.info("Found feedback report in cache")
            return cache

    if publisher:
        group_ids = [publisher.id]
        if include_sub_publishers:
            groups = sorted([x for x in go_down_tree(publisher)], key=lambda x: x.title)
            group_ids = [x.id for x in groups]

        memberships = model.Session.query(model.Member)\
            .join(model.Package, model.Package.id==model.Member.table_id)\
            .filter(model.Member.state == 'active')\
            .filter(model.Member.group_id.in_(group_ids))\
            .filter(model.Member.table_name == 'package')\
            .filter(model.Package.state == 'active')

    else:
        memberships = model.Session.query(model.Member)\
            .join(model.Package, model.Package.id==model.Member.table_id)\
            .filter(model.Member.state == 'active')\
            .filter(model.Member.table_name == 'package')\
            .filter(model.Package.state == 'active')

    results = []
    for member in memberships.all():
        pkg = model.Package.get(member.table_id)

        # For now we will skip over unpublished items
        if not include_published and not pkg.extras.get('unpublished', False):
            continue

        key = pkg.name

        data = collections.defaultdict(int)
        data['publisher-name'] = member.group.name
        data['generated-at'] = helpers.render_datetime(datetime.datetime.now(), "%d/%m/%Y %H:%M")
        data['publisher-title'] = member.group.title
        data['package-name'] = pkg.name
        data['package-title'] = pkg.title
        data['publish-date'] = pkg.extras.get('publish-date', '')

        for item in model.Session.query(Feedback).filter(Feedback.visible == True)\
                .filter(Feedback.package_id == member.table_id )\
                .filter(Feedback.active == True ):
            if item.economic: data['economic'] += 1
            if item.social: data['social'] += 1
            if item.linked: data['linked'] += 1
            if item.other: data['other'] += 1
            if item.effective: data['effective'] += 1

        data['total-comments'] = sum([data['economic'], data['social'],
                                     data['linked'], data['other'],
                                     data['effective']])
        results.append(data)

    return sorted(results, key=itemgetter('package-title'))
                user_emails[user.name] = user.email
            else:
                user_emails[user.name] = user_properties['mail']
        else:
            # not a drupal user
            user_emails[user.name] = user.email
    return user_emails[user.name]


# NHS publishers
nhs = model.Group.by_name('national-health-service')
assert nhs
pub_stats = StatsList()
pct_rows = []
non_pct_rows = []
for pub in publisher_lib.go_down_tree(nhs):
    # Filter to PCTs
    title = pub.title
    not_pct = ('NHS Choices', 'NHS Connecting for Health',
               'NHS Connecting for Health and NHS Business Services Authority')
    is_pct = ('Care Trust' in title or 'PCT' in title or title.startswith('NHS ') or 'Care Tust' in title) \
              and title not in not_pct and 'Foundation' not in title
    # Get the admins & editors
    admins = pub.members_of_type(model.User, 'admin').all()
    editors = pub.members_of_type(model.User, 'editor').all()
    # Get their email addresses
    users_with_email = []
    users_without_email = []
    warnings = None
    for user in admins:
        if get_email_for_user(user):
示例#20
0
        try:
            c.group_dict = get_action('organization_show')(context, {"id": id})
            c.group = context['group']
        except ObjectNotFound:
            abort(404, 'Organization not found')
        except NotAuthorized:
            abort(401, 'Unauthorized to read Organization %s' % id)

        try:
            context['group'] = c.group
            check_access('organization_update', context)
        except NotAuthorized, e:
            abort(401, 'User %r not authorized to download unpublished '% (c.user))

        groups = [c.group]
        if request.params.get('include_sub') == 'true':
            groups = go_down_tree(c.group)

        # Set the content-disposition so that it downloads the file
        # response.headers['Content-Type'] = "text/plain; charset=utf-8"
        response.headers['Content-Type'] = "text/csv; charset=utf-8"
        response.headers['Content-Disposition'] = str('attachment; filename=%s-inventory.csv' % (c.group.name,))

        writer = csv.writer(response)
        inventory_lib.render_inventory_header(writer)
        for gp in groups:
            ds = gp.members_of_type(model.Package).all()
            inventory_lib.render_inventory_row(writer, ds, gp)

示例#21
0
def organisation_dataset_scores(organisation_name,
                                include_sub_organisations=False):
    '''
    Returns a dictionary detailing openness scores for the organisation
    for each dataset.

    i.e.:
    {'publisher_name': 'cabinet-office',
     'publisher_title:': 'Cabinet Office',
     'data': [
       {'package_name', 'package_title', 'resource_url', 'openness_score', 'reason', 'last_updated', 'is_broken', 'format'}
      ...]

    NB the list does not contain datasets that have 0 resources and therefore
       score 0

    '''
    values = {}
    sql = """
        select package.id as package_id,
               task_status.key as task_status_key,
               task_status.value as task_status_value,
               task_status.error as task_status_error,
               task_status.last_updated as task_status_last_updated,
               resource.id as resource_id,
               resource.url as resource_url,
               resource.position,
               package.title as package_title,
               package.name as package_name,
               "group".id as publisher_id,
               "group".name as publisher_name,
               "group".title as publisher_title
        from resource
            left join task_status on task_status.entity_id = resource.id
            left join resource_group on resource.resource_group_id = resource_group.id
            left join package on resource_group.package_id = package.id
            left join member on member.table_id = package.id
            left join "group" on member.group_id = "group".id
        where
            entity_id in (select entity_id from task_status where task_status.task_type='qa')
            and package.state = 'active'
            and resource.state='active'
            and resource_group.state='active'
            and "group".state='active'
            and task_status.task_type='qa'
            and task_status.key='status'
            %(org_filter)s
        order by package.title, package.name, resource.position
        """
    sql_options = {}
    org = model.Group.by_name(organisation_name)
    if not org:
        abort(404, 'Publisher not found')
    organisation_title = org.title

    if not include_sub_organisations:
        sql_options['org_filter'] = 'and "group".name = :org_name'
        values['org_name'] = organisation_name
    else:
        sub_org_filters = [
            '"group".name=\'%s\'' % org.name for org in go_down_tree(org)
        ]
        sql_options['org_filter'] = 'and (%s)' % ' or '.join(sub_org_filters)

    rows = model.Session.execute(sql % sql_options, values)
    data = dict()  # dataset_name: {properties}
    for row in rows:
        package_data = data.get(row.package_name)
        if not package_data:
            package_data = OrderedDict((
                ('dataset_title', row.package_title),
                ('dataset_name', row.package_name),
                ('publisher_title', row.publisher_title),
                ('publisher_name', row.publisher_name),
                # the rest are placeholders to hold the details
                # of the highest scoring resource
                ('resource_position', None),
                ('resource_id', None),
                ('resource_url', None),
                ('openness_score', None),
                ('openness_score_reason', None),
                ('last_updated', None),
            ))
        if row.task_status_value > package_data['openness_score']:
            package_data['resource_position'] = row.position
            package_data['resource_id'] = row.resource_id
            package_data['resource_url'] = row.resource_url

            try:
                package_data.update(json.loads(row.task_status_error))
            except ValueError, e:
                log.error(
                    'QA status "error" should have been in JSON format, but found: "%s" %s',
                    task_status_error, e)
                package_data[
                    'reason'] = 'Could not display reason due to a system error'

            package_data['openness_score'] = row.task_status_value
            package_data['openness_score_reason'] = package_data[
                'reason']  # deprecated
            package_data['last_updated'] = row.task_status_last_updated

        data[row.package_name] = package_data