示例#1
0
    def portal_update_worker(self, source):
        """
        a process that accepts package ids on stdin which are passed to
        the package_show API on the remote CKAN instance and compared
        to the local version of the same package.  The local package is
        then created, updated, deleted or left unchanged.  This process
        outputs that action as a string 'created', 'updated', 'deleted'
        or 'unchanged'
        """
        registry = RemoteCKAN(source)
        portal = LocalCKAN()
        now = datetime.now()

        for package_id in iter(sys.stdin.readline, ''):
            try:
                data = registry.action.package_show(id=package_id.strip())
                source_pkg = data['result']
            except NotAuthorized:
                source_pkg = None

            _trim_package(source_pkg)

            if source_pkg:
                # treat unpublished packages same as deleted packages
                if not source_pkg['portal_release_date'] or isodate(
                        source_pkg['portal_release_date'], None) > now:
                    source_pkg = None

            try:
                # don't pass user in context so deleted packages
                # raise NotAuthorized
                target_pkg = portal.call_action('package_show',
                    {'id':package_id.strip()}, {})
            except (NotFound, NotAuthorized):
                target_pkg = None

            _trim_package(target_pkg)

            if target_pkg is None and source_pkg is None:
                result = 'unchanged'
            elif target_pkg is None:
                # CREATE
                portal.action.package_create(**source_pkg)
                result = 'created'
            elif source_pkg is None:
                # DELETE
                portal.action.package_delete(id=package_id.strip())
                result = 'deleted'
            elif source_pkg == target_pkg:
                result = 'unchanged'
            else:
                # UPDATE
                portal.action.package_update(**source_pkg)
                result = 'updated'

            sys.stdout.write(result + '\n')
            try:
                sys.stdout.flush()
            except IOError:
                break
示例#2
0
    def setup(self):
        self.sysadmin_user = factories.Sysadmin()
        self.normal_user = factories.User()
        self.org = factories.Organization(
            title='en org name | fr org name')

        self.sysadmin_action = LocalCKAN(
            username=self.sysadmin_user['name']).action
        self.normal_action = LocalCKAN(
            username=self.normal_user['name']).action
        self.action = LocalCKAN().action

        self.sysadmin_action.organization_member_create(
            username=self.normal_user['name'],
            id=self.org['name'],
            role='editor')

        self.incomplete_pkg = {
            'type': 'dataset',
            'collection': 'primary',
            'title_translated': {'en': u'A Novel By Tolstoy'},
            'license_id': 'ca-ogl-lgo',
            'ready_to_publish': 'true',
            'imso_approval': 'true',
            'jurisdiction': 'federal',
            'maintainer_email': '*****@*****.**',
            'restrictions': 'unrestricted',
            'resources': [{
                'name_translated': {'en': u'Full text.', 'fr': u'Full text.'},
                'format': u'TXT',
                'url': u'http://www.annakarenina.com/download/',
                'size': 42,
                'resource_type': 'dataset',
                'language': ['zxx'],
            }],
        }

        self.complete_pkg = dict(self.incomplete_pkg,
            owner_org=self.org['name'],
            title_translated={
                'en': u'A Novel By Tolstoy', 'fr':u'Un novel par Tolstoy'},
            frequency=u'as_needed',
            notes_translated={'en': u'...', 'fr': u'...'},
            subject=[u'persons'],
            date_published=u'2013-01-01',
            keywords={'en': [u'book'], 'fr': [u'livre']},
            )
 def publish(self):
     lc = LocalCKAN(username=c.user)
     
     publish_date = date_str_to_datetime(request.str_POST['publish_date']
         ).strftime("%Y-%m-%d %H:%M:%S")
     
     # get a list of package id's from the for POST data
     for key, package_id in request.str_POST.iteritems():
         if key == 'publish':
             old = lc.action.package_show(id=package_id)
             lc.call_action('package_update', dict(old,
                 portal_release_date=publish_date))
        
     #return us to the publishing interface
     url = h.url_for(controller='ckanext.canada.controller:PublishController',
                     action='search')
     redirect(url)
    def test_invalid_url(self):
        lc = LocalCKAN()

        try:
            dataset = factories.Dataset()
            lc.call_action(
		'resource_create',
		{
                    'package_id': dataset['name'],
                    'name': 'Test-File',
                    'url': 'https://example.com]'
		
		}
            )
        except ValidationError as e:
            eq_(
                e.error_dict['url'],
                    [u'Please provide a valid URL']
            )
        else:
            raise AssertionError('ValidationError not raised')
示例#5
0
    def _load_one_csv_file(self, name):
        path, csv_name = os.path.split(name)
        assert csv_name.endswith('.csv'), csv_name
        resource_name = csv_name[:-4]
        print resource_name
        chromo = get_chromo(resource_name)
        dataset_type = chromo['dataset_type']
        method = 'upsert' if chromo.get('datastore_primary_key') else 'insert'
        lc = LocalCKAN()

        for org_name, records in csv_data_batch(name, chromo):
            results = lc.action.package_search(
                q='type:%s organization:%s' % (dataset_type, org_name),
                rows=2)['results']
            if not results:
                print 'type:%s organization:%s not found!' % (
                    dataset_type, org_name)
                return 1
            if len(results) > 1:
                print 'type:%s organization:%s multiple found!' % (
                    dataset_type, org_name)
                return 1
            for r in results[0]['resources']:
                if r['name'] == resource_name:
                    break
            else:
                print 'type:%s organization:%s missing resource:%s' % (
                    dataset_type, org_name, resource_name)
                return 1

            print '-', org_name, len(records)
            lc.action.datastore_upsert(
                method=method,
                resource_id=r['id'],
                records=records)
        return 0
示例#6
0
 def rebuild_external_search(self):
     search_integration.rebuild_search_index(LocalCKAN(), self.options.unindexed_only, self.options.refresh_index)
示例#7
0
    def _portal_update(self, portal_ini, activity_date):
        if activity_date:
            past = re.match(PAST_RE, activity_date)
            if past:
                days, hours, minutes = (
                    int(x) if x else 0 for x in past.groups()
                )
                activity_date = datetime.now() - timedelta(
                    days=days,
                    seconds=(hours * 60 + minutes) * 60
                )
            else:
                activity_date = isodate(activity_date, None)
        else:
            activity_date = datetime.now() - timedelta(days=7)

        log = None
        if self.options.log:
            log = open(self.options.log, 'a')

        registry = LocalCKAN()

        def changed_package_id_runs(start_date):
            while True:
                packages, next_date = self._changed_packages_since(
                    registry, start_date)
                if next_date is None:
                    return
                yield packages, next_date
                start_date = next_date

        cmd = [
            sys.argv[0],
            'canada',
            'copy-datasets',
            '-c',
            portal_ini
        ]
        if self.options.mirror:
            cmd.append('-m')

        pool = worker_pool(
            cmd,
            self.options.processes,
            [],
            stop_when_jobs_done=False,
            stop_on_keyboard_interrupt=False,
            )

        # Advance generator so we may call send() below
        pool.next()

        def append_log(finished, package_id, action, reason):
            if not log:
                return
            log.write(json.dumps([
                datetime.now().isoformat(),
                finished,
                package_id,
                action,
                reason,
                ]) + '\n')
            log.flush()

        with _quiet_int_pipe():
            append_log(
                None,
                None,
                "started updating from:",
                activity_date.isoformat()
            )

            for packages, next_date in (
                    changed_package_id_runs(activity_date)):
                job_ids, finished, result = pool.send(enumerate(packages))
                stats = completion_stats(self.options.processes)
                while result is not None:
                    package_id, action, reason = json.loads(result)
                    print job_ids, stats.next(), finished, package_id, \
                        action, reason
                    append_log(finished, package_id, action, reason)
                    job_ids, finished, result = pool.next()

                print " --- next batch starting at: " + next_date.isoformat()
                append_log(
                    None,
                    None,
                    "next batch starting at:",
                    next_date.isoformat()
                )
                self._portal_update_activity_date = next_date.isoformat()
            self._portal_update_completed = True
示例#8
0
def add_to_search_index(data_dict_id, in_bulk=False):

    log = logging.getLogger('ckan')
    od_search_solr_url = config.get(SEARCH_INTEGRATION_URL_OPTION, "")
    od_search_enabled = config.get(SEARCH_INTEGRATION_ENABLED_OPTION, False)
    od_search_od_url_en = config.get(SEARCH_INTEGRATION_OD_URL_EN_OPTION, "https://open.canada.ca/data/en/dataset/")
    od_search_od_url_fr = config.get(SEARCH_INTEGRATION_OD_URL_FR_OPTION, "https://ouvert.canada.ca/data/fr/dataset/")

    # Retrieve the full record - it has additional information including organization title and metadata modified date
    # that are not available in the regular data dict

    portal = LocalCKAN()
    data_dict = portal.action.package_show(id=data_dict_id)

    if not od_search_enabled:
        return
    try:
        subject_codes = scheming_choices_label_by_value(scheming_get_preset('canada_subject')['choices'])
        type_codes = scheming_choices_label_by_value(scheming_get_preset('canada_resource_related_type')['choices'])
        collection_codes = scheming_choices_label_by_value(scheming_get_preset('canada_collection')['choices'])
        juristiction_codes = scheming_choices_label_by_value(scheming_get_preset('canada_jurisdiction')['choices'])
        resource_type_codes = scheming_choices_label_by_value(scheming_get_preset('canada_resource_type')['choices'])
        frequency_codes = scheming_choices_label_by_value(scheming_get_preset('canada_frequency')['choices'])

        org_title = data_dict['organization']['title'].split('|')
        owner_org_title_en = org_title[0].strip()
        owner_org_title_fr = org_title[1].strip()

        subjects_en = []
        subjects_fr = []
        subjects = json.loads(data_dict['subject']) if \
            isinstance(data_dict['subject'], str) else data_dict['subject']
        for s in subjects:
            subjects_en.append(subject_codes['en'][s].replace(",", ""))
            subjects_fr.append(subject_codes['fr'][s].replace(",", ""))

        resource_type_en = []
        resource_type_fr = []
        resource_fmt = []
        resource_title_en = []
        resource_title_fr = []
        for r in data_dict['resources']:
            resource_type_en.append(
                resource_type_codes['en'][r['resource_type']]
                if r['resource_type'] in resource_type_codes['en'] else '')
            resource_type_fr.append(
                resource_type_codes['fr'][r['resource_type']]
                if r['resource_type'] in resource_type_codes['fr'] else '')
            resource_fmt.append(r['format'])

            resource_name = json.loads(r['name_translated']) if \
                isinstance(r['name_translated'], str) else r['name_translated']
            if 'en' in resource_name:
                resource_title_en.append(resource_name['en'])
            elif 'en-t-fr' in resource_name:
                resource_title_en.append(resource_name['en-t-fr'])
            if 'fr' in resource_name:
                resource_title_fr.append(resource_name['fr'].strip())
            elif 'fr-t-en' in resource_name:
                resource_title_fr.append(resource_name['fr-t-en'].strip())
        display_options = []
        if 'display_flags' in data_dict:
            for d in data_dict['display_flags']:
                display_options.append(d)
        notes_translated = json.loads(data_dict['notes_translated']) if \
            isinstance(data_dict['notes_translated'], str) else data_dict['notes_translated']
        title_translated = json.loads(data_dict['title_translated']) if \
            isinstance(data_dict['title_translated'], str) else data_dict['title_translated']
        od_obj = {
            'portal_type_en_s': type_codes['en'][data_dict['type']],
            'portal_type_fr_s': type_codes['fr'][data_dict['type']],
            'collection_type_en_s': collection_codes['en'][data_dict['collection']],
            'collection_type_fr_s': collection_codes['fr'][data_dict['collection']],
            'jurisdiction_en_s': juristiction_codes['en'][data_dict['jurisdiction']],
            'jurisdiction_fr_s': juristiction_codes['fr'][data_dict['jurisdiction']],
            'owner_org_title_en_s': owner_org_title_en,
            'owner_org_title_fr_s': owner_org_title_fr,
            'subject_en_s': subjects_en,
            'subject_fr_s': subjects_fr,
            'resource_type_en_s': list(set(resource_type_en)),
            'resource_type_fr_s': list(set(resource_type_fr)),
            'update_cycle_en_s': frequency_codes['en'][data_dict['frequency']],
            'update_cycle_fr_s': frequency_codes['fr'][data_dict['frequency']],
            'id_name_s': data_dict['name'],
            'id': data_dict['name'],
            'owner_org_s': data_dict['owner_org'],
            'author_txt': data_dict['author'] if 'author' in data_dict else '',
            'description_txt_en': notes_translated['en'] if 'en' in data_dict['notes_translated'] else '',
            'description_txt_fr': notes_translated['fr'] if 'fr' in data_dict['notes_translated'] else '',
            'description_xlt_txt_fr': notes_translated['fr-t-en'] if 'fr-t-en' in notes_translated else '',
            'description_xlt_txt_en': notes_translated['en-t-fr'] if 'en-t-fr' in notes_translated else '',
            'title_en_s': title_translated['en'] if 'en' in title_translated else '',
            'title_fr_s': title_translated['fr'] if 'fr' in title_translated else '',
            'title_xlt_fr_s': title_translated['fr-t-en'] if 'fr-t-en' in title_translated else '',
            'title_xlt_en_s': title_translated['en-t-fr'] if 'en-t-fr' in title_translated else '',
            'resource_format_s': list(set(resource_fmt)),
            'resource_title_en_s': resource_title_en,
            'resource_title_fr_s': resource_title_fr,
            'last_modified_tdt': parser.parse(data_dict['metadata_modified']).replace(microsecond=0).isoformat() + 'Z',
            'published_tdt': parser.parse(data_dict['date_published']).replace(microsecond=0).isoformat() + 'Z',
            'ogp_link_en_s': '{0}{1}'.format(od_search_od_url_en, data_dict['name']),
            'ogp_link_fr_s': '{0}{1}'.format(od_search_od_url_fr, data_dict['name']),
            'display_options_s': display_options
        }

        if 'en' in notes_translated:
            od_obj['desc_summary_txt_en'] = get_summary(notes_translated['en'].strip(), 'en')
        elif 'en-t-fr' in notes_translated:
            od_obj['desc_summary_txt_en'] = get_summary(notes_translated['en-t-fr'].strip(), 'en')
        if 'fr' in notes_translated:
            od_obj['desc_summary_txt_fr'] = get_summary(notes_translated['fr'].strip(), 'fr')
        elif 'en-t-fr' in notes_translated:
            od_obj['desc_summary_txt_fr'] = get_summary(notes_translated['fr-t-en'].strip(), 'fr')

        keywords = json.loads(data_dict['keywords']) if \
            isinstance(data_dict['keywords'], str) else data_dict['keywords']
        if 'en' in keywords:
            od_obj['keywords_en_s'] = keywords['en']
        elif 'en-t-fr' in keywords:
            od_obj['keywords_xlt_en_s'] = keywords['en-t-fr']
        if 'fr' in keywords:
            od_obj['keywords_fr_s'] = keywords['fr']
        elif 'fr-t-en' in keywords:
            od_obj['keywords_xlt_fr_s'] = keywords['fr-t-en']

        if 'data_series_issue_identification' in data_dict:
            if 'en' in data_dict['data_series_issue_identification']:
                od_obj['data_series_issue_identification_en'] = data_dict['data_series_issue_identification']['en']
            else:
                od_obj['data_series_issue_ident_en'] = '-'
            if 'fr' in data_dict['data_series_issue_identification']:
                od_obj['data_series_issue_identification_fr'] = data_dict['data_series_issue_identification']['fr']
            else:
                od_obj['data_series_issue_ident_fr'] = '-'
        else:
            od_obj['data_series_issue_ident_en'] = '-'
            od_obj['data_series_issue_ident_fr'] = '-'
            
        solr = pysolr.Solr(od_search_solr_url)
        if in_bulk:
            solr.add([od_obj])
        else:
            solr.delete(id=od_obj['id'])
            solr.add([od_obj])
            solr.commit()
    except Exception as x:
        log.error("Exception: {} {}".format(x.message, x.args))
 def test_organization_schema_not_found(self):
     lc = LocalCKAN("visitor")
     with pytest.raises(NotFound):
         lc.action.scheming_organization_schema_show(type="elmo")
示例#10
0
class TestNAVLSchema(FunctionalTestBase):

    def setup(self):
        self.sysadmin_user = factories.Sysadmin()
        self.normal_user = factories.User()
        self.org = factories.Organization(
            title='en org name | fr org name')

        self.sysadmin_action = LocalCKAN(
            username=self.sysadmin_user['name']).action
        self.normal_action = LocalCKAN(
            username=self.normal_user['name']).action
        self.action = LocalCKAN().action

        self.sysadmin_action.organization_member_create(
            username=self.normal_user['name'],
            id=self.org['name'],
            role='editor')

        self.incomplete_pkg = {
            'type': 'dataset',
            'collection': 'primary',
            'title_translated': {'en': u'A Novel By Tolstoy'},
            'license_id': 'ca-ogl-lgo',
            'ready_to_publish': 'true',
            'imso_approval': 'true',
            'jurisdiction': 'federal',
            'maintainer_email': '*****@*****.**',
            'restrictions': 'unrestricted',
            'resources': [{
                'name_translated': {'en': u'Full text.', 'fr': u'Full text.'},
                'format': u'TXT',
                'url': u'http://www.annakarenina.com/download/',
                'size': 42,
                'resource_type': 'dataset',
                'language': ['zxx'],
            }],
        }

        self.complete_pkg = dict(self.incomplete_pkg,
            owner_org=self.org['name'],
            title_translated={
                'en': u'A Novel By Tolstoy', 'fr':u'Un novel par Tolstoy'},
            frequency=u'as_needed',
            notes_translated={'en': u'...', 'fr': u'...'},
            subject=[u'persons'],
            date_published=u'2013-01-01',
            keywords={'en': [u'book'], 'fr': [u'livre']},
            )

    def test_basic_package(self):
        assert_raises(ValidationError,
            self.normal_action.package_create,
            name='basic_package', **self.incomplete_pkg)

        resp = self.normal_action.package_create(
            name='basic_package', **self.complete_pkg)
        assert resp['title_translated']['fr'] == u'Un novel par Tolstoy'

        resp = self.action.package_show(id=resp['id'])
        assert resp['title_translated']['fr'] == u'Un novel par Tolstoy'

    def test_keyword_validation(self):
        assert_raises(ValidationError,
            self.normal_action.package_create,
            name='keyword_validation',
            **dict(self.complete_pkg,
                keywords={'en':['test'], 'fr':['not  ok']}))

        assert_raises(ValidationError,
            self.normal_action.package_create,
            name='keyword_validation',
            **dict(self.complete_pkg,
                keywords={'en':['test'], 'fr':['one too short', 'q']}))

        assert_raises(ValidationError,
            self.normal_action.package_create,
            name='keyword_validation',
            **dict(self.complete_pkg,
                keywords={'en':['this is much too long' * 50], 'fr':['test']}))

        self.normal_action.package_create(
            name='keyword_validation',
            **dict(self.complete_pkg,
                keywords={'en':['these', 'ones', 'are', 'a-ok'], 'fr':['test']}))

    def test_custom_dataset_id(self):
        my_uuid = '3056920043b943f1a1fb9e7974cbb997'
        norm_uuid = '30569200-43b9-43f1-a1fb-9e7974cbb997'
        self.normal_action.package_create(
            name='custom_dataset_id', id=my_uuid, **self.complete_pkg)

        resp = self.action.package_show(id='custom_dataset_id')
        assert resp['id'] == norm_uuid
        assert resp['name'] == 'custom_dataset_id'

        assert_raises(ValidationError,
            self.sysadmin_action.package_create,
            name='repeated_dataset_id', id=my_uuid, **self.complete_pkg)

        assert_raises(ValidationError,
            self.sysadmin_action.package_create,
            name='invalid_dataset_id', id='my-custom-id', **self.complete_pkg)

    def test_raw_required(self):
        raw_pkg = dict(self.complete_pkg)
        del raw_pkg['title_translated']

        assert_raises(ValidationError,
            self.normal_action.package_create,
            **raw_pkg)

    def test_tag_extras_bug(self):
        resp = self.normal_action.package_create(
            **self.complete_pkg)

        resp = self.action.package_show(id=resp['id'])
        assert 'subject' not in [e['key'] for e in resp.get('extras',[])]

    def test_keywords_with_apostrophe(self):
        self.normal_action.package_create(
            **dict(self.complete_pkg, keywords=
                {'en': ['test'], 'fr': ["emissions de l'automobile"]}))

    def test_invalid_resource_size(self):
        assert_raises(ValidationError,
            self.normal_action.package_create,
            **dict(self.complete_pkg,
                resources = [dict(self.complete_pkg['resources'][0],
                    size='10M',
                    )],
                )
            )

    def test_copy_org_name(self):
        pkg = self.normal_action.package_create(**self.complete_pkg)

        assert_equal(sorted(pkg['org_title_at_publication']), ['en', 'fr'])
        assert_equal(pkg['org_title_at_publication']['en'], 'en org name')
        assert_equal(pkg['org_title_at_publication']['fr'], 'fr org name')

    def test_dont_copy_org_name(self):
        pkg = self.normal_action.package_create(**dict(
            self.complete_pkg, org_title_at_publication={'en':'a', 'fr':'b'}))

        assert_equal(pkg['org_title_at_publication']['en'], 'a')
        assert_equal(pkg['org_title_at_publication']['fr'], 'b')

    def test_generated_fields(self):
        pkg = self.normal_action.package_create(**self.complete_pkg)

        # not generated, we set this one but later tests depend on it
        assert_equal(pkg['license_id'], 'ca-ogl-lgo')
        # this one is generated in the bowels of CKAN's model_dictize
        assert_equal(pkg['license_title'],
            'Open Government Licence - Canada')

        raise SkipTest('XXX: not generating fields yet')
        # some we actually generate ourselves
        assert_equal(pkg['license_title_fra'],
            'Licence du gouvernement ouvert - Canada')
        assert pkg['license_url_fra']

        assert pkg['department_number']

    def test_portal_release_date(self):
        raise SkipTest('XXX: portal_release_date not implemented yet')
        release_pkg = dict(self.complete_pkg,
            portal_release_date='2012-01-01')

        assert_raises(ValidationError,
            self.normal_action.package_create,
            **release_pkg)

        self.publisher_action.package_create(**release_pkg)

        self.sysadmin_action.package_create(**release_pkg)

    def test_spatial(self):
        raise SkipTest('XXX: spatial not implemented in raw schema')
        spatial_pkg = dict(self.complete_pkg,
            spatial='{"type": "Polygon", "coordinates": '
                '[[[-141.001333, 41.736231], [-141.001333, 82.514468], '
                '[-52.622540, 82.514468], [-52.622540, 41.736231], '
                '[-141.001333, 41.736231]]]}')
        self.normal_action.package_create(**spatial_pkg)

        bad_spatial_pkg = dict(self.complete_pkg,
            spatial='{"type": "Line", "coordinates": '
                '[[[-141.001333, 41.736231], [-141.001333, 82.514468], '
                '[-52.622540, 82.514468], [-52.622540, 41.736231], '
                '[-141.001333, 41.736231]]]}')
        assert_raises(ValidationError,
            self.normal_action.package_create,
            **bad_spatial_pkg)

        bad_spatial_pkg2 = dict(self.complete_pkg,
            spatial='forty')
        assert_raises(ValidationError,
            self.normal_action.package_create,
            **bad_spatial_pkg2)

        bad_spatial_pkg3 = dict(self.complete_pkg,
            spatial='{"type": "Polygon"}')
        self.assert_raises(ValidationError,
            self.normal_action.package_create,
            **bad_spatial_pkg3)

        bad_spatial_pkg4 = dict(self.complete_pkg,
            spatial='{"type": "Polygon", "coordinates": [1,2,3,4]}')
        self.assert_raises(ValidationError,
            self.normal_action.package_create,
            **bad_spatial_pkg4)

    def test_dont_change_portal_release_date(self):
        "normal users should not be able to reset the portal release date"
        raise SkipTest('XXX portal_release_date not yet implemented')

        resp = self.sysadmin_action.package_create(
            portal_release_date='2012-01-01',
            **self.complete_pkg)

        # silently ignore missing portal_release_date
        self.normal_action.package_update(id=resp['id'],
            **self.complete_pkg)

        resp2 = self.normal_action.package_show(id=resp['id'])

        assert_equal(resp['portal_release_date'],
            resp2.get('portal_release_date'))
示例#11
0
    def create_pd_record(self, owner_org, resource_name):
        lc = LocalCKAN(username=c.user)

        try:
            chromo = h.recombinant_get_chromo(resource_name)
            rcomb = lc.action.recombinant_show(
                owner_org=owner_org, dataset_type=chromo['dataset_type'])
            [res
             ] = [r for r in rcomb['resources'] if r['name'] == resource_name]

            check_access('datastore_upsert', {
                'user': c.user,
                'auth_user_obj': c.userobj
            }, {'resource_id': res['id']})
        except NotAuthorized:
            return abort(403, _('Unauthorized'))

        choice_fields = {
            f['datastore_id']: [{
                'value': k,
                'label': v
            } for (k, v) in f['choices']]
            for f in h.recombinant_choice_fields(resource_name)
        }
        pk_fields = aslist(chromo['datastore_primary_key'])

        if request.method == 'POST':
            post_data = parse_params(request.POST, ignore_keys=['save'])

            if 'cancel' in post_data:
                return redirect(
                    h.url_for(
                        controller=
                        'ckanext.recombinant.controller:UploadController',
                        action='preview_table',
                        resource_name=resource_name,
                        owner_org=rcomb['owner_org'],
                    ))

            data, err = clean_check_type_errors(post_data, chromo['fields'],
                                                pk_fields, choice_fields)
            try:
                lc.action.datastore_upsert(resource_id=res['id'],
                                           method='insert',
                                           records=[{
                                               k: None if k in err else v
                                               for (k, v) in data.items()
                                           }],
                                           dry_run=bool(err))
            except ValidationError as ve:
                if 'records' in ve.error_dict:
                    err = dict(
                        {
                            k: [_(e) for e in v]
                            for (k, v) in ve.error_dict['records'][0].items()
                        }, **err)
                elif ve.error_dict.get('info', {}).get('pgcode',
                                                       '') == '23505':
                    err = dict(
                        {
                            k: [_("This record already exists")]
                            for k in pk_fields
                        }, **err)

            if err:
                return render('recombinant/create_pd_record.html',
                              extra_vars={
                                  'data': data,
                                  'resource_name': resource_name,
                                  'chromo_title': chromo['title'],
                                  'choice_fields': choice_fields,
                                  'owner_org': rcomb['owner_org'],
                                  'errors': err,
                              })

            h.flash_notice(_(u'Record Created'))

            return redirect(
                h.url_for(
                    controller=
                    'ckanext.recombinant.controller:UploadController',
                    action='preview_table',
                    resource_name=resource_name,
                    owner_org=rcomb['owner_org'],
                ))

        return render('recombinant/create_pd_record.html',
                      extra_vars={
                          'data': {},
                          'resource_name': resource_name,
                          'chromo_title': chromo['title'],
                          'choice_fields': choice_fields,
                          'owner_org': rcomb['owner_org'],
                          'errors': {},
                      })
 def test_dataset_schema_show(self):
     lc = LocalCKAN('visitor')
     schema = lc.action.scheming_dataset_schema_show(type='test-schema')
     assert schema['dataset_fields'][2]['label'] == 'Humps'
 def test_dataset_schema_list(self):
     lc = LocalCKAN('visitor')
     dataset_schemas = lc.action.scheming_dataset_schema_list()
     assert 'test-schema' in dataset_schemas
 def test_dataset_schema_not_found(self):
     lc = LocalCKAN('visitor')
     assert_raises(NotFound,
         lc.action.scheming_dataset_schema_show,
         type='ernie')
示例#15
0
 def test_blank(self):
     lc = LocalCKAN()
     assert_raises(ValidationError,
                   lc.action.datastore_upsert,
                   resource_id=self.resource_id,
                   records=[{}])
示例#16
0
 def test_example(self):
     lc = LocalCKAN()
     record = get_chromo('grants')['examples']['record']
     lc.action.datastore_upsert(resource_id=self.resource_id,
                                records=[record])
示例#17
0
def _migrate(args):
    path = args['<path_to_storage>']
    single_id = args['<resource_id>']
    if not os.path.isdir(path):
        print('The storage directory cannot be found.')
        return

    lc = LocalCKAN()
    resources = {}
    failed = []

    # The resource folder is stuctured like so on disk:
    # - storage/
    #   - ...
    # - resources/
    #   - <3 letter prefix>
    #     - <3 letter prefix>
    #       - <remaining resource_id as filename>
    #       ...
    #     ...
    #   ...
    for root, dirs, files in os.walk(path):
        # Only the bottom level of the tree actually contains any files. We
        # don't care at all about the overall structure.
        if not files:
            continue

        split_root = root.split('/')
        resource_id = split_root[-2] + split_root[-1]

        for file_ in files:
            ckan_res_id = resource_id + file_
            if single_id and ckan_res_id != single_id:
                continue

            resources[ckan_res_id] = os.path.join(root, file_)

    for i, resource in enumerate(resources.iteritems(), 1):
        resource_id, file_path = resource
        print('[{i}/{count}] Working on {id}'.format(i=i,
                                                     count=len(resources),
                                                     id=resource_id))

        try:
            resource = lc.action.resource_show(id=resource_id)
        except NotFound:
            print(u'\tResource not found')
            continue

        if resource['url_type'] != 'upload':
            print(u'\t`url_type` is not `upload`. Skip')
            continue

        with open(file_path, 'rb') as fin:
            resource['upload'] = FakeFileStorage(
                fin, resource['url'].split('/')[-1])
            try:
                uploader = ResourceCloudStorage(resource)
                uploader.upload(resource['id'])
            except Exception as e:
                failed.append(resource_id)
                print(u'\tError of type {0} during upload: {1}'.format(
                    type(e), e))

    if failed:
        log_file = tempfile.NamedTemporaryFile(delete=False)
        log_file.file.writelines(failed)
        print(u'ID of all failed uploads are saved to `{0}`'.format(
            log_file.name))
示例#18
0
 def delete_vocabulary(self, name):
     registry = LocalCKAN()
     vocab = registry.action.vocabulary_show(id=name)
     for t in vocab['tags']:
         registry.action.tag_delete(id=t['id'])
     registry.action.vocabulary_delete(id=vocab['id'])
示例#19
0
    def datatable(self, resource_name, resource_id):
        draw = int(request.params['draw'])
        search_text = unicode(request.params['search[value]'])
        offset = int(request.params['start'])
        limit = int(request.params['length'])

        chromo = h.recombinant_get_chromo(resource_name)
        lc = LocalCKAN(username=c.user)
        try:
            unfiltered_response = lc.action.datastore_search(
                resource_id=resource_id,
                limit=1,
            )
        except NotAuthorized:
            # datatables js can't handle any sort of error response
            # return no records instead
            return json.dumps({
                'draw': draw,
                'iTotalRecords': -1,  # with a hint that something is wrong
                'iTotalDisplayRecords': -1,
                'aaData': [],
            })

        cols = [f['datastore_id'] for f in chromo['fields']]
        prefix_cols = 1 if chromo.get('edit_form', False) else 0

        sort_list = []
        i = 0
        while True:
            if u'order[%d][column]' % i not in request.params:
                break
            sort_by_num = int(request.params[u'order[%d][column]' % i])
            sort_order = (u'desc NULLS LAST' if
                          request.params[u'order[%d][dir]' %
                                         i] == u'desc' else u'asc NULLS LAST')
            sort_list.append(cols[sort_by_num - prefix_cols] + u' ' +
                             sort_order)
            i += 1

        response = lc.action.datastore_search(
            q=search_text,
            resource_id=resource_id,
            offset=offset,
            limit=limit,
            sort=u', '.join(sort_list),
        )

        aadata = [[
            datatablify(row.get(colname, u''), colname) for colname in cols
        ] for row in response['records']]

        if chromo.get('edit_form', False):
            res = lc.action.resource_show(id=resource_id)
            pkg = lc.action.package_show(id=res['package_id'])
            fids = [f['datastore_id'] for f in chromo['fields']]
            pkids = [
                fids.index(k) for k in aslist(chromo['datastore_primary_key'])
            ]
            for row in aadata:
                row.insert(
                    0,
                    (u'<a href="{0}" aria-label"' + _("Edit") + '">'
                     u'<i class="fa fa-lg fa-edit" aria-hidden="true"></i></a>'
                     ).format(
                         h.url_for(
                             controller=
                             'ckanext.canada.controller:PDUpdateController',
                             action='update_pd_record',
                             owner_org=pkg['organization']['name'],
                             resource_name=resource_name,
                             pk=','.join(
                                 url_part_escape(row[i]) for i in pkids))))

        return json.dumps({
            'draw': draw,
            'iTotalRecords': unfiltered_response.get('total', 0),
            'iTotalDisplayRecords': response.get('total', 0),
            'aaData': aadata,
        })
 def test_group_schema_show(self):
     lc = LocalCKAN("visitor")
     schema = lc.action.scheming_group_schema_show(type="group")
     assert schema["fields"][4]["label"] == "Bookface"
示例#21
0
    def update_pd_record(self, owner_org, resource_name, pk):
        pk = [url_part_unescape(p) for p in pk.split(',')]

        lc = LocalCKAN(username=c.user)

        try:
            chromo = h.recombinant_get_chromo(resource_name)
            rcomb = lc.action.recombinant_show(
                owner_org=owner_org, dataset_type=chromo['dataset_type'])
            [res
             ] = [r for r in rcomb['resources'] if r['name'] == resource_name]

            check_access('datastore_upsert', {
                'user': c.user,
                'auth_user_obj': c.userobj
            }, {'resource_id': res['id']})
        except NotAuthorized:
            abort(403, _('Unauthorized'))

        choice_fields = {
            f['datastore_id']: [{
                'value': k,
                'label': v
            } for (k, v) in f['choices']]
            for f in h.recombinant_choice_fields(resource_name)
        }
        pk_fields = aslist(chromo['datastore_primary_key'])
        pk_filter = dict(zip(pk_fields, pk))

        records = lc.action.datastore_search(resource_id=res['id'],
                                             filters=pk_filter)['records']
        if len(records) == 0:
            abort(404, _('Not found'))
        if len(records) > 1:
            abort(400, _('Multiple records found'))
        record = records[0]

        if request.method == 'POST':
            post_data = parse_params(request.POST,
                                     ignore_keys=['save'] + pk_fields)

            if 'cancel' in post_data:
                return redirect(
                    h.url_for(
                        controller=
                        'ckanext.recombinant.controller:UploadController',
                        action='preview_table',
                        resource_name=resource_name,
                        owner_org=rcomb['owner_org'],
                    ))

            data, err = clean_check_type_errors(post_data, chromo['fields'],
                                                pk_fields, choice_fields)
            # can't change pk fields
            for f_id in data:
                if f_id in pk_fields:
                    data[f_id] = record[f_id]
            try:
                lc.action.datastore_upsert(
                    resource_id=res['id'],
                    #method='update',    FIXME not raising ValidationErrors
                    records=[{
                        k: None if k in err else v
                        for (k, v) in data.items()
                    }],
                    dry_run=bool(err))
            except ValidationError as ve:
                err = dict(
                    {
                        k: [_(e) for e in v]
                        for (k, v) in ve.error_dict['records'][0].items()
                    }, **err)

            if err:
                return render('recombinant/update_pd_record.html',
                              extra_vars={
                                  'data': data,
                                  'resource_name': resource_name,
                                  'chromo_title': chromo['title'],
                                  'choice_fields': choice_fields,
                                  'pk_fields': pk_fields,
                                  'owner_org': rcomb['owner_org'],
                                  'errors': err,
                              })

            h.flash_notice(_(u'Record %s Updated') % u','.join(pk))

            return redirect(
                h.url_for(
                    controller=
                    'ckanext.recombinant.controller:UploadController',
                    action='preview_table',
                    resource_name=resource_name,
                    owner_org=rcomb['owner_org'],
                ))

        data = {}
        for f in chromo['fields']:
            if not f.get('import_template_include', True):
                continue
            val = record[f['datastore_id']]
            data[f['datastore_id']] = val

        return render('recombinant/update_pd_record.html',
                      extra_vars={
                          'data': data,
                          'resource_name': resource_name,
                          'chromo_title': chromo['title'],
                          'choice_fields': choice_fields,
                          'pk_fields': pk_fields,
                          'owner_org': rcomb['owner_org'],
                          'errors': {},
                      })
 def test_organization_schema_list(self):
     lc = LocalCKAN("visitor")
     org_schemas = lc.action.scheming_organization_schema_list()
     assert sorted(org_schemas) == ["organization", "publisher"]
示例#23
0
    def copy_datasets(self, remote, package_ids=None):
        """
        a process that accepts package ids on stdin which are passed to
        the package_show API on the remote CKAN instance and compared
        to the local version of the same package.  The local package is
        then created, updated, deleted or left unchanged.  This process
        outputs that action as a string 'created', 'updated', 'deleted'
        or 'unchanged'
        """
        if self.options.push_apikey and not self.options.fetch:
            registry = LocalCKAN()
            portal = RemoteCKAN(remote, apikey=self.options.push_apikey)
        elif self.options.fetch:
            registry = RemoteCKAN(remote)
            portal = LocalCKAN()
        else:
            print "exactly one of -f or -a options must be specified"
            return

        now = datetime.now()

        if not package_ids:
            package_ids = iter(sys.stdin.readline, '')

        for package_id in package_ids:
            package_id = package_id.strip()
            reason = None
            target_deleted = False
            try:
                source_pkg = registry.action.package_show(id=package_id)
            except NotAuthorized:
                source_pkg = None
            except (CKANAPIError, urllib2.URLError), e:
                sys.stdout.write(json.dumps([package_id, 'source error',
                    unicode(e.args)]) + '\n')
                raise
            if source_pkg and source_pkg['state'] == 'deleted':
                source_pkg = None

            if source_pkg and source_pkg['type'] != 'dataset':
                # non-default dataset types ignored
                source_pkg = None

            _trim_package(source_pkg)

            if source_pkg and not self.options.mirror:
                # treat unpublished packages same as deleted packages
                if not source_pkg['portal_release_date']:
                    source_pkg = None
                    reason = 'release date not set'
                elif isodate(source_pkg['portal_release_date'], None) > now:
                    source_pkg = None
                    reason = 'release date in future'

            try:
                target_pkg = portal.call_action('package_show',
                    {'id':package_id})
            except (NotFound, NotAuthorized):
                target_pkg = None
            except (CKANAPIError, urllib2.URLError), e:
                sys.stdout.write(json.dumps([package_id, 'target error',
                    unicode(e.args)]) + '\n')
                raise
示例#24
0
    def copy_datasets(self, remote, package_ids=None):
        """
        a process that accepts packages on stdin which are compared
        to the local version of the same package.  The local package is
        then created, updated, deleted or left unchanged.  This process
        outputs that action as a string 'created', 'updated', 'deleted'
        or 'unchanged'
        """
        portal = LocalCKAN()

        now = datetime.now()

        packages = iter(sys.stdin.readline, '')

        for package in packages:
            source_pkg = json.loads(package)
            package_id = source_pkg['id']
            reason = None
            target_deleted = False
            if source_pkg and source_pkg['state'] == 'deleted':
                source_pkg = None

            if source_pkg and source_pkg['type'] not in DATASET_TYPES:
                # non-default dataset types ignored
                source_pkg = None

            _trim_package(source_pkg)

            action = None
            if source_pkg and not self.options.mirror:
                if source_pkg.get('ready_to_publish') == 'false':
                    source_pkg = None
                    reason = 'marked not ready to publish'
                elif not source_pkg.get('portal_release_date'):
                    source_pkg = None
                    reason = 'release date not set'
                elif isodate(source_pkg['portal_release_date'], None) > now:
                    source_pkg = None
                    reason = 'release date in future'
                else:
                    # portal packages published public
                    source_pkg['private'] = False

            if action != 'skip':
                try:
                    target_pkg = portal.call_action('package_show', {
                        'id': package_id
                    })
                except (NotFound, NotAuthorized):
                    target_pkg = None
                except (CKANAPIError, urllib2.URLError), e:
                    sys.stdout.write(
                        json.dumps([
                            package_id,
                            'target error',
                            unicode(e.args)
                        ]) + '\n'
                    )
                    raise
                if target_pkg and target_pkg['state'] == 'deleted':
                    target_pkg = None
                    target_deleted = True

                _trim_package(target_pkg)

            if action == 'skip':
                pass
            elif target_pkg is None and source_pkg is None:
                action = 'unchanged'
                reason = reason or 'deleted on registry'
            elif target_deleted:
                action = 'updated'
                reason = 'undeleting on target'
                portal.action.package_update(**source_pkg)
            elif target_pkg is None:
                action = 'created'
                portal.action.package_create(**source_pkg)
            elif source_pkg is None:
                action = 'deleted'
                portal.action.package_delete(id=package_id)
            elif source_pkg == target_pkg:
                action = 'unchanged'
                reason = 'no difference found'
            else:
                action = 'updated'
                portal.action.package_update(**source_pkg)

            sys.stdout.write(json.dumps([package_id, action, reason]) + '\n')
            sys.stdout.flush()
 def test_group_schema_not_found(self):
     lc = LocalCKAN("visitor")
     with pytest.raises(NotFound):
         lc.action.scheming_group_schema_show(type="bert")
 def test_group_schema_show(self):
     lc = LocalCKAN('visitor')
     schema = lc.action.scheming_group_schema_show(type='group')
     assert_equals(schema['fields'][4]['label'], 'Bookface')
 def test_organization_schema_show(self):
     lc = LocalCKAN("visitor")
     schema = lc.action.scheming_organization_schema_show(
         type="organization")
     assert schema["fields"][4]["label"] == "Department ID"
 def test_group_schema_not_found(self):
     lc = LocalCKAN('visitor')
     assert_raises(NotFound,
                   lc.action.scheming_group_schema_show,
                   type='bert')
 def test_group_schema_list(self):
     lc = LocalCKAN("visitor")
     group_schemas = lc.action.scheming_group_schema_list()
     assert sorted(group_schemas) == ["group", "theme"]
 def test_organization_schema_list(self):
     lc = LocalCKAN('visitor')
     org_schemas = lc.action.scheming_organization_schema_list()
     assert_equals(org_schemas, ['organization'])
from datetime import datetime

from ckan.lib import mailer
import ckan.plugins.toolkit as t

from ckanapi import LocalCKAN, ValidationError

registry = LocalCKAN()


def upload_to_ckan(package_id, filename):

    try:
        resource = registry.action.resource_create(package_id=package_id,
                                                   upload=open(filename, 'rb'))

        email_notification_recipients = t.aslist(
            t.config.get('ckanext.prh_tools.mail_recipients', ''))
        site_title = t.config.get('ckan.site_title', '')
        today = datetime.now().date().isoformat()

        msg = '%(site_title)s - PRH data uploaded %(today)s\n\n%(status)s' % {
            'site_title':
            site_title,
            'today':
            today,
            'status':
            "New data available in https://www.avoindata.fi/data/dataset/%s/resource/%s"
            % (package_id, resource.get('id'))
        }
 def test_organization_schema_show(self):
     lc = LocalCKAN('visitor')
     schema = lc.action.scheming_organization_schema_show(
         type='organization')
     assert_equals(schema['fields'][4]['label'], 'Department ID')
示例#33
0
    def copy_datasets(self, remote, package_ids=None):
        """
        a process that accepts packages on stdin which are compared
        to the local version of the same package.  The local package is
        then created, updated, deleted or left unchanged.  This process
        outputs that action as a string 'created', 'updated', 'deleted'
        or 'unchanged'
        """
        portal = LocalCKAN()

        now = datetime.now()

        packages = iter(sys.stdin.readline, '')

        for package in packages:
            source_pkg = json.loads(package)
            package_id = source_pkg['id']
            reason = None
            target_deleted = False
            if source_pkg and source_pkg['state'] == 'deleted':
                source_pkg = None

            if source_pkg and source_pkg['type'] not in DATASET_TYPES:
                # non-default dataset types ignored
                source_pkg = None

            _trim_package(source_pkg)

            action = None
            if source_pkg and not self.options.mirror:
                if source_pkg.get('ready_to_publish') == 'false':
                    source_pkg = None
                    reason = 'marked not ready to publish'
                elif not source_pkg.get('portal_release_date'):
                    source_pkg = None
                    reason = 'release date not set'
                elif isodate(source_pkg['portal_release_date'], None) > now:
                    source_pkg = None
                    reason = 'release date in future'
                else:
                    # portal packages published public
                    source_pkg['private'] = False

            if action != 'skip':
                try:
                    target_pkg = portal.call_action('package_show', {
                        'id': package_id
                    })
                except (NotFound, NotAuthorized):
                    target_pkg = None
                except (CKANAPIError, urllib2.URLError), e:
                    sys.stdout.write(
                        json.dumps([
                            package_id,
                            'target error',
                            unicode(e.args)
                        ]) + '\n'
                    )
                    raise
                if target_pkg and target_pkg['state'] == 'deleted':
                    target_pkg = None
                    target_deleted = True

                _trim_package(target_pkg)

            if action == 'skip':
                pass
            elif target_pkg is None and source_pkg is None:
                action = 'unchanged'
                reason = reason or 'deleted on registry'
            elif target_deleted:
                action = 'updated'
                reason = 'undeleting on target'
                portal.action.package_update(**source_pkg)
            elif target_pkg is None:
                action = 'created'
                portal.action.package_create(**source_pkg)
            elif source_pkg is None:
                action = 'deleted'
                portal.action.package_delete(id=package_id)
            elif source_pkg == target_pkg:
                action = 'unchanged'
                reason = 'no difference found'
            else:
                action = 'updated'
                portal.action.package_update(**source_pkg)

            sys.stdout.write(json.dumps([package_id, action, reason]) + '\n')
            sys.stdout.flush()
 def test_organization_schema_not_found(self):
     lc = LocalCKAN('visitor')
     assert_raises(NotFound,
                   lc.action.scheming_organization_schema_show,
                   type='elmo')
示例#35
0
    def load_suggested(self, use_created_date, filename):
        """
        a process that loads suggested datasets from Drupal into CKAN
        """
        registry = LocalCKAN()

        # load packages as dict
        results = True
        counter = 0
        batch_size = 100
        existing_suggestions = {}
        while results:
            packages = registry.action.package_search(q='type:prop', start=counter, rows=batch_size, include_private=True)['results']
            if packages:
                for package in packages:
                    existing_suggestions[package['id']] = package
                counter += len(packages)
            else:
                results = False

        # load data from csv
        csv_file = io.open(filename, "r", encoding='utf-8-sig')
        csv_reader = csv.DictReader((l.encode('utf-8') for l in csv_file))
        today = datetime.now().strftime('%Y-%m-%d')

        for row in csv_reader:
            uuid = row['uuid']
            if uuid in existing_suggestions:
                continue

            if use_created_date:
                today = row['date_created']

            # add record
            record = {
                "type": "prop",
                "state": "active",
                "id": uuid,
                "title_translated": {
                    "en": row['title_en'],
                    "fr": row['title_fr']
                },
                "owner_org": row['organization'],
                "notes_translated": {
                    "en": row['description_en'],
                    "fr": row['description_fr'],
                },
                "comments": {
                    "en": row['additional_comments_and_feedback_en'],
                    "fr": row['additional_comments_and_feedback_fr']
                },
                "reason": row['reason'],
                "subject": row['subject'].split(',') if row['subject'] else ['information_and_communications'],
                "keywords": {
                    "en": row['keywords_en'].split(',') if row['keywords_en'] else ['dataset'],
                    "fr": row['keywords_fr'].split(',') if row['keywords_fr'] else ['Jeu de données'],
                },
                "date_submitted": row['date_created'],
                "date_forwarded": today,
                "status": [] if row['dataset_suggestion_status'] == 'department_contacted' else [
                    {
                        "reason": row['dataset_suggestion_status'],
                        "date": row['dataset_released_date'] if row['dataset_released_date'] else today,
                        "comments": {
                            "en": row['dataset_suggestion_status_link'] or u'Status imported from previous ‘suggest a dataset’ system',
                            "fr": row['dataset_suggestion_status_link'] or u'État importé du système précédent « Proposez un jeu de données »',
                        }
                    }
                ]
            }

            try:
                registry.action.package_create(**record)
                print uuid + ' suggested dataset created'
            except ValidationError as e:
                if 'id' in e.error_dict:
                    try:
                        registry.action.package_update(**record)
                        print uuid + ' suggested dataset update deleted'
                    except ValidationError as e:
                        print uuid + ' (update deleted) ' + str(e)
                else:
                    print uuid + ' ' + str(e)
        csv_file.close()
 def test_group_schema_list(self):
     lc = LocalCKAN('visitor')
     group_schemas = lc.action.scheming_group_schema_list()
     assert_equals(group_schemas, ['group'])