def test_1(self): startrev = model.repo.youngest_revision().id pkg = model.Package.by_name(u'warandpeace') assert pkg is not None rev = model.repo.new_revision() pkg._extras[u'country'] = model.PackageExtra(key=u'country', value='us') pkg.extras_active[u'xxx'] = model.PackageExtra(key=u'xxx', value='yyy') pkg.extras[u'format'] = u'rdf' model.repo.commit_and_remove() # now test it is saved rev1 = model.repo.youngest_revision().id samepkg = model.Package.by_name(u'warandpeace') assert len(samepkg._extras) == 3, samepkg._extras assert samepkg.extras_active[ u'country'].value == 'us', samepkg.extras_active assert samepkg.extras[u'country'] == 'us' assert samepkg.extras[u'format'] == 'rdf' model.Session.remove() # now delete and extras samepkg = model.Package.by_name(u'warandpeace') model.repo.new_revision() del samepkg.extras[u'country'] model.repo.commit_and_remove() samepkg = model.Package.by_name(u'warandpeace') assert len(samepkg._extras) == 3 assert len(samepkg.extras) == 2 extra = model.Session.query( model.PackageExtra).filter_by(key=u'country').first() assert extra and extra.state == model.State.DELETED, extra model.Session.remove() samepkg = model.Package.by_name(u'warandpeace') samepkg.get_as_of(model.Session.query(model.Revision).get(rev1)) assert len(samepkg.extras) == 3, len(samepkg.extras) model.Session.remove() # now restore it ... model.repo.new_revision() samepkg = model.Package.by_name(u'warandpeace') samepkg.extras[u'country'] = 'uk' model.repo.commit_and_remove() samepkg = model.Package.by_name(u'warandpeace') assert len(samepkg.extras) == 3 assert len(samepkg._extras) == 3 assert samepkg.extras[u'country'] == 'uk'
def set_syndicated_id(local_id: str, remote_id: str, field: str): """Set the remote package id on the local package""" ext_id = ( model.Session.query(model.PackageExtra.id) .join(model.Package, model.Package.id == model.PackageExtra.package_id) .filter( model.Package.id == local_id, model.PackageExtra.key == field, ) .first() ) if not ext_id: existing = model.PackageExtra( package_id=local_id, key=field, value=remote_id, ) model.Session.add(existing) model.Session.commit() model.Session.flush() else: model.Session.query(model.PackageExtra).filter_by(id=ext_id).update( {"value": remote_id, "state": "active"} ) rebuild(local_id)
def test_1(self): startrev = model.repo.youngest_revision().id pkg = model.Package.by_name(u'warandpeace') assert pkg is not None rev = model.repo.new_revision() pkg._extras[u'country'] = model.PackageExtra(key=u'country', value='us') pkg.extras[u'xxx'] = u'yyy' pkg.extras[u'format'] = u'rdf' model.repo.commit_and_remove() # now test it is saved rev1 = model.repo.youngest_revision().id samepkg = model.Package.by_name(u'warandpeace') assert len(samepkg._extras) == 3, samepkg._extras assert samepkg.extras[u'country'] == 'us' assert samepkg.extras[u'format'] == 'rdf' model.Session.remove() # now delete an extra samepkg = model.Package.by_name(u'warandpeace') model.repo.new_revision() del samepkg.extras[u'country'] model.repo.commit_and_remove() samepkg = model.Package.by_name(u'warandpeace') assert len(samepkg._extras) == 2 assert len(samepkg.extras) == 2 extra = model.Session.query( model.PackageExtra).filter_by(key=u'country').first() assert not extra, extra model.Session.remove()
def test_03_update_package_from_dict(): factories.Dataset() factories.Dataset(**get_data()) query = search.query_for(model.Package) package = model.Package.by_name("council-owned-litter-bins") # update package package.name = u"new_name" extra = model.PackageExtra(key="published_by", value="barrow") package._extras[extra.key] = extra model.repo.commit_and_remove() assert query.run({"q": ""})["count"] == 2 assert query.run({"q": "barrow"})["count"] == 1 assert query.run({"q": "barrow"})["results"][0] == "new_name" # update package again package = model.Package.by_name("new_name") package.name = u"council-owned-litter-bins" model.repo.commit_and_remove() assert query.run({"q": ""})["count"] == 2 assert query.run({"q": "spatial"})["count"] == 1 assert (query.run({"q": "spatial" })["results"][0] == "council-owned-litter-bins")
def test_create_extras(self): pkg = model.Package(name=u"test-package") # method 1 extra1 = model.PackageExtra(key=u"subject", value=u"science") pkg._extras[u"subject"] = extra1 # method 2 pkg.extras[u"accuracy"] = u"metre" model.Session.add_all([pkg]) model.Session.commit() model.Session.remove() pkg = model.Package.by_name(u"test-package") assert pkg.extras == {u"subject": u"science", u"accuracy": u"metre"}
def test_03_update_package_from_dict(self): package = model.Package.by_name("council-owned-litter-bins") # update package package.name = u"new_name" extra = model.PackageExtra(key="published_by", value="barrow") package._extras[extra.key] = extra model.repo.commit_and_remove() check_search_results("", 3) check_search_results("barrow", 1, ["new_name"]) # update package again package = model.Package.by_name("new_name") package.name = u"council-owned-litter-bins" model.repo.commit_and_remove() check_search_results("", 3) check_search_results("spatial", 1, ["council-owned-litter-bins"])
def test_03_update_package_from_dict(self): package = model.Package.by_name('council-owned-litter-bins') # update package package.name = u'new_name' extra = model.PackageExtra(key='published_by', value='barrow') package._extras[extra.key] = extra model.repo.commit_and_remove() check_search_results('', 3) check_search_results('barrow', 1, ['new_name']) # update package again package = model.Package.by_name('new_name') package.name = u'council-owned-litter-bins' model.repo.commit_and_remove() check_search_results('', 3) check_search_results('spatial', 1, ['council-owned-litter-bins'])
def test_create_extras(self): model.repo.new_revision() pkg = model.Package(name=u'test-package') # method 1 extra1 = model.PackageExtra(key=u'subject', value=u'science') pkg._extras[u'subject'] = extra1 # method 2 pkg.extras[u'accuracy'] = u'metre' model.Session.add_all([pkg]) model.Session.commit() model.Session.remove() pkg = model.Package.by_name(u'test-package') assert_equal(pkg.extras, { u'subject': u'science', u'accuracy': u'metre' })
def command(self): if not all(os.environ.get(i) for i in ('CKAN_TEST_SYSADMIN_NAME', 'CKAN_TEST_SYSADMIN_PASSWORD', 'CKAN_INI')): print('One of these env vars not set: CKAN_INI, CKAN_TEST_SYSADMIN_NAME or CKAN_TEST_SYSADMIN_PASSWORD') return print('====== Creating DGU test data') self._load_config() engine = sqlalchemy.create_engine(config.get('sqlalchemy.url')) model.init_model(engine) sysadmin_user = model.User.get(os.environ.get('CKAN_TEST_SYSADMIN_NAME')) if not sysadmin_user: print('=== Creating test sysadmin') sysadmin_user = model.User( name=os.environ.get('CKAN_TEST_SYSADMIN_NAME'), password=os.environ.get('CKAN_TEST_SYSADMIN_PASSWORD') ) sysadmin_user.sysadmin = True model.Session.add(sysadmin_user) model.repo.commit_and_remove() publisher = model.Group.get('Example Publisher #1') if not publisher: print('=== Creating example publisher 1') model.Session.flush() rev = model.repo.new_revision() rev.author = u"DGU test admin" rev.message = u"Creating Example Publisher #1." publisher = model.Group( name=u"example-publisher-1", title=u"Example Publisher #1", type=u"organization" ) publisher.is_organization = True model.Session.add(publisher) model.repo.commit_and_remove() rev = model.repo.new_revision() rev.author = u"DGU test admin" rev.message = u"Adding charity-ngo category for example publisher 1." category = model.GroupExtra(group_id=publisher.id, key="category", value="charity-ngo") model.Session.add(category) model.repo.commit_and_remove() if not model.Package.by_name(u"example-harvest-1"): print('=== Creating harvest source') version = '' if "ckan@db" in config.get('sqlalchemy.url'): if '5001' in config.get('ckan.site_url'): version = '-2.8' elif '5002' in config.get('ckan.site_url'): verions = '-2.9' source_dict = { 'title': 'Example Harvest #1', 'name': 'example-harvest-1', 'url': "http://static-mock-harvest-source{}:11088/".format(version)\ if "ckan@db" in config.get('sqlalchemy.url') else \ "https://ckan-static-mock-harvest-source.cloudapps.digital/", 'source_type': 'ckan', 'owner_org': publisher.id, 'notes': 'An example harvest source', 'frequency': "MANUAL", 'active': True, "config": None } context = { "model": model, "session": model.Session, "user": sysadmin_user.name, "ignore_auth": True, "schema": harvest_source_schema(), "message": "Create DGU example harvest source", "return_id_only": True } harvest_source_id = harvest_source_create(context, source_dict) if harvest_source_id: print("=== Creating harvest job") harvest_job_create(context, {"source_id": harvest_source_id, "run": False}) print("=== Running harvest job") command = "paster --plugin=ckanext-harvest harvester run_test example-harvest-1 -c $CKAN_INI" run_command(command) model.Session.flush() print("=== Updating the example dataset to be in line with how DGU processes it") rev = model.repo.new_revision() rev.author = u"DGU test admin" rev.message = u"Updating example-data-number-one for CKAN functional tests" dataset = model.Package.get("example-dataset-number-one") contact_name = model.PackageExtra(package_id=dataset.id, key="contact-name", value="Example User") model.Session.add(contact_name) empty_fields = [ "contact-email", "contact-phone", "schema-vocabulary", "codelist", "licence-custom", "foi-web", "foi-name", "foi-email", "foi-phone", "theme-primary" ] for key in empty_fields: field = model.PackageExtra(package_id=dataset.id, key=key, value="") model.Session.add(field) delete_fields = [ "guid", "responsible-party", "taxonomy_url" ] for key in delete_fields: field = model.Session.query(model.PackageExtra).filter( model.PackageExtra.package_id == dataset.id, model.PackageExtra.key == key ).first() if field: field.delete() model.repo.commit_and_remove() print("=== Running search index rebuild") command = 'paster --plugin=ckan search-index rebuild %s -c $CKAN_INI' % dataset.name run_command(command) publisher2 = model.Group.get('Example Publisher #2') if not publisher2: print('=== Creating example publisher 2') model.Session.flush() rev = model.repo.new_revision() rev.author = u"DGU test admin" rev.message = u'''Creating Example Publisher #2.''' publisher2 = model.Group( name=u"example-publisher-2", title=u"Example Publisher #2", type="organization" ) publisher2.is_organization = True model.Session.add(publisher2) model.repo.commit_and_remove() print("=== To use with CKAN functional tests in ckan-vars.conf set OWNER_ORG=%s" % publisher.id) print("====== DGU test data created")
def add_empty_field(dataset_id, fieldname, model): field = model.PackageExtra(package_id=dataset_id, key=fieldname, value="") model.Session.add(field)
def create_package_from_data(self, package_data, package=None): ''' {"extras": {"INSPIRE": "True", "bbox-east-long": "-3.12442", "bbox-north-lat": "54.218407", "bbox-south-lat": "54.039634", "bbox-west-long": "-3.32485", "constraint": "conditions unknown; (e) intellectual property rights;", "dataset-reference-date": [{"type": "creation", "value": "2008-10-10"}, {"type": "revision", "value": "2009-10-08"}], "guid": "00a743bf-cca4-4c19-a8e5-e64f7edbcadd", "metadata-date": "2009-10-16", "metadata-language": "eng", "published_by": 0, "resource-type": "dataset", "spatial-reference-system": "wee", "temporal_coverage-from": "1977-03-10T11:45:30", "temporal_coverage-to": "2005-01-15T09:10:00"}, "name": "council-owned-litter-bins", "notes": "Location of Council owned litter bins within Borough.", "resources": [{"description": "Resource locator", "format": "Unverified", "url": "http://www.barrowbc.gov.uk"}], "tags": ["Utility and governmental services"], "title": "Council Owned Litter Bins"} ''' if not package: package = model.Package() rev = model.repo.new_revision() relationship_attr = ['extras', 'resources', 'tags'] package_properties = {} for key, value in package_data.iteritems(): if key not in relationship_attr: setattr(package, key, value) tags = package_data.get('tags', []) for tag in tags: package.add_tag_by_name(tag, autoflush=False) for resource_dict in package_data.get("resources", []): resource = model.Resource(**resource_dict) package.resources[:] = [] package.resources.append(resource) for key, value in package_data.get("extras", {}).iteritems(): extra = model.PackageExtra(key=key, value=value) package._extras[key] = extra model.Session.add(package) model.Session.flush() model.setup_default_user_roles(package, []) model.Session.add(rev) model.Session.commit() return package
def update_datasets(): '''Generates SQL that makes every package a member of the appropriate group (publisher). It uses publisher_via and published_by to determine the group. If a package has both fields, then it is a member of published_via group, and published_by value becomes 'provider' extra. Any packages with neither values are logged.''' from ckan import model publisher_name_and_id_regex = re.compile("^(.*)\s\[(\d+)\].*$") publisher_id_regex = re.compile("^(\d+)$") package_ids = model.Session.query("id")\ .from_statement("SELECT id FROM package").all() package_ids = [p[0] for p in package_ids] for pkg in model.Session.query(model.Package): pid = pkg.id pkg_str = pkg.name if pkg.state != 'active': pkg_str += ' (%s)' % pkg.state provider = "" via = model.Session.query("id","value")\ .from_statement(DATASET_EXTRA_QUERY_VIA).params(package_id=pid).all() by = model.Session.query("id","value")\ .from_statement(DATASET_EXTRA_QUERY_BY).params(package_id=pid).all() via_value = via[0][1].strip("\"' ") if via else None by_value = by[0][1].strip("\"' ") if by else None if not via_value: if by_value: value = by_value else: if pkg.state == 'active': warn('Dataset %s has no published_by/via - skipping.', pkg_str) else: log.info('Dataset %s has no published_by/via - skipping.', pkg_str) continue else: value = via_value # We have a value but we should check against the BY query if via_value != by_value: if '[' in by_value: provider = by_value[:by_value.index('[')].strip("\"' ") else: provider = by_value provider = provider.replace("'", "\\'") # Use the publisher_name_and_id_regex to extract the publisher name and node_id from # value, which has format "Name of the publisher [node_id]" group_match = publisher_name_and_id_regex.match(str(value)) if group_match: publisher_name, publisher_node_id = group_match.groups(0) publisher_node_id = int(publisher_node_id) else: group_match = publisher_id_regex.match(str(value)) if group_match: publisher_name = None publisher_node_id = int(group_match.groups(0)[0]) else: warn('Could not extract id from the publisher name: %r. Skipping package %s', value, pkg_str) continue # Lookup publisher object if publisher_name: publisher_q = model.Group.all('publisher').filter_by(title=publisher_name) else: publisher_q = None if not publisher_q or publisher_q.count() == 0: # alternatively search by node_id mapping pub_name = publishers.get(publisher_node_id) if not pub_name: warn('Could not find publisher for node ID %r. Skipping package=%s published_by=%r published_via=%r', publisher_node_id, pkg_str, by_value, via_value) continue publisher_q = model.Group.all('publisher').filter_by(name=pub_name) if publisher_q.count() == 1: publisher = publisher_q.one() elif publisher_q.count() == 0: warn('Could not find publisher %r. Skipping package=%s published_by=%r published_via=%r', publisher_name, pkg_str, by_value, via_value) continue elif publisher_q.count() > 1: warn('Multiple matches for publisher %r: %r. package=%s published_by=%r published_via=%r', publisher_name, [(pub.id, pub.title) for pub in publisher_q.all()], pkg_str, by_value, via_value) continue publisher_id = publisher.id member_id = unicode(uuid.uuid4()) member_revision_id = unicode(uuid.uuid4()) revision_id = unicode(uuid.uuid4()) provider_id = unicode(uuid.uuid4()) log.info('Adding dataset %r to publisher %r', model.Package.get(pid).name, publisher_name) membership_q = model.Session.query(model.Member).filter_by(table_id=pid, capacity='public', group_id=publisher_id) if membership_q.count(): log.warn('Membership already added') continue member_q = MEMBER_QUERY.strip() % \ (member_id, pid, publisher_id, revision_id) member_rev_q = MEMBER_REVISION_QUERY.strip() % \ (member_revision_id, pid, publisher_id, revision_id, member_id) revision_q = REVISION_QUERY.strip() % (revision_id,) print revision_q print member_q print member_rev_q if provider: p = model.PackageExtra(id=unicode(uuid.uuid4()), package_id=pid, key='provider', value=provider) model.Session.add(p) model.Session.commit() print ''