def package_delete(self, pkg_id): h.flash_success(_( '<strong>Note</strong><br> The dataset has been removed from' ' the Open Government Portal. <br/> The record may re-appear' ' if it is re-harvested or updated. Please ensure that the' ' record is deleted and purged from the source catalogue in' ' order to prevent it from reappearing.' ), allow_html=True ) lc = LocalCKAN(username=c.user) lc.action.package_delete(id=pkg_id) return h.redirect_to( controller='package', action='search' )
def test_invalid_json_string_not_json(self): lc = LocalCKAN() try: lc.action.package_create( type='test-schema', name='bob_json_1', resources=[{ 'url': 'http://example.com/data.csv', 'a_resource_json_field': 'not-json', }], ) except ValidationError as e: assert e.error_dict['resources'][0]['a_resource_json_field'][ 0].startswith( 'Invalid JSON string: No JSON object could be decoded') else: raise AssertionError('ValidationError not raised')
def publish(self): lc = LocalCKAN(username=c.user) publish_date = date_str_to_datetime( request.str_POST['publish_date'] ).strftime("%Y-%m-%d %H:%M:%S") # get a list of package id's from the for POST data for key, package_id in request.str_POST.iteritems(): if key == 'publish': lc.action.package_patch( id=package_id, portal_release_date=publish_date, ) # return us to the publishing interface redirect(h.url_for('ckanadmin_publish'))
def package_undelete(self, pkg_id): h.flash_success(_( '<strong>Note</strong><br> The record has been restored.'), allow_html=True ) lc = LocalCKAN(username=c.user) lc.action.package_patch( id=pkg_id, state='active' ) return h.redirect_to( controller='package', action='read', id=pkg_id )
def _rebuild(self, csv_files=None, solr_url=None): """ Implement rebuild command :param csv_files: sequence of paths to .csv files for input :type csv_files: sequence of str :return: Nothing :rtype: None """ self._clear_index(solr_url, False) conn = solr_connection('ati', solr_url) lc = LocalCKAN() if csv_files: for csv_file in csv_files: print csv_file + ':' firstpart, filename = os.path.split(csv_file) assert filename.endswith('.csv') resource_name = filename[:-4] chromo = get_chromo(resource_name) geno = get_geno(chromo['dataset_type']) assert geno.get('target_dataset') == TARGET_DATASET for org_id, records in csv_data_batch(csv_file, chromo): records = [dict((k, safe_for_solr(v)) for k, v in row_dict.items()) for row_dict in records] try: org_detail = lc.action.organization_show(id=org_id) except NotFound: continue print " {0:s} {1}".format(org_id, len(records)) _update_records(records, org_detail, conn) else: for org_id in lc.action.organization_list(): count = 0 org_detail = lc.action.organization_show(id=org_id) for resource_name, records in data_batch(org_detail['id'], lc, TARGET_DATASET): _update_records(records, org_detail, conn) count += len(records) print org_id, count print "commit" conn.commit()
def test_invalid_json_object(self): lc = LocalCKAN() try: lc.action.package_create( type='test-schema', name='bob_json_1', a_json_field={ 'type': 'walnut', 'date': datetime.datetime.utcnow() }, ) except ValidationError as e: assert e.error_dict['a_json_field'][0].startswith( 'Invalid JSON object:') assert e.error_dict['a_json_field'][0].endswith( 'is not JSON serializable') else: raise AssertionError('ValidationError not raised')
def test_valid_json_object(self): lc = LocalCKAN() dataset = lc.action.package_create( type='test-schema', name='bob_json_1', resources=[{ 'url': 'http://example.com/data.csv', 'a_resource_json_field': { 'a': 1, 'b': 2 } }], ) assert_equals(dataset['resources'][0]['a_resource_json_field'], { 'a': 1, 'b': 2 })
def test_invalid_json_string(self): lc = LocalCKAN() try: lc.action.package_create( type='test-schema', name='bob_json_1', resources=[{ 'url': 'http://example.com/data.csv', 'a_resource_json_field': '{"type": "walnut", "codes": 1, 2 ,3}' }], ) except ValidationError as e: assert e.error_dict['resources'][0]['a_resource_json_field'][ 0].startswith('Invalid JSON string: Expecting property name') else: raise AssertionError('ValidationError not raised')
def test_invalid_json_string_values(self): lc = LocalCKAN() values = ["22", "true", "false", "null", "[1,2,3]"] for value in values: try: lc.action.package_create( type="test-schema", name="bob_json_1", resources=[{ "url": "http://example.com/data.csv", "a_resource_json_field": value, }], ) except ValidationError as e: assert e.error_dict["resources"][0]["a_resource_json_field"][ 0].startswith("Unsupported value for JSON field") else: raise AssertionError("ValidationError not raised")
def _delete(self, dataset_types): """ delete recombinant datasets and all their data """ orgs = self._get_orgs() lc = LocalCKAN() for dtype in self._expand_dataset_types(dataset_types): packages = self._get_packages(dtype, orgs) for p in packages: print 'deleting %s %s' % (dtype, p['owner_org']) for r in p['resources']: try: lc.action.datastore_delete( force=True, resource_id=r['id']) except NotFound: pass lc.action.package_delete(id=p['id'])
def scheming_vocabulary_choices(field): """ Required scheming field: "vocabulary": "name or id" # https://github.com/SNStatComp/ckan-CC/ # /containers/plugins/ckanext-scheming/ckanext-scheming/ckanext/scheming/helpers.py """ try: lc = LocalCKAN(username='') vocab = lc.action.vocabulary_show(id=field['vocabulary']) result = [{ 'value': tag['name'], 'label': tag['name'] } for tag in vocab['tags']] return [{'value': 'notSpecified', 'label': 'not specified'}] + result except: return []
def test_invalid_json_string(self): lc = LocalCKAN() try: lc.action.package_create( type="test-schema", name="bob_json_1", resources=[{ "url": "http://example.com/data.csv", "a_resource_json_field": '{"type": "walnut", "codes": 1, 2 ,3}', }], ) except ValidationError as e: assert e.error_dict["resources"][0]["a_resource_json_field"][ 0].startswith("Invalid JSON string: Expecting property name") else: raise AssertionError("ValidationError not raised")
def test_invalid_json_object(self): lc = LocalCKAN() try: lc.action.package_create( type="test-schema", name="bob_json_1", a_json_field={ "type": "walnut", "date": datetime.datetime.utcnow(), }, ) except ValidationError as e: assert e.error_dict["a_json_field"][0].startswith( "Invalid JSON object:") assert e.error_dict["a_json_field"][0].endswith( "is not JSON serializable") else: raise AssertionError("ValidationError not raised")
def test_valid_json_object(self): lc = LocalCKAN() dataset = lc.action.package_create( type="test-schema", name="bob_json_1", resources=[{ "url": "http://example.com/data.csv", "a_resource_json_field": { "a": 1, "b": 2 }, }], ) assert dataset["resources"][0]["a_resource_json_field"] == { "a": 1, "b": 2, }
def create_vocabulary(self, name, terms): registry = LocalCKAN() try: vocab = registry.action.vocabulary_show(id=name) print "{name} vocabulary exists, skipping".format(name=name) return except NotFound: pass print 'creating {name} vocabulary'.format(name=name) vocab = registry.action.vocabulary_create(name=name) for term in terms: # don't create items that only existed in pilot if 'id' not in term: continue registry.action.tag_create( name=term['key'], vocabulary_id=vocab['id'], )
def command(self): self._load_config() import ckan.model as model model.Session.remove() model.Session.configure(bind=model.meta.engine) self.ckan = LocalCKAN() if len(self.args) == 1: self.force_resource(self.args[0]) return # Going to re-process everything - this may: # 1. Take some time # 2. Put some pressure on the queue resources = model.Session.query(model.Resource).all() for resource in resources: self.force_resource(resource.id)
def test_invalid_json_value(self): lc = LocalCKAN() values = [True, datetime.datetime.utcnow(), (2, 3), [2, 3], 23] for value in values: try: lc.action.package_create( type="test-schema", name="bob_json_1", resources=[{ "url": "http://example.com/data.csv", "a_resource_json_field": value, }], ) except ValidationError as e: assert e.error_dict["resources"][0]["a_resource_json_field"][ 0].startswith("Unsupported type for JSON field:") else: raise AssertionError("ValidationError not raised")
def test_invalid_json_string_not_json(self): lc = LocalCKAN() try: lc.action.package_create( type="test-schema", name="bob_json_1", resources=[ { "url": "http://example.com/data.csv", "a_resource_json_field": "not-json", } ], ) except ValidationError as e: assert e.error_dict["resources"][0]["a_resource_json_field"][ 0 ].startswith("Invalid JSON string:") else: raise AssertionError("ValidationError not raised")
def _action_find_dataset(context, data_dict): ''' common code for actions that need to check for a dataset based on the dataset type and organization name or id ''' dataset_type = get_or_bust(data_dict, 'dataset_type') owner_org = get_or_bust(data_dict, 'owner_org') try: geno = get_geno(dataset_type) except RecombinantException: raise ValidationError( {'dataset_type': _("Recombinant dataset type not found")}) lc = LocalCKAN(username=context['user']) result = lc.action.package_search(q="type:%s organization:%s" % (dataset_type, owner_org), rows=2) return lc, geno, result['results']
def test_invalid_json_value(self): lc = LocalCKAN() values = [True, datetime.datetime.utcnow(), (2, 3), [2, 3], 23] for value in values: try: lc.action.package_create( type='test-schema', name='bob_json_1', resources=[{ 'url': 'http://example.com/data.csv', 'a_resource_json_field': value }], ) except ValidationError as e: assert e.error_dict['resources'][0]['a_resource_json_field'][ 0].startswith('Unsupported type for JSON field:') else: raise AssertionError('ValidationError not raised')
def canada_copy_from_org_name(key, data, errors, context): """ When org name at publication not provided, copy from owner_org """ value = data[key] if json.loads(value) not in ({}, {'en': '', 'fr': ''}): return org_id = data[('owner_org', )] if not org_id: return try: org = LocalCKAN(username='').action.organization_show(id=org_id) except NotFound: return data[key] = json.dumps({ 'en': org['title'].split(' | ')[0], 'fr': org['title'].split(' | ')[-1], })
def command(self): self._load_config() LOGGER.info("Comenzando limpieza del Datastore") # Usando un LocalCKAN obtengo el apikey del usuario default lc = LocalCKAN() site_user = lc._get_action('get_site_user')({'ignore_auth': True}, ()) apikey = site_user.get('apikey') datajson_resource_ids = self.get_resource_ids(site_user) if not datajson_resource_ids: LOGGER.info("No existen datasets en el nodo, por lo que no se realizará ninguna limpieza") return # La búsqueda de recursos en Datastore falla si la url no comienza con 'http' site_url = config.get('ckan.site_url') if not site_url.startswith('http'): site_url = 'http://{}'.format(site_url) # Obtengo informacion de los elementos del datastore rc = RemoteCKAN(site_url, apikey) datastore_resources = rc.action.datastore_search(resource_id='_table_metadata') # Se borrarán los recursos del Datastore que no figuren en `datajson_resource_ids` # La función `datastore_search` muestra 100 resultados, por lo que es necesario utilizar un offset current_offset = 0 while datastore_resources.get('total') > current_offset: for datastore_resource in datastore_resources.get('records'): # En Datastore, el id del recurso se busca como `name` (y buscamos los que no sean "_table_metadata") datastore_resource_id = datastore_resource.get('name') if datastore_resource_id != "_table_metadata" and datastore_resource_id not in datajson_resource_ids: try: rc.action.datastore_delete(resource_id=datastore_resource_id, force=True) except Exception as e: LOGGER.warn('Intentando eliminar del Datastore el recurso %s surgió un error: %s', datastore_resource_id, e) current_offset += 100 datastore_resources = rc.action.datastore_search(resource_id='_table_metadata', offset=current_offset) LOGGER.info("Limpieza del Datastore terminada")
def convertToRDF(self): losd = LocalCKAN() try: resource_id = request.params.get('resource_id', u'') resource_csv = losd.action.resource_show(id=resource_id) Source_URL = resource_csv['url'] print('\n\n\n\n\n\n') print(Source_URL) # read from juma jumaUser = request.params.get('jumaUser', u'') jumaMappingID = request.params.get('jumaMappingID', u'') juma_url = 'http://losd.staging.derilinx.com:8889/juma-api?user='******'&map=' + jumaMappingID + '&source=' + Source_URL print(juma_url) #dataset_rdf = get_content(juma_url) # write to dataframe filename = '/var/lib/ckan/storage/uploads/' + unicode( uuid.uuid4()) + '.ttl' #file = open(filename ,'w+') response = urllib2.urlopen(juma_url) CHUNK = 16 * 1024 with open(filename, 'wb') as f: while True: chunk = response.read(CHUNK) if not chunk: break f.write(chunk) #file.write(dataset_rdf) losd.action.resource_create( package_id=request.params.get('pkg_id', u''), format='rdf', name=request.params.get('newResourceName', u'') or 'rdf ' + resource_csv['name'], description='RDF file converted using JUMA from CSV resource:' + resource_csv['name'], upload=open(filename)) os.remove(filename) id = request.params.get('pkg_id', u'') h.flash_notice(_('A new RDF resource has been created.')) tk.redirect_to(controller='package', action='read', id=id) except NotFound: print('not found')
def test_simple(self): lc = LocalCKAN() dataset = lc.action.package_create( type="test-subfields", name="c_sf_1", resources=[ { "url": "http://example.com/data.csv", "schedule": [ {"impact": "A", "frequency": "1m"}, {"impact": "P", "frequency": "7d"}, ] } ], ) assert dataset["resources"][0]["schedule"] == [ {"impact": "A", "frequency": "1m"}, {"impact": "P", "frequency": "7d"}, ]
def test_field_length_errors(self): lc = LocalCKAN() record = dict( get_chromo('contracts')['examples']['record'], economic_object_code='467782', commodity_code='K23HG367BU', ) with assert_raises(ValidationError) as ve: lc.action.datastore_upsert(resource_id=self.resource_id, records=[record]) err = ve.exception.error_dict['records'][0] expected = { 'economic_object_code': ['This field is limited to only 3 or 4 digits.'], 'commodity_code': ['The field is limited to eight alpha-numeric digits or less.'], } assert isinstance(err, dict), err for k in set(err) | set(expected): assert_equal(err.get(k), expected.get(k), (k, err))
def clear_geodescriptors_for_package(package_id): """ Erase all geodescriptor associations from the given `package_id` (which is a package_id_new). :param package_id: The product_id_new of the dataset to clear. :type package_id: unicode """ lc = LocalCKAN() pkg = lc.action.package_search( q='product_id_new:{pid}'.format(pid=package_id), rows=1) if pkg['results']: pkg = pkg['results'][0] else: return 0 return model.Session.execute(geodescriptor_table.delete().where( geodescriptor_table.c.package_id == pkg['id'])).rowcount
def _combine_csv(self, target_dir, resource_names): if target_dir and not os.path.isdir(target_dir): print '"{0}" is not a directory'.format(target_dir) return 1 orgs = self._get_orgs() lc = LocalCKAN() outf = sys.stdout for resource_name in self._expand_resource_names(resource_names): if target_dir: outf = open(os.path.join(target_dir, resource_name + '.csv'), 'wb') self._write_one_csv( lc, self._get_packages( get_dataset_type_for_resource_name(resource_name), orgs), get_chromo(resource_name), outf) if target_dir: outf.close()
def test_date_field_rejects_non_isodates(self): lc = LocalCKAN() try: lc.action.package_create( type='camel-photos', name='fred_date1', a_relevant_date='31/11/2014', ) except ValidationError as e: assert_equals(e.error_dict['a_relevant_date'], ['Date format incorrect'] ) else: raise AssertionError('ValidationError not raised') try: lc.action.package_create( type='camel-photos', name='fred_date2', a_relevant_date='31/11/abcd', ) except ValidationError as e: assert_equals(e.error_dict['a_relevant_date'], ['Date format incorrect'] ) else: raise AssertionError('ValidationError not raised') try: lc.action.package_create( type='camel-photos', name='fred_date3', a_relevant_date='this-is-not-a-date', ) except ValidationError as e: assert_equals(e.error_dict['a_relevant_date'], ['Date format incorrect'] ) else: raise AssertionError('ValidationError not raised')
def publish(self): lc = LocalCKAN(username=c.user) publish_date = date_str_to_datetime( request.str_POST['publish_date']).strftime("%Y-%m-%d %H:%M:%S") # get a list of package id's from the for POST data count = 0 for key, package_id in request.str_POST.iteritems(): if key == 'publish': lc.action.package_patch( id=package_id, portal_release_date=publish_date, ) count += 1 # flash notice that records are published h.flash_notice(str(count) + _(u' record(s) published.')) # return us to the publishing interface redirect(h.url_for('ckanadmin_publish'))
def test_invalid_choice(self): lc = LocalCKAN() try: lc.action.package_create( type="test-subfields", name="c_sf_1", resources=[ { "url": "http://example.com/data.csv", "schedule": [ {"impact": "Q", "frequency": "1m"}, {"impact": "P", "frequency": "7d"}, ] } ], ) except ValidationError as e: assert e.error_dict["resources"][0]["schedule"][0]["impact"][0 ].startswith("Value must be one of") else: raise AssertionError("ValidationError not raised")