def _add_new_user(node_id, username, email): '''Tries to create a user based on the parameters. Returns (username, ckan_user_id) ''' from ckan.logic.validators import name_validator from ckan.lib.navl.dictization_functions import Invalid from ckan import model from ckan.logic.schema import user_new_form_schema from ckan.logic import get_action name = get_ckan_username_from_drupal_id(node_id) try: name_validator(name, {}) except Invalid, e: log.error('Name does not validate %r - not created user.', username) return name, None
def name_validator_with_changed_msg(val, context): """This is just a wrapper function around the validator.name_validator function. The wrapper function just changes the message in case the name_match doesn't match. The only purpose for still calling that function here is to keep the link visible and in case of a ckan upgrade to still be able to raise any new Invalid exceptions """ try: return name_validator(val, context) except Invalid as invalid: if val in ['new', 'edit', 'search']: raise Invalid(_('That name cannot be used')) if len(val) < 2: raise Invalid(_('Name must be at least %s characters long') % 2) if len(val) > PACKAGE_NAME_MAX_LENGTH: raise Invalid(_('Name must be a maximum of %i characters long') % \ PACKAGE_NAME_MAX_LENGTH) if not name_match.match(val): raise Invalid(_('Username should be lowercase letters and/or numbers and/or these symbols: -_')) raise invalid
def call_validator(*args, **kwargs): return validators.name_validator(*args, **kwargs)
def import_stage(self, harvest_object): # The import stage actually creates the dataset. log.debug('In %s import_stage' % repr(self)) if (harvest_object.content == None): return True dataset = json.loads(harvest_object.content) schema_version = '1.0' # default to '1.0' is_collection = False parent_pkg_id = '' catalog_extras = {} for extra in harvest_object.extras: if extra.key == 'schema_version': schema_version = extra.value if extra.key == 'is_collection' and extra.value: is_collection = True if extra.key == 'collection_pkg_id' and extra.value: parent_pkg_id = extra.value if parent_pkg_id.startswith('IPO:'): # it's an IsPartOf ("identifier" at the external source) log.info('IPO found {}'.format(parent_pkg_id)) # check if parent is already harvested parent_identifier = parent_pkg_id.replace('IPO:', '') parent = self.is_part_of_to_package_id( parent_identifier, harvest_object) parent_pkg_id = parent['id'] if extra.key.startswith('catalog_'): catalog_extras[extra.key] = extra.value # if this dataset is part of collection, we need to check if # parent dataset exist or not. we dont support any hierarchy # in this, so the check does not apply to those of is_collection if parent_pkg_id and not is_collection: parent_pkg = None try: parent_pkg = get_action('package_show')(self.context(), { "id": parent_pkg_id }) except: pass if not parent_pkg: parent_check_message = "isPartOf identifer '%s' not found." \ % dataset.get('isPartOf') self._save_object_error(parent_check_message, harvest_object, 'Import') return None # do title check here # https://github.com/GSA/datagov-deploy/issues/953 title_to_check = self.make_package_name(dataset.get('title'), harvest_object.guid) try: name_validator(title_to_check, None) except Invalid as e: invalid_message = "title: %s. %s." % (dataset.get('title'), e.error) self._save_object_error(invalid_message, harvest_object, 'Import') return None # Get default values. source_config = self.load_config(harvest_object.source) dataset_defaults = source_config["defaults"] validator_schema = source_config.get('validator_schema') if schema_version == '1.0' and validator_schema != 'non-federal': lowercase_conversion = True else: lowercase_conversion = False MAPPING = { "title": "title", "description": "notes", "keyword": "tags", "modified": "extras__modified", # ! revision_timestamp "publisher": "extras__publisher", # !owner_org "contactPoint": "maintainer", "mbox": "maintainer_email", "identifier": "extras__identifier", # !id "accessLevel": "extras__accessLevel", "bureauCode": "extras__bureauCode", "programCode": "extras__programCode", "accessLevelComment": "extras__accessLevelComment", "license": "extras__license", # !license_id "spatial": "extras__spatial", # Geometry not valid GeoJSON, not indexing "temporal": "extras__temporal", "theme": "extras__theme", "dataDictionary": "extras__dataDictionary", # !data_dict "dataQuality": "extras__dataQuality", "accrualPeriodicity": "extras__accrualPeriodicity", "landingPage": "extras__landingPage", "language": "extras__language", "primaryITInvestmentUII": "extras__primaryITInvestmentUII", # !PrimaryITInvestmentUII "references": "extras__references", "issued": "extras__issued", "systemOfRecords": "extras__systemOfRecords", "accessURL": None, "webService": None, "format": None, "distribution": None, } MAPPING_V1_1 = { "title": "title", "description": "notes", "keyword": "tags", "modified": "extras__modified", # ! revision_timestamp "publisher": "extras__publisher", # !owner_org "contactPoint": { "fn": "maintainer", "hasEmail": "maintainer_email" }, "identifier": "extras__identifier", # !id "accessLevel": "extras__accessLevel", "bureauCode": "extras__bureauCode", "programCode": "extras__programCode", "rights": "extras__rights", "license": "extras__license", # !license_id "spatial": "extras__spatial", # Geometry not valid GeoJSON, not indexing "temporal": "extras__temporal", "theme": "extras__theme", "dataDictionary": "extras__dataDictionary", # !data_dict "dataQuality": "extras__dataQuality", "accrualPeriodicity": "extras__accrualPeriodicity", "landingPage": "extras__landingPage", "language": "extras__language", "primaryITInvestmentUII": "extras__primaryITInvestmentUII", # !PrimaryITInvestmentUII "references": "extras__references", "issued": "extras__issued", "systemOfRecords": "extras__systemOfRecords", "distribution": None, } SKIP = ["accessURL", "webService", "format", "distribution"] # will go into pkg["resources"] # also skip the processed_how key, it was added to indicate how we processed the dataset. SKIP.append("processed_how") SKIP_V1_1 = ["@type", "isPartOf", "distribution"] SKIP_V1_1.append("processed_how") if lowercase_conversion: mapping_processed = {} for k, v in MAPPING.items(): mapping_processed[k.lower()] = v skip_processed = [k.lower() for k in SKIP] dataset_processed = {'processed_how': ['lowercase']} for k, v in dataset.items(): if k.lower() in mapping_processed.keys(): dataset_processed[k.lower()] = v else: dataset_processed[k] = v if 'distribution' in dataset and dataset[ 'distribution'] is not None: dataset_processed['distribution'] = [] for d in dataset['distribution']: d_lower = {} for k, v in d.items(): if k.lower() in mapping_processed.keys(): d_lower[k.lower()] = v else: d_lower[k] = v dataset_processed['distribution'].append(d_lower) else: dataset_processed = dataset mapping_processed = MAPPING skip_processed = SKIP if schema_version == '1.1': mapping_processed = MAPPING_V1_1 skip_processed = SKIP_V1_1 validate_message = self._validate_dataset(validator_schema, schema_version, dataset_processed) if validate_message: self._save_object_error(validate_message, harvest_object, 'Import') return None # We need to get the owner organization (if any) from the harvest # source dataset owner_org = None source_dataset = model.Package.get(harvest_object.source.id) if source_dataset.owner_org: owner_org = source_dataset.owner_org group_name = source_config.get('default_groups', '') # Assemble basic information about the dataset. pkg = { "state": "active", # in case was previously deleted "owner_org": owner_org, "groups": [{ "name": group_name }], "resources": [], "extras": [ { "key": "resource-type", "value": "Dataset", }, { "key": "source_hash", "value": self.make_upstream_content_hash(dataset, harvest_object.source, catalog_extras, schema_version), }, { "key": "source_datajson_identifier", "value": True, }, { "key": "harvest_source_id", "value": harvest_object.harvest_source_id, }, { "key": "harvest_object_id", "value": harvest_object.id, }, { "key": "harvest_source_title", "value": harvest_object.source.title, }, { "key": "source_schema_version", "value": schema_version, }, ] } extras = pkg["extras"] unmapped = [] for key, value in dataset_processed.iteritems(): try: self._size_check(key, value) except DataError, e: self._save_object_error(e.error, harvest_object, 'Import') return None if key in skip_processed: continue new_key = mapping_processed.get(key) if not new_key: unmapped.append(key) continue # after schema 1.0+, we need to deal with multiple new_keys new_keys = [] values = [] if isinstance(new_key, dict): # when schema is not 1.0 _new_key_keys = new_key.keys() new_keys = new_key.values() values = [] for _key in _new_key_keys: values.append(value.get(_key)) else: new_keys.append(new_key) values.append(value) if not any(item for item in values): continue mini_dataset = dict(zip(new_keys, values)) for mini_key, mini_value in mini_dataset.iteritems(): if not mini_value: continue if mini_key.startswith('extras__'): extras.append({"key": mini_key[8:], "value": mini_value}) else: pkg[mini_key] = mini_value