Exemplo n.º 1
0
def _add_new_user(node_id, username, email):
    '''Tries to create a user based on the parameters.
    Returns (username, ckan_user_id)
    '''
    from ckan.logic.validators import name_validator
    from ckan.lib.navl.dictization_functions import Invalid
    from ckan import model
    from ckan.logic.schema import user_new_form_schema
    from ckan.logic import get_action

    name = get_ckan_username_from_drupal_id(node_id)

    try:
        name_validator(name, {})
    except Invalid, e:
        log.error('Name does not validate %r - not created user.', username)
        return name, None
def _add_new_user(node_id, username, email):
    '''Tries to create a user based on the parameters.
    Returns (username, ckan_user_id)
    '''
    from ckan.logic.validators import name_validator
    from ckan.lib.navl.dictization_functions import Invalid
    from ckan import model
    from ckan.logic.schema import user_new_form_schema
    from ckan.logic import get_action

    name = get_ckan_username_from_drupal_id(node_id)

    try:
        name_validator(name, {})
    except Invalid, e:
        log.error('Name does not validate %r - not created user.', username)
        return name, None
Exemplo n.º 3
0
def name_validator_with_changed_msg(val, context):
    """This is just a wrapper function around the validator.name_validator function.
        The wrapper function just changes the message in case the name_match doesn't match.
        The only purpose for still calling that function here is to keep the link visible and
        in case of a ckan upgrade to still be able to raise any new Invalid exceptions

    """
    try:
        return name_validator(val, context)
    except Invalid as invalid:
        if val in ['new', 'edit', 'search']:
            raise Invalid(_('That name cannot be used'))

        if len(val) < 2:
            raise Invalid(_('Name must be at least %s characters long') % 2)
        if len(val) > PACKAGE_NAME_MAX_LENGTH:
            raise Invalid(_('Name must be a maximum of %i characters long') % \
                          PACKAGE_NAME_MAX_LENGTH)
        if not name_match.match(val):
            raise Invalid(_('Username should be lowercase letters and/or numbers and/or these symbols: -_'))

        raise invalid
Exemplo n.º 4
0
def name_validator_with_changed_msg(val, context):
    """This is just a wrapper function around the validator.name_validator function. 
        The wrapper function just changes the message in case the name_match doesn't match.
        The only purpose for still calling that function here is to keep the link visible and
        in case of a ckan upgrade to still be able to raise any new Invalid exceptions

    """
    try:
        return name_validator(val, context)
    except Invalid as invalid:
        if val in ['new', 'edit', 'search']:
            raise Invalid(_('That name cannot be used'))
    
        if len(val) < 2:
            raise Invalid(_('Name must be at least %s characters long') % 2)
        if len(val) > PACKAGE_NAME_MAX_LENGTH:
            raise Invalid(_('Name must be a maximum of %i characters long') % \
                          PACKAGE_NAME_MAX_LENGTH)
        if not name_match.match(val):
            raise Invalid(_('Username should be lowercase letters and/or numbers and/or these symbols: -_'))
        
        raise invalid
Exemplo n.º 5
0
 def call_validator(*args, **kwargs):
     return validators.name_validator(*args, **kwargs)
Exemplo n.º 6
0
    def import_stage(self, harvest_object):
        # The import stage actually creates the dataset.

        log.debug('In %s import_stage' % repr(self))

        if (harvest_object.content == None):
            return True

        dataset = json.loads(harvest_object.content)
        schema_version = '1.0'  # default to '1.0'
        is_collection = False
        parent_pkg_id = ''
        catalog_extras = {}
        for extra in harvest_object.extras:
            if extra.key == 'schema_version':
                schema_version = extra.value
            if extra.key == 'is_collection' and extra.value:
                is_collection = True
            if extra.key == 'collection_pkg_id' and extra.value:
                parent_pkg_id = extra.value
                if parent_pkg_id.startswith('IPO:'):
                    # it's an IsPartOf ("identifier" at the external source)
                    log.info('IPO found {}'.format(parent_pkg_id))

                    #  check if parent is already harvested
                    parent_identifier = parent_pkg_id.replace('IPO:', '')
                    parent = self.is_part_of_to_package_id(
                        parent_identifier, harvest_object)
                    parent_pkg_id = parent['id']

            if extra.key.startswith('catalog_'):
                catalog_extras[extra.key] = extra.value

            # if this dataset is part of collection, we need to check if
            # parent dataset exist or not. we dont support any hierarchy
            # in this, so the check does not apply to those of is_collection
            if parent_pkg_id and not is_collection:
                parent_pkg = None
                try:
                    parent_pkg = get_action('package_show')(self.context(), {
                        "id": parent_pkg_id
                    })
                except:
                    pass
                if not parent_pkg:
                    parent_check_message = "isPartOf identifer '%s' not found." \
                        % dataset.get('isPartOf')
                    self._save_object_error(parent_check_message,
                                            harvest_object, 'Import')
                    return None

        # do title check here
        # https://github.com/GSA/datagov-deploy/issues/953
        title_to_check = self.make_package_name(dataset.get('title'),
                                                harvest_object.guid)
        try:
            name_validator(title_to_check, None)
        except Invalid as e:
            invalid_message = "title: %s. %s." % (dataset.get('title'),
                                                  e.error)
            self._save_object_error(invalid_message, harvest_object, 'Import')
            return None

        # Get default values.
        source_config = self.load_config(harvest_object.source)
        dataset_defaults = source_config["defaults"]
        validator_schema = source_config.get('validator_schema')
        if schema_version == '1.0' and validator_schema != 'non-federal':
            lowercase_conversion = True
        else:
            lowercase_conversion = False

        MAPPING = {
            "title": "title",
            "description": "notes",
            "keyword": "tags",
            "modified": "extras__modified",  # ! revision_timestamp
            "publisher": "extras__publisher",  # !owner_org
            "contactPoint": "maintainer",
            "mbox": "maintainer_email",
            "identifier": "extras__identifier",  # !id
            "accessLevel": "extras__accessLevel",
            "bureauCode": "extras__bureauCode",
            "programCode": "extras__programCode",
            "accessLevelComment": "extras__accessLevelComment",
            "license": "extras__license",  # !license_id 
            "spatial":
            "extras__spatial",  # Geometry not valid GeoJSON, not indexing
            "temporal": "extras__temporal",
            "theme": "extras__theme",
            "dataDictionary": "extras__dataDictionary",  # !data_dict
            "dataQuality": "extras__dataQuality",
            "accrualPeriodicity": "extras__accrualPeriodicity",
            "landingPage": "extras__landingPage",
            "language": "extras__language",
            "primaryITInvestmentUII":
            "extras__primaryITInvestmentUII",  # !PrimaryITInvestmentUII
            "references": "extras__references",
            "issued": "extras__issued",
            "systemOfRecords": "extras__systemOfRecords",
            "accessURL": None,
            "webService": None,
            "format": None,
            "distribution": None,
        }

        MAPPING_V1_1 = {
            "title": "title",
            "description": "notes",
            "keyword": "tags",
            "modified": "extras__modified",  # ! revision_timestamp
            "publisher": "extras__publisher",  # !owner_org
            "contactPoint": {
                "fn": "maintainer",
                "hasEmail": "maintainer_email"
            },
            "identifier": "extras__identifier",  # !id
            "accessLevel": "extras__accessLevel",
            "bureauCode": "extras__bureauCode",
            "programCode": "extras__programCode",
            "rights": "extras__rights",
            "license": "extras__license",  # !license_id
            "spatial":
            "extras__spatial",  # Geometry not valid GeoJSON, not indexing
            "temporal": "extras__temporal",
            "theme": "extras__theme",
            "dataDictionary": "extras__dataDictionary",  # !data_dict
            "dataQuality": "extras__dataQuality",
            "accrualPeriodicity": "extras__accrualPeriodicity",
            "landingPage": "extras__landingPage",
            "language": "extras__language",
            "primaryITInvestmentUII":
            "extras__primaryITInvestmentUII",  # !PrimaryITInvestmentUII
            "references": "extras__references",
            "issued": "extras__issued",
            "systemOfRecords": "extras__systemOfRecords",
            "distribution": None,
        }

        SKIP = ["accessURL", "webService", "format",
                "distribution"]  # will go into pkg["resources"]
        # also skip the processed_how key, it was added to indicate how we processed the dataset.
        SKIP.append("processed_how")

        SKIP_V1_1 = ["@type", "isPartOf", "distribution"]
        SKIP_V1_1.append("processed_how")

        if lowercase_conversion:

            mapping_processed = {}
            for k, v in MAPPING.items():
                mapping_processed[k.lower()] = v

            skip_processed = [k.lower() for k in SKIP]

            dataset_processed = {'processed_how': ['lowercase']}
            for k, v in dataset.items():
                if k.lower() in mapping_processed.keys():
                    dataset_processed[k.lower()] = v
                else:
                    dataset_processed[k] = v

            if 'distribution' in dataset and dataset[
                    'distribution'] is not None:
                dataset_processed['distribution'] = []
                for d in dataset['distribution']:
                    d_lower = {}
                    for k, v in d.items():
                        if k.lower() in mapping_processed.keys():
                            d_lower[k.lower()] = v
                        else:
                            d_lower[k] = v
                    dataset_processed['distribution'].append(d_lower)
        else:
            dataset_processed = dataset
            mapping_processed = MAPPING
            skip_processed = SKIP

        if schema_version == '1.1':
            mapping_processed = MAPPING_V1_1
            skip_processed = SKIP_V1_1

        validate_message = self._validate_dataset(validator_schema,
                                                  schema_version,
                                                  dataset_processed)
        if validate_message:
            self._save_object_error(validate_message, harvest_object, 'Import')
            return None

        # We need to get the owner organization (if any) from the harvest
        # source dataset
        owner_org = None
        source_dataset = model.Package.get(harvest_object.source.id)
        if source_dataset.owner_org:
            owner_org = source_dataset.owner_org

        group_name = source_config.get('default_groups', '')

        # Assemble basic information about the dataset.

        pkg = {
            "state":
            "active",  # in case was previously deleted
            "owner_org":
            owner_org,
            "groups": [{
                "name": group_name
            }],
            "resources": [],
            "extras": [
                {
                    "key": "resource-type",
                    "value": "Dataset",
                },
                {
                    "key":
                    "source_hash",
                    "value":
                    self.make_upstream_content_hash(dataset,
                                                    harvest_object.source,
                                                    catalog_extras,
                                                    schema_version),
                },
                {
                    "key": "source_datajson_identifier",
                    "value": True,
                },
                {
                    "key": "harvest_source_id",
                    "value": harvest_object.harvest_source_id,
                },
                {
                    "key": "harvest_object_id",
                    "value": harvest_object.id,
                },
                {
                    "key": "harvest_source_title",
                    "value": harvest_object.source.title,
                },
                {
                    "key": "source_schema_version",
                    "value": schema_version,
                },
            ]
        }

        extras = pkg["extras"]
        unmapped = []

        for key, value in dataset_processed.iteritems():

            try:
                self._size_check(key, value)
            except DataError, e:
                self._save_object_error(e.error, harvest_object, 'Import')
                return None

            if key in skip_processed:
                continue
            new_key = mapping_processed.get(key)
            if not new_key:
                unmapped.append(key)
                continue

            # after schema 1.0+, we need to deal with multiple new_keys
            new_keys = []
            values = []
            if isinstance(new_key, dict):  # when schema is not 1.0
                _new_key_keys = new_key.keys()
                new_keys = new_key.values()
                values = []
                for _key in _new_key_keys:
                    values.append(value.get(_key))
            else:
                new_keys.append(new_key)
                values.append(value)

            if not any(item for item in values):
                continue

            mini_dataset = dict(zip(new_keys, values))
            for mini_key, mini_value in mini_dataset.iteritems():
                if not mini_value:
                    continue
                if mini_key.startswith('extras__'):
                    extras.append({"key": mini_key[8:], "value": mini_value})
                else:
                    pkg[mini_key] = mini_value
Exemplo n.º 7
0
 def call_validator(*args, **kwargs):
     return validators.name_validator(*args, **kwargs)