Exemplo n.º 1
0
class OEREBlexSource(Base):
    """
    A document source, that creates records for the received documents from OEREBlex for the specified
    geoLink.
    """
    def __init__(self, **kwargs):
        """
        Creates a new OEREBlex document source.

        Keyword Args:
            host (uri): Host URL of OEREBlex (without /api/...).
            version (str): The used geoLink schema version. Default is 1.1.0
            pass_version (bool): True to pass version in URL, false otherwise. Defaults is false.
            language (str): The language of the received data.
            canton (str): Canton code used for the documents.
            mapping (dict of str): Mapping for optional attributes.
            related_decree_as_main (bool): Add related decrees directly to the public law restriction.
            proxy (dict of uri): Optional proxy configuration for HTTP and/or HTTPS.
            auth (dict of str): Optional credentials for basic authentication. Requires `username`
                and `password` to be defined.

        """
        super(OEREBlexSource, self).__init__()

        # Get keyword arguments
        self._version = kwargs.get('version')
        self._pass_version = kwargs.get('pass_version')
        self._mapping = kwargs.get('mapping')
        self._related_decree_as_main = kwargs.get('related_decree_as_main')
        self._proxies = kwargs.get('proxy')

        # Set default values for missing parameters
        if self._version is None:
            self._version = '1.1.1'
        if self._pass_version is None:
            self._pass_version = False

        auth = kwargs.get('auth')
        if isinstance(auth,
                      dict) and 'username' in auth and 'password' in auth:
            self._auth = HTTPBasicAuth(auth.get('username'),
                                       auth.get('password'))
        else:
            self._auth = None

        self._language = str(kwargs.get('language')).lower()
        assert self._language is not None and len(self._language) == 2

        self._canton = kwargs.get('canton')
        assert self._canton is not None and len(self._canton) == 2

        self._parser = XML(host_url=kwargs.get('host'), version=self._version)
        assert self._parser.host_url is not None

    def read(self, geolink_id):
        """
        Requests the geoLink for the specified ID and returns records for the received documents.

        Args:
            geolink_id (int): The geoLink ID.
        """
        log.debug("read() start")

        # Request documents
        url = '{host}/api/{version}geolinks/{id}.xml'.format(
            host=self._parser.host_url,
            version=self._version + '/' if self._pass_version else '',
            id=geolink_id)
        log.debug("read() getting documents, url: {}, parser: {}".format(
            url, self._parser))
        documents = self._parser.from_url(url, {},
                                          proxies=self._proxies,
                                          auth=self._auth)
        log.debug("read() got documents")

        # Get main documents
        main_documents = list()
        referenced_documents = list()
        for document in documents:
            if document.category == 'main':
                main_documents.append(document)
            elif document.category == 'related' and document.doctype == 'decree' \
                    and self._related_decree_as_main:
                main_documents.append(document)
            else:
                referenced_documents.append(document)

        # Convert to records
        self.records = []
        for document in main_documents:
            self.records.extend(
                self._get_document_records(document, referenced_documents))
        log.debug("read() done.")

    def _get_document_records(self, document, references=list()):
        """
        Converts the received documents into records.

        Args:
            document (geolink_formatter.entity.Document): The geoLink document to be returned as document
                record.
            references (list of geolink_formatter.entity.Document): Referenced geoLink documents.

        Returns:
            list of pyramid_oereb.lib.records.documents.DocumentRecord: The converted record.
        """

        # Cancel if document contains no files
        if len(document.files) == 0:
            log.warning(
                'Document with OEREBlex ID {0} has been skipped because of missing file.'
                .format(document.id))
            return []

        # Check mandatory attributes
        assert document.title is not None
        assert document.enactment_date is not None
        assert document.authority is not None

        # Get document type
        if document.doctype == 'decree':
            document_class = LegalProvisionRecord
        elif document.doctype == 'edict':
            document_class = LawRecord
        else:
            raise TypeError(
                'Wrong doctype: expected decree or edict, got {0}'.format(
                    document.doctype))

        # Convert referenced documents
        referenced_records = []
        for reference in references:
            referenced_records.extend(self._get_document_records(reference))

        # Create related office record
        office = OfficeRecord({self._language: document.authority},
                              office_at_web=document.authority_url)

        # Check for available abbreviation
        abbreviation = {
            self._language: document.abbreviation
        } if document.abbreviation else None

        # Get files
        records = []
        for f in document.files:
            arguments = {
                'law_status':
                LawStatusRecord.from_config(u'inForce'),
                'published_from':
                document.enactment_date,
                'title':
                self._get_document_title(document, f),
                'responsible_office':
                office,
                'text_at_web': {
                    self._language: f.href
                },
                'abbreviation':
                abbreviation,
                'official_number':
                document.number,
                'official_title':
                self._get_mapped_value(document, 'official_title', True),
                'canton':
                self._canton,
                'municipality':
                self._get_mapped_value(document, 'municipality'),
                'references':
                referenced_records if len(referenced_records) > 0 else None
            }
            records.append(document_class(**arguments))

        return records

    def _get_mapped_value(self, document, key, multilingual=False):
        """
        Return the value of a mapped optional attribute.

        Args:
            document (geolink_formatter.entity.Document): The document entity.
            key (str): The key of the attribute to be mapped.
            multilingual (bool): True to wrap value in multilingual dictionary.

        Returns:
            str or None: The value of the mapped attribute.
        """
        if self._mapping:
            attribute = self._mapping.get(key)
            if attribute:
                value = getattr(document, attribute)
                if value:
                    return {self._language: value} if multilingual else value
        return None

    def _get_document_title(self, document, current_file):
        """
        Returns the title of the document/file. Extracting this logic allows
        easier customization of the file title.

        Args:
            document (geolink_formatter.entity.Document): The document entity.
            current_file (geolink_formatter.entity.File): The file, which gets annotated with a title.

        Returns:
            str: Title of document.
        """
        # Assign multilingual values
        return {self._language: document.title}
Exemplo n.º 2
0
class OEREBlexSource(Base):
    """
    A document source, that creates records for the received documents from OEREBlex for the specified
    geoLink.
    """
    def __init__(self, **kwargs):
        """
        Creates a new OEREBlex document source.

        Keyword Args:
            host (uri): Host URL of OEREBlex (without /api/...).
            version (str): The used geoLink schema version. Default is 1.2.0
            pass_version (bool): True to pass version in URL, false otherwise. Defaults is false.
            language (str): The language of the received data.
            canton (str): Canton code used for the documents.
            mapping (dict of str): Mapping for optional attributes.
            related_decree_as_main (bool): Add related decrees directly to the public law restriction.
            related_notice_as_main (bool): Add related notices directly to the public law restriction.
            proxy (dict of uri): Optional proxy configuration for HTTP and/or HTTPS.
            auth (dict of str): Optional credentials for basic authentication. Requires `username`
                and `password` to be defined.
            validation (bool): Turn XML validation on/off. Default is true.
            url_param_config (list of code and url_param): Optional url parameters to use, per plr code

        """
        super(OEREBlexSource, self).__init__()

        # Get keyword arguments
        self._version = kwargs.get('version')
        self._pass_version = kwargs.get('pass_version')
        self._mapping = kwargs.get('mapping')
        self._related_decree_as_main = kwargs.get('related_decree_as_main')
        self._related_notice_as_main = kwargs.get('related_notice_as_main')
        self._proxies = kwargs.get('proxy')

        # Set default values for missing parameters
        if self._version is None:
            self._version = '1.2.0'
        if self._pass_version is None:
            self._pass_version = False

        auth = kwargs.get('auth')
        if isinstance(auth,
                      dict) and 'username' in auth and 'password' in auth:
            self._auth = HTTPBasicAuth(auth.get('username'),
                                       auth.get('password'))
        else:
            self._auth = None

        self._language = str(kwargs.get('language')).lower()
        if not (isinstance(self._language, str) and len(self._language) == 2):
            raise AssertionError(
                'language has to be string of two characters, e.g. "de" or "fr"'
            )

        self._canton = kwargs.get('canton')
        if not (isinstance(self._canton, str) and len(self._canton) == 2):
            raise AssertionError(
                'canton has to be string of two characters, e.g. "BL" or "NE"')

        if kwargs.get('validation') is not None:
            xsd_validation = kwargs.get('validation')
        else:
            xsd_validation = True
        self._parser = XML(host_url=kwargs.get('host'),
                           version=self._version,
                           xsd_validation=xsd_validation)
        if self._parser.host_url is None:
            raise AssertionError('host_url has to be defined')

        self._url_param_config = kwargs.get('url_param_config')
        if self._url_param_config:
            if not (isinstance(self._url_param_config, list)):
                raise AssertionError(
                    'url_param_config is of wrong type {}, should be list'.
                    format(type(self._url_param_config)))
            for list_entry in self._url_param_config:
                if not (isinstance(list_entry, dict)):
                    raise AssertionError(
                        'url_param_config list entry is of wrong type {},'
                        ' should be dictionary'.format(type(list_entry)))

    def read(self, params, geolink_id, oereblex_params=None):
        """
        Requests the geoLink for the specified ID and returns records for the received documents.

        Args:
            params (pyramid_oereb.views.webservice.Parameter): The parameters of the extract request.
            geolink_id (int): The geoLink ID.
            oereblex_params (string): Any additional parameters to pass to Oereblex
        """
        log.debug("read() start for geolink_id {}, oereblex_params {}".format(
            geolink_id, oereblex_params))

        url_base = '{host}/api/{version}geolinks/{id}.xml'
        if oereblex_params:
            url_base = url_base + '?' + oereblex_params

        # Request documents
        url = url_base.format(host=self._parser.host_url,
                              version=self._version +
                              '/' if self._pass_version else '',
                              id=geolink_id,
                              url_params=oereblex_params)

        language = params.language or self._language
        request_params = {'locale': language}
        log.debug("read() getting documents, url: {}, parser: {}".format(
            url, self._parser))
        documents = self._parser.from_url(url,
                                          request_params,
                                          proxies=self._proxies,
                                          auth=self._auth)
        log.debug("read() got documents")

        # Get main documents
        main_documents = list()
        referenced_documents = list()
        for document in documents:
            if document.category == 'main':
                main_documents.append(document)
            elif document.category == 'related' and document.doctype == 'decree' \
                    and self._related_decree_as_main:
                main_documents.append(document)
            elif document.category == 'related' and document.doctype == 'notice' \
                    and self._related_notice_as_main:
                main_documents.append(document)
            else:
                referenced_documents.append(document)

        # Convert to records
        self.records = []
        for document in main_documents:
            self.records.extend(
                self._get_document_records(document, language,
                                           referenced_documents))
        log.debug("read() done.")

    def _get_document_records(self, document, language, references=None):
        """
        Converts the received documents into records.

        Args:
            document (geolink_formatter.entity.Document): The geoLink document to be returned as document
                record.
            language (str): The language of the returned documents.
            references (list of geolink_formatter.entity.Document): Referenced geoLink documents.

        Returns:
            list of pyramid_oereb.lib.records.documents.DocumentRecord: The converted record.
        """

        references = references or list()

        # Cancel if document contains no files
        if len(document.files) == 0:
            log.warning(
                'Document with OEREBlex ID {0} has been skipped because of missing file.'
                .format(document.id))
            return []

        enactment_date = document.enactment_date
        authority = document.authority
        if document.doctype == 'notice':
            # Oereblex notices documents can have no enactment_date while it is require by pyramid_oereb to
            # have one. Add a fake default one that is identifiable and always older than now (01.0.1.1970).
            if enactment_date is None:
                enactment_date = datetime.date(1970, 1, 1)
            # Oereblex notices documents can have no `authority` while it is require by pyramid_oereb to
            # have one. Replace None by '-' in this case.
            if authority is None:
                authority = '-'

        # Cancel if enactment_date is not set
        if enactment_date is None:
            log.warning(
                'Document with OEREBlex ID {0} has been skipped because of missing enactment_date.'
                .format(document.id))
            return []

        # Check mandatory attributes
        if document.title is None:
            raise AssertionError('Missing title for document #{0}'.format(
                document.id))
        if authority is None:
            raise AssertionError('Missing authority for document #{0}'.format(
                document.id))

        # Get document type
        if document.doctype == 'decree':
            document_class = LegalProvisionRecord
        elif document.doctype == 'edict':
            document_class = LawRecord
        elif document.doctype == 'notice':
            document_class = HintRecord
        else:
            raise TypeError(
                'Wrong doctype: expected decree, edict or notice, got {0}'.
                format(document.doctype))

        # Convert referenced documents
        referenced_records = []
        for reference in references:
            referenced_records.extend(
                self._get_document_records(reference, language))

        # Create related office record
        office = OfficeRecord({language: authority},
                              office_at_web=document.authority_url)

        # Check for available abbreviation
        abbreviation = {
            language: document.abbreviation
        } if document.abbreviation else None

        # Get files
        records = []
        for f in document.files:
            arguments = {
                'law_status':
                LawStatusRecord.from_config(u'inForce'),
                'published_from':
                enactment_date,
                'title':
                self._get_document_title(document, f, language),
                'responsible_office':
                office,
                'text_at_web': {
                    language: f.href
                },
                'abbreviation':
                abbreviation,
                'official_number':
                document.number,
                'official_title':
                self._get_mapped_value(document,
                                       'official_title',
                                       language=language),
                'canton':
                self._canton,
                'municipality':
                self._get_mapped_value(document, 'municipality'),
                'references':
                referenced_records if len(referenced_records) > 0 else None
            }
            records.append(document_class(**arguments))

        return records

    def _get_mapped_value(self, document, key, language=None):
        """
        Return the value of a mapped optional attribute.

        Args:
            document (geolink_formatter.entity.Document): The document entity.
            key (str): The key of the attribute to be mapped.
            language (str or None): Pass language to wrap value in multilingual dictionary.

        Returns:
            str or None: The value of the mapped attribute.
        """
        if self._mapping:
            attribute = self._mapping.get(key)
            if attribute:
                value = getattr(document, attribute)
                if value:
                    return {language: value} if language else value
        return None

    @staticmethod
    def _get_document_title(document, current_file, language):
        """
        Returns the title of the document/file. Extracting this logic allows
        easier customization of the file title.

        Args:
            document (geolink_formatter.entity.Document): The document entity.
            current_file (geolink_formatter.entity.File): The file, which gets annotated with a title.
            language (str): The language of the document title.

        Returns:
            str: Title of document.
        """
        # Assign multilingual values
        return {language: document.title}