Пример #1
0
    def post(self, request):
        serializer = ParseSerializer(data=request.data)
        serializer.is_valid(raise_exception=True)

        fragment = serializer.validated_data.get('fragment')
        frbr_uri = FrbrUri.parse(serializer.validated_data.get('frbr_uri'))

        importer = plugins.for_locale('importer', frbr_uri.country,
                                      frbr_uri.language, frbr_uri.locality)
        importer.fragment = fragment
        importer.fragment_id_prefix = serializer.validated_data.get(
            'id_prefix')

        try:
            text = serializer.validated_data.get('content')
            xml = importer.import_from_text(text, frbr_uri.work_uri(), '.txt')
        except ValueError as e:
            log.error("Error during import: %s" % str(e), exc_info=e)
            raise ValidationError({'content': str(e) or "error during import"})

        # parse and re-serialize the XML to ensure it's clean, and sort out encodings
        xml = Base(xml).to_xml()

        # output
        return Response({'output': xml})
Пример #2
0
    def get(self, request, **kwargs):
        # try parse it as an FRBR URI, if that succeeds, we'll lookup the document
        # that document matches, otherwise we'll assume they're trying to
        # list documents with a prefix URI match.
        try:
            self.frbr_uri = FrbrUri.parse(self.kwargs['frbr_uri'])

            # ensure we haven't mistaken '/za-cpt/act/by-law/2011/full.atom' for a URI
            if self.frbr_uri.number in ['full', 'summary'] and self.format_kwarg == 'atom':
                raise ValueError()

            # in a URL like
            #
            #   /act/1980/1/toc
            #
            # don't mistake 'toc' for a language, it's really equivalent to
            #
            #   /act/1980/1/eng/toc
            #
            # if eng is the default language.
            if self.frbr_uri.language == 'toc':
                self.frbr_uri.language = self.frbr_uri.default_language
                self.frbr_uri.expression_component = 'toc'

            return self.retrieve(request)
        except ValueError:
            return self.list(request)
Пример #3
0
    def parse_frbr_uri(self, frbr_uri):
        FrbrUri.default_language = None
        try:
            frbr_uri = FrbrUri.parse(frbr_uri)
        except ValueError:
            return None

        # ensure we haven't mistaken '/za-cpt/act/by-law/2011/full.atom' for a URI
        if frbr_uri.number in ['full', 'summary'
                               ] and self.format_kwarg == 'atom':
            return None

        frbr_uri.default_language = self.country.primary_language.code
        if not frbr_uri.language:
            frbr_uri.language = frbr_uri.default_language

        # in a URL like
        #
        #   /act/1980/1/toc
        #
        # don't mistake 'toc' for a language, it's really equivalent to
        #
        #   /act/1980/1/eng/toc
        #
        # if eng is the default language.
        if frbr_uri.language == 'toc':
            frbr_uri.language = frbr_uri.default_language
            frbr_uri.expression_component = 'toc'

        return frbr_uri
Пример #4
0
    def test_dont_link_constitution_in_constitution(self):
        constitution = Work(frbr_uri='/akn/za/act/1996/constitution')
        doc = Document(work=constitution, content=document_fixture(xml="""
        <section id="section-1">
          <num>1.</num>
          <heading>Tester</heading>
          <paragraph id="section-1.paragraph-0">
            <content>
              <p>the Constitution</p>
              <p>the Constitution, 1996</p>
              <p>the Constitution of South Africa</p>
              <p>the Constitution of the Republic of South Africa</p>
              <p>the Constitution of the Republic of South Africa, 1996</p>
              <p>the Constitution of the Republic of South Africa 1996</p>
              <p>the Constitution of the Republic of South Africa Act, 1996</p>
              <p>the Constitution of the Republic of South Africa, 1996 ( Act 108 of 1996 )</p>
              <p>the Constitution of the Republic of South Africa, 1996 ( Act No 108 of 1996 )</p>
              <p>the Constitution of the Republic of South Africa, 1996 ( Act No. 108 of 1996 )</p>
              <p>the Constitution of the Republic of South Africa Act, 1996 ( Act No. 108 of 1996 )</p>
              <p>the Constitution of the Republic of South Africa  Act 108 of 1996</p>
              <p>the Constitution of the Republic of South Africa (Act 108 of 1996)</p>
            </content>
          </paragraph>
        </section>"""))

        unchanged = doc.document_xml
        doc.doc.frbr_uri = FrbrUri.parse(constitution.frbr_uri)
        self.finder.find_references_in_document(doc)
        self.assertMultiLineEqual(unchanged, doc.document_xml)
Пример #5
0
 def validate_frbr_uri(self, value):
     try:
         if not value:
             raise ValueError()
         return FrbrUri.parse(value.lower()).work_uri()
     except ValueError:
         raise ValidationError("Invalid FRBR URI: %s" % value)
Пример #6
0
 def find_work(self, given_string):
     """ The string we get from the spreadsheet could be e.g.
         `/ug/act/1933/14 - Administrator-General’s Act` (new and preferred style)
         `Administrator-General’s Act` (old style)
         First see if the string before the first space can be parsed as an FRBR URI, and find a work based on that.
         If not, assume a title has been given and try to match on the whole string.
     """
     first = given_string.split()[0]
     try:
         FrbrUri.parse(first)
         return Work.objects.filter(frbr_uri=first).first()
     except ValueError:
         potential_matches = Work.objects.filter(title=given_string,
                                                 country=self.country,
                                                 locality=self.locality)
         if len(potential_matches) == 1:
             return potential_matches.first()
Пример #7
0
    def get_for_frbr_uri(self, frbr_uri):
        """ Find a single document matching the FRBR URI.

        Raises ValueError if any part of the URI isn't valid.

        See http://docs.oasis-open.org/legaldocml/akn-nc/v1.0/cs01/akn-nc-v1.0-cs01.html#_Toc492651893
        """
        if not isinstance(frbr_uri, FrbrUri):
            frbr_uri = FrbrUri.parse(frbr_uri)
        query = self.filter(frbr_uri=frbr_uri.work_uri())

        # filter on language
        if frbr_uri.language:
            query = query.filter(
                language__language__iso_639_2B=frbr_uri.language)

        # filter on expression date
        expr_date = frbr_uri.expression_date

        if not expr_date:
            # no expression date is equivalent to the "current" version, at time of retrieval
            expr_date = ':' + datetime.date.today().strftime("%Y-%m-%d")

        try:
            if expr_date == '@':
                # earliest document
                query = query.order_by('expression_date')

            elif expr_date[0] == '@':
                # document at this date
                query = query.filter(
                    expression_date=parse_date(expr_date[1:]).date())

            elif expr_date[0] == ':':
                # latest document at or before this date
                query = query \
                    .filter(expression_date__lte=parse_date(expr_date[1:]).date()) \
                    .order_by('-expression_date')

            else:
                raise ValueError("The expression date %s is not valid" %
                                 expr_date)

        except ParseError:
            raise ValueError("The expression date %s is not valid" % expr_date)

        obj = query.first()
        if obj is None:
            raise ValueError("Document doesn't exist")

        if obj and frbr_uri.language and obj.language.code != frbr_uri.language:
            raise ValueError(
                "The document %s exists but is not available in the language '%s'"
                % (frbr_uri.work_uri(), frbr_uri.language))

        return obj
Пример #8
0
 def new_frbr_uri(self, uri, forward, for_work=True):
     """ Sets prefix on uri:
         'akn' if forward is True, None if it's False.
     """
     if not isinstance(uri, FrbrUri):
         uri = FrbrUri.parse(uri)
     uri.prefix = 'akn' if forward else None
     if for_work:
         return uri.work_uri()
     else:
         return uri.expression_uri()
Пример #9
0
    def parse_frbr_uri(self, frbr_uri):
        FrbrUri.default_language = None
        try:
            frbr_uri = FrbrUri.parse(frbr_uri)
        except ValueError:
            return None

        frbr_uri.default_language = self.country.primary_language.code
        if not frbr_uri.language:
            frbr_uri.language = frbr_uri.default_language

        return frbr_uri
Пример #10
0
    def validate_frbr_uri(self, value):
        try:
            if not value:
                raise ValueError()
            value = FrbrUri.parse(value.lower()).work_uri()
        except ValueError:
            raise ValidationError("Invalid FRBR URI: %s" % value)

        # does a work exist for this frbr_uri?
        # raises ValueError if it doesn't
        Work.objects.get_for_frbr_uri(value)

        return value
Пример #11
0
    def parse_frbr_uri(self, frbr_uri):
        FrbrUri.default_language = None
        self.frbr_uri = FrbrUri.parse(frbr_uri)

        # validate the country and set the default language
        try:
            country = Country.for_frbr_uri(self.frbr_uri)
            self.frbr_uri.default_language = country.primary_language.code
        except Country.DoesNotExist:
            raise Http404("Country %s from FRBR URI not found" %
                          self.frbr_uri.country)

        if not self.frbr_uri.language:
            self.frbr_uri.language = self.frbr_uri.default_language
Пример #12
0
    def randomized(cls, frbr_uri, **kwargs):
        """ Helper to return a new document with a random FRBR URI
        """
        from .works import Work
        from .places import Country

        frbr_uri = FrbrUri.parse(frbr_uri)
        kwargs['work'] = Work.objects.get_for_frbr_uri(frbr_uri.work_uri())
        kwargs['language'] = Country.for_frbr_uri(frbr_uri).primary_language

        doc = cls(frbr_uri=frbr_uri.work_uri(False),
                  expression_date=frbr_uri.expression_date,
                  **kwargs)
        doc.copy_attributes()

        return doc
Пример #13
0
    def clean(self):
        # validate and clean the frbr_uri
        try:
            frbr_uri = FrbrUri.parse(
                self.frbr_uri).work_uri(work_component=False)
        except ValueError:
            raise ValidationError("Invalid FRBR URI")

        # Assume frbr_uri starts with /akn; `rest` is everything after the country/locality, e.g.
        # in `/akn/za-wc/act/2000/12`, `rest` is `act/2000/12`.
        rest = frbr_uri.split('/', 3)[3]

        # force akn prefix, country and locality codes in frbr uri
        prefix = '/akn/' + self.country.code
        if self.locality:
            prefix = prefix + '-' + self.locality.code

        self.frbr_uri = f'{prefix}/{rest}'.lower()
Пример #14
0
    def test_dont_find_self(self):
        document = Document(work=self.work,
                            frbr_uri=self.work.frbr_uri,
                            document_xml=document_fixture(xml="""
        <section eId="sec_1">
          <num>1.</num>
          <heading>Tester</heading>
          <paragraph eId="sec_1.paragraph-0">
            <content>
              <p>Something to do with Act 1 of 1991.</p>
              <p>Something to do with Act no 22 of 2012.</p>
              <p>And another thing about Act 4 of 1998.</p>
            </content>
          </paragraph>
        </section>"""),
                            language=self.eng)

        expected = Document(work=self.work,
                            document_xml=document_fixture(xml="""
        <section eId="sec_1">
          <num>1.</num>
          <heading>Tester</heading>
          <paragraph eId="sec_1.paragraph-0">
            <content>
              <p>Something to do with Act 1 of 1991.</p>
              <p>Something to do with Act <ref href="/akn/za/act/2012/22">no 22 of 2012</ref>.</p>
              <p>And another thing about Act <ref href="/akn/za/act/1998/4">4 of 1998</ref>.</p>
            </content>
          </paragraph>
        </section>"""),
                            language=self.eng)

        document.doc.frbr_uri = FrbrUri.parse(self.work.frbr_uri)
        self.finder.find_references_in_document(document)
        root = etree.fromstring(expected.content)
        expected.content = etree.tostring(root,
                                          encoding='utf-8').decode('utf-8')
        self.assertEqual(expected.content, document.content)
Пример #15
0
    def post(self, request):
        serializer = ParseSerializer(data=request.data)
        serializer.is_valid(raise_exception=True)

        fragment = serializer.validated_data.get('fragment')
        frbr_uri = FrbrUri.parse(serializer.validated_data.get('frbr_uri'))

        importer = plugins.for_locale('importer', frbr_uri.country, frbr_uri.language, frbr_uri.locality)
        importer.fragment = fragment
        importer.fragment_id_prefix = serializer.validated_data.get('id_prefix')

        upload = self.request.data.get('file')
        if upload:
            # we got a file
            try:
                document = importer.import_from_upload(upload, frbr_uri.work_uri(), self.request)
            except ValueError as e:
                log.error("Error during import: %s" % e.message, exc_info=e)
                raise ValidationError({'file': e.message or "error during import"})
        else:
            # plain text
            try:
                text = serializer.validated_data.get('content')
                document = importer.import_from_text(text, frbr_uri.work_uri())
            except ValueError as e:
                log.error("Error during import: %s" % e.message, exc_info=e)
                raise ValidationError({'content': e.message or "error during import"})

        if not document:
            raise ValidationError("Nothing to parse! Either 'file' or 'content' must be provided.")

        # output
        if fragment:
            return Response({'output': document.to_xml()})
        else:
            return Response({'output': document.document_xml})
Пример #16
0
 def work_uri(self):
     """ The FRBR Work URI as a :class:`FrbrUri` instance that uniquely identifies this work universally. """
     if self._work_uri is None:
         self._work_uri = FrbrUri.parse(self.frbr_uri)
     return self._work_uri