def import_from_upload(self, upload, frbr_uri, request): """ Create a new Document by importing it from a :class:`django.core.files.uploadedfile.UploadedFile` instance. """ self.reformat = True if upload.content_type in ['text/xml', 'application/xml']: # just assume it's valid AKN xml doc = Document.randomized(frbr_uri) doc.content = upload.read().decode('utf-8') return doc if upload.content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': # pre-process docx to HTML and then import html html = self.docx_to_html(upload) doc = self.import_from_text(html, frbr_uri, '.html') elif upload.content_type == 'application/pdf': doc = self.import_from_pdf(upload, frbr_uri) else: # slaw will do its best with self.tempfile_for_upload(upload) as f: doc = self.import_from_file(f.name, frbr_uri) self.analyse_after_import(doc) return doc
def import_from_file(self, fname, frbr_uri): cmd = ['bundle', 'exec', 'slaw', 'parse'] if self.fragment: cmd.extend(['--fragment', self.fragment]) if self.fragment_id_prefix: cmd.extend(['--id-prefix', self.fragment_id_prefix]) if self.section_number_position: cmd.extend( ['--section-number-position', self.section_number_position]) cmd.extend(['--grammar', self.slaw_grammar]) cmd.append(fname) code, stdout, stderr = self.shell(cmd) if code > 0: raise ValueError(stderr) if not stdout: raise ValueError("We couldn't get any useful text out of the file") if self.fragment: doc = Fragment(stdout.decode('utf-8')) else: doc = Document.randomized(frbr_uri) doc.content = stdout.decode('utf-8') doc.frbr_uri = frbr_uri # reset it doc.title = None doc.copy_attributes() self.log.info("Successfully imported from %s" % fname) return doc
def document(request, doc_id=None): if doc_id: doc = get_object_or_404(Document, pk=doc_id) doc_json = json.dumps(None) else: # it's new! doc = Document.randomized(request.user, title='(untitled)') doc.tags = None doc_json = json.dumps( DocumentSerializer(instance=doc, context={ 'request': request }).data) form = DocumentForm(instance=doc) countries = Country.objects.select_related('country').prefetch_related( 'locality_set', 'publication_set', 'country').all() countries = {c.code: c.as_json() for c in countries} countries_json = json.dumps(countries) serializer = DocumentListSerializer(context={'request': request}) documents_json = json.dumps( serializer.to_representation(DocumentViewSet.queryset.all())) return render( request, 'document/show.html', { 'document': doc, 'document_json': doc_json, 'document_content_json': json.dumps(doc.document_xml), 'documents_json': documents_json, 'form': form, 'subtypes': Subtype.objects.order_by('name').all(), 'languages': Language.objects.select_related('language').all(), 'countries': Country.objects.select_related('country').all(), 'countries_json': countries_json, 'view': 'DocumentView', })