def setUp(self): self.document = Document(title="Test document") self.document_without_file = Document(title="Document without file") self.document.file.save( 'example.txt', ContentFile("A boring example document") ) self.image = CFGOVImage.objects.create( title='test', file=get_test_image_file() ) self.rendition = self.image.get_rendition('original') CACHE_PURGED_URLS[:] = []
def chooser_upload(request): if request.POST: document = Document(uploaded_by_user=request.user) form = DocumentForm(request.POST, request.FILES, instance=document) if form.is_valid(): form.save() # Reindex the document to make sure all tags are indexed for backend in get_search_backends(): backend.add(document) document_json = json.dumps({ 'id': document.id, 'title': document.title }) return render_modal_workflow( request, None, 'wagtaildocs/chooser/document_chosen.js', {'document_json': document_json}) else: form = DocumentForm() documents = Document.objects.order_by('title') return render_modal_workflow(request, 'wagtaildocs/chooser/chooser.html', 'wagtaildocs/chooser/chooser.js', { 'documents': documents, 'uploadform': form })
def add(request): if request.POST: doc = Document(uploaded_by_user=request.user) form = DocumentForm(request.POST, request.FILES, instance=doc) if form.is_valid(): form.save() # Reindex the document to make sure all tags are indexed for backend in get_search_backends(): backend.add(doc) messages.success(request, _("Document '{0}' added.").format(doc.title), buttons=[ messages.button( reverse('wagtaildocs:edit', args=(doc.id, )), _('Edit')) ]) return redirect('wagtaildocs:index') else: messages.error(request, _("The document could not be saved due to errors.")) else: form = DocumentForm() return render(request, "wagtaildocs/documents/add.html", { 'form': form, })
def setUp(self): self.document = Document(title="Test document") self.document.file.save('example.txt', ContentFile("A boring example document")) self.image = CFGOVImage.objects.create(title='test', file=get_test_image_file()) CACHE_PURGED_URLS[:] = []
def _import_documents(self, pdf_dir): doc_dir = os.path.join(pdf_dir, 'documents') for data in document_data.values(): title = data['title'] filename = data['file'] self.logger.debug('Creating document {}'.format( title.encode('utf-8'))) doc = Document(title=title) with open(os.path.join(doc_dir, filename), 'rb') as fh: document_file = File(fh) doc.file.save(filename, document_file) doc.save()
def import_work(self, works_page, publishers_page): """Imports a Work: gets the work name and heading; gets the order of impressions; walks through the work directory and creates an impression object for each PDF file.""" work = Work() work.code = self._work_code if work.code.find('Opus') >= 0: work.has_opus = True try: opus_n = int(work.code.split()[1].strip()) except ValueError: opus_n = 66 work.is_posthumous = (opus_n >= settings.POSTHUMOUS_WORKS_WITH_OPUS) work.sort_order = opus_n else: work.has_opus = False work.is_posthumous = (work.code in settings.POSTHUMOUS_WORKS_WITHOUT_OPUS) work.sort_order = settings.ALL_WORKS_WITHOUT_OPUS.index( work.code) + 74 self.logger.debug('Work sort order: {}'.format(work.sort_order)) # Heading filename. try: heading_filename = glob.glob( os.path.join(self._work_path, '*.heading*.pdf'))[0] except IndexError: self.logger.error( 'No heading file found; skipping work {0}'.format(work.code)) return work.heading = self._import_heading(heading_filename) work.title = work.heading.split(' ')[0].strip() work.slug = safe_slugify(work.title, Work) # Create a Work PDF Document. document = Document(title=work.title) with open(heading_filename, 'rb') as fh: pdf_file = File(fh) document.file.save(os.path.basename(heading_filename), pdf_file) document.tags.add('work') work.pdf = document # gets the order of impressions self._order_of_impressions = self._import_order_of_impressions() self.logger.debug(self._order_of_impressions) works_page.add_child(instance=work) self._import_impressions(work, publishers_page)
def add(request): if request.POST: doc = Document(uploaded_by_user=request.user) form = DocumentForm(request.POST, request.FILES, instance=doc) if form.is_valid(): form.save() messages.success(request, "Document '%s' added." % doc.title) return redirect('wagtaildocs_index') else: messages.error(request, "The document could not be saved due to errors.") else: form = DocumentForm() return render(request, "wagtaildocs/documents/add.html", { 'form': form, })
def dummy_wagtail_doc(request): if not Collection.objects.exists(): # pragma: no cover Collection.add_root() doc = Document(title='hello') doc.file.save('foo.txt', ContentFile('foo', 'foo.txt')) doc.save() doc = Document.objects.get(pk=doc.pk) # Reload to ensure the upload took def nuke(): try: # Try cleaning up so `/var/media` isn't full of foo doc.file.delete() doc.delete() except: # pragma: no cover pass request.addfinalizer(nuke) return doc
def import_adverts(advert_dir): logging.debug('Importing adverts') for root, dirs, files in os.walk(advert_dir): publisher_name = _clean_publisher_name(root, ' advt') logger.debug('publisher_name: {}'.format( publisher_name.encode('utf-8'))) for filename in files: if filename.endswith('.pdf'): rubric = _clean_rubric(filename) document = Document( title=u'{}; {}'.format(publisher_name, rubric)) with open(os.path.join(root, filename), 'rb') as fh: pdf_file = File(fh) document.file.save(filename, pdf_file) document.tags.add('advert') advert = Advert() advert.publisher_name = publisher_name advert.rubric = rubric advert.pdf = document advert.save() logger.debug('rubric: {}'.format(rubric.encode('utf-8')))
def import_stps(stps_dir): logger.debug('Importing STPs') for root, dirs, files in os.walk(stps_dir): publisher_name = _clean_publisher_name(root, ' STP') logger.debug('publisher_name: {}'.format( publisher_name.encode('utf-8'))) for filename in files: if filename.endswith('.pdf'): rubric = _clean_rubric(filename) document = Document( title=u'{}; {}'.format(publisher_name, rubric)) with open(os.path.join(root, filename), 'rb') as fh: pdf_file = File(fh) document.file.save(filename, pdf_file) document.tags.add('STP') stp = STP() stp.publisher_name = publisher_name stp.rubric = rubric stp.pdf = document stp.save() logger.debug('rubric: {}'.format(rubric.encode('utf-8')))
def _import_impression(self, work, publishers_page, f_path): # creates a new PDFParser to get the impression self.logger.debug('Parsing {}'.format(f_path)) parser = PDFParser(f_path) code = parser.get_impression_code() if code: self.logger.debug('Impression: ' + code) # Create an Impression PDF Document. document = Document(title=code) with open(f_path, 'rb') as fh: pdf_file = File(fh) document.file.save(os.path.basename(f_path), pdf_file) document.tags.add('impression') # creates a new impression impression = Impression() impression.title = code impression.impression_title = parser.get_title() impression.content = parser.get_text_content() impression.pdf = document try: sort_order = self._order_of_impressions.index(code.lower()) except Exception: self.logger.error( u'{0} missing from order of impressions, which consists of: {1}' .format(code, ', '.join(self._order_of_impressions))) sort_order = 999 impression.sort_order = sort_order impression.slug = safe_slugify(impression.title, Impression) impression.comments = parser.get_comments() self._import_copies(impression, parser, code) publisher_code = impression.title.split('-')[-1] publisher = Publisher.objects.filter(title=publisher_code).first() if not publisher: publisher = Publisher(title=publisher_code) publisher.slug = slugify(publisher_code) publishers_page.add_child(instance=publisher) impression.publisher = publisher work.add_child(instance=impression)
def import_library(file_path, index_page): logger.debug('Importing {}'.format(file_path)) parser = PDFParser(file_path) content = parser.get_text_content() if not content: logger.debug('Found no content in the PDF') return # gets the library heading heading = content.split('\n')[0] heading_parts = heading.split(' ') # gets the library code, the first value before the spaces if len(heading_parts) < 2: code = content.split('\n')[1] else: code = heading_parts[0].strip() # the information after the spaces metadata = heading_parts[-1] metadata_parts = metadata.split(',') # the country name is the first element in the metadata country_name = metadata_parts[0].strip() # and the city the second city_name = metadata_parts[1].strip() # if the county is usa if country_name == 'United States of America': # the library name is after the state name = ','.join(metadata_parts[3:]).strip() else: # otherwise the library name comes after the city name = ','.join(metadata_parts[2:]).strip() logger.debug(u'{0} {1} {2} {3}'.format(code, country_name, city_name, name)) # gets the country country = Country.objects.filter(name=country_name).first() # if the country is not in the db yet if not country: # creates a new country object country = Country(name=country_name) country.save() # gets the city city = City.objects.filter(name=city_name, country=country).first() # if the city is not in the db yet if not city: # creates a new city object city = City(country=country, name=city_name) city.save() # gets the library slug = slugify(code)[:50] # Use the slug for lookups, because there are case differences in # some references that are meant to be the same. library = Library.objects.filter(slug=slug).first() # if the library is not in the db if not library: # creates a new library library = Library(title=code, city=city, name=name) library.slug = slug index_page.add_child(instance=library) else: logger.warning('Duplicate library') # otherwise update the library library.city = city library.name = name # Create a Library PDF Document. document = Document(title=code) with open(file_path, 'rb') as fh: pdf_file = File(fh) document.file.save(os.path.basename(file_path), pdf_file) document.tags.add('library') library.pdf = document library.save()