def test_change_filename_extension(app): """Test change filename extension.""" with pytest.raises(Exception) as e: change_filename_extension('test', 'txt') assert str(e.value) == 'test is not a valid filename' assert change_filename_extension('test.pdf', 'txt') == 'test-pdf.txt'
def create_thumbnail(self, file): """Create a thumbnail for record. This is done by getting the file with order 1 or the first file instead. :param file: File from which thumbnail is created. """ try: # Create thumbnail image_blob = create_thumbnail_from_file(file.file.uri, file.mimetype) thumbnail_key = change_filename_extension(file['key'], 'jpg') # Store thumbnail in record's files self.files[thumbnail_key] = BytesIO(image_blob) self.files[thumbnail_key]['type'] = 'thumbnail' except Exception as exception: current_app.logger.warning( 'Error during thumbnail generation of {file} of record ' '{record}: {error}'.format(file=file['key'], error=exception, record=self.get( 'identifiedBy', self['pid'])))
def create_fulltext_file(self, file): """Create fulltext file corresponding to give file object. :param file: File object. """ # If extract fulltext is disabled or file is not a PDF if not current_app.config.get( 'SONAR_DOCUMENTS_EXTRACT_FULLTEXT_ON_IMPORT' ) or file.mimetype != 'application/pdf': return # Try to extract full text from file data, and generate a warning if # it's not possible. For several cases, file is locked against fulltext # copy. try: with file.file.storage().open() as pdf_file: fulltext = extract_text_from_content(pdf_file.read()) key = change_filename_extension(file.key, 'txt') self.files[key] = BytesIO(fulltext.encode()) self.files[key]['type'] = 'fulltext' except Exception as exception: current_app.logger.warning( 'Error during fulltext extraction of {file} of record ' '{record}: {error}'.format(file=file.key, error=exception, record=self['identifiedBy']))
def add_file(self, data, key, **kwargs): """Create file and add it to record. :param data: Binary data of file :param str key: File key kwargs may contain some additional data such as: file label, file type, order and url. """ if not current_app.config.get('SONAR_DOCUMENTS_IMPORT_FILES'): return # If file with the same key exists and checksum is the same as the # registered file, we don't do anything checksum = compute_md5_checksum(BytesIO(data)) if key in self.files and checksum == self.files[key].file.checksum: return # Create the file self.files[key] = BytesIO(data) self.files[key]['label'] = kwargs.get('label', key) self.files[key]['type'] = kwargs.get('type', 'file') self.files[key]['order'] = kwargs.get('order', 1) # Embargo if kwargs.get('restricted'): self.files[key]['restricted'] = kwargs['restricted'] if kwargs.get('embargo_date'): self.files[key]['embargo_date'] = kwargs['embargo_date'] # Store external file URL if kwargs.get('url'): self.files[key]['external_url'] = kwargs['url'] # Create thumbnail if current_app.config.get('SONAR_DOCUMENTS_GENERATE_THUMBNAIL'): self.create_thumbnail(self.files[key]) # Try to extract full text from file data, and generate a warning if # it's not possible. For several cases, file is locked against fulltext # copy. if current_app.config.get( 'SONAR_DOCUMENTS_EXTRACT_FULLTEXT_ON_IMPORT' ) and self.files[key].mimetype == 'application/pdf': try: fulltext = extract_text_from_content(data) key = change_filename_extension(key, 'txt') self.files[key] = BytesIO(fulltext.encode()) self.files[key]['type'] = 'fulltext' except Exception as exception: current_app.logger.warning( 'Error during fulltext extraction of {file} of record ' '{record}: {error}'.format(file=key, error=exception, record=self['identifiedBy']))
def thumbnail(file, files): """Get thumbnail from file. :param file: Dict of file from which thumbnail will be returned. :param files: Liste of files of the record. """ key = change_filename_extension(file['key'], 'jpg') matches = [file for file in files if file['key'] == key] if not matches: return None return matches[0]
def get_thumbnail(file, record): """Get thumbnail from file. If file is restricted, a restricted image is returned. If no thumbnail found, a default image is returned. :param file: Dict of file from which thumbnail will be returned. :param record: Record object. :returns: URL to thumbnail file. """ if file['restriction']['restricted']: return 'static/images/restricted.png' key = change_filename_extension(file['key'], 'jpg') matches = [file for file in record['_files'] if file['key'] == key] if not matches: return 'static/images/no-image.png' return '/documents/{pid}/files/{key}'.format(pid=record['pid'], key=key)