def __call__(self, validate_all=False, data=None): if data is None: data = json_body(self.request) if 'message' in data: field = IMail['message'] deserializer = queryMultiAdapter( (field, self.context, self.request), IFieldDeserializer) message = deserializer(data['message']) if message and message.filename.lower().endswith('.msg'): self.context.original_message = message transform = Msg2MimeTransform() eml = transform.transform(message.data) data['message'] = { 'data': eml, 'content-type': 'message/rfc822', 'filename': message.filename[:-3] + 'eml', } context = super(DeserializeMailFromJson, self).__call__( validate_all=validate_all, data=data) context._update_title_from_message_subject() initialize_metadata(context, None) initalize_title(context, None) return context
def __call__(self, validate_all=False, data=None, create=False): if data is None: data = json_body(self.request) context = super(DeserializeMailFromJson, self).__call__(validate_all=validate_all, data=data, create=create) if context.message and context.message.filename.lower().endswith( '.msg'): self.context.original_message = context.message transform = Msg2MimeTransform() eml = transform.transform(context.message.data) file_ = NamedBlobFile(data=eml, filename=context.message.filename[:-3] + 'eml', contentType='message/rfc822') context.message = file_ if create and 'message' in data: if not data.get('title'): context._update_title_from_message_subject() initalize_title(context, None) initialize_metadata(context, None) return context
def run_after_creation_jobs(self, item, obj): """Fire these event handlers manually because they got fired too early before (when the file contents weren't loaded yet) """ if self.is_mail(item): initialize_metadata(obj, None) if obj.title == NO_SUBJECT_TITLE_FALLBACK: # Reset the [No Subject] placeholder obj.title = None initalize_title(obj, None) else: sync_title_and_filename_handler(obj, None) set_digitally_available(obj, None)
def __iter__(self): for item in self.previous: if self.is_mail(item): file_field = IMail['message'] else: file_field = IDocumentSchema['file'] keys = item.keys() pathkey = self.pathkey(*keys)[0] if self.key in item: filepath = item[self.key] if filepath is None: yield item continue if pathkey not in item: logger.warning("Missing path key for file %s" % filepath) yield item continue path = item[pathkey] filepath = os.path.join(self.bundle_path, filepath) filename = os.path.basename(filepath) # TODO: Check for this in OGGBundle validation if filepath.endswith(u'.msg'): logger.warning("Skipping .msg file: %s" % filepath) self.stats['errors']['msgs'][filepath] = path yield item continue # TODO: Check for this in OGGBundle validation if not os.path.exists(filepath): logger.warning("File not found: %s" % filepath) self.stats['errors']['files_not_found'][filepath] = path yield item continue mimetype, _encoding = guess_type(filepath, strict=False) if mimetype is None: logger.warning("Unknown mimetype for file %s" % filepath) mimetype = 'application/octet-stream' obj = item.get('_object') if obj is None: logger.warning( "Cannot set file. Document %s doesn't exist." % path) yield item continue try: with open(filepath, 'rb') as f: namedblobfile = file_field._type( data=f.read(), contentType=mimetype, filename=filename) setattr(obj, file_field.getName(), namedblobfile) except EnvironmentError as e: # TODO: Check for this in OGGBundle validation logger.warning("Can't open file %s. %s." % ( filepath, str(e))) self.stats['errors']['files_io_errors'][filepath] = path yield item continue # Fire these event handlers manually because they got fired # too early before (when the file contents weren't loaded yet) if self.is_mail(item): initialize_metadata(obj, None) # Reset the [No Subject] placeholder obj.title = None initalize_title(obj, None) else: sync_title_and_filename_handler(obj, None) set_digitally_available(obj, None) yield item
def __iter__(self): for item in self.previous: guid = item['guid'] if self.is_mail(item): file_field = IMail['message'] else: file_field = IDocumentSchema['file'] keys = item.keys() pathkey = self.pathkey(*keys)[0] if self.key in item: _filepath = item[self.key] if _filepath is None: yield item continue if pathkey not in item: logger.warning("Missing path key for file %s" % _filepath) yield item continue path = item[pathkey] abs_filepath = self.build_absolute_filepath(_filepath) if abs_filepath is None: logger.warning('Unresolvable filepath: %s' % _filepath) error = (guid, _filepath, path) self.bundle.errors['files_unresolvable_path'].append(error) yield item continue filename = os.path.basename(abs_filepath) if isinstance(filename, str): filename = filename.decode('utf8') # TODO: Check for this in OGGBundle validation if any(abs_filepath.lower().endswith(ext) for ext in INVALID_FILE_EXTENSIONS): # noqa logger.warning("Skipping file with invalid type: %s" % abs_filepath) error = (guid, abs_filepath, path) self.bundle.errors['files_invalid_types'].append(error) yield item continue # TODO: Check for this in OGGBundle validation if not os.path.exists(abs_filepath): logger.warning("File not found: %s" % abs_filepath) error = (guid, abs_filepath, path) self.bundle.errors['files_not_found'].append(error) yield item continue mimetype, _encoding = guess_type(abs_filepath, strict=False) if mimetype is None: logger.warning("Unknown mimetype for file %s" % abs_filepath) mimetype = 'application/octet-stream' obj = item.get('_object') if obj is None: logger.warning( "Cannot set file. Document %s doesn't exist." % path) yield item continue try: with open(abs_filepath, 'rb') as f: namedblobfile = file_field._type(data=f.read(), contentType=mimetype, filename=filename) setattr(obj, file_field.getName(), namedblobfile) except EnvironmentError as e: # TODO: Check for this in OGGBundle validation logger.warning("Can't open file %s. %s." % (abs_filepath, str(e))) error = (guid, abs_filepath, str(e), path) self.bundle.errors['files_io_errors'].append(error) yield item continue # Fire these event handlers manually because they got fired # too early before (when the file contents weren't loaded yet) if self.is_mail(item): initialize_metadata(obj, None) if obj.title == NO_SUBJECT_TITLE_FALLBACK: # Reset the [No Subject] placeholder obj.title = None initalize_title(obj, None) else: sync_title_and_filename_handler(obj, None) set_digitally_available(obj, None) yield item