def process_node_body(self, html): try: html = autolink(html_cleaner.clean_html(html)) #To stay on the safe side escape all % characters html = html.replace(u"%", u"%%") doc = filter_style(fromstring(html)) for el in doc.iter(u'img'): src = el.get(u"src").replace(u"%%", u"%") if not src: el.getparent().remove(el) if src.startswith(djsettings.APP_URL): src.replace(djsettings.APP_URL, u"/", 1) if src.startswith(djsettings.MEDIA_URL): elem_id = el.get(u"id") try: if elem_id and elem_id.startswith(self.image_id_prefix): image_id = int(elem_id.replace(self.image_id_prefix, "")) image = Image.objects.get(id=image_id) self.existing_images.append(image) self.set_image_attributes(el, image) continue except: logger.error(u'Malformed id (%s)found on our own img url %s' % (elem_id, src)) try: image = Image.objects.get(image=src.replace(djsettings.MEDIA_URL,u"")) self.set_image_attributes(el, image) self.existing_images.append(image) continue except: logger.error(u'Unable to locate img stored under url %s in Image table' % src) if src.startswith(u"/"): src = src.replace(u"/", djsettings.APP_URL, 1) image_file = download_image_file(src) image = Image(image=image_file, upload_url=src) image.save() self.new_images.append(image) self.set_image_attributes(el, image) return etree.tounicode(doc, method="html") except ValidationError: for image in self.new_images: image.delete() raise except Exception: logger.exception(u'Unhandled exception while parsing "%s" body' % html) for image in self.new_images: image.delete() raise ValidationError(_(u"Unexpected error happened :("))