def create_images_from_urls_in_content(self, body): """create Image objects and transfer image files to media root""" soup = BeautifulSoup(body, "html5lib") for img in soup.findAll('img'): old_url = img['src'] if 'width' in img: width = img['width'] if 'height' in img: height = img['height'] else: width = 100 height = 100 path, file_ = os.path.split(img['src']) if not img['src']: continue # Blank image try: remote_image = urllib.request.urlretrieve(img['src']) except (urllib.error.HTTPError, urllib.error.URLError, UnicodeEncodeError): print("Unable to import " + img['src']) continue image = Image(title=file_, width=width, height=height) try: image.file.save(file_, File(open(remote_image[0], 'rb'))) image.save() new_url = image.file.url body = body.replace(old_url, new_url) body = self.convert_html_entities(body) except TypeError: print("Unable to import image {}".format(remote_image[0])) return body
def process_content_image(self, content): self.stdout.write('\tGenerate and replace entry content images....') if content: root = lxml.html.fromstring(content) for img_node in root.iter('img'): parent_node = img_node.getparent() if 'wp-content' in img_node.attrib[ 'src'] or 'files' in img_node.attrib['src']: image = self._import_image(img_node.attrib['src']) if image: title = img_node.attrib.get( 'title') or img_node.attrib.get('alt') new_image = WagtailImage(file=File(file=image), title=title) new_image.save() if parent_node.tag == 'a': parent_node.addnext( ET.XML(self._image_to_embed(new_image))) parent_node.drop_tree() else: parent_node.append( ET.XML(self._image_to_embed(new_image))) img_node.drop_tag() content = ET.tostring(root) return content
def create_images_from_urls_in_content(self, body): """create Image objects and transfer image files to media root""" soup = BeautifulSoup(body, "html5lib") for img in soup.findAll('img'): old_url = img['src'] if 'width' in img: width = img['width'] if 'height' in img: height = img['height'] else: width = 100 height = 100 path, file_ = os.path.split(img['src']) if not img['src']: continue # Blank image try: remote_image = urllib.request.urlretrieve(img['src']) except urllib.error.HTTPError: print("Unable to import " + img['src']) continue except urllib.error.URLError: print("URL error - try again " + img['src']) continue image = Image(title=file_, width=width, height=height) image.file.save(file_, File(open(remote_image[0], 'rb'))) image.save() new_url = image.file.url body = body.replace(old_url, new_url) body = self.convert_html_entities(body) return body
def create_images_from_urls_in_content(self, body): """create Image objects and transfer image files to media root""" soup = BeautifulSoup(body, "html5lib") for img in soup.findAll('img'): if 'width' in img: width = img['width'] if 'height' in img: height = img['height'] else: width = 100 height = 100 try: path, file_ = os.path.split(img['src']) if not img['src']: continue # Blank image if img['src'].startswith('data:'): continue # Embedded image old_url = img['src'] headers = { 'Content-Type': 'application/json', 'Accept': 'application/json', 'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36 SE 2.X MetaSr 1.0" } req = requests.get(self.prepare_url(img['src']), headers=headers, timeout=10) if req.status_code == 200: remote_image = tempfile.NamedTemporaryFile() remote_image.write(req.content) else: remote_image = None except (urllib.error.HTTPError, urllib.error.URLError, UnicodeEncodeError, requests.exceptions.SSLError, KeyError, requests.exceptions.ConnectionError, requests.exceptions.MissingSchema, requests.exceptions.InvalidSchema, requests.exceptions.InvalidURL): logging.warning("Unable to import image: " + img['src']) continue if len(file_) > 255: file_ = file_[:255] image = Image(title=file_, width=width, height=height) try: if remote_image and os.path.getsize(remote_image.name) > 0: #TODO: Log error of files that don't import for manual fix imageFile = File(open(remote_image.name, 'rb')) image.file.save(file_, imageFile) image.save() remote_image.close() new_url = image.file.url body = body.replace(old_url, new_url) body = self.convert_html_entities(body) except TypeError: logging.warning("Unable to import image: " + img['src']) #print("Unable to import image {}".format(remote_image[0])) pass return body
def import_entries(self): self.stdout.write("Importing entries...") entries = ZinniaEntry.objects.all() for entry in entries: self.stdout.write(entry.title) # Header images if entry.image: header_image = WagtailImage(file=entry.image, title=os.path.basename(entry.image.url)) self.stdout.write('\tImported header image: {}'.format(entry.image)) header_image.save() else: header_image = None self.stdout.write('\tGenerate and replace entry content images....') if entry.content: root = lxml.html.fromstring(entry.content) for el in root.iter('img'): if el.attrib['src'].startswith(settings.MEDIA_URL): old_image = el.attrib['src'].replace(settings.MEDIA_URL, '') with open('{}/{}'.format(settings.MEDIA_ROOT, old_image), 'r') as image_file: new_image = WagtailImage(file=File(file=image_file, name=os.path.basename(old_image)), title=os.path.basename(old_image)) new_image.save() el.attrib['src'] = new_image.file.url self.stdout.write('\t\t{}'.format(new_image.file.url)) # New content with images replaced content = lxml.html.tostring(root, pretty_print=True) else: content = entry.content # Create page try: page = EntryPage.objects.get(slug=entry.slug) except EntryPage.DoesNotExist: page = EntryPage( title=entry.title, body=content, slug=entry.slug, go_live_at=entry.start_publication, expire_at=entry.end_publication, first_published_at=entry.creation_date, date=entry.creation_date, owner=entry.authors.first(), seo_title=entry.title, search_description=entry.excerpt, live=entry.is_visible, header_image=header_image ) self.blogpage.add_child(instance=page) revision = self.blogpage.save_revision() revision.publish() self.import_entry_categories(entry, page) self.import_entry_tags(entry, page) page.save() page.save_revision(changed=False) self.entries[entry.pk] = page
def create_blog_pages(self, posts, blog_index, *args, **options): """create Blog post entries from wordpress data""" for post in posts: post_id = post.get('ID') title = post.get('title') if title: new_title = self.convert_html_entities(title) title = new_title slug = post.get('slug') description = post.get('description') if description: description = self.convert_html_entities(description) body = post.get('content') # get image info from content and create image objects body = self.create_images_from_urls_in_content(body) # author/user data author = post.get('author') user = self.create_user(author) categories = post.get('terms') # format the date date = post.get('date')[:10] try: new_entry = BlogPage.objects.get(slug=slug) new_entry.title = title new_entry.body = body new_entry.owner = user new_entry.save() except BlogPage.DoesNotExist: new_entry = blog_index.add_child(instance=BlogPage( title=title, slug=slug, search_description="description", date=date, body=body, owner=user)) featured_image = post.get('featured_image') if featured_image is not None: title = post['featured_image']['title'] source = post['featured_image']['source'] path, file_ = os.path.split(source) source = source.replace('stage.swoon', 'swoon') try: remote_image = urllib.request.urlretrieve( self.prepare_url(source)) width = 640 height = 290 header_image = Image(title=title, width=width, height=height) header_image.file.save( file_, File(open(remote_image[0], 'rb'))) header_image.save() except UnicodeEncodeError: header_image = None print('unable to set header image {}'.format(source)) else: header_image = None new_entry.header_image = header_image new_entry.save() if categories: self.create_categories_and_tags(new_entry, categories) if self.should_import_comments: self.import_comments(post_id, slug)
def add_to_collection(item, collection): description_file = item + ".description.txt" if os.path.isfile(description_file): description = open(description_file, 'r').read() else: description = "" file_name = os.path.basename(item) title = ".".join(file_name.split(".")[:-1]) title = title.replace("_", " ") thumbnail_file = item + ".thumbnail.jpeg" if os.path.isfile(thumbnail_file): f = open(thumbnail_file, 'r') thumbnail = Image() thumbnail.file.save(os.path.basename(item) + ".jpeg", File(f)) thumbnail.title = "Thumbnail for " + title thumbnail.save() else: thumbnail = None extension = file_name.split(".")[-1] if extension in MOVIE_EXTENSIONS: cls = models.Movie elif extension in EBOOK_EXTENSIONS: cls = models.EBook else: return slug = slugify(title) path = collection.path + "{pos:s}".format(pos=str(collection.numchild + 1).zfill(4)) try: obj = collection.get_children().filter(slug=slug)[0] # Ensure that other objects with same slug and path are deleted others = collection.get_children().filter(slug=slug).exclude(id=obj.id) if others.exists(): logger.warn("Other objects with same path existed and were deleted. File: " + item) others.delete() except IndexError: obj = cls(path=path) obj.numchild=0 obj.depth=collection.depth + 1 obj.show_in_menus=False obj.resource_link=item obj.url_path=os.path.join(collection.url_path, slug) + "/" obj.slug=slug obj.title=title obj.live=True obj.short_description=description obj.author=options["author"] obj.duration="" obj.thumbnail=thumbnail obj.save() collection.numchild += 1 collection.save()
def save_images_to_cms(self): '''Save images to the database with: - title: the file name - tags: the directory containing the image''' for img_path, dirname, img_name in self.imgs: image = Image(title=img_name, file=ImageFile(open(img_path, "rb"), name=os.path.basename(img_path)), tags=img_name) # is this correct?? image.save()
def import_header_image(self, entry, items, image_id): self.stdout.write('\tImport header images....') for item in items: post_type = item.find(u'{{{0:s}}}post_type'.format(WP_NS)).text if post_type == 'attachment' and item.find(u'{{{0:s}}}post_id'.format(WP_NS)).text == image_id: title = item.find('title').text image_url = item.find(u'{{{0:s}}}attachment_url'.format(WP_NS)).text img = self._import_image(image_url) new_image = WagtailImage(file=File(file=img, name=title), title=title) new_image.save() entry.header_image = new_image entry.save()
def import_header_image(self, entry, items, image_id): self.stdout.write('\tImport header images....') for item in items: post_type = item.find(u'{{{0:s}}}post_type'.format(WP_NS)).text if post_type == 'attachment' and item.find( u'{{{0:s}}}post_id'.format(WP_NS)).text == image_id: title = item.find('title').text image_url = item.find( u'{{{0:s}}}attachment_url'.format(WP_NS)).text img = self._import_image(image_url) new_image = WagtailImage(file=File(file=img, name=title), title=title) new_image.save() entry.header_image = new_image entry.save()
def generate_photo(request, page): if isinstance(page, PostPage) and not page.photo: im = Vizhash(page.title, 64).identicon() buffer = BytesIO() im.save(fp=buffer, format="PNG") content_file = ContentFile(buffer.getvalue()) image_file = InMemoryUploadedFile(content_file, None, page.title, "image/png", content_file.tell, len(buffer.getvalue()), None) image = Image(title=page.title, file=image_file, width=im.width, height=im.height, created_at=page.created, file_size=len(buffer.getvalue())) image.save() page.photo = image page.save()
def process_content_image(self, content): self.stdout.write('\tGenerate and replace entry content images....') if content: root = lxml.html.fromstring(content) for img_node in root.iter('img'): parent_node = img_node.getparent() if 'wp-content' in img_node.attrib['src'] or 'files' in img_node.attrib['src']: img = self._import_image(img_node.attrib['src']) title = img_node.attrib.get('title') or img_node.attrib.get('alt') new_image = WagtailImage(file=File(file=img, name=title), title=title) new_image.save() if parent_node.tag == 'a': parent_node.addnext(ET.XML(self._image_to_embed(new_image))) parent_node.drop_tree() else: parent_node.append(ET.XML(self._image_to_embed(new_image))) img_node.drop_tag() content = ET.tostring(root) return content
def process_content_image(self, content): self.stdout.write('\tGenerate and replace entry content images....') if content: root = lxml.html.fromstring(content) for img_node in root.iter('img'): parent_node = img_node.getparent() if 'bp.blogspot.com' in img_node.attrib['src']: self.stdout.write('\t\t{}'.format(img_node.attrib['src'])) image = self._import_image(img_node.attrib['src']) title = img_node.attrib['src'].rsplit('/', 1)[1] new_image = WagtailImage(file=File(file=image, name=title), title=title) new_image.save() if parent_node.tag == 'a': parent_node.addnext(ET.XML(self._image_to_embed(new_image))) parent_node.drop_tree() else: parent_node.append(ET.XML(self._image_to_embed(new_image))) img_node.drop_tag() content = ET.tostring(root) return content
def fetch_hut_images(): for hpage in HutPage.objects.all(): if hpage.link_url: try: r = requests.get(hpage.link_url, timeout=settings.API_TIMEOUT) except requests.exceptions.RequestException as e: logger.exception(str(e)) else: soup = BeautifulSoup(r.content, 'html5lib') a_tag = soup.find_all("a", {"class": "fancybox-gallery"}) if a_tag: img_tag = a_tag[0].find_all("img") if img_tag: img_url = 'http://www.doc.govt.nz/%s' % img_tag[0].get( 'src') logger.debug("Hut %s using img %s from HTML body.", str(hpage.pk), img_url) else: page = metadata_parser.MetadataParser(url=hpage.link_url) img_url = page.get_metadata_link('image') logger.debug("Hut %s using img %s from HTML meta", str(hpage.pk), img_url) if img_url: try: response = requests.get(img_url, timeout=settings.API_TIMEOUT) except requests.exceptions.RequestException as e: logger.exception(str(e)) image = Image(title=hpage.title, file=ImageFile(BytesIO(response.content), name=img_url.split('/')[-1])) image.save() hpage.meta_image = image hpage.save() else: logger.debug("No img found for hut %s", str(hpage.pk))
def handle(self, *args, **options): browser = Browser('phantomjs') dims = (1600, 1000) browser.driver.set_window_size(dims[0], dims[1]) for project in ProjectPage.objects.all(): links = project.links.filter(public=True, type='main') if not links: continue # Use only the first link for now link = links[0] print("Visiting %s (%s)" % (link.url, link)) browser.visit(link.url) assert browser.status_code.is_success() time.sleep(5) with tempfile.NamedTemporaryFile(suffix='.png', prefix='project') as tmpf: browser.driver.save_screenshot(tmpf.name) pil_image = PILImage.open(tmpf) pil_image = pil_image.crop((0, 0, dims[0], dims[1])) tmpf.seek(0) tmpf.truncate(0) pil_image.save(tmpf, format='PNG') title = '%s screenshot' % project.title try: image = Image.objects.get(title=title) except Image.DoesNotExist: image = Image(title=title) image.file = ImageFile(tmpf) image.save() project.image = image project.save(update_fields=['image']) browser.quit()
def create_images_from_urls_in_content(self, body): """create Image objects and transfer image files to media root""" soup = BeautifulSoup(body, "html5lib") for img in soup.findAll('img'): old_url = img['src'] if 'width' in img: width = img['width'] if 'height' in img: height = img['height'] else: width = 100 height = 100 path, file_ = os.path.split(img['src']) if not img['src']: continue # Blank image if img['src'].startswith('data:'): continue # Embedded image try: remote_image = urllib.request.urlretrieve( self.prepare_url(img['src'])) except (urllib.error.HTTPError, urllib.error.URLError, UnicodeEncodeError, ValueError): print("Unable to import " + img['src']) continue image = Image(title=file_, width=width, height=height) try: image.file.save(file_, File(open(remote_image[0], 'rb'))) image.save() new_url = image.file.url body = body.replace(old_url, new_url) body = self.convert_html_entities(body) except TypeError: print("Unable to import image {}".format(remote_image[0])) return body
from io import BytesIO import requests from django.core.files.images import ImageFile from wagtail.wagtailimages.models import Image # event is a model object, substitute your model # filename and title are up to you # in my model, event.event_image is a ForeignKey to wagtailimages.Image response = requests.get(url) image = Image(title=title, file=ImageFile(BytesIO(response.content), name=filename)) image.save() event.event_image = image event.save()
def add_to_collection(item, collection): description_file = item + ".description.txt" if os.path.isfile(description_file): description = open(description_file, 'r').read() else: description = "" file_name = os.path.basename(item) title = ".".join(file_name.split(".")[:-1]) title = title.replace("_", " ") thumbnail_file = item + ".thumbnail.jpeg" if os.path.isfile(thumbnail_file): f = open(thumbnail_file, 'r') thumbnail = Image() thumbnail.file.save(os.path.basename(item) + ".jpeg", File(f)) thumbnail.title = "Thumbnail for " + title thumbnail.save() else: thumbnail = None extension = file_name.split(".")[-1] if extension in MOVIE_EXTENSIONS: cls = models.Movie elif extension in EBOOK_EXTENSIONS: cls = models.EBook else: return slug = slugify(title) path = collection.path + "{pos:s}".format( pos=str(collection.numchild + 1).zfill(4)) try: obj = collection.get_children().filter(slug=slug)[0] # Ensure that other objects with same slug and path are deleted others = collection.get_children().filter(slug=slug).exclude( id=obj.id) if others.exists(): logger.warn( "Other objects with same path existed and were deleted. File: " + item) others.delete() except IndexError: obj = cls(path=path) obj.numchild = 0 obj.depth = collection.depth + 1 obj.show_in_menus = False obj.resource_link = item obj.url_path = os.path.join(collection.url_path, slug) + "/" obj.slug = slug obj.title = title obj.live = True obj.short_description = description obj.author = options["author"] obj.duration = "" obj.thumbnail = thumbnail obj.save() collection.numchild += 1 collection.save()
def create_blog_pages(self, posts, blog_index, *args, **options): """create Blog post entries from wordpress data""" for post in posts: title = post.get('title') print(title) if title: new_title = self.convert_html_entities(title) title = new_title # TODO: Fix hardcoded replacement slug = post.get('slug') + "-html" description = post.get('description') if description: description = self.convert_html_entities(description) body = post.get('content') # get image info from content and create image objects body = self.create_images_from_urls_in_content(body) body = self.format_code_in_content(body) body = self.replace_twilioinc_urls(body) # author/user data author = post.get('author') user = self.create_user(author) categories = post.get('terms') # format the date date = post.get('date')[:10] try: new_entry = BlogPage.objects.get(slug=slug) new_entry.title = title new_entry.body = body new_entry.owner = user new_entry.author = user new_entry.save() except BlogPage.DoesNotExist: new_entry = blog_index.add_child(instance=BlogPage( title=title, slug=slug, search_description="description", date=date, body=body, owner=user, author=user)) print("Owner:") print(new_entry.owner) featured_image = post.get('featured_image') header_image = None if featured_image is not None and "source" in post['featured_image']: if 'title' in post['featured_image']: title = post['featured_image']['title'] else: title = "Featured Image" source = post['featured_image']['source'] path, file_ = os.path.split(source) source = source.replace('stage.swoon', 'swoon') try: headers = { 'Content-Type': 'application/json', 'Accept': 'application/json', 'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36 SE 2.X MetaSr 1.0" } req = requests.get(self.prepare_url(source), headers=headers, timeout=10) remote_image = tempfile.NamedTemporaryFile() remote_image.write(req.content) #remote_image = urllib.request.urlretrieve( # self.prepare_url(source)) width = 640 height = 290 if os.path.getsize(remote_image.name): #TODO: Capture error for manual download header_image = Image(title=title, width=width, height=height) header_image.file.save( file_, File(open(remote_image.name, 'rb'))) header_image.save() except UnicodeEncodeError: header_image = None print('unable to set header image {}'.format(source)) else: header_image = None new_entry.header_image = header_image new_entry.save() if categories: self.create_categories_and_tags(new_entry, categories) if self.should_import_comments: self.import_comments(post_id, slug)