def import_entries(self): self.stdout.write('Importing entries...') for entry in self.blogger_entries: content = entry['content'] or '' content = self.process_content_image(content) excerpt = Truncator(content).words(50) or '' slug = slugify(entry['title']) try: page = EntryPage.objects.get(slug=slug) except EntryPage.DoesNotExist: entry_author = entry['author']['displayName'].replace(' ', '-') page = EntryPage( title=entry['title'], body=content, excerpt=strip_tags(excerpt), slug=slugify(entry['title']), go_live_at=entry['published'], first_published_at=entry['published'], date=entry['published'], owner=self.authors[entry_author], seo_title=entry['title'], search_description=excerpt, live=entry['published']) self.blogpage.add_child(instance=page) revision = self.blogpage.save_revision() revision.publish() self.import_entry_tags(entry.get('labels', []), page) page.save()
def import_entries(self): self.stdout.write('Importing entries...') for entry in self.blogger_entries: content = entry['content'] or '' content = self.process_content_image(content) excerpt = Truncator(content).words(50) or '' slug = slugify(entry['title']) try: page = EntryPage.objects.get(slug=slug) except EntryPage.DoesNotExist: entry_author = entry['author']['displayName'].replace(' ', '-') page = EntryPage(title=entry['title'], body=content, excerpt=strip_tags(excerpt), slug=slugify(entry['title']), go_live_at=entry['published'], first_published_at=entry['published'], date=entry['published'], owner=self.authors[entry_author], seo_title=entry['title'], search_description=excerpt, live=entry['published']) self.blogpage.add_child(instance=page) revision = self.blogpage.save_revision() revision.publish() self.import_entry_tags(entry.get('labels', []), page) page.save()
def import_posts(self): self.stdout.write('- Importing entries\n') for post in self.get_posts(): content = post['content'] or '' content = self.process_content_image(content) excerpt = self.auto_excerpt and Truncator( strip_tags(smart_unicode(content))).words(50) or '' slug = slugify(post['title']) try: entry = EntryPage.objects.get(slug=slug) except EntryPage.DoesNotExist: entry = EntryPage( title=post['title'], body=content, excerpt=excerpt, slug=slugify(post['title']), go_live_at=post['published'], first_published_at=post['published'], date=post['published'], owner=User.objects.first(), seo_title=post['title'], search_description=excerpt, live=post['published']) self.blogpage.add_child(instance=entry) revision = self.blogpage.save_revision() revision.publish() self.get_entry_tags(post.get('labels', []), entry) entry.save()
def import_entries(self): self.stdout.write("Importing entries...") entries = ZinniaEntry.objects.all() for entry in entries: self.stdout.write(entry.title) # Header images if entry.image: header_image = WagtailImage(file=entry.image, title=os.path.basename(entry.image.url)) self.stdout.write('\tImported header image: {}'.format(entry.image)) header_image.save() else: header_image = None self.stdout.write('\tGenerate and replace entry content images....') if entry.content: root = lxml.html.fromstring(entry.content) for el in root.iter('img'): if el.attrib['src'].startswith(settings.MEDIA_URL): old_image = el.attrib['src'].replace(settings.MEDIA_URL, '') with open('{}/{}'.format(settings.MEDIA_ROOT, old_image), 'r') as image_file: new_image = WagtailImage(file=File(file=image_file, name=os.path.basename(old_image)), title=os.path.basename(old_image)) new_image.save() el.attrib['src'] = new_image.file.url self.stdout.write('\t\t{}'.format(new_image.file.url)) # New content with images replaced content = lxml.html.tostring(root, pretty_print=True) else: content = entry.content # Create page try: page = EntryPage.objects.get(slug=entry.slug) except EntryPage.DoesNotExist: page = EntryPage( title=entry.title, body=content, slug=entry.slug, go_live_at=entry.start_publication, expire_at=entry.end_publication, first_published_at=entry.creation_date, date=entry.creation_date, owner=entry.authors.first(), seo_title=entry.title, search_description=entry.excerpt, live=entry.is_visible, header_image=header_image ) self.blogpage.add_child(instance=page) revision = self.blogpage.save_revision() revision.publish() self.import_entry_categories(entry, page) self.import_entry_tags(entry, page) page.save() page.save_revision(changed=False) self.entries[entry.pk] = page
def import_entry(self, title, content, items, item_node): creation_date = datetime.strptime(item_node.find(u'{{{0:s}}}post_date'.format(WP_NS)).text, '%Y-%m-%d %H:%M:%S') if settings.USE_TZ: creation_date = timezone.make_aware(creation_date, pytz.timezone('GMT')) excerpt = strip_tags(item_node.find(u'{{{0:s}excerpt/}}encoded'.format(WP_NS)).text or '') if not excerpt and content: excerpt = Truncator(content).words(50) slug = slugify(title)[:255] or u'post-{0:s}'.format(item_node.find(u'{{{0:s}}}post_id'.format(WP_NS)).text) creator = item_node.find('{http://purl.org/dc/elements/1.1/}creator').text try: entry_date = datetime.strptime(item_node.find(u'{{{0:s}}}post_date_gmt'.format(WP_NS)).text, '%Y-%m-%d %H:%M:%S') except ValueError: entry_date = datetime.strptime(item_node.find(u'{{{0:s}}}post_date'.format(WP_NS)).text, '%Y-%m-%d %H:%M:%S') # Create page try: page = EntryPage.objects.get(slug=slug) except EntryPage.DoesNotExist: page = EntryPage( title=title, body=content, excerpt=strip_tags(excerpt), slug=slug, go_live_at=entry_date, first_published_at=creation_date, date=creation_date, owner=self.authors.get(creator), seo_title=title, search_description=excerpt, live=item_node.find(u'{{{0:s}}}status'.format(WP_NS)).text == 'publish') self.blogpage.add_child(instance=page) revision = self.blogpage.save_revision() revision.publish() self.import_entry_tags(item_node.findall('category'), page) self.import_entry_categories(item_node.findall('category'), page) # Import header image image_id = self.find_image_id(item_node.findall(u'{{{0:s}}}postmeta'.format(WP_NS))) if image_id: self.import_header_image(page, items, image_id) page.save() page.save_revision(changed=False)
def import_entry(self, title, content, items, item_node): creation_date = datetime.strptime( item_node.find(u'{{{0:s}}}post_date'.format(WP_NS)).text, '%Y-%m-%d %H:%M:%S') if settings.USE_TZ: creation_date = timezone.make_aware(creation_date, pytz.timezone('GMT')) excerpt = strip_tags( item_node.find(u'{{{0:s}excerpt/}}encoded'.format(WP_NS)).text or '') if not excerpt and content: excerpt = Truncator(content).words(50) slug = slugify(title)[:255] or u'post-{0:s}'.format( item_node.find(u'{{{0:s}}}post_id'.format(WP_NS)).text) creator = item_node.find( '{http://purl.org/dc/elements/1.1/}creator').text try: entry_date = datetime.strptime( item_node.find(u'{{{0:s}}}post_date_gmt'.format(WP_NS)).text, '%Y-%m-%d %H:%M:%S') except ValueError: entry_date = datetime.strptime( item_node.find(u'{{{0:s}}}post_date'.format(WP_NS)).text, '%Y-%m-%d %H:%M:%S') # Create page try: page = EntryPage.objects.get(slug=slug) except EntryPage.DoesNotExist: page = EntryPage( title=title, body=content, excerpt=strip_tags(excerpt), slug=slug, go_live_at=entry_date, first_published_at=creation_date, date=creation_date, owner=self.authors.get(creator), seo_title=title, search_description=excerpt, live=item_node.find( u'{{{0:s}}}status'.format(WP_NS)).text == 'publish') self.blogpage.add_child(instance=page) revision = self.blogpage.save_revision() revision.publish() self.import_entry_tags(item_node.findall('category'), page) self.import_entry_categories(item_node.findall('category'), page) # Import header image image_id = self.find_image_id( item_node.findall(u'{{{0:s}}}postmeta'.format(WP_NS))) if image_id: self.import_header_image(page, items, image_id) page.save() page.save_revision(changed=False)
def import_entries(self): self.stdout.write("Importing entries...") entries = ZinniaEntry.objects.all() for entry in entries: self.stdout.write(entry.title) # Header images if entry.image: header_image = WagtailImage(file=entry.image, title=os.path.basename( entry.image.url)) self.stdout.write('\tImported header image: {}'.format( entry.image)) header_image.save() else: header_image = None self.stdout.write( '\tGenerate and replace entry content images....') if entry.content: root = lxml.html.fromstring(entry.content) for el in root.iter('img'): if el.attrib['src'].startswith(settings.MEDIA_URL): # fix media chunks path naming e.g. /media/chinks/media/stuff.jpg will fail img_path = el.attrib['src'] old_image = img_path[len(settings.MEDIA_URL):] try: with open( '{}/{}'.format(settings.MEDIA_ROOT, old_image), 'r') as image_file: new_image = WagtailImage( file=File( file=image_file, name=os.path.basename(old_image)), title=os.path.basename(old_image)) new_image.save() el.attrib['src'] = new_image.file.url self.stdout.write('\t\t{}'.format( new_image.file.url)) except Exception as e: # handle image encoding errors like none utf-8 cahrs print(e) print("error handling image, move on... entry:" + str(entry.id)) # New content with images replaced content = lxml.html.tostring(root, pretty_print=True) else: content = entry.content # decode, somehow the content is a byte array if len(content) != 0: content = content.decode() # First, convert the html to json, with the appropriate block type # we convertet the blody from a RichTextField to a StreamField import json content = json.dumps([{'type': 'html', 'value': content}]) # fix empty author entrys (puput will not render the page if no author is set) author = entry.authors.first() if author == None: from zinnia.models.author import Author author = Author.objects.first() # Create page try: page = EntryPage.objects.get(slug=entry.slug) except EntryPage.DoesNotExist: page = EntryPage( title=entry.title, body=content, #fix missing excerpt transfer excerpt=entry.excerpt, slug=entry.slug, go_live_at=entry.start_publication, expire_at=entry.end_publication, first_published_at=entry.creation_date, date=entry.creation_date, owner=author, seo_title=entry.title, search_description=entry.excerpt, live=entry.is_visible, header_image=header_image) self.blogpage.add_child(instance=page) revision = self.blogpage.save_revision() revision.publish() self.import_entry_categories(entry, page) self.import_entry_tags(entry, page) page.save() page.save_revision(changed=False) self.entries[entry.pk] = page