def test_article_with_filename_metadata(self): page = self.read_file(path='2012-11-30_md_w_filename_meta#foo-bar.md', FILENAME_METADATA=None) expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', } for key, value in expected.items(): self.assertEqual(value, page.metadata[key], key) page = self.read_file( path='2012-11-30_md_w_filename_meta#foo-bar.md', FILENAME_METADATA='(?P<date>\d{4}-\d{2}-\d{2}).*') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'date': SafeDatetime(2012, 11, 30), } for key, value in expected.items(): self.assertEqual(value, page.metadata[key], key) page = self.read_file( path='2012-11-30_md_w_filename_meta#foo-bar.md', FILENAME_METADATA=('(?P<date>\d{4}-\d{2}-\d{2})' '_(?P<Slug>.*)' '#(?P<MyMeta>.*)-(?P<author>.*)')) expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'date': SafeDatetime(2012, 11, 30), 'slug': 'md_w_filename_meta', 'mymeta': 'foo', } for key, value in expected.items(): self.assertEqual(value, page.metadata[key], key)
def test_article_with_metadata(self): page = self.read_file(path='article_with_metadata.rst') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'This is a super article !', 'summary': '<p class="first last">Multi-line metadata should be' ' supported\nas well as <strong>inline' ' markup</strong> and stuff to "typogrify' '"...</p>\n', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'modified': SafeDatetime(2010, 12, 2, 10, 20), 'tags': ['foo', 'bar', 'foobar'], 'custom_field': 'http://notmyidea.org', } for key, value in expected.items(): self.assertEqual(value, page.metadata[key], key)
def test_article_with_filename_metadata(self): page = self.read_file(path='2012-11-30_md_w_filename_meta#foo-bar.md', FILENAME_METADATA=None) expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', } self.assertDictHasSubset(page.metadata, expected) page = self.read_file( path='2012-11-30_md_w_filename_meta#foo-bar.md', FILENAME_METADATA=r'(?P<date>\d{4}-\d{2}-\d{2}).*') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'date': SafeDatetime(2012, 11, 30), } self.assertDictHasSubset(page.metadata, expected) page = self.read_file( path='2012-11-30_md_w_filename_meta#foo-bar.md', FILENAME_METADATA=(r'(?P<date>\d{4}-\d{2}-\d{2})' r'_(?P<Slug>.*)' r'#(?P<MyMeta>.*)-(?P<author>.*)')) expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'date': SafeDatetime(2012, 11, 30), 'slug': 'md_w_filename_meta', 'mymeta': 'foo', } self.assertDictHasSubset(page.metadata, expected)
def test_article_with_metadata(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read(_path('article_with_md_extension.md')) expected = { 'category': 'test', 'title': 'Test md File', 'summary': '<p>I have a lot to test</p>', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'modified': SafeDatetime(2010, 12, 2, 10, 20), 'tags': ['foo', 'bar', 'foobar'], } self.assertDictHasSubset(metadata, expected) content, metadata = reader.read( _path('article_with_markdown_and_nonascii_summary.md')) expected = { 'title': 'マックOS X 10.8でパイソンとVirtualenvをインストールと設定', 'summary': '<p>パイソンとVirtualenvをまっくでインストールする方法について明確に説明します。</p>', 'category': '指導書', 'date': SafeDatetime(2012, 12, 20), 'modified': SafeDatetime(2012, 12, 22), 'tags': ['パイソン', 'マック'], 'slug': 'python-virtualenv-on-mac-osx-mountain-lion-10.8', } self.assertDictHasSubset(metadata, expected)
def test_readfile_path_metadata_explicit_dates(self): test_file = 'article_with_metadata_explicit_dates.html' page = self.read_file(path=test_file, DEFAULT_DATE='fs') expected = { 'date': SafeDatetime(2010, 12, 2, 10, 14), 'modified': SafeDatetime(2010, 12, 31, 23, 59) } self.assertDictHasSubset(page.metadata, expected)
def test_readfile_path_metadata_implicit_date_explicit_modified(self): test_file = 'article_with_metadata_implicit_date_explicit_modified.html' page = self.read_file(path=test_file, DEFAULT_DATE='fs') expected = { 'date': SafeDatetime.fromtimestamp(os.stat(_path(test_file)).st_mtime), 'modified': SafeDatetime(2010, 12, 2, 10, 14), } self.assertDictHasSubset(page.metadata, expected)
def blogger2fields(xml): """Opens a blogger XML file, and yield Pelican fields""" soup = xml_to_soup(xml) entries = soup.feed.findAll('entry') for entry in entries: raw_kind = entry.find('category', { 'scheme': 'http://schemas.google.com/g/2005#kind' }).get('term') if raw_kind == 'http://schemas.google.com/blogger/2008/kind#post': kind = 'article' elif raw_kind == 'http://schemas.google.com/blogger/2008/kind#comment': kind = 'comment' elif raw_kind == 'http://schemas.google.com/blogger/2008/kind#page': kind = 'page' else: continue try: assert kind != 'comment' filename = entry.find('link', {'rel': 'alternate'})['href'] filename = os.path.splitext(os.path.basename(filename))[0] except (AssertionError, TypeError, KeyError): filename = entry.find('id').string.split('.')[-1] title = entry.find('title').string or '' content = entry.find('content').string raw_date = entry.find('published').string if hasattr(SafeDatetime, 'fromisoformat'): date_object = SafeDatetime.fromisoformat(raw_date) else: date_object = SafeDatetime.strptime(raw_date[:23], '%Y-%m-%dT%H:%M:%S.%f') date = date_object.strftime('%Y-%m-%d %H:%M') author = entry.find('author').find('name').string # blogger posts only have tags, no category tags = [ tag.get('term') for tag in entry.findAll( 'category', {'scheme': 'http://www.blogger.com/atom/ns#'}) ] # Drafts have <app:control><app:draft>yes</app:draft></app:control> status = 'published' try: if entry.find('control').find('draft').string == 'yes': status = 'draft' except AttributeError: pass yield (title, content, filename, date, author, None, tags, status, kind, 'html')
def blogger2fields(xml): """Opens a blogger XML file, and yield Pelican fields""" soup = xml_to_soup(xml) entries = soup.feed.findAll('entry') for entry in entries: raw_kind = entry.find( 'category', {'scheme': 'http://schemas.google.com/g/2005#kind'} ).get('term') if raw_kind == 'http://schemas.google.com/blogger/2008/kind#post': kind = 'article' elif raw_kind == 'http://schemas.google.com/blogger/2008/kind#comment': kind = 'comment' elif raw_kind == 'http://schemas.google.com/blogger/2008/kind#page': kind = 'page' else: continue try: assert kind != 'comment' filename = entry.find('link', {'rel': 'alternate'})['href'] filename = os.path.splitext(os.path.basename(filename))[0] except (AssertionError, TypeError, KeyError): filename = entry.find('id').string.split('.')[-1] title = entry.find('title').string or '' content = entry.find('content').string raw_date = entry.find('published').string if hasattr(SafeDatetime, 'fromisoformat'): date_object = SafeDatetime.fromisoformat(raw_date) else: date_object = SafeDatetime.strptime( raw_date[:23], '%Y-%m-%dT%H:%M:%S.%f') date = date_object.strftime('%Y-%m-%d %H:%M') author = entry.find('author').find('name').string # blogger posts only have tags, no category tags = [tag.get('term') for tag in entry.findAll( 'category', {'scheme': 'http://www.blogger.com/atom/ns#'})] # Drafts have <app:control><app:draft>yes</app:draft></app:control> status = 'published' try: if entry.find('control').find('draft').string == 'yes': status = 'draft' except AttributeError: pass yield (title, content, filename, date, author, None, tags, status, kind, 'html')
def __init__(self, *args, **kwargs): super(Article, self).__init__(*args, **kwargs) # handle WITH_FUTURE_DATES (designate article to draft based on date) if not self.settings['WITH_FUTURE_DATES'] and hasattr(self, 'date'): if self.date.tzinfo is None: now = SafeDatetime.now() else: now = SafeDatetime.utcnow().replace(tzinfo=pytz.utc) if self.date > now: self.status = 'draft' # if we are a draft and there is no date provided, set max datetime if not hasattr(self, 'date') and self.status == 'draft': self.date = SafeDatetime.max
def path_metadata(full_path, source_path, settings=None): metadata = {} if settings: if settings.get("DEFAULT_DATE", None) == "fs": metadata["date"] = SafeDatetime.fromtimestamp(os.stat(full_path).st_ctime) metadata.update(settings.get("EXTRA_PATH_METADATA", {}).get(source_path, {})) return metadata
def posterous2fields(api_token, email, password): """Imports posterous posts""" import base64 from datetime import timedelta try: # py3k import import json except ImportError: # py2 import import simplejson as json try: # py3k import import urllib.request as urllib_request except ImportError: # py2 import import urllib2 as urllib_request def get_posterous_posts(api_token, email, password, page=1): base64string = base64.encodestring(("%s:%s" % (email, password)).encode("utf-8")).replace("\n", "") url = ("http://posterous.com/api/v2/users/me/sites/primary/" "posts?api_token=%s&page=%d") % (api_token, page) request = urllib_request.Request(url) request.add_header("Authorization", "Basic %s" % base64string.decode()) handle = urllib_request.urlopen(request) posts = json.loads(handle.read().decode("utf-8")) return posts page = 1 posts = get_posterous_posts(api_token, email, password, page) while len(posts) > 0: posts = get_posterous_posts(api_token, email, password, page) page += 1 for post in posts: slug = post.get("slug") if not slug: slug = slugify(post.get("title")) tags = [tag.get("name") for tag in post.get("tags")] raw_date = post.get("display_date") date_object = SafeDatetime.strptime(raw_date[:-6], "%Y/%m/%d %H:%M:%S") offset = int(raw_date[-5:]) delta = timedelta(hours=(offset / 100)) date_object -= delta date = date_object.strftime("%Y-%m-%d %H:%M") kind = "article" # TODO: Recognise pages status = "published" # TODO: Find a way for draft posts yield ( post.get("title"), post.get("body_cleaned"), slug, date, post.get("user").get("display_name"), [], tags, status, kind, "html", )
def merge_date_url(value, url): """ Given a Pelican setting URL that contains a placeholder for a date, and a date, it will combine the two to return the resulting URL. Args ---- value (datetime.datetime): a date url (string): a Pelican URL setting Returns ------- string: combined URL """ try: return url.format(date=value) except ValueError: # will throw a "ValueError" if the value is a datetime.datetime and the url # contains a "-" (e.g. "{date:%-d}") (used in Pelican to strip the leading # zero) try: return url.format( date=SafeDatetime(value.year, value.month, value.day)) except ValueError as e: logger.error( "%s ValueError. value: %s, type(value): %s, url: %s", LOG_PREFIX, value, type(value), url, ) raise e
def test_article_extra_path_metadata_dont_overwrite(self): # EXTRA_PATH_METADATA['author'] should get ignored # since we don't overwrite already set values input_file_path = '2012-11-29_rst_w_filename_meta#foo-bar.rst' page = self.read_file( path=input_file_path, FILENAME_METADATA=(r'(?P<date>\d{4}-\d{2}-\d{2})' r'_(?P<Slug>.*)' r'#(?P<MyMeta>.*)-(?P<orginalauthor>.*)'), EXTRA_PATH_METADATA={ input_file_path: { 'author': 'Charlès Overwrite', 'key-1b': 'value-1b' } }) expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'Rst with filename metadata', 'date': SafeDatetime(2012, 11, 29), 'slug': 'rst_w_filename_meta', 'mymeta': 'foo', 'reader': 'rst', 'key-1b': 'value-1b' } self.assertDictHasSubset(page.metadata, expected)
def wp2fields(xml, wp_custpost=False): """Opens a wordpress XML file, and yield Pelican fields""" soup = xml_to_soup(xml) items = soup.rss.channel.findAll('item') for item in items: if item.find('status').string in ["publish", "draft"]: try: # Use HTMLParser due to issues with BeautifulSoup 3 title = unescape(item.title.contents[0]) except IndexError: title = 'No title [%s]' % item.find('post_name').string logger.warning('Post "%s" is lacking a proper title', title) post_name = item.find('post_name').string post_id = item.find('post_id').string filename = get_filename(post_name, post_id) content = item.find('encoded').string raw_date = item.find('post_date').string if raw_date == u'0000-00-00 00:00:00': date = None else: date_object = SafeDatetime.strptime(raw_date, '%Y-%m-%d %H:%M:%S') date = date_object.strftime('%Y-%m-%d %H:%M') author = item.find('creator').string categories = [ cat.string for cat in item.findAll('category', {'domain': 'category'}) ] tags = [ tag.string for tag in item.findAll('category', {'domain': 'post_tag'}) ] # To publish a post the status should be 'published' status = 'published' if item.find('status').string == "publish" \ else item.find('status').string kind = 'article' post_type = item.find('post_type').string if post_type == 'page': kind = 'page' elif wp_custpost: if post_type == 'post': pass # Old behaviour was to name everything not a page as an # article.Theoretically all attachments have status == inherit # so no attachments should be here. But this statement is to # maintain existing behaviour in case that doesn't hold true. elif post_type == 'attachment': pass else: kind = post_type yield (title, content, filename, date, author, categories, tags, status, kind, 'wp-html')
def test_article_with_footnote(self): settings = get_settings() ec = settings['MARKDOWN']['extension_configs'] ec['markdown.extensions.footnotes'] = {'SEPARATOR': '-'} reader = readers.MarkdownReader(settings) content, metadata = reader.read( _path('article_with_markdown_and_footnote.md')) expected_content = ( '<p>This is some content' '<sup id="fnref-1"><a class="footnote-ref" href="#fn-1"' '>1</a></sup>' ' with some footnotes' '<sup id="fnref-footnote"><a class="footnote-ref" ' 'href="#fn-footnote">2</a></sup></p>\n' '<div class="footnote">\n' '<hr>\n<ol>\n<li id="fn-1">\n' '<p>Numbered footnote ' '<a class="footnote-backref" href="#fnref-1" ' 'title="Jump back to footnote 1 in the text">↩</a></p>\n' '</li>\n<li id="fn-footnote">\n' '<p>Named footnote ' '<a class="footnote-backref" href="#fnref-footnote"' ' title="Jump back to footnote 2 in the text">↩</a></p>\n' '</li>\n</ol>\n</div>') expected_metadata = { 'title': 'Article with markdown containing footnotes', 'summary': ('<p>Summary with <strong>inline</strong> markup ' '<em>should</em> be supported.</p>'), 'date': SafeDatetime(2012, 10, 31), 'modified': SafeDatetime(2012, 11, 1), 'multiline': [ 'Line Metadata should be handle properly.', 'See syntax of Meta-Data extension of ' 'Python Markdown package:', 'If a line is indented by 4 or more spaces,', 'that line is assumed to be an additional line of the value', 'for the previous keyword.', 'A keyword may have as many lines as desired.', ] } self.assertEqual(content, expected_content) self.assertDictHasSubset(metadata, expected_metadata)
def path_metadata(full_path, source_path, settings=None): metadata = {} if settings: if settings.get('DEFAULT_DATE', None) == 'fs': metadata['date'] = SafeDatetime.fromtimestamp( os.stat(full_path).st_mtime) metadata.update(settings.get('EXTRA_PATH_METADATA', {}).get( source_path, {})) return metadata
def path_metadata(full_path, source_path, settings=None): metadata = {} if settings: if settings.get('DEFAULT_DATE', None) == 'fs': metadata['date'] = SafeDatetime.fromtimestamp( os.stat(full_path).st_mtime) metadata.update( settings.get('EXTRA_PATH_METADATA', {}).get(source_path, {})) return metadata
def test_article_with_footnote(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read( _path('article_with_markdown_and_footnote.md')) expected_content = ( '<p>This is some content' '<sup id="fnref:1"><a class="footnote-ref" href="#fn:1" ' 'rel="footnote">1</a></sup>' ' with some footnotes' '<sup id="fnref:footnote"><a class="footnote-ref" ' 'href="#fn:footnote" rel="footnote">2</a></sup></p>\n' '<div class="footnote">\n' '<hr />\n<ol>\n<li id="fn:1">\n' '<p>Numbered footnote ' '<a class="footnote-backref" href="#fnref:1" rev="footnote" ' 'title="Jump back to footnote 1 in the text">↩</a></p>\n' '</li>\n<li id="fn:footnote">\n' '<p>Named footnote ' '<a class="footnote-backref" href="#fnref:footnote" rev="footnote"' ' title="Jump back to footnote 2 in the text">↩</a></p>\n' '</li>\n</ol>\n</div>') expected_metadata = { 'title': 'Article with markdown containing footnotes', 'summary': ('<p>Summary with <strong>inline</strong> markup ' '<em>should</em> be supported.</p>'), 'date': SafeDatetime(2012, 10, 31), 'modified': SafeDatetime(2012, 11, 1), 'slug': 'article-with-markdown-containing-footnotes', 'multiline': [ 'Line Metadata should be handle properly.', 'See syntax of Meta-Data extension of Python Markdown package:', 'If a line is indented by 4 or more spaces,', 'that line is assumed to be an additional line of the value', 'for the previous keyword.', 'A keyword may have as many lines as desired.', ] } self.assertEqual(content, expected_content) for key, value in metadata.items(): self.assertEqual(value, expected_metadata[key], key)
def test_datetime(self): # If DATETIME is set to a tuple, it should be used to override LOCALE dt = SafeDatetime(2015, 9, 13) page_kwargs = self._copy_page_kwargs() # set its date to dt page_kwargs['metadata']['date'] = dt page = Page(**page_kwargs) # page.locale_date is a unicode string in both python2 and python3 dt_date = dt.strftime(DEFAULT_CONFIG['DEFAULT_DATE_FORMAT']) # dt_date is a byte string in python2, and a unicode string in python3 # Let's make sure it is a unicode string (relies on python 3.3 supporting the u prefix) if type(dt_date) != type(u''): # python2: dt_date = unicode(dt_date, 'utf8') self.assertEqual(page.locale_date, dt_date) page_kwargs['settings'] = get_settings() # I doubt this can work on all platforms ... if platform == "win32": locale = 'jpn' else: locale = 'ja_JP.utf8' page_kwargs['settings']['DATE_FORMATS'] = { 'jp': (locale, '%Y-%m-%d(%a)') } page_kwargs['metadata']['lang'] = 'jp' import locale as locale_module try: page = Page(**page_kwargs) self.assertEqual(page.locale_date, '2015-09-13(\u65e5)') except locale_module.Error: # The constructor of ``Page`` will try to set the locale to # ``ja_JP.utf8``. But this attempt will failed when there is no # such locale in the system. You can see which locales there are # in your system with ``locale -a`` command. # # Until we find some other method to test this functionality, we # will simply skip this test. unittest.skip("There is no locale %s in this system." % locale)
def wp2fields(xml, wp_custpost=False): """Opens a wordpress XML file, and yield Pelican fields""" soup = xml_to_soup(xml) items = soup.rss.channel.findAll('item') for item in items: if item.find('status').string in ["publish", "draft"]: try: # Use HTMLParser due to issues with BeautifulSoup 3 title = unescape(item.title.contents[0]) except IndexError: title = 'No title [%s]' % item.find('post_name').string logger.warning('Post "%s" is lacking a proper title', title) post_name = item.find('post_name').string post_id = item.find('post_id').string filename = get_filename(post_name, post_id) content = item.find('encoded').string raw_date = item.find('post_date').string if raw_date == u'0000-00-00 00:00:00': date = None else: date_object = SafeDatetime.strptime( raw_date, '%Y-%m-%d %H:%M:%S') date = date_object.strftime('%Y-%m-%d %H:%M') author = item.find('creator').string categories = [cat.string for cat in item.findAll('category', {'domain': 'category'})] tags = [tag.string for tag in item.findAll('category', {'domain': 'post_tag'})] # To publish a post the status should be 'published' status = 'published' if item.find('status').string == "publish" \ else item.find('status').string kind = 'article' post_type = item.find('post_type').string if post_type == 'page': kind = 'page' elif wp_custpost: if post_type == 'post': pass # Old behaviour was to name everything not a page as an # article.Theoretically all attachments have status == inherit # so no attachments should be here. But this statement is to # maintain existing behaviour in case that doesn't hold true. elif post_type == 'attachment': pass else: kind = post_type yield (title, content, filename, date, author, categories, tags, status, kind, 'wp-html')
def test_datetime(self): # If DATETIME is set to a tuple, it should be used to override LOCALE dt = SafeDatetime(2015, 9, 13) page_kwargs = self._copy_page_kwargs() # set its date to dt page_kwargs['metadata']['date'] = dt page = Page(**page_kwargs) # page.locale_date is a unicode string in both python2 and python3 dt_date = dt.strftime(DEFAULT_CONFIG['DEFAULT_DATE_FORMAT']) # dt_date is a byte string in python2, and a unicode string in python3 # Let's make sure it is a unicode string (relies on python 3.3 supporting the u prefix) if type(dt_date) != type(u''): # python2: dt_date = unicode(dt_date, 'utf8') self.assertEqual(page.locale_date, dt_date ) page_kwargs['settings'] = get_settings() # I doubt this can work on all platforms ... if platform == "win32": locale = 'jpn' else: locale = 'ja_JP.utf8' page_kwargs['settings']['DATE_FORMATS'] = {'jp': (locale, '%Y-%m-%d(%a)')} page_kwargs['metadata']['lang'] = 'jp' import locale as locale_module try: page = Page(**page_kwargs) self.assertEqual(page.locale_date, '2015-09-13(\u65e5)') except locale_module.Error: # The constructor of ``Page`` will try to set the locale to # ``ja_JP.utf8``. But this attempt will failed when there is no # such locale in the system. You can see which locales there are # in your system with ``locale -a`` command. # # Until we find some other method to test this functionality, we # will simply skip this test. unittest.skip("There is no locale %s in this system." % locale)
def datetime_from_period(value): """ Converts "period" into a datetime object. On yearly/monthly/daily archive pages, a "period" object is supplied so you know what timeperiod the particular archive page is for. This converts it to a datetime.datetime object, so it can be further processed. If a month is not provided (i.e. the period is for a yearly archive), January is assumed. If a day is not provided (i.e. the period is for a yearly or monthly archive), the 1st is assumed. You can also generate a tuple of (up to three) integers to get a datetime out, using the integer representation for the month (1=January, etc). If passes a single integer, it is assumed to represent a year. Args ---- value (tuple or int): input period Returns ------- datetime.datetime: value converted """ if isinstance(value, int): value = (value, ) if len(value) >= 2 and isinstance(value[1], int): placeholder_month = SafeDatetime(2021, value[1], 1).strftime("%B") elif len(value) == 1: placeholder_month = SafeDatetime(2021, 1, 1).strftime("%B") else: placeholder_month = value[1] new_value = " ".join(( str(value[0]), placeholder_month, str(value[2]) if len(value) >= 3 else "1", )) new_datetime = SafeDatetime.strptime(new_value, "%Y %B %d") return new_datetime
def posterous2fields(api_token, email, password): """Imports posterous posts""" import base64 from datetime import timedelta try: # py3k import import json except ImportError: # py2 import import simplejson as json try: # py3k import import urllib.request as urllib_request except ImportError: # py2 import import urllib2 as urllib_request def get_posterous_posts(api_token, email, password, page=1): base64string = base64.encodestring( ("%s:%s" % (email, password)).encode('utf-8')).replace('\n', '') url = ("http://posterous.com/api/v2/users/me/sites/primary/" "posts?api_token=%s&page=%d") % (api_token, page) request = urllib_request.Request(url) request.add_header('Authorization', 'Basic %s' % base64string.decode()) handle = urllib_request.urlopen(request) posts = json.loads(handle.read().decode('utf-8')) return posts page = 1 posts = get_posterous_posts(api_token, email, password, page) settings = read_settings() subs = settings['SLUG_REGEX_SUBSTITUTIONS'] while len(posts) > 0: posts = get_posterous_posts(api_token, email, password, page) page += 1 for post in posts: slug = post.get('slug') if not slug: slug = slugify(post.get('title'), regex_subs=subs) tags = [tag.get('name') for tag in post.get('tags')] raw_date = post.get('display_date') date_object = SafeDatetime.strptime( raw_date[:-6], '%Y/%m/%d %H:%M:%S') offset = int(raw_date[-5:]) delta = timedelta(hours=(offset / 100)) date_object -= delta date = date_object.strftime('%Y-%m-%d %H:%M') kind = 'article' # TODO: Recognise pages status = 'published' # TODO: Find a way for draft posts yield (post.get('title'), post.get('body_cleaned'), slug, date, post.get('user').get('display_name'), [], tags, status, kind, 'html')
def posterous2fields(api_token, email, password): """Imports posterous posts""" import base64 from datetime import timedelta try: # py3k import import json except ImportError: # py2 import import simplejson as json try: # py3k import import urllib.request as urllib_request except ImportError: # py2 import import urllib2 as urllib_request def get_posterous_posts(api_token, email, password, page=1): base64string = base64.encodestring( ("%s:%s" % (email, password)).encode('utf-8')).replace('\n', '') url = ("http://posterous.com/api/v2/users/me/sites/primary/" "posts?api_token=%s&page=%d") % (api_token, page) request = urllib_request.Request(url) request.add_header('Authorization', 'Basic %s' % base64string.decode()) handle = urllib_request.urlopen(request) posts = json.loads(handle.read().decode('utf-8')) return posts page = 1 posts = get_posterous_posts(api_token, email, password, page) settings = read_settings() subs = settings['SLUG_REGEX_SUBSTITUTIONS'] while len(posts) > 0: posts = get_posterous_posts(api_token, email, password, page) page += 1 for post in posts: slug = post.get('slug') if not slug: slug = slugify(post.get('title'), regex_subs=subs) tags = [tag.get('name') for tag in post.get('tags')] raw_date = post.get('display_date') date_object = SafeDatetime.strptime(raw_date[:-6], '%Y/%m/%d %H:%M:%S') offset = int(raw_date[-5:]) delta = timedelta(hours=(offset / 100)) date_object -= delta date = date_object.strftime('%Y-%m-%d %H:%M') kind = 'article' # TODO: Recognise pages status = 'published' # TODO: Find a way for draft posts yield (post.get('title'), post.get('body_cleaned'), slug, date, post.get('user').get('display_name'), [], tags, status, kind, 'html')
def test_typed_metadata(): content, metadata = read_content_metadata('metadata.md') expected = { 'title': 'Metadata', 'list': ['a', 'b', 'c'], 'date': SafeDatetime(2017, 1, 6, 22, 24), 'int': 42, 'bool': False, 'summary': '<p>a summary</p>', } assert_dict_contains(metadata, expected)
def default_metadata(settings=None, process=None): metadata = {} if settings: if 'DEFAULT_CATEGORY' in settings: value = settings['DEFAULT_CATEGORY'] if process: value = process('category', value) metadata['category'] = value if settings.get('DEFAULT_DATE', None) and settings['DEFAULT_DATE'] != 'fs': metadata['date'] = SafeDatetime(*settings['DEFAULT_DATE']) return metadata
def test_article_with_metadata_and_contents_attrib(self): page = self.read_file(path='article_with_metadata_and_contents.html') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'This is a super article !', 'summary': 'Summary and stuff', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'tags': ['foo', 'bar', 'foobar'], 'custom_field': 'http://notmyidea.org', } self.assertDictHasSubset(page.metadata, expected)
def posterous2fields(api_token, email, password): """Imports posterous posts""" import base64 from datetime import timedelta try: # py3k import import json except ImportError: # py2 import import simplejson as json try: # py3k import import urllib.request as urllib_request except ImportError: # py2 import import urllib2 as urllib_request def get_posterous_posts(api_token, email, password, page=1): base64string = base64.encodestring( ("%s:%s" % (email, password)).encode('utf-8')).replace(b'\n', b'') url = "http://posterous.com/api/v2/users/me/sites/primary/posts?api_token=%s&page=%d" % ( api_token, page) request = urllib_request.Request(url) request.add_header("Authorization", "Basic %s" % base64string.decode()) handle = urllib_request.urlopen(request) posts = json.loads(handle.read().decode('utf-8')) return posts page = 1 posts = get_posterous_posts(api_token, email, password, page) while len(posts) > 0: posts = get_posterous_posts(api_token, email, password, page) page += 1 for post in posts: slug = post.get('slug') if not slug: slug = slugify(post.get('title')) tags = [tag.get('name') for tag in post.get('tags')] raw_date = post.get('display_date') date_object = SafeDatetime.strptime(raw_date[:-6], "%Y/%m/%d %H:%M:%S") offset = int(raw_date[-5:]) delta = timedelta(hours=offset / 100) date_object -= delta date = date_object.strftime("%Y-%m-%d %H:%M") kind = 'article' # TODO: Recognise pages yield (post.get('title'), post.get('body_cleaned'), slug, date, post.get('user').get('display_name'), [], tags, kind, "html")
def url_format(self): """Returns the URL, formatted with the proper values""" metadata = copy.copy(self.metadata) path = self.metadata.get('path', self.get_relative_source_path()) metadata.update({ 'path': path_to_url(path), 'slug': getattr(self, 'slug', ''), 'lang': getattr(self, 'lang', 'en'), 'date': getattr(self, 'date', SafeDatetime.now()), 'author': self.author.slug if hasattr(self, 'author') else '', 'category': self.category.slug if hasattr(self, 'category') else '' }) return metadata
def test_article_with_metadata(self): page = self.read_file(path='article_with_metadata.html') expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'This is a super article !', 'summary': 'Summary and stuff', 'date': SafeDatetime(2010, 12, 2, 10, 14), 'tags': ['foo', 'bar', 'foobar'], 'custom_field': 'http://notmyidea.org', } for key, value in expected.items(): self.assertEqual(value, page.metadata[key], key)
def test_article_with_optional_filename_metadata(self): page = self.read_file(path='2012-11-30_md_w_filename_meta#foo-bar.md', FILENAME_METADATA='(?P<date>\d{4}-\d{2}-\d{2})?') expected = { 'date': SafeDatetime(2012, 11, 30), 'reader': 'markdown', } self.assertDictHasSubset(page.metadata, expected) page = self.read_file(path='empty.md', FILENAME_METADATA='(?P<date>\d{4}-\d{2}-\d{2})?') expected = { 'reader': 'markdown', } self.assertDictHasSubset(page.metadata, expected) self.assertNotIn('date', page.metadata, 'Date should not be set.')
def url_format(self): """Returns the URL, formatted with the proper values""" metadata = copy.copy(self.metadata) path = self.metadata.get("path", self.get_relative_source_path()) metadata.update( { "path": path_to_url(path), "slug": getattr(self, "slug", ""), "lang": getattr(self, "lang", "en"), "date": getattr(self, "date", SafeDatetime.now()), "author": self.author.slug if hasattr(self, "author") else "", "tag": self.tag.slug if hasattr(self, "tag") else "", "category": self.category.slug if hasattr(self, "category") else "", } ) return metadata
def default_metadata(settings=None, process=None): metadata = {} if settings: for name, value in dict(settings.get('DEFAULT_METADATA', {})).items(): if process: value = process(name, value) metadata[name] = value if 'DEFAULT_CATEGORY' in settings: value = settings['DEFAULT_CATEGORY'] if process: value = process('category', value) metadata['category'] = value if settings.get('DEFAULT_DATE', None) and \ settings['DEFAULT_DATE'] != 'fs': metadata['date'] = SafeDatetime(*settings['DEFAULT_DATE']) return metadata
def test_article_extra_path_metadata(self): input_with_metadata = '2012-11-29_rst_w_filename_meta#foo-bar.rst' page_metadata = self.read_file( path=input_with_metadata, FILENAME_METADATA=( r'(?P<date>\d{4}-\d{2}-\d{2})' r'_(?P<Slug>.*)' r'#(?P<MyMeta>.*)-(?P<author>.*)' ), EXTRA_PATH_METADATA={ input_with_metadata: { 'key-1a': 'value-1a', 'key-1b': 'value-1b' } } ) expected_metadata = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'Rst with filename metadata', 'date': SafeDatetime(2012, 11, 29), 'slug': 'rst_w_filename_meta', 'mymeta': 'foo', 'reader': 'rst', 'key-1a': 'value-1a', 'key-1b': 'value-1b' } self.assertDictHasSubset(page_metadata.metadata, expected_metadata) input_file_path_without_metadata = 'article.rst' page_without_metadata = self.read_file( path=input_file_path_without_metadata, EXTRA_PATH_METADATA={ input_file_path_without_metadata: { 'author': 'Charlès Overwrite' } } ) expected_without_metadata = { 'category': 'misc', 'author': 'Charlès Overwrite', 'title': 'Article title', 'reader': 'rst', } self.assertDictHasSubset( page_without_metadata.metadata, expected_without_metadata)
def path_metadata(full_path, source_path, settings=None): metadata = {} if settings: if settings.get('DEFAULT_DATE', None) == 'fs': metadata['date'] = SafeDatetime.fromtimestamp( os.stat(full_path).st_mtime) # Apply EXTRA_PATH_METADATA for the source path and the paths of any # parent directories. Sorting EPM first ensures that the most specific # path wins conflicts. epm = settings.get('EXTRA_PATH_METADATA', {}) for path, meta in sorted(epm.items()): # Enforce a trailing slash when checking for parent directories. # This prevents false positives when one file or directory's name # is a prefix of another's. dirpath = os.path.join(path, '') if source_path == path or source_path.startswith(dirpath): metadata.update(meta) return metadata
def url_format(self): """Returns the URL, formatted with the proper values""" metadata = copy.copy(self.metadata) path = self.metadata.get('path', self.get_relative_source_path()) default_category = self.settings['DEFAULT_CATEGORY'] slug_substitutions = self.settings.get('SLUG_SUBSTITUTIONS', ()) metadata.update({ 'path': path_to_url(path), 'slug': getattr(self, 'slug', ''), 'lang': getattr(self, 'lang', 'en'), 'date': getattr(self, 'date', SafeDatetime.now()), 'author': slugify( getattr(self, 'author', ''), slug_substitutions ), 'category': slugify( getattr(self, 'category', default_category), slug_substitutions ) }) return metadata
def parse(self): """Imports posterous posts""" from datetime import timedelta page = 1 posts = self._get_posterous_posts(page) settings = read_settings() subs = settings["SLUG_REGEX_SUBSTITUTIONS"] while len(posts) > 0: posts = self._get_posterous_posts(page) page += 1 for post in posts: slug = post.get("slug") if not slug: slug = slugify(post.get("title"), regex_subs=subs) tags = [tag.get("name") for tag in post.get("tags")] raw_date = post.get("display_date") date_object = SafeDatetime.strptime( raw_date[:-6], "%Y/%m/%d %H:%M:%S" ) offset = int(raw_date[-5:]) delta = timedelta(hours=(offset / 100)) date_object -= delta date = date_object.strftime("%Y-%m-%d %H:%M") kind = "article" # TODO: Recognise pages status = "published" # TODO: Find a way for draft posts yield blog2pelican.entities.content.Content( title=post.get("title"), content=post.get("body_cleaned"), slug=slug, date=date, author=post.get("user").get("display_name"), categories=[], tags=tags, status=status, kind=kind, markup="html", )
def __init__(self, content, metadata=None, settings=None, source_path=None, context=None): if metadata is None: metadata = {} if settings is None: settings = copy.deepcopy(DEFAULT_CONFIG) self.settings = settings self._content = content if context is None: context = {} self._context = context self.translations = [] local_metadata = dict(settings['DEFAULT_METADATA']) local_metadata.update(metadata) # set metadata as attributes for key, value in local_metadata.items(): if key in ('save_as', 'url'): key = 'override_' + key setattr(self, key.lower(), value) # also keep track of the metadata attributes available self.metadata = local_metadata #default template if it's not defined in page self.template = self._get_template() # First, read the authors from "authors", if not, fallback to "author" # and if not use the settings defined one, if any. if not hasattr(self, 'author'): if hasattr(self, 'authors'): self.author = self.authors[0] elif 'AUTHOR' in settings: self.author = Author(settings['AUTHOR'], settings) if not hasattr(self, 'authors') and hasattr(self, 'author'): self.authors = [self.author] # XXX Split all the following code into pieces, there is too much here. # manage languages self.in_default_lang = True if 'DEFAULT_LANG' in settings: default_lang = settings['DEFAULT_LANG'].lower() if not hasattr(self, 'lang'): self.lang = default_lang self.in_default_lang = (self.lang == default_lang) # create the slug if not existing, generate slug according to # setting of SLUG_ATTRIBUTE if not hasattr(self, 'slug'): if settings['SLUGIFY_SOURCE'] == 'title' and hasattr(self, 'title'): self.slug = slugify(self.title, settings.get('SLUG_SUBSTITUTIONS', ())) elif settings['SLUGIFY_SOURCE'] == 'basename' and source_path != None: basename = os.path.basename(os.path.splitext(source_path)[0]) self.slug = slugify(basename, settings.get('SLUG_SUBSTITUTIONS', ())) self.source_path = source_path # manage the date format if not hasattr(self, 'date_format'): if hasattr(self, 'lang') and self.lang in settings['DATE_FORMATS']: self.date_format = settings['DATE_FORMATS'][self.lang] else: self.date_format = settings['DEFAULT_DATE_FORMAT'] if isinstance(self.date_format, tuple): locale_string = self.date_format[0] if sys.version_info < (3, ) and isinstance(locale_string, six.text_type): locale_string = locale_string.encode('ascii') locale.setlocale(locale.LC_ALL, locale_string) self.date_format = self.date_format[1] # manage timezone default_timezone = settings.get('TIMEZONE', 'UTC') timezone = getattr(self, 'timezone', default_timezone) if hasattr(self, 'date'): self.date = set_date_tzinfo(self.date, timezone) self.locale_date = strftime(self.date, self.date_format) if hasattr(self, 'modified'): self.modified = set_date_tzinfo(self.modified, timezone) self.locale_modified = strftime(self.modified, self.date_format) # manage status if not hasattr(self, 'status'): self.status = settings['DEFAULT_STATUS'] if not settings['WITH_FUTURE_DATES']: if hasattr(self, 'date') and self.date > SafeDatetime.now(): self.status = 'draft' # store the summary metadata if it is set if 'summary' in metadata: self._summary = metadata['summary'] signals.content_object_init.send(self)
def tumblr2fields(api_key, blogname): """ Imports Tumblr posts (API v2)""" try: # py3k import import json except ImportError: # py2 import import simplejson as json try: # py3k import import urllib.request as urllib_request except ImportError: # py2 import import urllib2 as urllib_request def get_tumblr_posts(api_key, blogname, offset=0): url = ("http://api.tumblr.com/v2/blog/%s.tumblr.com/" "posts?api_key=%s&offset=%d&filter=raw") % ( blogname, api_key, offset) request = urllib_request.Request(url) handle = urllib_request.urlopen(request) posts = json.loads(handle.read().decode('utf-8')) return posts.get('response').get('posts') offset = 0 posts = get_tumblr_posts(api_key, blogname, offset) settings = read_settings() subs = settings['SLUG_REGEX_SUBSTITUTIONS'] while len(posts) > 0: for post in posts: title = \ post.get('title') or \ post.get('source_title') or \ post.get('type').capitalize() slug = post.get('slug') or slugify(title, regex_subs=subs) tags = post.get('tags') timestamp = post.get('timestamp') date = SafeDatetime.fromtimestamp(int(timestamp)).strftime( "%Y-%m-%d %H:%M:%S") slug = SafeDatetime.fromtimestamp(int(timestamp)).strftime( "%Y-%m-%d-") + slug format = post.get('format') content = post.get('body') type = post.get('type') if type == 'photo': if format == 'markdown': fmtstr = '![%s](%s)' else: fmtstr = '<img alt="%s" src="%s" />' content = '' for photo in post.get('photos'): content += '\n'.join( fmtstr % (photo.get('caption'), photo.get('original_size').get('url'))) content += '\n\n' + post.get('caption') elif type == 'quote': if format == 'markdown': fmtstr = '\n\n— %s' else: fmtstr = '<p>— %s</p>' content = post.get('text') + fmtstr % post.get('source') elif type == 'link': if format == 'markdown': fmtstr = '[via](%s)\n\n' else: fmtstr = '<p><a href="%s">via</a></p>\n' content = fmtstr % post.get('url') + post.get('description') elif type == 'audio': if format == 'markdown': fmtstr = '[via](%s)\n\n' else: fmtstr = '<p><a href="%s">via</a></p>\n' content = fmtstr % post.get('source_url') + \ post.get('caption') + \ post.get('player') elif type == 'video': if format == 'markdown': fmtstr = '[via](%s)\n\n' else: fmtstr = '<p><a href="%s">via</a></p>\n' source = fmtstr % post.get('source_url') caption = post.get('caption') players = '\n'.join(player.get('embed_code') for player in post.get('player')) content = source + caption + players elif type == 'answer': title = post.get('question') content = ('<p>' '<a href="%s" rel="external nofollow">%s</a>' ': %s' '</p>\n' ' %s' % (post.get('asking_name'), post.get('asking_url'), post.get('question'), post.get('answer'))) content = content.rstrip() + '\n' kind = 'article' status = 'published' # TODO: Find a way for draft posts yield (title, content, slug, date, post.get('blog_name'), [type], tags, status, kind, format) offset += len(posts) posts = get_tumblr_posts(api_key, blogname, offset)
def __init__(self, content, metadata=None, settings=None, source_path=None, context=None): if metadata is None: metadata = {} if settings is None: settings = copy.deepcopy(DEFAULT_CONFIG) self.settings = settings self._content = content if context is None: context = {} self._context = context self.translations = [] local_metadata = dict() local_metadata.update(metadata) # set metadata as attributes for key, value in local_metadata.items(): if key in ('save_as', 'url'): key = 'override_' + key setattr(self, key.lower(), value) # also keep track of the metadata attributes available self.metadata = local_metadata # default template if it's not defined in page self.template = self._get_template() # First, read the authors from "authors", if not, fallback to "author" # and if not use the settings defined one, if any. if not hasattr(self, 'author'): if hasattr(self, 'authors'): self.author = self.authors[0] elif 'AUTHOR' in settings: self.author = Author(settings['AUTHOR'], settings) if not hasattr(self, 'authors') and hasattr(self, 'author'): self.authors = [self.author] # XXX Split all the following code into pieces, there is too much here. # manage languages self.in_default_lang = True if 'DEFAULT_LANG' in settings: default_lang = settings['DEFAULT_LANG'].lower() if not hasattr(self, 'lang'): self.lang = default_lang self.in_default_lang = (self.lang == default_lang) # create the slug if not existing, generate slug according to # setting of SLUG_ATTRIBUTE if not hasattr(self, 'slug'): if (settings['SLUGIFY_SOURCE'] == 'title' and hasattr(self, 'title')): self.slug = slugify(self.title, settings.get('SLUG_SUBSTITUTIONS', ())) elif (settings['SLUGIFY_SOURCE'] == 'basename' and source_path is not None): basename = os.path.basename(os.path.splitext(source_path)[0]) self.slug = slugify(basename, settings.get('SLUG_SUBSTITUTIONS', ())) self.source_path = source_path # manage the date format if not hasattr(self, 'date_format'): if hasattr(self, 'lang') and self.lang in settings['DATE_FORMATS']: self.date_format = settings['DATE_FORMATS'][self.lang] else: self.date_format = settings['DEFAULT_DATE_FORMAT'] if isinstance(self.date_format, tuple): locale_string = self.date_format[0] if sys.version_info < (3, ) and isinstance(locale_string, six.text_type): locale_string = locale_string.encode('ascii') locale.setlocale(locale.LC_ALL, locale_string) self.date_format = self.date_format[1] # manage timezone default_timezone = settings.get('TIMEZONE', 'UTC') timezone = getattr(self, 'timezone', default_timezone) if hasattr(self, 'date'): self.date = set_date_tzinfo(self.date, timezone) self.locale_date = strftime(self.date, self.date_format) if hasattr(self, 'modified'): self.modified = set_date_tzinfo(self.modified, timezone) self.locale_modified = strftime(self.modified, self.date_format) # manage status if not hasattr(self, 'status'): self.status = settings['DEFAULT_STATUS'] if not settings['WITH_FUTURE_DATES'] and hasattr(self, 'date'): if self.date.tzinfo is None: now = SafeDatetime.now() else: now = SafeDatetime.utcnow().replace(tzinfo=pytz.utc) if self.date > now: self.status = 'draft' # store the summary metadata if it is set if 'summary' in metadata: self._summary = metadata['summary'] signals.content_object_init.send(self)
def tumblr2fields(api_key, blogname): """ Imports Tumblr posts (API v2)""" try: # py3k import import json except ImportError: # py2 import import simplejson as json try: # py3k import import urllib.request as urllib_request except ImportError: # py2 import import urllib2 as urllib_request def get_tumblr_posts(api_key, blogname, offset=0): url = ("http://api.tumblr.com/v2/blog/%s.tumblr.com/" "posts?api_key=%s&offset=%d&filter=raw") % (blogname, api_key, offset) request = urllib_request.Request(url) handle = urllib_request.urlopen(request) posts = json.loads(handle.read().decode('utf-8')) return posts.get('response').get('posts') offset = 0 posts = get_tumblr_posts(api_key, blogname, offset) settings = read_settings() subs = settings['SLUG_REGEX_SUBSTITUTIONS'] while len(posts) > 0: for post in posts: title = \ post.get('title') or \ post.get('source_title') or \ post.get('type').capitalize() slug = post.get('slug') or slugify(title, regex_subs=subs) tags = post.get('tags') timestamp = post.get('timestamp') date = SafeDatetime.fromtimestamp( int(timestamp)).strftime("%Y-%m-%d %H:%M:%S") slug = SafeDatetime.fromtimestamp( int(timestamp)).strftime("%Y-%m-%d-") + slug format = post.get('format') content = post.get('body') type = post.get('type') if type == 'photo': if format == 'markdown': fmtstr = '![%s](%s)' else: fmtstr = '<img alt="%s" src="%s" />' content = '' for photo in post.get('photos'): content += '\n'.join( fmtstr % (photo.get('caption'), photo.get('original_size').get('url'))) content += '\n\n' + post.get('caption') elif type == 'quote': if format == 'markdown': fmtstr = '\n\n— %s' else: fmtstr = '<p>— %s</p>' content = post.get('text') + fmtstr % post.get('source') elif type == 'link': if format == 'markdown': fmtstr = '[via](%s)\n\n' else: fmtstr = '<p><a href="%s">via</a></p>\n' content = fmtstr % post.get('url') + post.get('description') elif type == 'audio': if format == 'markdown': fmtstr = '[via](%s)\n\n' else: fmtstr = '<p><a href="%s">via</a></p>\n' content = fmtstr % post.get('source_url') + \ post.get('caption') + \ post.get('player') elif type == 'video': if format == 'markdown': fmtstr = '[via](%s)\n\n' else: fmtstr = '<p><a href="%s">via</a></p>\n' source = fmtstr % post.get('source_url') caption = post.get('caption') players = '\n'.join( player.get('embed_code') for player in post.get('player')) content = source + caption + players elif type == 'answer': title = post.get('question') content = ('<p>' '<a href="%s" rel="external nofollow">%s</a>' ': %s' '</p>\n' ' %s' % (post.get('asking_name'), post.get('asking_url'), post.get('question'), post.get('answer'))) content = content.rstrip() + '\n' kind = 'article' status = 'published' # TODO: Find a way for draft posts yield (title, content, slug, date, post.get('blog_name'), [type], tags, status, kind, format) offset += len(posts) posts = get_tumblr_posts(api_key, blogname, offset)
def __init__(self, content, metadata=None, settings=None, source_path=None, context=None): if metadata is None: metadata = {} if settings is None: settings = copy.deepcopy(DEFAULT_CONFIG) self.settings = settings self._content = content if context is None: context = {} self._context = context self.translations = [] local_metadata = dict() local_metadata.update(metadata) # set metadata as attributes for key, value in local_metadata.items(): if key in ("save_as", "url"): key = "override_" + key setattr(self, key.lower(), value) # also keep track of the metadata attributes available self.metadata = local_metadata # default template if it's not defined in page self.template = self._get_template() # First, read the authors from "authors", if not, fallback to "author" # and if not use the settings defined one, if any. if not hasattr(self, "author"): if hasattr(self, "authors"): self.author = self.authors[0] elif "AUTHOR" in settings: self.author = Author(settings["AUTHOR"], settings) if not hasattr(self, "authors") and hasattr(self, "author"): self.authors = [self.author] # XXX Split all the following code into pieces, there is too much here. # manage languages self.in_default_lang = True if "DEFAULT_LANG" in settings: default_lang = settings["DEFAULT_LANG"].lower() if not hasattr(self, "lang"): self.lang = default_lang self.in_default_lang = self.lang == default_lang # create the slug if not existing, generate slug according to # setting of SLUG_ATTRIBUTE if not hasattr(self, "slug"): if settings["SLUGIFY_SOURCE"] == "title" and hasattr(self, "title"): self.slug = slugify(self.title, settings.get("SLUG_SUBSTITUTIONS", ())) elif settings["SLUGIFY_SOURCE"] == "basename" and source_path is not None: basename = os.path.basename(os.path.splitext(source_path)[0]) self.slug = slugify(basename, settings.get("SLUG_SUBSTITUTIONS", ())) self.source_path = source_path # manage the date format if not hasattr(self, "date_format"): if hasattr(self, "lang") and self.lang in settings["DATE_FORMATS"]: self.date_format = settings["DATE_FORMATS"][self.lang] else: self.date_format = settings["DEFAULT_DATE_FORMAT"] if isinstance(self.date_format, tuple): locale_string = self.date_format[0] if sys.version_info < (3,) and isinstance(locale_string, six.text_type): locale_string = locale_string.encode("ascii") locale.setlocale(locale.LC_ALL, locale_string) self.date_format = self.date_format[1] # manage timezone default_timezone = settings.get("TIMEZONE", "UTC") timezone = getattr(self, "timezone", default_timezone) if hasattr(self, "date"): self.date = set_date_tzinfo(self.date, timezone) self.locale_date = strftime(self.date, self.date_format) if hasattr(self, "modified"): self.modified = set_date_tzinfo(self.modified, timezone) self.locale_modified = strftime(self.modified, self.date_format) # manage status if not hasattr(self, "status"): self.status = settings["DEFAULT_STATUS"] if not settings["WITH_FUTURE_DATES"] and hasattr(self, "date"): if self.date.tzinfo is None: now = SafeDatetime.now() else: now = SafeDatetime.utcnow().replace(tzinfo=pytz.utc) if self.date > now: self.status = "draft" # store the summary metadata if it is set if "summary" in metadata: self._summary = metadata["summary"] signals.content_object_init.send(self)