Exemplo n.º 1
0
    def test_article_with_filename_metadata(self):
        page = self.read_file(path='2012-11-30_md_w_filename_meta#foo-bar.md',
                              FILENAME_METADATA=None)
        expected = {
            'category': 'yeah',
            'author': 'Alexis Métaireau',
        }
        for key, value in expected.items():
            self.assertEqual(value, page.metadata[key], key)

        page = self.read_file(
            path='2012-11-30_md_w_filename_meta#foo-bar.md',
            FILENAME_METADATA='(?P<date>\d{4}-\d{2}-\d{2}).*')
        expected = {
            'category': 'yeah',
            'author': 'Alexis Métaireau',
            'date': SafeDatetime(2012, 11, 30),
        }
        for key, value in expected.items():
            self.assertEqual(value, page.metadata[key], key)

        page = self.read_file(
            path='2012-11-30_md_w_filename_meta#foo-bar.md',
            FILENAME_METADATA=('(?P<date>\d{4}-\d{2}-\d{2})'
                               '_(?P<Slug>.*)'
                               '#(?P<MyMeta>.*)-(?P<author>.*)'))
        expected = {
            'category': 'yeah',
            'author': 'Alexis Métaireau',
            'date': SafeDatetime(2012, 11, 30),
            'slug': 'md_w_filename_meta',
            'mymeta': 'foo',
        }
        for key, value in expected.items():
            self.assertEqual(value, page.metadata[key], key)
Exemplo n.º 2
0
    def test_article_with_metadata(self):
        page = self.read_file(path='article_with_metadata.rst')
        expected = {
            'category':
            'yeah',
            'author':
            'Alexis Métaireau',
            'title':
            'This is a super article !',
            'summary':
            '<p class="first last">Multi-line metadata should be'
            ' supported\nas well as <strong>inline'
            ' markup</strong> and stuff to &quot;typogrify'
            '&quot;...</p>\n',
            'date':
            SafeDatetime(2010, 12, 2, 10, 14),
            'modified':
            SafeDatetime(2010, 12, 2, 10, 20),
            'tags': ['foo', 'bar', 'foobar'],
            'custom_field':
            'http://notmyidea.org',
        }

        for key, value in expected.items():
            self.assertEqual(value, page.metadata[key], key)
Exemplo n.º 3
0
    def test_article_with_filename_metadata(self):
        page = self.read_file(path='2012-11-30_md_w_filename_meta#foo-bar.md',
                              FILENAME_METADATA=None)
        expected = {
            'category': 'yeah',
            'author': 'Alexis Métaireau',
        }
        self.assertDictHasSubset(page.metadata, expected)

        page = self.read_file(
            path='2012-11-30_md_w_filename_meta#foo-bar.md',
            FILENAME_METADATA=r'(?P<date>\d{4}-\d{2}-\d{2}).*')
        expected = {
            'category': 'yeah',
            'author': 'Alexis Métaireau',
            'date': SafeDatetime(2012, 11, 30),
        }
        self.assertDictHasSubset(page.metadata, expected)

        page = self.read_file(
            path='2012-11-30_md_w_filename_meta#foo-bar.md',
            FILENAME_METADATA=(r'(?P<date>\d{4}-\d{2}-\d{2})'
                               r'_(?P<Slug>.*)'
                               r'#(?P<MyMeta>.*)-(?P<author>.*)'))
        expected = {
            'category': 'yeah',
            'author': 'Alexis Métaireau',
            'date': SafeDatetime(2012, 11, 30),
            'slug': 'md_w_filename_meta',
            'mymeta': 'foo',
        }
        self.assertDictHasSubset(page.metadata, expected)
Exemplo n.º 4
0
    def test_article_with_metadata(self):
        reader = readers.MarkdownReader(settings=get_settings())
        content, metadata = reader.read(_path('article_with_md_extension.md'))
        expected = {
            'category': 'test',
            'title': 'Test md File',
            'summary': '<p>I have a lot to test</p>',
            'date': SafeDatetime(2010, 12, 2, 10, 14),
            'modified': SafeDatetime(2010, 12, 2, 10, 20),
            'tags': ['foo', 'bar', 'foobar'],
        }
        self.assertDictHasSubset(metadata, expected)

        content, metadata = reader.read(
            _path('article_with_markdown_and_nonascii_summary.md'))
        expected = {
            'title': 'マックOS X 10.8でパイソンとVirtualenvをインストールと設定',
            'summary': '<p>パイソンとVirtualenvをまっくでインストールする方法について明確に説明します。</p>',
            'category': '指導書',
            'date': SafeDatetime(2012, 12, 20),
            'modified': SafeDatetime(2012, 12, 22),
            'tags': ['パイソン', 'マック'],
            'slug': 'python-virtualenv-on-mac-osx-mountain-lion-10.8',
        }
        self.assertDictHasSubset(metadata, expected)
Exemplo n.º 5
0
    def test_readfile_path_metadata_explicit_dates(self):
        test_file = 'article_with_metadata_explicit_dates.html'
        page = self.read_file(path=test_file, DEFAULT_DATE='fs')
        expected = {
            'date': SafeDatetime(2010, 12, 2, 10, 14),
            'modified': SafeDatetime(2010, 12, 31, 23, 59)
        }

        self.assertDictHasSubset(page.metadata, expected)
Exemplo n.º 6
0
    def test_readfile_path_metadata_implicit_date_explicit_modified(self):
        test_file = 'article_with_metadata_implicit_date_explicit_modified.html'
        page = self.read_file(path=test_file, DEFAULT_DATE='fs')
        expected = {
            'date':
            SafeDatetime.fromtimestamp(os.stat(_path(test_file)).st_mtime),
            'modified': SafeDatetime(2010, 12, 2, 10, 14),
        }

        self.assertDictHasSubset(page.metadata, expected)
Exemplo n.º 7
0
def blogger2fields(xml):
    """Opens a blogger XML file, and yield Pelican fields"""

    soup = xml_to_soup(xml)
    entries = soup.feed.findAll('entry')
    for entry in entries:
        raw_kind = entry.find('category', {
            'scheme': 'http://schemas.google.com/g/2005#kind'
        }).get('term')
        if raw_kind == 'http://schemas.google.com/blogger/2008/kind#post':
            kind = 'article'
        elif raw_kind == 'http://schemas.google.com/blogger/2008/kind#comment':
            kind = 'comment'
        elif raw_kind == 'http://schemas.google.com/blogger/2008/kind#page':
            kind = 'page'
        else:
            continue

        try:
            assert kind != 'comment'
            filename = entry.find('link', {'rel': 'alternate'})['href']
            filename = os.path.splitext(os.path.basename(filename))[0]
        except (AssertionError, TypeError, KeyError):
            filename = entry.find('id').string.split('.')[-1]

        title = entry.find('title').string or ''

        content = entry.find('content').string
        raw_date = entry.find('published').string
        if hasattr(SafeDatetime, 'fromisoformat'):
            date_object = SafeDatetime.fromisoformat(raw_date)
        else:
            date_object = SafeDatetime.strptime(raw_date[:23],
                                                '%Y-%m-%dT%H:%M:%S.%f')
        date = date_object.strftime('%Y-%m-%d %H:%M')
        author = entry.find('author').find('name').string

        # blogger posts only have tags, no category
        tags = [
            tag.get('term') for tag in entry.findAll(
                'category', {'scheme': 'http://www.blogger.com/atom/ns#'})
        ]

        # Drafts have <app:control><app:draft>yes</app:draft></app:control>
        status = 'published'
        try:
            if entry.find('control').find('draft').string == 'yes':
                status = 'draft'
        except AttributeError:
            pass

        yield (title, content, filename, date, author, None, tags, status,
               kind, 'html')
Exemplo n.º 8
0
def blogger2fields(xml):
    """Opens a blogger XML file, and yield Pelican fields"""

    soup = xml_to_soup(xml)
    entries = soup.feed.findAll('entry')
    for entry in entries:
        raw_kind = entry.find(
            'category', {'scheme': 'http://schemas.google.com/g/2005#kind'}
        ).get('term')
        if raw_kind == 'http://schemas.google.com/blogger/2008/kind#post':
            kind = 'article'
        elif raw_kind == 'http://schemas.google.com/blogger/2008/kind#comment':
            kind = 'comment'
        elif raw_kind == 'http://schemas.google.com/blogger/2008/kind#page':
            kind = 'page'
        else:
            continue

        try:
            assert kind != 'comment'
            filename = entry.find('link', {'rel': 'alternate'})['href']
            filename = os.path.splitext(os.path.basename(filename))[0]
        except (AssertionError, TypeError, KeyError):
            filename = entry.find('id').string.split('.')[-1]

        title = entry.find('title').string or ''

        content = entry.find('content').string
        raw_date = entry.find('published').string
        if hasattr(SafeDatetime, 'fromisoformat'):
            date_object = SafeDatetime.fromisoformat(raw_date)
        else:
            date_object = SafeDatetime.strptime(
                raw_date[:23], '%Y-%m-%dT%H:%M:%S.%f')
        date = date_object.strftime('%Y-%m-%d %H:%M')
        author = entry.find('author').find('name').string

        # blogger posts only have tags, no category
        tags = [tag.get('term') for tag in entry.findAll(
            'category', {'scheme': 'http://www.blogger.com/atom/ns#'})]

        # Drafts have <app:control><app:draft>yes</app:draft></app:control>
        status = 'published'
        try:
            if entry.find('control').find('draft').string == 'yes':
                status = 'draft'
        except AttributeError:
            pass

        yield (title, content, filename, date, author, None, tags, status,
               kind, 'html')
Exemplo n.º 9
0
    def __init__(self, *args, **kwargs):
        super(Article, self).__init__(*args, **kwargs)

        # handle WITH_FUTURE_DATES (designate article to draft based on date)
        if not self.settings['WITH_FUTURE_DATES'] and hasattr(self, 'date'):
            if self.date.tzinfo is None:
                now = SafeDatetime.now()
            else:
                now = SafeDatetime.utcnow().replace(tzinfo=pytz.utc)
            if self.date > now:
                self.status = 'draft'

        # if we are a draft and there is no date provided, set max datetime
        if not hasattr(self, 'date') and self.status == 'draft':
            self.date = SafeDatetime.max
Exemplo n.º 10
0
    def __init__(self, *args, **kwargs):
        super(Article, self).__init__(*args, **kwargs)

        # handle WITH_FUTURE_DATES (designate article to draft based on date)
        if not self.settings['WITH_FUTURE_DATES'] and hasattr(self, 'date'):
            if self.date.tzinfo is None:
                now = SafeDatetime.now()
            else:
                now = SafeDatetime.utcnow().replace(tzinfo=pytz.utc)
            if self.date > now:
                self.status = 'draft'

        # if we are a draft and there is no date provided, set max datetime
        if not hasattr(self, 'date') and self.status == 'draft':
            self.date = SafeDatetime.max
Exemplo n.º 11
0
def path_metadata(full_path, source_path, settings=None):
    metadata = {}
    if settings:
        if settings.get("DEFAULT_DATE", None) == "fs":
            metadata["date"] = SafeDatetime.fromtimestamp(os.stat(full_path).st_ctime)
        metadata.update(settings.get("EXTRA_PATH_METADATA", {}).get(source_path, {}))
    return metadata
Exemplo n.º 12
0
def posterous2fields(api_token, email, password):
    """Imports posterous posts"""
    import base64
    from datetime import timedelta

    try:
        # py3k import
        import json
    except ImportError:
        # py2 import
        import simplejson as json

    try:
        # py3k import
        import urllib.request as urllib_request
    except ImportError:
        # py2 import
        import urllib2 as urllib_request

    def get_posterous_posts(api_token, email, password, page=1):
        base64string = base64.encodestring(("%s:%s" % (email, password)).encode("utf-8")).replace("\n", "")
        url = ("http://posterous.com/api/v2/users/me/sites/primary/" "posts?api_token=%s&page=%d") % (api_token, page)
        request = urllib_request.Request(url)
        request.add_header("Authorization", "Basic %s" % base64string.decode())
        handle = urllib_request.urlopen(request)
        posts = json.loads(handle.read().decode("utf-8"))
        return posts

    page = 1
    posts = get_posterous_posts(api_token, email, password, page)
    while len(posts) > 0:
        posts = get_posterous_posts(api_token, email, password, page)
        page += 1

        for post in posts:
            slug = post.get("slug")
            if not slug:
                slug = slugify(post.get("title"))
            tags = [tag.get("name") for tag in post.get("tags")]
            raw_date = post.get("display_date")
            date_object = SafeDatetime.strptime(raw_date[:-6], "%Y/%m/%d %H:%M:%S")
            offset = int(raw_date[-5:])
            delta = timedelta(hours=(offset / 100))
            date_object -= delta
            date = date_object.strftime("%Y-%m-%d %H:%M")
            kind = "article"  # TODO: Recognise pages
            status = "published"  # TODO: Find a way for draft posts

            yield (
                post.get("title"),
                post.get("body_cleaned"),
                slug,
                date,
                post.get("user").get("display_name"),
                [],
                tags,
                status,
                kind,
                "html",
            )
Exemplo n.º 13
0
def merge_date_url(value, url):
    """
    Given a Pelican setting URL that contains a placeholder for a date, and a
    date, it will combine the two to return the resulting URL.

    Args
    ----
        value (datetime.datetime): a date
        url (string): a Pelican URL setting

    Returns
    -------
        string: combined URL

    """
    try:
        return url.format(date=value)
    except ValueError:
        # will throw a "ValueError" if the value is a datetime.datetime and the url
        # contains a "-" (e.g. "{date:%-d}") (used in Pelican to strip the leading
        # zero)
        try:
            return url.format(
                date=SafeDatetime(value.year, value.month, value.day))
        except ValueError as e:
            logger.error(
                "%s ValueError. value: %s, type(value): %s, url: %s",
                LOG_PREFIX,
                value,
                type(value),
                url,
            )
            raise e
Exemplo n.º 14
0
 def test_article_extra_path_metadata_dont_overwrite(self):
     # EXTRA_PATH_METADATA['author'] should get ignored
     # since we don't overwrite already set values
     input_file_path = '2012-11-29_rst_w_filename_meta#foo-bar.rst'
     page = self.read_file(
         path=input_file_path,
         FILENAME_METADATA=(r'(?P<date>\d{4}-\d{2}-\d{2})'
                            r'_(?P<Slug>.*)'
                            r'#(?P<MyMeta>.*)-(?P<orginalauthor>.*)'),
         EXTRA_PATH_METADATA={
             input_file_path: {
                 'author': 'Charlès Overwrite',
                 'key-1b': 'value-1b'
             }
         })
     expected = {
         'category': 'yeah',
         'author': 'Alexis Métaireau',
         'title': 'Rst with filename metadata',
         'date': SafeDatetime(2012, 11, 29),
         'slug': 'rst_w_filename_meta',
         'mymeta': 'foo',
         'reader': 'rst',
         'key-1b': 'value-1b'
     }
     self.assertDictHasSubset(page.metadata, expected)
Exemplo n.º 15
0
def wp2fields(xml, wp_custpost=False):
    """Opens a wordpress XML file, and yield Pelican fields"""

    soup = xml_to_soup(xml)
    items = soup.rss.channel.findAll('item')
    for item in items:

        if item.find('status').string in ["publish", "draft"]:

            try:
                # Use HTMLParser due to issues with BeautifulSoup 3
                title = unescape(item.title.contents[0])
            except IndexError:
                title = 'No title [%s]' % item.find('post_name').string
                logger.warning('Post "%s" is lacking a proper title', title)

            post_name = item.find('post_name').string
            post_id = item.find('post_id').string
            filename = get_filename(post_name, post_id)

            content = item.find('encoded').string
            raw_date = item.find('post_date').string
            if raw_date == u'0000-00-00 00:00:00':
                date = None
            else:
                date_object = SafeDatetime.strptime(raw_date,
                                                    '%Y-%m-%d %H:%M:%S')
                date = date_object.strftime('%Y-%m-%d %H:%M')
            author = item.find('creator').string

            categories = [
                cat.string
                for cat in item.findAll('category', {'domain': 'category'})
            ]

            tags = [
                tag.string
                for tag in item.findAll('category', {'domain': 'post_tag'})
            ]
            # To publish a post the status should be 'published'
            status = 'published' if item.find('status').string == "publish" \
                else item.find('status').string

            kind = 'article'
            post_type = item.find('post_type').string
            if post_type == 'page':
                kind = 'page'
            elif wp_custpost:
                if post_type == 'post':
                    pass
                # Old behaviour was to name everything not a page as an
                # article.Theoretically all attachments have status == inherit
                # so no attachments should be here. But this statement is to
                # maintain existing behaviour in case that doesn't hold true.
                elif post_type == 'attachment':
                    pass
                else:
                    kind = post_type
            yield (title, content, filename, date, author, categories, tags,
                   status, kind, 'wp-html')
Exemplo n.º 16
0
 def test_article_with_footnote(self):
     settings = get_settings()
     ec = settings['MARKDOWN']['extension_configs']
     ec['markdown.extensions.footnotes'] = {'SEPARATOR': '-'}
     reader = readers.MarkdownReader(settings)
     content, metadata = reader.read(
         _path('article_with_markdown_and_footnote.md'))
     expected_content = (
         '<p>This is some content'
         '<sup id="fnref-1"><a class="footnote-ref" href="#fn-1"'
         '>1</a></sup>'
         ' with some footnotes'
         '<sup id="fnref-footnote"><a class="footnote-ref" '
         'href="#fn-footnote">2</a></sup></p>\n'
         '<div class="footnote">\n'
         '<hr>\n<ol>\n<li id="fn-1">\n'
         '<p>Numbered footnote&#160;'
         '<a class="footnote-backref" href="#fnref-1" '
         'title="Jump back to footnote 1 in the text">&#8617;</a></p>\n'
         '</li>\n<li id="fn-footnote">\n'
         '<p>Named footnote&#160;'
         '<a class="footnote-backref" href="#fnref-footnote"'
         ' title="Jump back to footnote 2 in the text">&#8617;</a></p>\n'
         '</li>\n</ol>\n</div>')
     expected_metadata = {
         'title':
         'Article with markdown containing footnotes',
         'summary': ('<p>Summary with <strong>inline</strong> markup '
                     '<em>should</em> be supported.</p>'),
         'date':
         SafeDatetime(2012, 10, 31),
         'modified':
         SafeDatetime(2012, 11, 1),
         'multiline': [
             'Line Metadata should be handle properly.',
             'See syntax of Meta-Data extension of '
             'Python Markdown package:',
             'If a line is indented by 4 or more spaces,',
             'that line is assumed to be an additional line of the value',
             'for the previous keyword.',
             'A keyword may have as many lines as desired.',
         ]
     }
     self.assertEqual(content, expected_content)
     self.assertDictHasSubset(metadata, expected_metadata)
Exemplo n.º 17
0
def path_metadata(full_path, source_path, settings=None):
    metadata = {}
    if settings:
        if settings.get('DEFAULT_DATE', None) == 'fs':
            metadata['date'] = SafeDatetime.fromtimestamp(
                os.stat(full_path).st_mtime)
        metadata.update(settings.get('EXTRA_PATH_METADATA', {}).get(
            source_path, {}))
    return metadata
Exemplo n.º 18
0
def path_metadata(full_path, source_path, settings=None):
    metadata = {}
    if settings:
        if settings.get('DEFAULT_DATE', None) == 'fs':
            metadata['date'] = SafeDatetime.fromtimestamp(
                os.stat(full_path).st_mtime)
        metadata.update(
            settings.get('EXTRA_PATH_METADATA', {}).get(source_path, {}))
    return metadata
Exemplo n.º 19
0
 def test_article_with_footnote(self):
     reader = readers.MarkdownReader(settings=get_settings())
     content, metadata = reader.read(
         _path('article_with_markdown_and_footnote.md'))
     expected_content = (
         '<p>This is some content'
         '<sup id="fnref:1"><a class="footnote-ref" href="#fn:1" '
         'rel="footnote">1</a></sup>'
         ' with some footnotes'
         '<sup id="fnref:footnote"><a class="footnote-ref" '
         'href="#fn:footnote" rel="footnote">2</a></sup></p>\n'
         '<div class="footnote">\n'
         '<hr />\n<ol>\n<li id="fn:1">\n'
         '<p>Numbered footnote&#160;'
         '<a class="footnote-backref" href="#fnref:1" rev="footnote" '
         'title="Jump back to footnote 1 in the text">&#8617;</a></p>\n'
         '</li>\n<li id="fn:footnote">\n'
         '<p>Named footnote&#160;'
         '<a class="footnote-backref" href="#fnref:footnote" rev="footnote"'
         ' title="Jump back to footnote 2 in the text">&#8617;</a></p>\n'
         '</li>\n</ol>\n</div>')
     expected_metadata = {
         'title':
         'Article with markdown containing footnotes',
         'summary': ('<p>Summary with <strong>inline</strong> markup '
                     '<em>should</em> be supported.</p>'),
         'date':
         SafeDatetime(2012, 10, 31),
         'modified':
         SafeDatetime(2012, 11, 1),
         'slug':
         'article-with-markdown-containing-footnotes',
         'multiline': [
             'Line Metadata should be handle properly.',
             'See syntax of Meta-Data extension of Python Markdown package:',
             'If a line is indented by 4 or more spaces,',
             'that line is assumed to be an additional line of the value',
             'for the previous keyword.',
             'A keyword may have as many lines as desired.',
         ]
     }
     self.assertEqual(content, expected_content)
     for key, value in metadata.items():
         self.assertEqual(value, expected_metadata[key], key)
Exemplo n.º 20
0
    def test_datetime(self):
        # If DATETIME is set to a tuple, it should be used to override LOCALE
        dt = SafeDatetime(2015, 9, 13)

        page_kwargs = self._copy_page_kwargs()

        # set its date to dt
        page_kwargs['metadata']['date'] = dt
        page = Page(**page_kwargs)

        # page.locale_date is a unicode string in both python2 and python3
        dt_date = dt.strftime(DEFAULT_CONFIG['DEFAULT_DATE_FORMAT'])
        # dt_date is a byte string in python2, and a unicode string in python3
        # Let's make sure it is a unicode string (relies on python 3.3 supporting the u prefix)
        if type(dt_date) != type(u''):
            # python2:
            dt_date = unicode(dt_date, 'utf8')

        self.assertEqual(page.locale_date, dt_date)
        page_kwargs['settings'] = get_settings()

        # I doubt this can work on all platforms ...
        if platform == "win32":
            locale = 'jpn'
        else:
            locale = 'ja_JP.utf8'
        page_kwargs['settings']['DATE_FORMATS'] = {
            'jp': (locale, '%Y-%m-%d(%a)')
        }
        page_kwargs['metadata']['lang'] = 'jp'

        import locale as locale_module
        try:
            page = Page(**page_kwargs)
            self.assertEqual(page.locale_date, '2015-09-13(\u65e5)')
        except locale_module.Error:
            # The constructor of ``Page`` will try to set the locale to
            # ``ja_JP.utf8``. But this attempt will failed when there is no
            # such locale in the system. You can see which locales there are
            # in your system with ``locale -a`` command.
            #
            # Until we find some other method to test this functionality, we
            # will simply skip this test.
            unittest.skip("There is no locale %s in this system." % locale)
Exemplo n.º 21
0
def wp2fields(xml, wp_custpost=False):
    """Opens a wordpress XML file, and yield Pelican fields"""

    soup = xml_to_soup(xml)
    items = soup.rss.channel.findAll('item')
    for item in items:

        if item.find('status').string in ["publish", "draft"]:

            try:
                # Use HTMLParser due to issues with BeautifulSoup 3
                title = unescape(item.title.contents[0])
            except IndexError:
                title = 'No title [%s]' % item.find('post_name').string
                logger.warning('Post "%s" is lacking a proper title', title)

            post_name = item.find('post_name').string
            post_id = item.find('post_id').string
            filename = get_filename(post_name, post_id)

            content = item.find('encoded').string
            raw_date = item.find('post_date').string
            if raw_date == u'0000-00-00 00:00:00':
                date = None
            else:
                date_object = SafeDatetime.strptime(
                    raw_date, '%Y-%m-%d %H:%M:%S')
                date = date_object.strftime('%Y-%m-%d %H:%M')
            author = item.find('creator').string

            categories = [cat.string for cat
                          in item.findAll('category', {'domain': 'category'})]

            tags = [tag.string for tag
                    in item.findAll('category', {'domain': 'post_tag'})]
            # To publish a post the status should be 'published'
            status = 'published' if item.find('status').string == "publish" \
                else item.find('status').string

            kind = 'article'
            post_type = item.find('post_type').string
            if post_type == 'page':
                kind = 'page'
            elif wp_custpost:
                if post_type == 'post':
                    pass
                # Old behaviour was to name everything not a page as an
                # article.Theoretically all attachments have status == inherit
                # so no attachments should be here. But this statement is to
                # maintain existing behaviour in case that doesn't hold true.
                elif post_type == 'attachment':
                    pass
                else:
                    kind = post_type
            yield (title, content, filename, date, author, categories,
                   tags, status, kind, 'wp-html')
Exemplo n.º 22
0
    def test_datetime(self):
        # If DATETIME is set to a tuple, it should be used to override LOCALE
        dt = SafeDatetime(2015, 9, 13)

        page_kwargs = self._copy_page_kwargs()

        # set its date to dt
        page_kwargs['metadata']['date'] = dt
        page = Page(**page_kwargs)

        # page.locale_date is a unicode string in both python2 and python3
        dt_date = dt.strftime(DEFAULT_CONFIG['DEFAULT_DATE_FORMAT']) 
        # dt_date is a byte string in python2, and a unicode string in python3
        # Let's make sure it is a unicode string (relies on python 3.3 supporting the u prefix)
        if type(dt_date) != type(u''):
            # python2:
            dt_date = unicode(dt_date, 'utf8')

        self.assertEqual(page.locale_date, dt_date )
        page_kwargs['settings'] = get_settings()

        # I doubt this can work on all platforms ...
        if platform == "win32":
            locale = 'jpn'
        else:
            locale = 'ja_JP.utf8'
        page_kwargs['settings']['DATE_FORMATS'] = {'jp': (locale,
                                                          '%Y-%m-%d(%a)')}
        page_kwargs['metadata']['lang'] = 'jp'

        import locale as locale_module
        try:
            page = Page(**page_kwargs)
            self.assertEqual(page.locale_date, '2015-09-13(\u65e5)')
        except locale_module.Error:
            # The constructor of ``Page`` will try to set the locale to
            # ``ja_JP.utf8``. But this attempt will failed when there is no
            # such locale in the system. You can see which locales there are
            # in your system with ``locale -a`` command.
            #
            # Until we find some other method to test this functionality, we
            # will simply skip this test.
            unittest.skip("There is no locale %s in this system." % locale)
Exemplo n.º 23
0
def datetime_from_period(value):
    """
    Converts "period" into a datetime object.

    On yearly/monthly/daily archive pages, a "period" object is supplied so you
    know what timeperiod the particular archive page is for. This converts it
    to a datetime.datetime object, so it can be further processed.

    If a month is not provided (i.e. the period is for a yearly archive),
    January is assumed. If a day is not provided (i.e. the period is for a
    yearly or monthly archive), the 1st is assumed.

    You can also generate a tuple of (up to three) integers to get a datetime
    out, using the integer representation for the month (1=January, etc).

    If passes a single integer, it is assumed to represent a year.

    Args
    ----
        value (tuple or int): input period

    Returns
    -------
        datetime.datetime: value converted

    """
    if isinstance(value, int):
        value = (value, )

    if len(value) >= 2 and isinstance(value[1], int):
        placeholder_month = SafeDatetime(2021, value[1], 1).strftime("%B")
    elif len(value) == 1:
        placeholder_month = SafeDatetime(2021, 1, 1).strftime("%B")
    else:
        placeholder_month = value[1]

    new_value = " ".join((
        str(value[0]),
        placeholder_month,
        str(value[2]) if len(value) >= 3 else "1",
    ))
    new_datetime = SafeDatetime.strptime(new_value, "%Y %B %d")
    return new_datetime
Exemplo n.º 24
0
def posterous2fields(api_token, email, password):
    """Imports posterous posts"""
    import base64
    from datetime import timedelta
    try:
        # py3k import
        import json
    except ImportError:
        # py2 import
        import simplejson as json

    try:
        # py3k import
        import urllib.request as urllib_request
    except ImportError:
        # py2 import
        import urllib2 as urllib_request

    def get_posterous_posts(api_token, email, password, page=1):
        base64string = base64.encodestring(
            ("%s:%s" % (email, password)).encode('utf-8')).replace('\n', '')
        url = ("http://posterous.com/api/v2/users/me/sites/primary/"
               "posts?api_token=%s&page=%d") % (api_token, page)
        request = urllib_request.Request(url)
        request.add_header('Authorization', 'Basic %s' % base64string.decode())
        handle = urllib_request.urlopen(request)
        posts = json.loads(handle.read().decode('utf-8'))
        return posts

    page = 1
    posts = get_posterous_posts(api_token, email, password, page)
    settings = read_settings()
    subs = settings['SLUG_REGEX_SUBSTITUTIONS']
    while len(posts) > 0:
        posts = get_posterous_posts(api_token, email, password, page)
        page += 1

        for post in posts:
            slug = post.get('slug')
            if not slug:
                slug = slugify(post.get('title'), regex_subs=subs)
            tags = [tag.get('name') for tag in post.get('tags')]
            raw_date = post.get('display_date')
            date_object = SafeDatetime.strptime(
                raw_date[:-6], '%Y/%m/%d %H:%M:%S')
            offset = int(raw_date[-5:])
            delta = timedelta(hours=(offset / 100))
            date_object -= delta
            date = date_object.strftime('%Y-%m-%d %H:%M')
            kind = 'article'      # TODO: Recognise pages
            status = 'published'  # TODO: Find a way for draft posts

            yield (post.get('title'), post.get('body_cleaned'),
                   slug, date, post.get('user').get('display_name'),
                   [], tags, status, kind, 'html')
Exemplo n.º 25
0
def posterous2fields(api_token, email, password):
    """Imports posterous posts"""
    import base64
    from datetime import timedelta
    try:
        # py3k import
        import json
    except ImportError:
        # py2 import
        import simplejson as json

    try:
        # py3k import
        import urllib.request as urllib_request
    except ImportError:
        # py2 import
        import urllib2 as urllib_request

    def get_posterous_posts(api_token, email, password, page=1):
        base64string = base64.encodestring(
            ("%s:%s" % (email, password)).encode('utf-8')).replace('\n', '')
        url = ("http://posterous.com/api/v2/users/me/sites/primary/"
               "posts?api_token=%s&page=%d") % (api_token, page)
        request = urllib_request.Request(url)
        request.add_header('Authorization', 'Basic %s' % base64string.decode())
        handle = urllib_request.urlopen(request)
        posts = json.loads(handle.read().decode('utf-8'))
        return posts

    page = 1
    posts = get_posterous_posts(api_token, email, password, page)
    settings = read_settings()
    subs = settings['SLUG_REGEX_SUBSTITUTIONS']
    while len(posts) > 0:
        posts = get_posterous_posts(api_token, email, password, page)
        page += 1

        for post in posts:
            slug = post.get('slug')
            if not slug:
                slug = slugify(post.get('title'), regex_subs=subs)
            tags = [tag.get('name') for tag in post.get('tags')]
            raw_date = post.get('display_date')
            date_object = SafeDatetime.strptime(raw_date[:-6],
                                                '%Y/%m/%d %H:%M:%S')
            offset = int(raw_date[-5:])
            delta = timedelta(hours=(offset / 100))
            date_object -= delta
            date = date_object.strftime('%Y-%m-%d %H:%M')
            kind = 'article'  # TODO: Recognise pages
            status = 'published'  # TODO: Find a way for draft posts

            yield (post.get('title'), post.get('body_cleaned'), slug, date,
                   post.get('user').get('display_name'), [], tags, status,
                   kind, 'html')
Exemplo n.º 26
0
def test_typed_metadata():
    content, metadata = read_content_metadata('metadata.md')
    expected = {
        'title': 'Metadata',
        'list': ['a', 'b', 'c'],
        'date': SafeDatetime(2017, 1, 6, 22, 24),
        'int': 42,
        'bool': False,
        'summary': '<p>a summary</p>',
    }
    assert_dict_contains(metadata, expected)
Exemplo n.º 27
0
def default_metadata(settings=None, process=None):
    metadata = {}
    if settings:
        if 'DEFAULT_CATEGORY' in settings:
            value = settings['DEFAULT_CATEGORY']
            if process:
                value = process('category', value)
            metadata['category'] = value
        if settings.get('DEFAULT_DATE', None) and settings['DEFAULT_DATE'] != 'fs':
            metadata['date'] = SafeDatetime(*settings['DEFAULT_DATE'])
    return metadata
Exemplo n.º 28
0
 def test_article_with_metadata_and_contents_attrib(self):
     page = self.read_file(path='article_with_metadata_and_contents.html')
     expected = {
         'category': 'yeah',
         'author': 'Alexis Métaireau',
         'title': 'This is a super article !',
         'summary': 'Summary and stuff',
         'date': SafeDatetime(2010, 12, 2, 10, 14),
         'tags': ['foo', 'bar', 'foobar'],
         'custom_field': 'http://notmyidea.org',
     }
     self.assertDictHasSubset(page.metadata, expected)
Exemplo n.º 29
0
def posterous2fields(api_token, email, password):
    """Imports posterous posts"""
    import base64
    from datetime import timedelta
    try:
        # py3k import
        import json
    except ImportError:
        # py2 import
        import simplejson as json

    try:
        # py3k import
        import urllib.request as urllib_request
    except ImportError:
        # py2 import
        import urllib2 as urllib_request

    def get_posterous_posts(api_token, email, password, page=1):
        base64string = base64.encodestring(
            ("%s:%s" % (email, password)).encode('utf-8')).replace(b'\n', b'')
        url = "http://posterous.com/api/v2/users/me/sites/primary/posts?api_token=%s&page=%d" % (
            api_token, page)
        request = urllib_request.Request(url)
        request.add_header("Authorization", "Basic %s" % base64string.decode())
        handle = urllib_request.urlopen(request)
        posts = json.loads(handle.read().decode('utf-8'))
        return posts

    page = 1
    posts = get_posterous_posts(api_token, email, password, page)
    while len(posts) > 0:
        posts = get_posterous_posts(api_token, email, password, page)
        page += 1

        for post in posts:
            slug = post.get('slug')
            if not slug:
                slug = slugify(post.get('title'))
            tags = [tag.get('name') for tag in post.get('tags')]
            raw_date = post.get('display_date')
            date_object = SafeDatetime.strptime(raw_date[:-6],
                                                "%Y/%m/%d %H:%M:%S")
            offset = int(raw_date[-5:])
            delta = timedelta(hours=offset / 100)
            date_object -= delta
            date = date_object.strftime("%Y-%m-%d %H:%M")
            kind = 'article'  # TODO: Recognise pages

            yield (post.get('title'), post.get('body_cleaned'), slug, date,
                   post.get('user').get('display_name'), [], tags, kind,
                   "html")
Exemplo n.º 30
0
 def url_format(self):
     """Returns the URL, formatted with the proper values"""
     metadata = copy.copy(self.metadata)
     path = self.metadata.get('path', self.get_relative_source_path())
     metadata.update({
         'path': path_to_url(path),
         'slug': getattr(self, 'slug', ''),
         'lang': getattr(self, 'lang', 'en'),
         'date': getattr(self, 'date', SafeDatetime.now()),
         'author': self.author.slug if hasattr(self, 'author') else '',
         'category': self.category.slug if hasattr(self, 'category') else ''
     })
     return metadata
Exemplo n.º 31
0
 def url_format(self):
     """Returns the URL, formatted with the proper values"""
     metadata = copy.copy(self.metadata)
     path = self.metadata.get('path', self.get_relative_source_path())
     metadata.update({
         'path': path_to_url(path),
         'slug': getattr(self, 'slug', ''),
         'lang': getattr(self, 'lang', 'en'),
         'date': getattr(self, 'date', SafeDatetime.now()),
         'author': self.author.slug if hasattr(self, 'author') else '',
         'category': self.category.slug if hasattr(self, 'category') else ''
     })
     return metadata
Exemplo n.º 32
0
    def test_article_with_metadata(self):
        page = self.read_file(path='article_with_metadata.html')
        expected = {
            'category': 'yeah',
            'author': 'Alexis Métaireau',
            'title': 'This is a super article !',
            'summary': 'Summary and stuff',
            'date': SafeDatetime(2010, 12, 2, 10, 14),
            'tags': ['foo', 'bar', 'foobar'],
            'custom_field': 'http://notmyidea.org',
        }

        for key, value in expected.items():
            self.assertEqual(value, page.metadata[key], key)
Exemplo n.º 33
0
    def test_article_with_optional_filename_metadata(self):
        page = self.read_file(path='2012-11-30_md_w_filename_meta#foo-bar.md',
                              FILENAME_METADATA='(?P<date>\d{4}-\d{2}-\d{2})?')
        expected = {
            'date': SafeDatetime(2012, 11, 30),
            'reader': 'markdown',
        }
        self.assertDictHasSubset(page.metadata, expected)

        page = self.read_file(path='empty.md',
                              FILENAME_METADATA='(?P<date>\d{4}-\d{2}-\d{2})?')
        expected = {
            'reader': 'markdown',
        }
        self.assertDictHasSubset(page.metadata, expected)
        self.assertNotIn('date', page.metadata, 'Date should not be set.')
Exemplo n.º 34
0
 def url_format(self):
     """Returns the URL, formatted with the proper values"""
     metadata = copy.copy(self.metadata)
     path = self.metadata.get("path", self.get_relative_source_path())
     metadata.update(
         {
             "path": path_to_url(path),
             "slug": getattr(self, "slug", ""),
             "lang": getattr(self, "lang", "en"),
             "date": getattr(self, "date", SafeDatetime.now()),
             "author": self.author.slug if hasattr(self, "author") else "",
             "tag": self.tag.slug if hasattr(self, "tag") else "",
             "category": self.category.slug if hasattr(self, "category") else "",
         }
     )
     return metadata
Exemplo n.º 35
0
def default_metadata(settings=None, process=None):
    metadata = {}
    if settings:
        for name, value in dict(settings.get('DEFAULT_METADATA', {})).items():
            if process:
                value = process(name, value)
            metadata[name] = value
        if 'DEFAULT_CATEGORY' in settings:
            value = settings['DEFAULT_CATEGORY']
            if process:
                value = process('category', value)
            metadata['category'] = value
        if settings.get('DEFAULT_DATE', None) and \
           settings['DEFAULT_DATE'] != 'fs':
            metadata['date'] = SafeDatetime(*settings['DEFAULT_DATE'])
    return metadata
Exemplo n.º 36
0
    def test_article_extra_path_metadata(self):
        input_with_metadata = '2012-11-29_rst_w_filename_meta#foo-bar.rst'
        page_metadata = self.read_file(
            path=input_with_metadata,
            FILENAME_METADATA=(
                r'(?P<date>\d{4}-\d{2}-\d{2})'
                r'_(?P<Slug>.*)'
                r'#(?P<MyMeta>.*)-(?P<author>.*)'
            ),
            EXTRA_PATH_METADATA={
                input_with_metadata: {
                    'key-1a': 'value-1a',
                    'key-1b': 'value-1b'
                }
            }
        )
        expected_metadata = {
            'category': 'yeah',
            'author': 'Alexis Métaireau',
            'title': 'Rst with filename metadata',
            'date': SafeDatetime(2012, 11, 29),
            'slug': 'rst_w_filename_meta',
            'mymeta': 'foo',
            'reader': 'rst',
            'key-1a': 'value-1a',
            'key-1b': 'value-1b'
        }
        self.assertDictHasSubset(page_metadata.metadata, expected_metadata)

        input_file_path_without_metadata = 'article.rst'
        page_without_metadata = self.read_file(
            path=input_file_path_without_metadata,
            EXTRA_PATH_METADATA={
                input_file_path_without_metadata: {
                    'author': 'Charlès Overwrite'
                }
            }
        )
        expected_without_metadata = {
            'category': 'misc',
            'author': 'Charlès Overwrite',
            'title': 'Article title',
            'reader': 'rst',
        }
        self.assertDictHasSubset(
            page_without_metadata.metadata,
            expected_without_metadata)
Exemplo n.º 37
0
def path_metadata(full_path, source_path, settings=None):
    metadata = {}
    if settings:
        if settings.get('DEFAULT_DATE', None) == 'fs':
            metadata['date'] = SafeDatetime.fromtimestamp(
                os.stat(full_path).st_mtime)

        # Apply EXTRA_PATH_METADATA for the source path and the paths of any
        # parent directories. Sorting EPM first ensures that the most specific
        # path wins conflicts.

        epm = settings.get('EXTRA_PATH_METADATA', {})
        for path, meta in sorted(epm.items()):
            # Enforce a trailing slash when checking for parent directories.
            # This prevents false positives when one file or directory's name
            # is a prefix of another's.
            dirpath = os.path.join(path, '')
            if source_path == path or source_path.startswith(dirpath):
                metadata.update(meta)

    return metadata
Exemplo n.º 38
0
 def url_format(self):
     """Returns the URL, formatted with the proper values"""
     metadata = copy.copy(self.metadata)
     path = self.metadata.get('path', self.get_relative_source_path())
     default_category = self.settings['DEFAULT_CATEGORY']
     slug_substitutions = self.settings.get('SLUG_SUBSTITUTIONS', ())
     metadata.update({
         'path': path_to_url(path),
         'slug': getattr(self, 'slug', ''),
         'lang': getattr(self, 'lang', 'en'),
         'date': getattr(self, 'date', SafeDatetime.now()),
         'author': slugify(
             getattr(self, 'author', ''),
             slug_substitutions
         ),
         'category': slugify(
             getattr(self, 'category', default_category),
             slug_substitutions
         )
     })
     return metadata
Exemplo n.º 39
0
 def url_format(self):
     """Returns the URL, formatted with the proper values"""
     metadata = copy.copy(self.metadata)
     path = self.metadata.get('path', self.get_relative_source_path())
     default_category = self.settings['DEFAULT_CATEGORY']
     slug_substitutions = self.settings.get('SLUG_SUBSTITUTIONS', ())
     metadata.update({
         'path': path_to_url(path),
         'slug': getattr(self, 'slug', ''),
         'lang': getattr(self, 'lang', 'en'),
         'date': getattr(self, 'date', SafeDatetime.now()),
         'author': slugify(
             getattr(self, 'author', ''),
             slug_substitutions
         ),
         'category': slugify(
             getattr(self, 'category', default_category),
             slug_substitutions
         )
     })
     return metadata
    def parse(self):
        """Imports posterous posts"""
        from datetime import timedelta

        page = 1
        posts = self._get_posterous_posts(page)
        settings = read_settings()
        subs = settings["SLUG_REGEX_SUBSTITUTIONS"]
        while len(posts) > 0:
            posts = self._get_posterous_posts(page)
            page += 1

            for post in posts:
                slug = post.get("slug")
                if not slug:
                    slug = slugify(post.get("title"), regex_subs=subs)
                tags = [tag.get("name") for tag in post.get("tags")]
                raw_date = post.get("display_date")
                date_object = SafeDatetime.strptime(
                    raw_date[:-6], "%Y/%m/%d %H:%M:%S"
                )
                offset = int(raw_date[-5:])
                delta = timedelta(hours=(offset / 100))
                date_object -= delta
                date = date_object.strftime("%Y-%m-%d %H:%M")
                kind = "article"  # TODO: Recognise pages
                status = "published"  # TODO: Find a way for draft posts

                yield blog2pelican.entities.content.Content(
                    title=post.get("title"),
                    content=post.get("body_cleaned"),
                    slug=slug,
                    date=date,
                    author=post.get("user").get("display_name"),
                    categories=[],
                    tags=tags,
                    status=status,
                    kind=kind,
                    markup="html",
                )
Exemplo n.º 41
0
    def __init__(self, content, metadata=None, settings=None,
                 source_path=None, context=None):
        if metadata is None:
            metadata = {}
        if settings is None:
            settings = copy.deepcopy(DEFAULT_CONFIG)

        self.settings = settings
        self._content = content
        if context is None:
            context = {}
        self._context = context
        self.translations = []

        local_metadata = dict(settings['DEFAULT_METADATA'])
        local_metadata.update(metadata)

        # set metadata as attributes
        for key, value in local_metadata.items():
            if key in ('save_as', 'url'):
                key = 'override_' + key
            setattr(self, key.lower(), value)

        # also keep track of the metadata attributes available
        self.metadata = local_metadata

        #default template if it's not defined in page
        self.template = self._get_template()

        # First, read the authors from "authors", if not, fallback to "author"
        # and if not use the settings defined one, if any.
        if not hasattr(self, 'author'):
            if hasattr(self, 'authors'):
                self.author = self.authors[0]
            elif 'AUTHOR' in settings:
                self.author = Author(settings['AUTHOR'], settings)

        if not hasattr(self, 'authors') and hasattr(self, 'author'):
            self.authors = [self.author]

        # XXX Split all the following code into pieces, there is too much here.

        # manage languages
        self.in_default_lang = True
        if 'DEFAULT_LANG' in settings:
            default_lang = settings['DEFAULT_LANG'].lower()
            if not hasattr(self, 'lang'):
                self.lang = default_lang

            self.in_default_lang = (self.lang == default_lang)

        # create the slug if not existing, generate slug according to 
        # setting of SLUG_ATTRIBUTE
        if not hasattr(self, 'slug'):
            if settings['SLUGIFY_SOURCE'] == 'title' and hasattr(self, 'title'):
                self.slug = slugify(self.title,
                                settings.get('SLUG_SUBSTITUTIONS', ()))
            elif settings['SLUGIFY_SOURCE'] == 'basename' and source_path != None:
                basename = os.path.basename(os.path.splitext(source_path)[0])
                self.slug = slugify(basename,
                                settings.get('SLUG_SUBSTITUTIONS', ()))

        self.source_path = source_path

        # manage the date format
        if not hasattr(self, 'date_format'):
            if hasattr(self, 'lang') and self.lang in settings['DATE_FORMATS']:
                self.date_format = settings['DATE_FORMATS'][self.lang]
            else:
                self.date_format = settings['DEFAULT_DATE_FORMAT']

        if isinstance(self.date_format, tuple):
            locale_string = self.date_format[0]
            if sys.version_info < (3, ) and isinstance(locale_string,
                                                       six.text_type):
                locale_string = locale_string.encode('ascii')
            locale.setlocale(locale.LC_ALL, locale_string)
            self.date_format = self.date_format[1]

        # manage timezone
        default_timezone = settings.get('TIMEZONE', 'UTC')
        timezone = getattr(self, 'timezone', default_timezone)

        if hasattr(self, 'date'):
            self.date = set_date_tzinfo(self.date, timezone)
            self.locale_date = strftime(self.date, self.date_format)

        if hasattr(self, 'modified'):
            self.modified = set_date_tzinfo(self.modified, timezone)
            self.locale_modified = strftime(self.modified, self.date_format)

        # manage status
        if not hasattr(self, 'status'):
            self.status = settings['DEFAULT_STATUS']
            if not settings['WITH_FUTURE_DATES']:
                if hasattr(self, 'date') and self.date > SafeDatetime.now():
                    self.status = 'draft'

        # store the summary metadata if it is set
        if 'summary' in metadata:
            self._summary = metadata['summary']

        signals.content_object_init.send(self)
Exemplo n.º 42
0
def tumblr2fields(api_key, blogname):
    """ Imports Tumblr posts (API v2)"""
    try:
        # py3k import
        import json
    except ImportError:
        # py2 import
        import simplejson as json

    try:
        # py3k import
        import urllib.request as urllib_request
    except ImportError:
        # py2 import
        import urllib2 as urllib_request

    def get_tumblr_posts(api_key, blogname, offset=0):
        url = ("http://api.tumblr.com/v2/blog/%s.tumblr.com/"
               "posts?api_key=%s&offset=%d&filter=raw") % (
            blogname, api_key, offset)
        request = urllib_request.Request(url)
        handle = urllib_request.urlopen(request)
        posts = json.loads(handle.read().decode('utf-8'))
        return posts.get('response').get('posts')

    offset = 0
    posts = get_tumblr_posts(api_key, blogname, offset)
    settings = read_settings()
    subs = settings['SLUG_REGEX_SUBSTITUTIONS']
    while len(posts) > 0:
        for post in posts:
            title = \
                post.get('title') or \
                post.get('source_title') or \
                post.get('type').capitalize()
            slug = post.get('slug') or slugify(title, regex_subs=subs)
            tags = post.get('tags')
            timestamp = post.get('timestamp')
            date = SafeDatetime.fromtimestamp(int(timestamp)).strftime(
                "%Y-%m-%d %H:%M:%S")
            slug = SafeDatetime.fromtimestamp(int(timestamp)).strftime(
                "%Y-%m-%d-") + slug
            format = post.get('format')
            content = post.get('body')
            type = post.get('type')
            if type == 'photo':
                if format == 'markdown':
                    fmtstr = '![%s](%s)'
                else:
                    fmtstr = '<img alt="%s" src="%s" />'
                content = ''
                for photo in post.get('photos'):
                    content += '\n'.join(
                        fmtstr % (photo.get('caption'),
                                  photo.get('original_size').get('url')))
                content += '\n\n' + post.get('caption')
            elif type == 'quote':
                if format == 'markdown':
                    fmtstr = '\n\n&mdash; %s'
                else:
                    fmtstr = '<p>&mdash; %s</p>'
                content = post.get('text') + fmtstr % post.get('source')
            elif type == 'link':
                if format == 'markdown':
                    fmtstr = '[via](%s)\n\n'
                else:
                    fmtstr = '<p><a href="%s">via</a></p>\n'
                content = fmtstr % post.get('url') + post.get('description')
            elif type == 'audio':
                if format == 'markdown':
                    fmtstr = '[via](%s)\n\n'
                else:
                    fmtstr = '<p><a href="%s">via</a></p>\n'
                content = fmtstr % post.get('source_url') + \
                    post.get('caption') + \
                    post.get('player')
            elif type == 'video':
                if format == 'markdown':
                    fmtstr = '[via](%s)\n\n'
                else:
                    fmtstr = '<p><a href="%s">via</a></p>\n'
                source = fmtstr % post.get('source_url')
                caption = post.get('caption')
                players = '\n'.join(player.get('embed_code')
                                    for player in post.get('player'))
                content = source + caption + players
            elif type == 'answer':
                title = post.get('question')
                content = ('<p>'
                           '<a href="%s" rel="external nofollow">%s</a>'
                           ': %s'
                           '</p>\n'
                           ' %s' % (post.get('asking_name'),
                                    post.get('asking_url'),
                                    post.get('question'),
                                    post.get('answer')))

            content = content.rstrip() + '\n'
            kind = 'article'
            status = 'published'  # TODO: Find a way for draft posts

            yield (title, content, slug, date, post.get('blog_name'), [type],
                   tags, status, kind, format)

        offset += len(posts)
        posts = get_tumblr_posts(api_key, blogname, offset)
Exemplo n.º 43
0
    def __init__(self,
                 content,
                 metadata=None,
                 settings=None,
                 source_path=None,
                 context=None):
        if metadata is None:
            metadata = {}
        if settings is None:
            settings = copy.deepcopy(DEFAULT_CONFIG)

        self.settings = settings
        self._content = content
        if context is None:
            context = {}
        self._context = context
        self.translations = []

        local_metadata = dict()
        local_metadata.update(metadata)

        # set metadata as attributes
        for key, value in local_metadata.items():
            if key in ('save_as', 'url'):
                key = 'override_' + key
            setattr(self, key.lower(), value)

        # also keep track of the metadata attributes available
        self.metadata = local_metadata

        # default template if it's not defined in page
        self.template = self._get_template()

        # First, read the authors from "authors", if not, fallback to "author"
        # and if not use the settings defined one, if any.
        if not hasattr(self, 'author'):
            if hasattr(self, 'authors'):
                self.author = self.authors[0]
            elif 'AUTHOR' in settings:
                self.author = Author(settings['AUTHOR'], settings)

        if not hasattr(self, 'authors') and hasattr(self, 'author'):
            self.authors = [self.author]

        # XXX Split all the following code into pieces, there is too much here.

        # manage languages
        self.in_default_lang = True
        if 'DEFAULT_LANG' in settings:
            default_lang = settings['DEFAULT_LANG'].lower()
            if not hasattr(self, 'lang'):
                self.lang = default_lang

            self.in_default_lang = (self.lang == default_lang)

        # create the slug if not existing, generate slug according to
        # setting of SLUG_ATTRIBUTE
        if not hasattr(self, 'slug'):
            if (settings['SLUGIFY_SOURCE'] == 'title'
                    and hasattr(self, 'title')):
                self.slug = slugify(self.title,
                                    settings.get('SLUG_SUBSTITUTIONS', ()))
            elif (settings['SLUGIFY_SOURCE'] == 'basename'
                  and source_path is not None):
                basename = os.path.basename(os.path.splitext(source_path)[0])
                self.slug = slugify(basename,
                                    settings.get('SLUG_SUBSTITUTIONS', ()))

        self.source_path = source_path

        # manage the date format
        if not hasattr(self, 'date_format'):
            if hasattr(self, 'lang') and self.lang in settings['DATE_FORMATS']:
                self.date_format = settings['DATE_FORMATS'][self.lang]
            else:
                self.date_format = settings['DEFAULT_DATE_FORMAT']

        if isinstance(self.date_format, tuple):
            locale_string = self.date_format[0]
            if sys.version_info < (3, ) and isinstance(locale_string,
                                                       six.text_type):
                locale_string = locale_string.encode('ascii')
            locale.setlocale(locale.LC_ALL, locale_string)
            self.date_format = self.date_format[1]

        # manage timezone
        default_timezone = settings.get('TIMEZONE', 'UTC')
        timezone = getattr(self, 'timezone', default_timezone)

        if hasattr(self, 'date'):
            self.date = set_date_tzinfo(self.date, timezone)
            self.locale_date = strftime(self.date, self.date_format)

        if hasattr(self, 'modified'):
            self.modified = set_date_tzinfo(self.modified, timezone)
            self.locale_modified = strftime(self.modified, self.date_format)

        # manage status
        if not hasattr(self, 'status'):
            self.status = settings['DEFAULT_STATUS']
            if not settings['WITH_FUTURE_DATES'] and hasattr(self, 'date'):
                if self.date.tzinfo is None:
                    now = SafeDatetime.now()
                else:
                    now = SafeDatetime.utcnow().replace(tzinfo=pytz.utc)
                if self.date > now:
                    self.status = 'draft'

        # store the summary metadata if it is set
        if 'summary' in metadata:
            self._summary = metadata['summary']

        signals.content_object_init.send(self)
Exemplo n.º 44
0
def tumblr2fields(api_key, blogname):
    """ Imports Tumblr posts (API v2)"""
    try:
        # py3k import
        import json
    except ImportError:
        # py2 import
        import simplejson as json

    try:
        # py3k import
        import urllib.request as urllib_request
    except ImportError:
        # py2 import
        import urllib2 as urllib_request

    def get_tumblr_posts(api_key, blogname, offset=0):
        url = ("http://api.tumblr.com/v2/blog/%s.tumblr.com/"
               "posts?api_key=%s&offset=%d&filter=raw") % (blogname, api_key,
                                                           offset)
        request = urllib_request.Request(url)
        handle = urllib_request.urlopen(request)
        posts = json.loads(handle.read().decode('utf-8'))
        return posts.get('response').get('posts')

    offset = 0
    posts = get_tumblr_posts(api_key, blogname, offset)
    settings = read_settings()
    subs = settings['SLUG_REGEX_SUBSTITUTIONS']
    while len(posts) > 0:
        for post in posts:
            title = \
                post.get('title') or \
                post.get('source_title') or \
                post.get('type').capitalize()
            slug = post.get('slug') or slugify(title, regex_subs=subs)
            tags = post.get('tags')
            timestamp = post.get('timestamp')
            date = SafeDatetime.fromtimestamp(
                int(timestamp)).strftime("%Y-%m-%d %H:%M:%S")
            slug = SafeDatetime.fromtimestamp(
                int(timestamp)).strftime("%Y-%m-%d-") + slug
            format = post.get('format')
            content = post.get('body')
            type = post.get('type')
            if type == 'photo':
                if format == 'markdown':
                    fmtstr = '![%s](%s)'
                else:
                    fmtstr = '<img alt="%s" src="%s" />'
                content = ''
                for photo in post.get('photos'):
                    content += '\n'.join(
                        fmtstr % (photo.get('caption'),
                                  photo.get('original_size').get('url')))
                content += '\n\n' + post.get('caption')
            elif type == 'quote':
                if format == 'markdown':
                    fmtstr = '\n\n&mdash; %s'
                else:
                    fmtstr = '<p>&mdash; %s</p>'
                content = post.get('text') + fmtstr % post.get('source')
            elif type == 'link':
                if format == 'markdown':
                    fmtstr = '[via](%s)\n\n'
                else:
                    fmtstr = '<p><a href="%s">via</a></p>\n'
                content = fmtstr % post.get('url') + post.get('description')
            elif type == 'audio':
                if format == 'markdown':
                    fmtstr = '[via](%s)\n\n'
                else:
                    fmtstr = '<p><a href="%s">via</a></p>\n'
                content = fmtstr % post.get('source_url') + \
                    post.get('caption') + \
                    post.get('player')
            elif type == 'video':
                if format == 'markdown':
                    fmtstr = '[via](%s)\n\n'
                else:
                    fmtstr = '<p><a href="%s">via</a></p>\n'
                source = fmtstr % post.get('source_url')
                caption = post.get('caption')
                players = '\n'.join(
                    player.get('embed_code') for player in post.get('player'))
                content = source + caption + players
            elif type == 'answer':
                title = post.get('question')
                content = ('<p>'
                           '<a href="%s" rel="external nofollow">%s</a>'
                           ': %s'
                           '</p>\n'
                           ' %s' %
                           (post.get('asking_name'), post.get('asking_url'),
                            post.get('question'), post.get('answer')))

            content = content.rstrip() + '\n'
            kind = 'article'
            status = 'published'  # TODO: Find a way for draft posts

            yield (title, content, slug, date, post.get('blog_name'), [type],
                   tags, status, kind, format)

        offset += len(posts)
        posts = get_tumblr_posts(api_key, blogname, offset)
Exemplo n.º 45
0
    def __init__(self, content, metadata=None, settings=None, source_path=None, context=None):
        if metadata is None:
            metadata = {}
        if settings is None:
            settings = copy.deepcopy(DEFAULT_CONFIG)

        self.settings = settings
        self._content = content
        if context is None:
            context = {}
        self._context = context
        self.translations = []

        local_metadata = dict()
        local_metadata.update(metadata)

        # set metadata as attributes
        for key, value in local_metadata.items():
            if key in ("save_as", "url"):
                key = "override_" + key
            setattr(self, key.lower(), value)

        # also keep track of the metadata attributes available
        self.metadata = local_metadata

        # default template if it's not defined in page
        self.template = self._get_template()

        # First, read the authors from "authors", if not, fallback to "author"
        # and if not use the settings defined one, if any.
        if not hasattr(self, "author"):
            if hasattr(self, "authors"):
                self.author = self.authors[0]
            elif "AUTHOR" in settings:
                self.author = Author(settings["AUTHOR"], settings)

        if not hasattr(self, "authors") and hasattr(self, "author"):
            self.authors = [self.author]

        # XXX Split all the following code into pieces, there is too much here.

        # manage languages
        self.in_default_lang = True
        if "DEFAULT_LANG" in settings:
            default_lang = settings["DEFAULT_LANG"].lower()
            if not hasattr(self, "lang"):
                self.lang = default_lang

            self.in_default_lang = self.lang == default_lang

        # create the slug if not existing, generate slug according to
        # setting of SLUG_ATTRIBUTE
        if not hasattr(self, "slug"):
            if settings["SLUGIFY_SOURCE"] == "title" and hasattr(self, "title"):
                self.slug = slugify(self.title, settings.get("SLUG_SUBSTITUTIONS", ()))
            elif settings["SLUGIFY_SOURCE"] == "basename" and source_path is not None:
                basename = os.path.basename(os.path.splitext(source_path)[0])
                self.slug = slugify(basename, settings.get("SLUG_SUBSTITUTIONS", ()))

        self.source_path = source_path

        # manage the date format
        if not hasattr(self, "date_format"):
            if hasattr(self, "lang") and self.lang in settings["DATE_FORMATS"]:
                self.date_format = settings["DATE_FORMATS"][self.lang]
            else:
                self.date_format = settings["DEFAULT_DATE_FORMAT"]

        if isinstance(self.date_format, tuple):
            locale_string = self.date_format[0]
            if sys.version_info < (3,) and isinstance(locale_string, six.text_type):
                locale_string = locale_string.encode("ascii")
            locale.setlocale(locale.LC_ALL, locale_string)
            self.date_format = self.date_format[1]

        # manage timezone
        default_timezone = settings.get("TIMEZONE", "UTC")
        timezone = getattr(self, "timezone", default_timezone)

        if hasattr(self, "date"):
            self.date = set_date_tzinfo(self.date, timezone)
            self.locale_date = strftime(self.date, self.date_format)

        if hasattr(self, "modified"):
            self.modified = set_date_tzinfo(self.modified, timezone)
            self.locale_modified = strftime(self.modified, self.date_format)

        # manage status
        if not hasattr(self, "status"):
            self.status = settings["DEFAULT_STATUS"]
            if not settings["WITH_FUTURE_DATES"] and hasattr(self, "date"):
                if self.date.tzinfo is None:
                    now = SafeDatetime.now()
                else:
                    now = SafeDatetime.utcnow().replace(tzinfo=pytz.utc)
                if self.date > now:
                    self.status = "draft"

        # store the summary metadata if it is set
        if "summary" in metadata:
            self._summary = metadata["summary"]

        signals.content_object_init.send(self)