示例#1
0
class TumblrFile:
    """
    This is the base container class for all downloadable resources associated with Tumblr posts.
    """

    CATEGORY = 'misc'

    def __init__(self, data, container):
        """
        Args:
            data(dict): API response data
            container(TumblrPost): Parent container
        """
        self.log = logging.getLogger('tumdlr.containers.file')

        self._data      = data
        self.container  = container
        self.url        = URL(self._data.get('url', self._data.get('post_url')))

    def download(self, context, **kwargs):
        """
        Args:
            context(tumdlr.main.Context): CLI request context
            kwargs(dict): Additional arguments to send with the download request

        Returns:
            str: Path to the saved file
        """
        try:
            download(self.url.as_string(), str(self.filepath(context, kwargs)), **kwargs)
        except Exception as e:
            self.log.warn('Post download failed: %r', self, exc_info=e)
            raise TumdlrDownloadError(error_message=str(e), download_url=self.url.as_string())

    def filepath(self, context, request_data):
        """
        Args:
            context(tumdlr.main.Context): CLI request context
            request_data(Optional[dict]): Additional arguments to send with the download request

        Returns:
            Path
        """
        # Construct the save basedir
        basedir = Path(context.config['Tumdlr']['SavePath'])

        # Are we categorizing by user?
        if context.config['Categorization']['User']:
            self.log.debug('Categorizing by user: %s', self.container.blog.name)
            basedir = basedir.joinpath(sanitize_filename(self.container.blog.name))

        # Are we categorizing by post type?
        if context.config['Categorization']['PostType']:
            self.log.debug('Categorizing by type: %s', self.CATEGORY)
            basedir = basedir.joinpath(self.CATEGORY)

        self.log.debug('Basedir constructed: %s', basedir)

        return basedir
示例#2
0
文件: test.py 项目: homm/yurl
    def test_stress_authority(self):
        # Authority is most ambiguous part of url. Invalid host can contatin
        # ':' and '@' (path for example can not contain '?'. And query
        # can not contain '#'). The host '//no:99:' will be parsed as 'no:99'
        # and in next recomposition it can be written as '//no:99'. But parsing
        # of '//no:99:' and '//no:99' will be different.

        # # case generation:
        # from re import sub
        # from itertools import permutations
        # cases = set(sub('\d+', '7', ''.join(case))
        #             for case in set(permutations('::@@77777')))

        cases = """7:7:7@7@ 7:7:7@7@7 7:7:7@@ 7:7:7@@7 7:7:@7@ 7:7:@7@7 7:7:@@
            7:7:@@7 7:7@7:7@ 7:7@7:7@7 7:7@7:@ 7:7@7:@7 7:7@7@7: 7:7@7@7:7
            7:7@7@: 7:7@7@:7 7:7@:7@ 7:7@:7@7 7:7@:@ 7:7@:@7 7:7@@7: 7:7@@7:7
            7:7@@: 7:7@@:7 7::7@7@ 7::7@7@7 7::7@@ 7::7@@7 7::@7@ 7::@7@7 7::@@
            7::@@7 7:@7:7@ 7:@7:7@7 7:@7:@ 7:@7:@7 7:@7@7: 7:@7@7:7 7:@7@:
            7:@7@:7 7:@:7@ 7:@:7@7 7:@:@ 7:@:@7 7:@@7: 7:@@7:7 7:@@: 7:@@:7
            7@7:7:7@ 7@7:7:7@7 7@7:7:@ 7@7:7:@7 7@7:7@7: 7@7:7@7:7 7@7:7@:
            7@7:7@:7 7@7::7@ 7@7::7@7 7@7::@ 7@7::@7 7@7:@7: 7@7:@7:7 7@7:@:
            7@7:@:7 7@7@7:7: 7@7@7:7:7 7@7@7:: 7@7@7::7 7@7@:7: 7@7@:7:7 7@7@::
            7@7@::7 7@:7:7@ 7@:7:7@7 7@:7:@ 7@:7:@7 7@:7@7: 7@:7@7:7 7@:7@:
            7@:7@:7 7@::7@ 7@::7@7 7@::@ 7@::@7 7@:@7: 7@:@7:7 7@:@: 7@:@:7
            7@@7:7: 7@@7:7:7 7@@7:: 7@@7::7 7@@:7: 7@@:7:7 7@@:: 7@@::7 :7:7@7@
            :7:7@7@7 :7:7@@ :7:7@@7 :7:@7@ :7:@7@7 :7:@@ :7:@@7 :7@7:7@
            :7@7:7@7 :7@7:@ :7@7:@7 :7@7@7: :7@7@7:7 :7@7@: :7@7@:7 :7@:7@
            :7@:7@7 :7@:@ :7@:@7 :7@@7: :7@@7:7 :7@@: :7@@:7 ::7@7@ ::7@7@7
            ::7@@ ::7@@7 ::@7@ ::@7@7 ::@@7 :@7:7@ :@7:7@7 :@7:@ :@7:@7 :@7@7:
            :@7@7:7 :@7@: :@7@:7 :@:7@ :@:7@7 :@:@7 :@@7: :@@7:7 :@@:7 @7:7:7@
            @7:7:7@7 @7:7:@ @7:7:@7 @7:7@7: @7:7@7:7 @7:7@: @7:7@:7 @7::7@
            @7::7@7 @7::@ @7::@7 @7:@7: @7:@7:7 @7:@: @7:@:7 @7@7:7: @7@7:7:7
            @7@7:: @7@7::7 @7@:7: @7@:7:7 @7@:: @7@::7 @:7:7@ @:7:7@7 @:7:@
            @:7:@7 @:7@7: @:7@7:7 @:7@: @:7@:7 @::7@ @::7@7 @::@7 @:@7: @:@7:7
            @:@:7 @@7:7: @@7:7:7 @@7:: @@7::7 @@:7: @@:7:7 @@::7""".split()

        for case in cases:
            url = URL('//' + case)
            # check is all parts defined in original url is defined in parsed
            self.assertEqual(url, URL(url.as_string()))
            self.assertEqual(url, URL('//' + url.authority))
示例#3
0
class TumblrPost:
    """
    This is the base container class for all Tumblr post types. It contains data that is always available with any
    type of post.

    Additional supported post types may extend this class to provide additional metadata parsing
    """
    def __init__(self, post, blog):
        """
        Args:
            post(dict): API response
            blog(tumdlr.api.TumblrBlog): Parent blog
        """
        self._post = post
        self.blog = blog
        self.log = logging.getLogger('tumdlr.containers.post')

        self.id         = None  # type: int
        self.type       = None  # type: str
        self.url        = None  # type: URL
        self.tags       = set()
        self.post_date  = None  # type: str
        self.note_count = None  # type: int

        self.files = []

        try:
            self._parse_post()
        except Exception as e:
            self.log.warn('Failed to parse post data: %r', self, exc_info=e)
            raise TumdlrParserError(post_data=post)

    @property
    def is_text(self):
        """
        Returns:
            bool
        """
        return self.type == 'text'

    @property
    def is_photo(self):
        """
        Returns:
            bool
        """
        return self.type in ['photo', 'link']

    @property
    def is_video(self):
        """
        Returns:
            bool
        """
        return self.type == 'video'

    def _parse_post(self):
        self.id         = self._post['id']
        self.type       = self._post['type']
        self.url        = URL(self._post['post_url']) if 'post_url' in self._post else None
        self.tags       = set(self._post.get('tags', []))
        self.note_count = self._post.get('note_count')
        self.post_date  = self._post['date']

    def __repr__(self):
        return "<TumblrPost id='{id}' type='{type}' url='{url}'>"\
            .format(id=self.id, type=self.type, url=self.url)

    def __str__(self):
        return self.url.as_string() if self.url else ''