def test_readfile_path_metadata_implicit_dates(self): test_file = 'article_with_metadata_implicit_dates.html' page = self.read_file(path=test_file, DEFAULT_DATE='fs') expected = { 'date': SafeDatetime.fromtimestamp(os.stat(_path(test_file)).st_mtime), 'modified': SafeDatetime.fromtimestamp(os.stat(_path(test_file)).st_mtime) } self.assertDictHasSubset(page.metadata, expected)
def path_metadata(full_path, source_path, settings=None): metadata = {} if settings: if settings.get("DEFAULT_DATE", None) == "fs": metadata["date"] = SafeDatetime.fromtimestamp(os.stat(full_path).st_ctime) metadata.update(settings.get("EXTRA_PATH_METADATA", {}).get(source_path, {})) return metadata
def path_metadata(full_path, source_path, settings=None): metadata = {} if settings: if settings.get('DEFAULT_DATE', None) == 'fs': metadata['date'] = SafeDatetime.fromtimestamp( os.stat(full_path).st_mtime) metadata.update( settings.get('EXTRA_PATH_METADATA', {}).get(source_path, {})) return metadata
def path_metadata(full_path, source_path, settings=None): metadata = {} if settings: if settings.get('DEFAULT_DATE', None) == 'fs': metadata['date'] = SafeDatetime.fromtimestamp( os.stat(full_path).st_mtime) metadata.update(settings.get('EXTRA_PATH_METADATA', {}).get( source_path, {})) return metadata
def path_metadata(full_path, source_path, settings=None): metadata = {} if settings: if settings.get('DEFAULT_DATE', None) == 'fs': metadata['date'] = SafeDatetime.fromtimestamp( os.stat(full_path).st_mtime) # Apply EXTRA_PATH_METADATA for the source path and the paths of any # parent directories. Sorting EPM first ensures that the most specific # path wins conflicts. epm = settings.get('EXTRA_PATH_METADATA', {}) for path, meta in sorted(epm.items()): # Enforce a trailing slash when checking for parent directories. # This prevents false positives when one file or directory's name # is a prefix of another's. dirpath = os.path.join(path, '') if source_path == path or source_path.startswith(dirpath): metadata.update(meta) return metadata
def tumblr2fields(api_key, blogname): """ Imports Tumblr posts (API v2)""" try: # py3k import import json except ImportError: # py2 import import simplejson as json try: # py3k import import urllib.request as urllib_request except ImportError: # py2 import import urllib2 as urllib_request def get_tumblr_posts(api_key, blogname, offset=0): url = ("http://api.tumblr.com/v2/blog/%s.tumblr.com/" "posts?api_key=%s&offset=%d&filter=raw") % (blogname, api_key, offset) request = urllib_request.Request(url) handle = urllib_request.urlopen(request) posts = json.loads(handle.read().decode('utf-8')) return posts.get('response').get('posts') offset = 0 posts = get_tumblr_posts(api_key, blogname, offset) settings = read_settings() subs = settings['SLUG_REGEX_SUBSTITUTIONS'] while len(posts) > 0: for post in posts: title = \ post.get('title') or \ post.get('source_title') or \ post.get('type').capitalize() slug = post.get('slug') or slugify(title, regex_subs=subs) tags = post.get('tags') timestamp = post.get('timestamp') date = SafeDatetime.fromtimestamp( int(timestamp)).strftime("%Y-%m-%d %H:%M:%S") slug = SafeDatetime.fromtimestamp( int(timestamp)).strftime("%Y-%m-%d-") + slug format = post.get('format') content = post.get('body') type = post.get('type') if type == 'photo': if format == 'markdown': fmtstr = '![%s](%s)' else: fmtstr = '<img alt="%s" src="%s" />' content = '' for photo in post.get('photos'): content += '\n'.join( fmtstr % (photo.get('caption'), photo.get('original_size').get('url'))) content += '\n\n' + post.get('caption') elif type == 'quote': if format == 'markdown': fmtstr = '\n\n— %s' else: fmtstr = '<p>— %s</p>' content = post.get('text') + fmtstr % post.get('source') elif type == 'link': if format == 'markdown': fmtstr = '[via](%s)\n\n' else: fmtstr = '<p><a href="%s">via</a></p>\n' content = fmtstr % post.get('url') + post.get('description') elif type == 'audio': if format == 'markdown': fmtstr = '[via](%s)\n\n' else: fmtstr = '<p><a href="%s">via</a></p>\n' content = fmtstr % post.get('source_url') + \ post.get('caption') + \ post.get('player') elif type == 'video': if format == 'markdown': fmtstr = '[via](%s)\n\n' else: fmtstr = '<p><a href="%s">via</a></p>\n' source = fmtstr % post.get('source_url') caption = post.get('caption') players = '\n'.join( player.get('embed_code') for player in post.get('player')) content = source + caption + players elif type == 'answer': title = post.get('question') content = ('<p>' '<a href="%s" rel="external nofollow">%s</a>' ': %s' '</p>\n' ' %s' % (post.get('asking_name'), post.get('asking_url'), post.get('question'), post.get('answer'))) content = content.rstrip() + '\n' kind = 'article' status = 'published' # TODO: Find a way for draft posts yield (title, content, slug, date, post.get('blog_name'), [type], tags, status, kind, format) offset += len(posts) posts = get_tumblr_posts(api_key, blogname, offset)
def parse(self): """ Imports Tumblr posts (API v2)""" offset = 0 posts = self._get_tumblr_posts(offset) settings = read_settings() subs = settings["SLUG_REGEX_SUBSTITUTIONS"] while len(posts) > 0: for post in posts: title = ( post.get("title") or post.get("source_title") or post.get("type").capitalize() ) slug = post.get("slug") or slugify(title, regex_subs=subs) tags = post.get("tags") timestamp = post.get("timestamp") date = SafeDatetime.fromtimestamp(int(timestamp)).strftime( "%Y-%m-%d %H:%M:%S" ) slug = ( SafeDatetime.fromtimestamp(int(timestamp)).strftime( "%Y-%m-%d-" ) + slug ) format = post.get("format") content = post.get("body") type = post.get("type") if type == "photo": if format == "markdown": fmtstr = "![%s](%s)" else: fmtstr = '<img alt="%s" src="%s" />' content = "" for photo in post.get("photos"): content += "\n".join( fmtstr % ( photo.get("caption"), photo.get("original_size").get("url"), ) ) content += "\n\n" + post.get("caption") elif type == "quote": if format == "markdown": fmtstr = "\n\n— %s" else: fmtstr = "<p>— %s</p>" content = post.get("text") + fmtstr % post.get("source") elif type == "link": if format == "markdown": fmtstr = "[via](%s)\n\n" else: fmtstr = '<p><a href="%s">via</a></p>\n' content = fmtstr % post.get("url") + post.get( "description" ) elif type == "audio": if format == "markdown": fmtstr = "[via](%s)\n\n" else: fmtstr = '<p><a href="%s">via</a></p>\n' content = ( fmtstr % post.get("source_url") + post.get("caption") + post.get("player") ) elif type == "video": if format == "markdown": fmtstr = "[via](%s)\n\n" else: fmtstr = '<p><a href="%s">via</a></p>\n' source = fmtstr % post.get("source_url") caption = post.get("caption") players = "\n".join( player.get("embed_code") for player in post.get("player") ) content = source + caption + players elif type == "answer": title = post.get("question") content = ( "<p>" '<a href="%s" rel="external nofollow">%s</a>' ": %s" "</p>\n" " %s" % ( post.get("asking_name"), post.get("asking_url"), post.get("question"), post.get("answer"), ) ) content = content.rstrip() + "\n" kind = "article" status = "published" # TODO: Find a way for draft posts yield ( title, content, slug, date, post.get("blog_name"), [type], tags, status, kind, format, ) offset += len(posts) posts = self._get_tumblr_posts(offset)
def tumblr2fields(api_key, blogname): """ Imports Tumblr posts (API v2)""" try: # py3k import import json except ImportError: # py2 import import simplejson as json try: # py3k import import urllib.request as urllib_request except ImportError: # py2 import import urllib2 as urllib_request def get_tumblr_posts(api_key, blogname, offset=0): url = ("http://api.tumblr.com/v2/blog/%s.tumblr.com/" "posts?api_key=%s&offset=%d&filter=raw") % ( blogname, api_key, offset) request = urllib_request.Request(url) handle = urllib_request.urlopen(request) posts = json.loads(handle.read().decode('utf-8')) return posts.get('response').get('posts') offset = 0 posts = get_tumblr_posts(api_key, blogname, offset) settings = read_settings() subs = settings['SLUG_REGEX_SUBSTITUTIONS'] while len(posts) > 0: for post in posts: title = \ post.get('title') or \ post.get('source_title') or \ post.get('type').capitalize() slug = post.get('slug') or slugify(title, regex_subs=subs) tags = post.get('tags') timestamp = post.get('timestamp') date = SafeDatetime.fromtimestamp(int(timestamp)).strftime( "%Y-%m-%d %H:%M:%S") slug = SafeDatetime.fromtimestamp(int(timestamp)).strftime( "%Y-%m-%d-") + slug format = post.get('format') content = post.get('body') type = post.get('type') if type == 'photo': if format == 'markdown': fmtstr = '![%s](%s)' else: fmtstr = '<img alt="%s" src="%s" />' content = '' for photo in post.get('photos'): content += '\n'.join( fmtstr % (photo.get('caption'), photo.get('original_size').get('url'))) content += '\n\n' + post.get('caption') elif type == 'quote': if format == 'markdown': fmtstr = '\n\n— %s' else: fmtstr = '<p>— %s</p>' content = post.get('text') + fmtstr % post.get('source') elif type == 'link': if format == 'markdown': fmtstr = '[via](%s)\n\n' else: fmtstr = '<p><a href="%s">via</a></p>\n' content = fmtstr % post.get('url') + post.get('description') elif type == 'audio': if format == 'markdown': fmtstr = '[via](%s)\n\n' else: fmtstr = '<p><a href="%s">via</a></p>\n' content = fmtstr % post.get('source_url') + \ post.get('caption') + \ post.get('player') elif type == 'video': if format == 'markdown': fmtstr = '[via](%s)\n\n' else: fmtstr = '<p><a href="%s">via</a></p>\n' source = fmtstr % post.get('source_url') caption = post.get('caption') players = '\n'.join(player.get('embed_code') for player in post.get('player')) content = source + caption + players elif type == 'answer': title = post.get('question') content = ('<p>' '<a href="%s" rel="external nofollow">%s</a>' ': %s' '</p>\n' ' %s' % (post.get('asking_name'), post.get('asking_url'), post.get('question'), post.get('answer'))) content = content.rstrip() + '\n' kind = 'article' status = 'published' # TODO: Find a way for draft posts yield (title, content, slug, date, post.get('blog_name'), [type], tags, status, kind, format) offset += len(posts) posts = get_tumblr_posts(api_key, blogname, offset)
def to_date(s): d = SafeDatetime.fromtimestamp(s) #d = d.replace(hour=0, minute=0, second=0, microsecond=0) return d