def add_root_elements(self, handler): Atom1Feed.add_root_elements(self, handler) if 'hub' in self.feed and self.feed['hub']: handler.addQuickElement(u'link', u'', {u'href': self.feed['hub'], u'rel': u'hub'}) if 'author_avatar' in self.feed and self.feed['author_avatar']: handler.addQuickElement(u'link', u'', {u'href': self.feed['author_avatar'], u'rel': u'avatar'})
def __init__(self, title, link, description): self.feed = Atom1Feed( title = title, link = link, description = description, language=u"en", )
def build_feed(config): site_url = config["site_url"] feed_url = "%s/feed.atom" % site_url atom1_feed = Atom1Feed(title="Twittback", description="Latest tweets", link=site_url, feed_url=feed_url) return atom1_feed
def generate(query): # Parse query path_split = query.split('/') query_type = path_split[1] query_short_link = path_split[2] # Get card or board info if query_type == 'c': json_card = api_request('cards/' + query_short_link, { 'actions_display' : 'true', 'actions' : 'addAttachmentToCard,addChecklistToCard,addMemberToCard,commentCard,copyCommentCard,convertToCardFromCheckItem,createCard,copyCard,deleteAttachmentFromCard,emailCard,moveCardFromBoard,moveCardToBoard,removeChecklistFromCard,removeMemberFromCard,updateCard:idList,updateCard:closed,updateCard:due,updateCard:dueComplete,updateCheckItemStateOnCard,updateCustomFieldItem', 'fields' : 'name,desc,url' }) json_actions = json_card['actions'] feed_title = 'Trello Activity On Card ' + json_card['name'] feed_description = json_card['desc'] feed_link = json_card['url'] else: json_board = api_request('boards/' + query_short_link, { 'actions_display' : 'true', 'actions' : 'addAttachmentToCard,addChecklistToCard,addMemberToCard,commentCard,copyCommentCard,convertToCardFromCheckItem,createCard,copyCard,deleteAttachmentFromCard,emailCard,moveCardFromBoard,moveCardToBoard,removeChecklistFromCard,removeMemberFromCard,updateCard:idList,updateCard:closed,updateCard:due,updateCard:dueComplete,updateCheckItemStateOnCard,updateCustomFieldItem', 'fields' : 'name,desc,url' }) json_actions = json_board['actions'] feed_title = 'Trello Activity On Board ' + json_board['name'] feed_description = json_board['desc'] feed_link = json_board['url'] feed = Atom1Feed( title=feed_title, description=feed_description, link=feed_link) for action in json_actions: description = '' date = datetime.strptime(action['date'], '%Y-%m-%dT%H:%M:%S.%fZ') categories = ['trello', action['type'], action['data']['card']['name'], action['data']['board']['name']] link = 'https://trello.com/c/' + action['data']['card']['shortLink'] + '#action-' + action['id'] feed.add_item( title=render_action(action, text_only=True), link=link, author_name=action['memberCreator']['fullName'], author_link='https://trello.com/' + action['memberCreator']['username'], pubdate=date, updateddate=date, unique_id=str(action['id']), description=render_action(action, text_only=False), categories=categories) return feed.writeString("utf-8")
def feed(feed_type, page=1): if feed_type not in ['json', 'xml']: abort(404) author_info = { 'author_name': 'Myles Braithwaite', 'author_link': 'https://mylesb.ca/', 'author_email': '*****@*****.**' } meta_info = { 'title': "Myles' GIFs", 'description': 'GIF is pronounced with a soft G.', 'link': 'https://gifs.mylesb.ca/', 'feed_guid': 'https://gifs.mylesb.ca/', 'language': 'en' } if feed_type == 'json': feed = JSONFeed(**{**meta_info, **author_info}) mimetype = feed.content_type else: feed = Atom1Feed(**{**meta_info, **author_info}) mimetype = 'application/atom+xml' gifs = all_gifs() paginator = Paginator(gifs, 25) page = paginator.get_page(page) if page.has_next and feed_type == 'json': feed.next_url = 'https://gifs.mylesb.ca{}'.format( url_for('views.feed', feed_type='json', page=page.next_page_number)) for gif in page.object_list: feed.add_item( guid=gif['html_url'], title=gif.get('caption', 'GIF'), link=gif['html_url'], description=('<a href="{html_url}">' '<img src="{image_url}"></a>').format(**gif), pubdate=gif['date'], categories=gif.get('keywords'), **author_info) return Response(feed.writeString('utf-8'), mimetype=mimetype)
def generate(query): url_base = 'http://' + query url_music = url_base + '/music' # Load page page = get(url_music, headers={'User-Agent': 'Mozilla/5.0'}) tree = html.fromstring(page.content) tree.make_links_absolute(url_base) # A title/name for the whole feed feed_title = tree.get_element_by_id('band-name-location')\ .find_class('title')[0].text_content() # A description for the feed bio_element = tree.get_element_by_id('bio-text', None) if bio_element is None: feed_description = "" else: feed_description = bio_element.text_content() # Link to where the feed was generated from feed_link = url_base feed = Atom1Feed(title=feed_title, description=feed_description, link=feed_link) # Get albums albums = get_albums(tree, url_base) # Generate feed items for album in albums: # print(album.title) # print(' ' + album.link) # print(' ' + album.artwork) desc = '<a href="' + album.link + '">' + \ '<img src="' + album.artwork + '" width="500px">' + \ '</img>' + \ '</a>' feed.add_item(title=album.title, link=album.link, pubdate=datetime.now(), unique_id=album.title, description=desc, categories=('music', 'bandcamp')) return feed.writeString("utf-8")
def generate(query): # Usefol thing for loading a json document from a site: # response = urlopen(url) # json_data = loads(response.read().decode("utf-8")) # json_data is then a tree of dictionaries containing the json response # A title/name for the whole feed feed_title = "A feed title" # A description for the feed # Optional and maybe not too relevant in all cases feed_description = "A short feed description" # Link to where the feed was generated from feed_link = "https://where.the/feed/came/from/" feed = Atom1Feed(title=feed_title, description=feed_description, link=feed_link) posts = () for post in posts: # An id that must be uniqute for this post # This shouldn't change even if the post contents change post_id = 987345 # A title for the post post_title = "A Post Title" # Link to the post itself post_link = "https://website.com/a_post" # The date the post was updated. # The following formats a unix epoch timestamp into the correct format: post_date = datetime.strptime(1493157617, '%Y-%m-%dT%H:%M:%S.%fZ') # Post description or contents. # This is what will be shown in your feed reader post_description = "A description/some post contents" # A list of tags/categories for the post. # This is optional but can be very useful for filtering in feed readers post_tags = ("a", "list", "of", "tags") feed.add_item(title=post_title, link=post_link, pubdate=post_date, unique_id=post_id, description=post_description, categories=post_tags) return feed.writeString("utf-8")
def generate(query): # Get issue information url_issue = 'https://api.github.com/repos' + query json_issue = get(url_issue).json() # A title/name for the whole feed feed_title = json_issue['title'] # A description for the feed feed_description = 'Issue number: ' + str(json_issue['number']) # Link to where the feed was generated from feed_link = json_issue['html_url'] feed = Atom1Feed( title=feed_title, description=feed_description, link=feed_link) # Get latest comments url_comments = json_issue['comments_url'] links_comments = head(url_comments).links if 'last' in links_comments: url_latest = links_comments['last']['url'] else: url_latest = url_comments json_comments = get(url_latest).json() # Generate feed items for comment in json_comments: feed.add_item( title='Comment by ' + comment['user']['login'], link=comment['html_url'], author_name=comment['user']['login'], author_link=comment['user']['html_url'], pubdate=datetime.strptime(comment['created_at'], '%Y-%m-%dT%H:%M:%SZ'), updateddate=datetime.strptime(comment['updated_at'], '%Y-%m-%dT%H:%M:%SZ'), unique_id=str(comment['id']), description=comment['body'], categories=('github', 'issue comment')) return feed.writeString("utf-8")
def feed(feed_type): if feed_type not in ["json", "xml"]: abort(404) author_info = { "author_name": "Myles Braithwaite", "author_link": "https://mylesb.ca/", "author_email": "*****@*****.**", } meta_info = { "title": "Braithwaite I/O", "link": "https://braithwaite.io/", "feed_guid": "https://braithwaite.io/", "description": "Myles Braithwaite's Fancy Jupyter Notebooks Blog.", "language": "en", } if feed_type == "json": feed = JSONFeed(**{**meta_info, **author_info}) mimetype = feed.content_type else: feed = Atom1Feed(**meta_info) mimetype = "application/atom+xml" for notebook in all_notebooks(): feed.add_item( guid="https://braithwaite.io{}".format(notebook.url), title=notebook.name, link="https://braithwaite.io{}".format(notebook.url), summary=notebook.summary, description=notebook.content, pubdate=notebook.published, categories=notebook.category, **author_info, ) return Response(feed.writeString("utf-8"), mimetype=mimetype)
def generate(query): # Parse query path_split = query.split('/') query_channel = path_split[1] query_type = path_split[3] # Get Client-ID global_js = get('https://web-cdn.ttvnw.net/global.js').text client_id = re.findall('clientID:"(.+?)"', global_js)[0] request_headers = {'Client-ID': client_id} # Get channel info json_channel = get('https://api.twitch.tv/kraken/channels/' + query_channel, headers=request_headers).json() # A title/name for the whole feed feed_title = json_channel[ 'display_name'] + '\'s Videos: ' + query_type.title() # A description for the feed feed_description = 'Twitch Videos By ' + json_channel[ 'display_name'] + ' (' + query_type.title() + ')' # Link to where the feed was generated from feed_link = json_channel['url'] feed = Atom1Feed(title=feed_title, description=feed_description, link=feed_link) # Get videos query_types = { 'all': 'archive,upload,highlight', 'uploads': 'upload', 'past-broadcasts': 'archive', 'highlights': 'highlight' } request_params = {'broadcast_type': query_types[query_type]} request_url = 'https://api.twitch.tv/kraken/channels/' + query_channel + '/videos' json_videos = get(url=request_url, headers=request_headers, params=request_params).json() # Generate feed items for video in json_videos['videos']: description = '<p><img src="' + video['preview'] + '" ></p>' if video['description_html']: description = description + video['description_html'] date = datetime.strptime(video['published_at'], '%Y-%m-%dT%H:%M:%SZ') feed.add_item(title=video['title'], link=video['url'], author_name=json_channel['display_name'], author_link=json_channel['url'], pubdate=date, updateddate=date, unique_id=video['_id'], description=description, categories=('twitch', video['broadcast_type'])) return feed.writeString("utf-8")
def build(args=None): """Builds an Engineer site using the settings specified in *args*.""" from engineer.conf import settings from engineer.loaders import LocalLoader from engineer.log import get_file_handler from engineer.models import PostCollection, TemplatePage from engineer.themes import ThemeManager from engineer.util import mirror_folder, ensure_exists, slugify if args and args.clean: clean() settings.create_required_directories() logger = logging.getLogger('engineer.engine.build') logger.parent.addHandler(get_file_handler(settings.LOG_FILE)) logger.debug("Starting build using configuration file %s." % settings.SETTINGS_FILE) build_stats = { 'time_run': times.now(), 'counts': { 'template_pages': 0, 'new_posts': 0, 'cached_posts': 0, 'rollups': 0, 'tag_pages': 0, }, 'files': {}, } # Remove the output cache (not the post cache or the Jinja cache) # since we're rebuilding the site settings.OUTPUT_CACHE_DIR.rmtree(ignore_errors=True) theme = ThemeManager.current_theme() engineer_lib = (settings.OUTPUT_STATIC_DIR / 'engineer/lib/').abspath() ensure_exists(engineer_lib) # Copy Foundation files if used if theme.use_foundation: s = settings.ENGINEER.LIB_DIR / settings.ENGINEER.FOUNDATION_CSS t = ensure_exists(engineer_lib / settings.ENGINEER.FOUNDATION_CSS) mirror_folder(s, t) logger.debug("Copied Foundation library files.") # Copy LESS js file if needed if theme.use_lesscss and not settings.PREPROCESS_LESS: s = settings.ENGINEER.LIB_DIR / settings.ENGINEER.LESS_JS s.copy(engineer_lib) logger.debug("Copied LESS CSS files.") # Copy jQuery files if needed if theme.use_jquery: s = settings.ENGINEER.LIB_DIR / settings.ENGINEER.JQUERY s.copy(engineer_lib) logger.debug("Copied jQuery files.") # Copy modernizr files if needed if theme.use_modernizr: s = settings.ENGINEER.LIB_DIR / settings.ENGINEER.MODERNIZR s.copy(engineer_lib) logger.debug("Copied Modernizr files.") # Copy normalize.css if needed if theme.use_normalize_css: s = settings.ENGINEER.LIB_DIR / settings.ENGINEER.NORMALIZE_CSS s.copy(engineer_lib) logger.debug("Copied normalize.css.") # Copy 'raw' content to output cache - first pass # This first pass ensures that any static content - JS/LESS/CSS - that # is needed by site-specific pages (like template pages) is available # during the build if settings.CONTENT_DIR.exists(): mirror_folder(settings.CONTENT_DIR, settings.OUTPUT_CACHE_DIR, delete_orphans=False) # Copy theme static content to output dir theme_output_dir = settings.OUTPUT_STATIC_DIR / 'theme' logger.debug("Copying theme static files to output cache.") theme.copy_content(theme_output_dir) logger.debug("Copied static files for theme to %s." % relpath(theme_output_dir)) # Copy any theme additional content to output dir if needed if theme.content_mappings: logger.debug("Copying additional theme content to output cache.") theme.copy_related_content(theme_output_dir) logger.debug("Copied additional files for theme to %s." % relpath(theme_output_dir)) # Load markdown input posts logger.info("Loading posts...") new_posts, cached_posts = LocalLoader.load_all(input=settings.POST_DIR) all_posts = PostCollection(new_posts + cached_posts) to_publish = PostCollection(all_posts.published) if settings.PUBLISH_DRAFTS: to_publish.extend(all_posts.drafts) if settings.PUBLISH_PENDING: to_publish.extend(all_posts.pending) if settings.PUBLISH_REVIEW: to_publish.extend(all_posts.review) if not settings.PUBLISH_PENDING and len(all_posts.pending) > 0: logger.warning("This site contains the following pending posts:") for post in all_posts.pending: logger.warning("\t'%s' - publish time: %s, %s." % (post.title, naturaltime( post.timestamp), post.timestamp_local)) logger.warning( "These posts won't be published until you build the site again after their publish time." ) all_posts = PostCollection( sorted(to_publish, reverse=True, key=lambda p: p.timestamp)) # Generate template pages if settings.TEMPLATE_PAGE_DIR.exists(): logger.info("Generating template pages from %s." % settings.TEMPLATE_PAGE_DIR) template_pages = [] for template in settings.TEMPLATE_PAGE_DIR.walkfiles('*.html'): # We create all the TemplatePage objects first so we have all of the URLs to them in the template # environment. Without this step, template pages might have broken links if they link to a page that is # loaded after them, since the URL to the not-yet-loaded page will be missing. template_pages.append(TemplatePage(template)) for page in template_pages: rendered_page = page.render_html(all_posts) ensure_exists(page.output_path) with open(page.output_path / page.output_file_name, mode='wb', encoding='UTF-8') as the_file: the_file.write(rendered_page) logger.info("Output template page %s." % relpath(the_file.name)) build_stats['counts']['template_pages'] += 1 logger.info("Generated %s template pages." % build_stats['counts']['template_pages']) # Generate individual post pages for post in all_posts: rendered_post = post.render_html(all_posts) ensure_exists(post.output_path) with open(post.output_path, mode='wb', encoding='UTF-8') as the_file: the_file.write(rendered_post) if post in new_posts: logger.console("Output new or modified post '%s'." % post.title) build_stats['counts']['new_posts'] += 1 elif post in cached_posts: build_stats['counts']['cached_posts'] += 1 # Generate rollup pages num_posts = len(all_posts) num_slices = ( num_posts / settings.ROLLUP_PAGE_SIZE) if num_posts % settings.ROLLUP_PAGE_SIZE == 0 \ else (num_posts / settings.ROLLUP_PAGE_SIZE) + 1 slice_num = 0 for posts in all_posts.paginate(): slice_num += 1 has_next = slice_num < num_slices has_previous = 1 < slice_num <= num_slices rendered_page = posts.render_listpage_html(slice_num, has_next, has_previous) ensure_exists(posts.output_path(slice_num)) with open(posts.output_path(slice_num), mode='wb', encoding='UTF-8') as the_file: the_file.write(rendered_page) logger.debug("Output rollup page %s." % relpath(the_file.name)) build_stats['counts']['rollups'] += 1 # Copy first rollup page to root of site - it's the homepage. if slice_num == 1: path.copyfile(posts.output_path(slice_num), settings.OUTPUT_CACHE_DIR / 'index.html') logger.debug("Output '%s'." % (settings.OUTPUT_CACHE_DIR / 'index.html')) # Generate archive page if num_posts > 0: archive_output_path = settings.OUTPUT_CACHE_DIR / 'archives/index.html' ensure_exists(archive_output_path) rendered_archive = all_posts.render_archive_html(all_posts) with open(archive_output_path, mode='wb', encoding='UTF-8') as the_file: the_file.write(rendered_archive) logger.debug("Output %s." % relpath(the_file.name)) # Generate tag pages if num_posts > 0: tags_output_path = settings.OUTPUT_CACHE_DIR / 'tag' for tag in all_posts.all_tags: rendered_tag_page = all_posts.render_tag_html(tag, all_posts) tag_path = ensure_exists(tags_output_path / slugify(tag) / 'index.html') with open(tag_path, mode='wb', encoding='UTF-8') as the_file: the_file.write(rendered_tag_page) build_stats['counts']['tag_pages'] += 1 logger.debug("Output %s." % relpath(the_file.name)) # Generate feeds rss_feed_output_path = ensure_exists(settings.OUTPUT_CACHE_DIR / 'feeds/rss.xml') atom_feed_output_path = ensure_exists(settings.OUTPUT_CACHE_DIR / 'feeds/atom.xml') rss_feed = Rss201rev2Feed(title=settings.FEED_TITLE, link=settings.SITE_URL, description=settings.FEED_DESCRIPTION, feed_url=settings.FEED_URL) atom_feed = Atom1Feed(title=settings.FEED_TITLE, link=settings.SITE_URL, description=settings.FEED_DESCRIPTION, feed_url=settings.FEED_URL) for feed in (rss_feed, atom_feed): for post in all_posts[:settings.FEED_ITEM_LIMIT]: title = settings.JINJA_ENV.get_template( 'core/feeds/title.jinja2').render(post=post) link = settings.JINJA_ENV.get_template( 'core/feeds/link.jinja2').render(post=post) content = settings.JINJA_ENV.get_template( 'core/feeds/content.jinja2').render(post=post) feed.add_item(title=title, link=link, description=content, pubdate=post.timestamp, unique_id=post.absolute_url) with open(rss_feed_output_path, mode='wb') as the_file: rss_feed.write(the_file, 'UTF-8') logger.debug("Output %s." % relpath(the_file.name)) with open(atom_feed_output_path, mode='wb') as the_file: atom_feed.write(the_file, 'UTF-8') logger.debug("Output %s." % relpath(the_file.name)) # Generate sitemap sitemap_file_name = 'sitemap.xml.gz' sitemap_output_path = ensure_exists(settings.OUTPUT_CACHE_DIR / sitemap_file_name) sitemap_content = settings.JINJA_ENV.get_or_select_template( ['sitemap.xml', 'theme/sitemap.xml', 'core/sitemap.xml']).render(post_list=all_posts) with gzip.open(sitemap_output_path, mode='wb') as the_file: the_file.write(sitemap_content) logger.debug("Output %s." % relpath(the_file.name)) # Copy 'raw' content to output cache - second/final pass if settings.CONTENT_DIR.exists(): mirror_folder(settings.CONTENT_DIR, settings.OUTPUT_CACHE_DIR, delete_orphans=False) # Compress all files marked for compression for the_file, compression_type in settings.COMPRESS_FILE_LIST: if the_file not in settings.COMPRESSION_CACHE: with open(the_file, mode='rb') as input: output = compress(input.read(), compression_type) logger.debug("Compressed %s." % relpath(the_file)) settings.COMPRESSION_CACHE[the_file] = output else: logger.debug("Found pre-compressed file in cache: %s." % relpath(the_file)) output = settings.COMPRESSION_CACHE[the_file] with open(the_file, mode='wb') as f: f.write(output) # Remove LESS files if LESS preprocessing is being done if settings.PREPROCESS_LESS: logger.debug("Deleting LESS files since PREPROCESS_LESS is True.") for f in settings.OUTPUT_STATIC_DIR.walkfiles(pattern="*.less"): logger.debug("Deleting file: %s." % relpath(f)) f.remove_p() # Check if anything has changed other than the sitemap have_changes = False compare = filecmp.dircmp(settings.OUTPUT_CACHE_DIR, settings.OUTPUT_DIR, ignore=settings.OUTPUT_DIR_IGNORE) # The algorithm below takes advantage of the fact that once we've determined that there is more than one file # that's different, or if the first item returned by the generator is not the sitemap, then we can break out of # the generator loop early. This is also advantageous because it doesn't require us to completely exhaust the # generator. In the case of a fresh site build, for example, the generator will return a lot more data. So the # other approach here of expanding the generator into a list with a list comprehension would be inefficient # in many cases. This approach performs equally well in all cases at the cost of some unusual-looking code. diff_file_count = 0 if not has_files(settings.OUTPUT_DIR): have_changes = True else: for file_path in diff_dir(compare): diff_file_count += 1 if file_path != sitemap_output_path: have_changes = True break if diff_file_count > 1: have_changes = True break if not have_changes: logger.console('') logger.console("No site changes to publish.") else: logger.debug("Synchronizing output directory with output cache.") build_stats['files'] = mirror_folder( settings.OUTPUT_CACHE_DIR, settings.OUTPUT_DIR, ignore_list=settings.OUTPUT_DIR_IGNORE) from pprint import pformat logger.debug("Folder mirroring report: %s" % pformat(build_stats['files'])) logger.console('') logger.console("Site: '%s' output to %s." % (settings.SITE_TITLE, settings.OUTPUT_DIR)) logger.console("Posts: %s (%s new or updated)" % ((build_stats['counts']['new_posts'] + build_stats['counts']['cached_posts']), build_stats['counts']['new_posts'])) logger.console( "Post rollup pages: %s (%s posts per page)" % (build_stats['counts']['rollups'], settings.ROLLUP_PAGE_SIZE)) logger.console("Template pages: %s" % build_stats['counts']['template_pages']) logger.console("Tag pages: %s" % build_stats['counts']['tag_pages']) logger.console( "%s new items, %s modified items, and %s deleted items." % (len(build_stats['files']['new']), len(build_stats['files']['overwritten']), len(build_stats['files']['deleted']))) logger.console('') logger.console("Full build log at %s." % settings.LOG_FILE) logger.console('') with open(settings.BUILD_STATS_FILE, mode='wb') as the_file: pickle.dump(build_stats, the_file) settings.CACHE.close() return build_stats
def generate(config): sitemap = [] target = config['target'] src = config['src'] if not os.path.exists(target): os.mkdir(target) gens = [klass(config) for klass in generators] match = {} for gen in gens: for ext in gen.exts: match[ext] = gen cats = config.get('categories', '').split(',') cats = [cat.strip() for cat in cats if cat.strip() != ''] config['categories'] = cats for path in ('cnd', 'siteurl'): if os.path.exists(config[path]): config[path] = os.path.abspath(config[path]) source_root = config.get('source_root') for root, dirs, files in os.walk(src): for file in files: if file.startswith('_'): continue if file.endswith('.DS_Store'): continue ext = os.path.splitext(file)[-1] path = os.path.join(root, file) if ext in ('.mako', '.un~'): continue # configurable XXX if (os.path.split(path)[0] == 'src/auteurs' and file.endswith('.rst')): continue location = path[len(src) + 1:] file_target = os.path.join(target, location) target_dir = os.path.dirname(file_target) file_target_name, ext = os.path.splitext(file_target) url_target = file_target_name[len(target):] + '.html' if source_root is not None: config['github'] = os.path.join(source_root, location) if not os.path.exists(target_dir): os.makedirs(target_dir) # now calling the right generator if ext in match: try: match[ext](path, file_target, url_target, config=config) except Exception: logger.info('Failed on %s' % path) raise sitemap.append((url_target, True)) else: logger.info('Copying %r' % file_target) shutil.copyfile(path, file_target) if 'github' in config: del config['github'] # media media = str(config['media']) if os.path.exists(media): shutil.rmtree(media) shutil.copytree('media', media) # building category pages now categories = defaultdict(list) for key, index in get_index(): path, title = key.split(':') for key, value in index.items(): if key != 'category': continue for cat in value: categories[cat].append((path, title)) for wanted in config['categories']: if wanted in categories: continue categories[wanted] = [] gen = Mako(config) for cat, paths in categories.items(): logger.info('Generating category %r' % cat) url_target = '/%s.html' % cat file_target = os.path.join(target, cat + '.html') gen(config['cats'], file_target, url_target, paths=paths, title=cat.capitalize(), config=config, category=cat) sitemap.append((url_target, False)) # creating the authors index page authors = {} for key, index in get_index(): path, title = key.split(':') for key, authors_ in index.items(): if key != 'author': continue for author_name in authors_: author_id = str2authorid(author_name) if author_id in authors: authors[author_id]['articles'].append((title, path)) else: # should be configurable link = '%s/auteurs/%s.html' % (config['siteurl'], author_id) authors[author_id] = { 'link': link, 'articles': [(title, path)], 'name': author_name } authors = authors.items() authors.sort() # XXX should be configurable... authors_template = os.path.join(src, 'auteurs', 'index.mako') logger.info('Generating authors index') url_target = '/auteurs/index.html' file_target = os.path.join(target, 'auteurs', 'index.html') gen(authors_template, file_target, url_target, authors=authors, title="Auteurs", config=config) sitemap.append((url_target, False)) # creating the author pages gen = RestructuredText(config) for author_id, data in authors: template = os.path.join(src, 'auteurs', '%s.rst' % author_id) if not os.path.exists(template): logger.warning('Template not found for author %r' % author_id) continue # we're supposed to find an author .rst file in /auteur url_target = '/auteurs/%s.html' % author_id file_target = os.path.join(target, 'auteurs', '%s.html' % author_id) fd, tmp = tempfile.mkstemp() os.close(fd) def _line(line): title, path = line path = '%s/%s' % (config['siteurl'].rstrip('/'), path) return u'- `%s <%s>`_' % (title, path) articles = AUTHOR_ARTICLES % '\n'.join( [_line(data) for data in data['articles']]) with codecs.open(template, encoding='utf8') as source_file: with codecs.open(tmp, 'w', encoding='utf8') as target_file: data = source_file.read() data += articles + '\n' target_file.write(data) try: gen(tmp, file_target, url_target, config=config) finally: os.remove(tmp) sitemap.append((url_target, True)) # create the atom feed siteurl = config.get('siteurl') feed = Atom1Feed(title='FaitMain.org', link=siteurl, feed_url=siteurl + 'feed.xml', description=config.get('site-description')) for key, article in get_articles(): path, title = key.split(':') feed.add_item(title=title, link='%s/%s' % (siteurl, path), description=article['body'], categories=article['category'], author_name=article['author'], pubdate=article['date']) with open(os.path.join(target, 'feed.xml'), 'w') as f: feed.write(f, 'utf-8') # creating sitemap sitemap_file = os.path.join(target, 'sitemap.json') logger.info('Generating sitemap at %r' % sitemap_file) now = datetime.datetime.now().isoformat() urlset = [{ 'loc': loc, 'lastmod': now, 'changefreq': 'monthly', 'priority': 0.1, 'indexable': int(indexable) } for loc, indexable in sitemap] with open(sitemap_file, 'w') as f: f.write(json.dumps({'urlset': urlset})) # asking Trouvailles to index the web site if int(config['indexing']) == 0: return logger.info('Indexing the whole website') url = config['search_server'] data = {'sitemap': config['sitemap']} headers = {'Content-type': 'application/json'} r = requests.post(url, data=json.dumps(data), headers=headers) if r.status_code != 200: logger.info('Indexation failed') logger.info(r.status_code) logger.info(r.content)
def generate(query): # Parse query path_split = query.split('/') query_user = path_split[1] # Get user info json_user = api_request('resolve', {'url': 'https://soundcloud.com/' + query_user}) # A title/name for the whole feed feed_title = json_user['username'] # A description for the feed # Optional and maybe not too relevant in all cases feed_description = json_user['description'] # Link to where the feed was generated from feed_link = json_user['permalink_url'] feed = Atom1Feed(title=feed_title, description=feed_description, link=feed_link) # Get sounds json_tracks = get_tracks(json_user['id']) for track in json_tracks: description = '' if track['artwork_url']: description = '<p><img src="' + track['artwork_url'] + '" ></p>' if track['description']: description = description + track['description'] pubdate = datetime.strptime(track['created_at'], '%Y/%m/%d %H:%M:%S +0000') updateddate = datetime.strptime(track['last_modified'], '%Y/%m/%d %H:%M:%S +0000') categories = ['soundcloud'] if track['genre']: categories += [track['genre']] if track['tag_list']: lex = shlex(track['tag_list'], posix=True) lex.whitespace_split = True lex.commenters = '' lex.quotes = '"' categories += list(lex) feed.add_item(title=track['title'], link=track['permalink_url'], author_name=track['user']['username'], author_link=track['user']['permalink_url'], pubdate=pubdate, updateddate=updateddate, unique_id=str(track['id']), description=description, categories=categories) return feed.writeString("utf-8")