def prep_page_content(article_id, article, wiki, photos, user_obj): """ Prepare the formatted article content :param article_id: int; article id :param article: str; article name :param wiki_resp: wikipedia; mediawiki api response :param photos: list; list of photo json :param user_obj: User; user object for request :return: dict; formatted page response passed to jinja template """ html = parse_strip_elements(wiki.html()) html = parse_convert_links(html) html = add_formatting_generic(html) photo_ids = process_photos(article_id, photos, user_obj) html = handle_photo_integrate(photos, html, article) page_content = { 'title': format_title_link(wiki.title, article), 'content': html, 'section_img_class': settings.SECTION_IMG_CLASS, 'num_photos': len(photos), 'article_id': article_id, 'user_id': user_obj.get_id(), 'photo_ids': photo_ids } return page_content
def process(self, article): """ Freshens the redis entry for `article` """ DataIORedis().connect() key = hmac(article) # Get wiki try: wiki = wikipedia.WikipediaPage(article, preload=True) except DisambiguationError as e: # choose a disambiguation return except PageError as e: # bad page proceed with crawl return # extract & parse html html = parse_strip_elements(wiki.html()) html = parse_convert_links(html) # Get flickr content res = flickr.call('photos_search', {'text': article, 'format': 'json', 'sort': 'relevance', }) # TODO - detect failed responses res_json = json.loads(res[14:-1]) # Extract data for the first photo returned owner = res_json['photos']['photo'][0]['owner'] photo_id = res_json['photos']['photo'][0]['id'] farm = res_json['photos']['photo'][0]['farm'] server = res_json['photos']['photo'][0]['server'] title = res_json['photos']['photo'][0]['title'] secret = res_json['photos']['photo'][0]['secret'] page_content = { 'content': html, 'owner': owner, 'photo_id': photo_id, 'farm': farm, 'server': server, 'title': title, 'secret': secret } DataIORedis().write(key, json.dumps(page_content)) # return the links return wiki.links