def openoni_topic(request, topic_id): topic = get_object_or_404(models.Topic, pk=topic_id) page_title = topic.name crumbs = list(settings.BASE_CRUMBS) if urlresolvers.reverse('recommended_topics') in request.META.get('HTTP_REFERER'): crumbs.extend([{'label': 'Recommended Topics', 'href': urlresolvers.reverse('recommended_topics')}, {'label': topic.name, 'href': urlresolvers.reverse('openoni_topic', kwargs={'topic_id': topic.pk})}]) else: referer = re.sub('^https?:\/\/', '', request.META.get('HTTP_REFERER')).split('/') try: lccn, date, edition, sequence = referer[2], referer[3], referer[4][-1], referer[5][-1] page = get_page(lccn, date, edition, sequence) if page: title, issue, page = _get_tip(lccn, date, edition, sequence) crumbs = create_crumbs(title, issue, date, edition, page) crumbs.extend([{'label': topic.name, 'href': urlresolvers.reverse('openoni_topic', kwargs={'topic_id': topic.pk})}]) except: pass important_dates = filter(lambda s: not s.isspace(), topic.important_dates.split('\n ')) search_suggestions = topic.suggested_search_terms.split('\t') openoni_pages = [{'title': t.title, 'description': t.description.lstrip(t.title), 'url': t.url} for t in topic.topicpages_set.all()] return render_to_response('topic.html', dictionary=locals(), context_instance=RequestContext(request))
def page_rdf(request, lccn, date, edition, sequence): page = get_page(lccn, date, edition, sequence) graph = page_to_graph(page) response = HttpResponse(graph.serialize(base=_rdf_base(request), include_base=True), content_type='application/rdf+xml') return response
def similar_pages(page): solr = SolrConnection(settings.SOLR) d = page.issue.date_issued year, month, day = '{0:02d}'.format(d.year), '{0:02d}'.format(d.month), '{0:02d}'.format(d.day) date = ''.join(map(str, (year, month, day))) query = '+type:page AND date:%s AND %s AND NOT(lccn:%s)' % (date, query_join(map(lambda p: p.city, page.issue.title.places.all()), 'city'), page.issue.title.lccn) response = solr.query(query, rows=25) results = response.results return map(lambda kwargs: utils.get_page(**kwargs), map(lambda r: urlresolvers.resolve(r['id']).kwargs, results))
def page_print(request, lccn, date, edition, sequence, width, height, x1, y1, x2, y2): page = get_page(lccn, date, edition, sequence) title = get_object_or_404(models.Title, lccn=lccn) issue = page.issue page_title = "%s, %s, %s" % (label(title), label(issue), label(page)) crumbs = create_crumbs(title, issue, date, edition, page) host = request.get_host() image_credit = page.issue.batch.awardee.name path_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence, width=width, height=height, x1=x1, y1=y1, x2=x2, y2=y2) url = urlresolvers.reverse('openoni_page_print', kwargs=path_parts) return render_to_response('page_print.html', dictionary=locals(), context_instance=RequestContext(request))
def load_topic_and_categories(): """ This function takes a list topics/topic_categories and creates instances of models.Topic and models.TopicCategory exist with the given name, if one such instance doesn't already exist. #TODO: some parts of the code has ugly hacks to scrub text out of html. This will fail if structure of target html changes. Revisit! """ page = html.fromstring(urllib.urlopen("%s%s" % (settings.TOPICS_ROOT_URL, settings.TOPICS_SUBJECT_URL)).read()) total_topics = total_categories = new_topics = new_categories = filed_topics = 0 topics = list(page.iterdescendants("li")) category = None for topic_or_category in topics: if topic_or_category.text: # its a category, check if exists/ create one total_categories += 1 category_name = topic_or_category.text.rstrip(":") category, is_new = models.TopicCategory.objects.get_or_create(name=category_name) if is_new: new_categories += 1 _logger.info("Syncing category %s" % category_name) else: topic, start, end = prepare_topic_for_db_insert(topic_or_category.text_content()) total_topics += 1 topic, is_new = models.Topic.objects.get_or_create( name=topic, topic_start_year=start, topic_end_year=end, category=category ) if is_new: new_topics += 1 _logger.info("Syncing topic %s" % topic.name) topic_url = list(topic_or_category.iterlinks())[0][2] if not topic_url.startswith("http://"): topic_url = "%s/%s" % (settings.TOPICS_ROOT_URL, topic_url) topic_page = html.fromstring(urllib.urlopen(topic_url).read()) topic.intro_text = list(topic_page.iterdescendants("p"))[0].text_content().encode("utf-8") topic.important_dates = list(topic_page.iterdescendants("ul"))[0].text_content().encode("utf-8") topic.suggested_search_terms = list(topic_page.iterdescendants("ul"))[1].text_content().encode("utf-8") topic.save() pages = list(topic_page.iterdescendants("ul"))[-1] for page in pages: page_url = list(page.iterlinks())[0][2] params = page_url.split("/") openoni_page = None try: params = params[params.index("lccn") + 1 :] openoni_page = utils.get_page(params[0], params[1], params[2][-1:], params[3][-1:]) _logger.info("Syncing topic with page :- lccn:%s." % params[0]) except ValueError: pass except Http404: pass models.TopicPages.objects.get_or_create( page=openoni_page, topic=topic, query_params=params[-1], url=page_url, title=list(page.iterlinks())[0][0].text, description=page.text_content().lstrip(list(page.iterchildren())[0].text).lstrip('"').lstrip(","), )