def page_print(request, lccn, date, edition, sequence, width, height, x1, y1, x2, y2): page = get_page(lccn, date, edition, sequence) title = get_object_or_404(models.Title, lccn=lccn) issue = page.issue page_title = "%s, %s, %s" % (label(title), label(issue), label(page)) crumbs = create_crumbs(title, issue, date, edition, page) host = request.get_host() image_credit = page.issue.batch.awardee.name path_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence, width=width, height=height, x1=x1, y1=y1, x2=x2, y2=y2) url = urlresolvers.reverse('chronam_page_print', kwargs=path_parts) width, height = int(width), int(height) x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) width = min(width, (x2-x1)) height = min(height, (y2-y1)) image_url = settings.IIIF + '%2F' \ + page.issue.batch.path.replace('/opt/chronam/data/dlg_batches/','').replace('/','%2F') \ + page.jp2_filename.replace('/','%2F') + '/' \ + str(x1) + ',' + str(y1) + ',' + str(x2 - x1) + ',' + str(y2 - y1) \ + '/' + str(width) + ',' + str(height) + '/0/default.jpg' return render_to_response('page_print.html', dictionary=locals(), context_instance=RequestContext(request))
def page_rdf(request, lccn, date, edition, sequence): page = get_page(lccn, date, edition, sequence) graph = page_to_graph(page) response = HttpResponse(graph.serialize(base=_rdf_base(request), include_base=True), mimetype='application/rdf+xml') return response
def page_rdf(request, lccn, date, edition, sequence): page = get_page(lccn, date, edition, sequence) graph = page_to_graph(page) response = HttpResponse(graph.serialize(base=_rdf_base(request), include_base=True), content_type='application/rdf+xml') return add_cache_tag(response, "lccn=%s" % lccn)
def load_topic_and_categories(): """ This function takes a list topics/topic_categories and creates instances of models.Topic and models.TopicCategory exist with the given name, if one such instance doesn't already exist. #TODO: some parts of the code has ugly hacks to scrub text out of html. This will fail if structure of target html changes. Revisit! """ page = html.fromstring(urllib.urlopen('%s%s' % (settings.TOPICS_ROOT_URL, settings.TOPICS_SUBJECT_URL)).read()) total_topics = total_categories = new_topics = new_categories = filed_topics = 0 topics = list(page.iterdescendants('li')) category = None for topic_or_category in topics: if topic_or_category.text: #its a category, check if exists/ create one total_categories += 1 category_name = topic_or_category.text.rstrip(':') category, is_new = models.TopicCategory.objects.get_or_create(name=category_name) if is_new: new_categories += 1 _logger.info('Syncing category %s' % category_name) else: topic, start, end = prepare_topic_for_db_insert( topic_or_category.text_content()) total_topics += 1 topic, is_new = models.Topic.objects.get_or_create(name=topic, topic_start_year=start, topic_end_year=end, category=category) if is_new: new_topics += 1 _logger.info('Syncing topic %s' % topic.name) topic_url = list(topic_or_category.iterlinks())[0][2] if not topic_url.startswith('http://'): topic_url = '%s/%s' % (settings.TOPICS_ROOT_URL, topic_url) topic_page = html.fromstring(urllib.urlopen(topic_url).read()) topic.intro_text = list(topic_page.iterdescendants('p'))[0].text_content().encode('utf-8') topic.important_dates = list(topic_page.iterdescendants('ul'))[0].text_content().encode('utf-8') topic.suggested_search_terms = list(topic_page.iterdescendants('ul'))[1].text_content().encode('utf-8') topic.save() pages = list(topic_page.iterdescendants('ul'))[-1] for page in pages: page_url = list(page.iterlinks())[0][2] params = page_url.split('/') chronam_page = None try: params = params[params.index('lccn')+1:] chronam_page = utils.get_page(params[0], params[1], params[2][-1:], params[3][-1:]) _logger.info('Syncing topic with page :- lccn:%s.' % params[0]) except ValueError: pass except Http404: pass models.TopicPages.objects.get_or_create(page=chronam_page, topic=topic, query_params=params[-1], url=page_url, title=list(page.iterlinks())[0][0].text, description=page.text_content().lstrip(list( page.iterchildren())[0].text).lstrip('"').lstrip(','))
def chronam_topic(request, topic_id): topic = get_object_or_404(models.Topic, pk=topic_id) page_title = topic.name crumbs = list(settings.BASE_CRUMBS) if urlresolvers.reverse('recommended_topics') in request.META.get('HTTP_REFERER'): crumbs.extend([{'label': 'Recommended Topics', 'href': urlresolvers.reverse('recommended_topics')}, {'label': topic.name, 'href': urlresolvers.reverse('chronam_topic', kwargs={'topic_id': topic.pk})}]) else: referer = re.sub('^https?:\/\/', '', request.META.get('HTTP_REFERER')).split('/') try: lccn, date, edition, sequence = referer[2], referer[3], referer[4][-1], referer[5][-1] page = get_page(lccn, date, edition, sequence) if page: title, issue, page = _get_tip(lccn, date, edition, sequence) crumbs = create_crumbs(title, issue, date, edition, page) crumbs.extend([{'label': topic.name, 'href': urlresolvers.reverse('chronam_topic', kwargs={'topic_id': topic.pk})}]) except: pass important_dates = filter(lambda s: not s.isspace(), topic.important_dates.split('\n ')) search_suggestions = topic.suggested_search_terms.split('\t') chronam_pages = [{'title': t.title, 'description': t.description.lstrip(t.title), 'url': t.url} for t in topic.topicpages_set.all()] return render_to_response('topic.html', dictionary=locals(), context_instance=RequestContext(request))
def page_print(request, lccn, date, edition, sequence, width, height, x1, y1, x2, y2): page = get_page(lccn, date, edition, sequence) title = get_object_or_404(models.Title, lccn=lccn) issue = page.issue page_title = "%s, %s, %s" % (label(title), label(issue), label(page)) crumbs = create_crumbs(title, issue, date, edition, page) host = request.get_host() image_credit = page.issue.batch.awardee.name path_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence, width=width, height=height, x1=x1, y1=y1, x2=x2, y2=y2) url = urlresolvers.reverse('chronam_page_print', kwargs=path_parts) response = render_to_response('page_print.html', dictionary=locals(), context_instance=RequestContext(request)) return add_cache_tag(response, "lccn=%s" % lccn)
def medium(request, lccn, date, edition, sequence): page = get_page(lccn, date, edition, sequence) try: im = _get_resized_image(page, 550) except IOError as e: return HttpResponseServerError("Unable to create thumbnail: %s" % e) response = HttpResponse(content_type="image/jpeg") im.save(response, "JPEG") return add_cache_tag(response, "lccn=%s" % lccn)
def thumbnail(request, lccn, date, edition, sequence): page = get_page(lccn, date, edition, sequence) if settings.PREGEN_THUMBNAILS: im = _get_image(page, True) else: try: im = _get_resized_image(page, settings.THUMBNAIL_WIDTH) except IOError, e: return HttpResponseServerError("Unable to create thumbnail: %s" % e)
def page_image(request, lccn, date, edition, sequence, width, height): page = get_page(lccn, date, edition, sequence) if settings.REDIRECT_IMAGES_TO_IIIF: # We'll redirect directly to avoid the duplicate database query: return HttpResponseRedirect( page.iiif_client.size(width=width, height=height)) else: return page_image_tile(request, lccn, date, edition, sequence, width, height, 0, 0, page.jp2_width, page.jp2_length)
def page_print(request, lccn, date, edition, sequence, width, height, x1, y1, x2, y2): width, height, x1, y1, x2, y2 = map(int, (width, height, x1, y1, x2, y2)) page = get_page(lccn, date, edition, sequence) title = get_object_or_404(models.Title, lccn=lccn) issue = page.issue page_title = "%s, %s, %s" % (label(title), label(issue), label(page)) crumbs = create_crumbs(title, issue, date, edition, page) host = request.get_host() image_credit = page.issue.batch.awardee.name path_parts = { "lccn": lccn, "date": date, "edition": edition, "sequence": sequence, "width": width, "height": height, "x1": x1, "y1": y1, "x2": x2, "y2": y2, } url = urlresolvers.reverse("chronam_page_print", kwargs=path_parts) download_filename = "%s %s %s %s image %dx%d from %dx%d to %dx%d.jpg" % ( lccn, date, edition, sequence, width, height, x1, y1, x2, y2, ) if page.iiif_client: download_url = page.iiif_client.region(x=x1, y=y1, width=x2 - x1, height=y2 - y1) image_url = download_url.size(width=width, height=height) else: download_url = urlresolvers.reverse("chronam_page_image_tile", kwargs=path_parts) image_url = urlresolvers.reverse("chronam_page_image_tile", kwargs=path_parts) response = render_to_response("page_print.html", dictionary=locals(), context_instance=RequestContext(request)) return add_cache_tag(response, "lccn=%s" % lccn)
def similar_pages(page): solr = SolrConnection(settings.SOLR) d = page.issue.date_issued year, month, day = '{0:02d}'.format(d.year), '{0:02d}'.format(d.month), '{0:02d}'.format(d.day) date = ''.join(map(str, (year, month, day))) query = '+type:page AND date:%s AND %s AND NOT(lccn:%s)' % (date, query_join(map(lambda p: p.city, page.issue.title.places.all()), 'city'), page.issue.title.lccn) response = solr.query(query, rows=25) results = response.results return map(lambda kwargs: utils.get_page(**kwargs), map(lambda r: urlresolvers.resolve(r['id']).kwargs, results))
def page_image_tile(request, lccn, date, edition, sequence, width, height, x1, y1, x2, y2): page = get_page(lccn, date, edition, sequence) if "download" in request.GET and request.GET["download"]: response = HttpResponse(mimetype="binary/octet-stream") else: response = HttpResponse(mimetype="image/jpeg") width, height = int(width), int(height) x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) try: im = _get_image(page) except IOError, e: return HttpResponseServerError("Unable to create image tile: %s" % e)
def similar_pages(page): solr = SolrConnection(settings.SOLR) d = page.issue.date_issued year, month, day = '{0:02d}'.format(d.year), '{0:02d}'.format(d.month), '{0:02d}'.format(d.day) date = ''.join(map(str, (year, month, day))) if page.issue.title.places.all()[0].city: query = '+type:page AND date:%s AND %s AND NOT(lccn:%s)' % (date, query_join(map(lambda p: p.city, page.issue.title.places.all()), 'city'), page.issue.title.lccn) response = solr.query(query, rows=25) results = response.results return map(lambda kwargs: utils.get_page(**kwargs), map(lambda r: urlresolvers.resolve(r['id']).kwargs, results)) return None
def thumbnail(request, lccn, date, edition, sequence): page = get_page(lccn, date, edition, sequence) if settings.REDIRECT_IMAGES_TO_IIIF: return HttpResponseRedirect(page.thumb_url) else: try: im = _get_resized_image(page, settings.THUMBNAIL_WIDTH) except IOError as e: return HttpResponseServerError("Unable to create thumbnail: %s" % e) response = HttpResponse(content_type="image/jpeg") im.save(response, "JPEG") return response
def page_image_tile(request, lccn, date, edition, sequence, width, height, x1, y1, x2, y2): page = get_page(lccn, date, edition, sequence) if 'download' in request.GET and request.GET['download']: response = HttpResponse(content_type="binary/octet-stream") else: response = HttpResponse(content_type="image/jpeg") width, height = int(width), int(height) x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) try: im = _get_image(page) except IOError, e: return HttpResponseServerError("Unable to create image tile: %s" % e)
def page_image_tile(request, lccn, date, edition, sequence, width, height, x1, y1, x2, y2): page = get_page(lccn, date, edition, sequence) width, height = map(int, (width, height)) x1, y1, x2, y2 = map(int, (x1, y1, x2, y2)) if settings.REDIRECT_IMAGES_TO_IIIF: return redirect_to_iiif(request, page.iiif_client, width, height, x1, y1, x2, y2) else: try: return serve_image_tile(request, _get_image(page), width, height, x1, y1, x2, y2) except EnvironmentError as e: logging.exception("Unable to create image tile for %s", page) return HttpResponseServerError("Unable to create image tile: %s" % e)
def page_print(request, lccn, date, edition, sequence, width, height, x1, y1, x2, y2): page = get_page(lccn, date, edition, sequence) title = get_object_or_404(models.Title, lccn=lccn) issue = page.issue page_title = "%s, %s, %s" % (label(title), label(issue), label(page)) crumbs = create_crumbs(title, issue, date, edition, page) host = request.get_host() image_credit = page.issue.batch.awardee.name path_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence, width=width, height=height, x1=x1, y1=y1, x2=x2, y2=y2) url = urlresolvers.reverse('chronam_page_print', kwargs=path_parts) return render_to_response('page_print.html', dictionary=locals(), context_instance=RequestContext(request))
def page_image_tile(request, lccn, date, edition, sequence, width, height, x1, y1, x2, y2): page = get_page(lccn, date, edition, sequence) if 'download' in request.GET and request.GET['download']: response = HttpResponse(content_type="binary/octet-stream") else: response = HttpResponse(content_type="image/jpeg") width, height = int(width), int(height) x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) try: im = _get_image(page) except IOError as e: return HttpResponseServerError("Unable to create image tile: %s" % e) width = min(width, (x2 - x1)) height = min(height, (y2 - y1)) c = im.crop((x1, y1, x2, y2)) f = c.resize((width, height)) f.save(response, "JPEG") return add_cache_tag(response, "lccn=%s" % lccn)
def page_image(request, lccn, date, edition, sequence, width, height): page = get_page(lccn, date, edition, sequence) return page_image_tile(request, lccn, date, edition, sequence, width, height, 0, 0, page.jp2_width, page.jp2_length)
def medium(request, lccn, date, edition, sequence): page = get_page(lccn, date, edition, sequence) try: im = _get_resized_image(page, 550) except IOError, e: return HttpResponseServerError("Unable to create thumbnail: %s" % e)
def load_topic_and_categories(): """ This function takes a list topics/topic_categories and creates instances of models.Topic and models.TopicCategory exist with the given name, if one such instance doesn't already exist. #TODO: some parts of the code has ugly hacks to scrub text out of html. This will fail if structure of target html changes. Revisit! """ page = html.fromstring( urllib.urlopen( '%s%s' % (settings.TOPICS_ROOT_URL, settings.TOPICS_SUBJECT_URL)).read()) total_topics = total_categories = new_topics = new_categories = filed_topics = 0 topics = list(page.iterdescendants('li')) category = None for topic_or_category in topics: if topic_or_category.text: #its a category, check if exists/ create one total_categories += 1 category_name = topic_or_category.text.rstrip(':') category, is_new = models.TopicCategory.objects.get_or_create( name=category_name) if is_new: new_categories += 1 _logger.info('Syncing category %s' % category_name) else: topic, start, end = prepare_topic_for_db_insert( topic_or_category.text_content()) total_topics += 1 topic, is_new = models.Topic.objects.get_or_create( name=topic, topic_start_year=start, topic_end_year=end, category=category) if is_new: new_topics += 1 _logger.info('Syncing topic %s' % topic.name) topic_url = list(topic_or_category.iterlinks())[0][2] if not topic_url.startswith('http://'): topic_url = '%s/%s' % (settings.TOPICS_ROOT_URL, topic_url) topic_page = html.fromstring(urllib.urlopen(topic_url).read()) topic.intro_text = list(topic_page.iterdescendants( 'p'))[0].text_content().encode('utf-8') topic.important_dates = list(topic_page.iterdescendants( 'ul'))[0].text_content().encode('utf-8') topic.suggested_search_terms = list( topic_page.iterdescendants('ul'))[1].text_content().encode( 'utf-8') topic.save() pages = list(topic_page.iterdescendants('ul'))[-1] for page in pages: page_url = list(page.iterlinks())[0][2] params = page_url.split('/') chronam_page = None try: params = params[params.index('lccn') + 1:] chronam_page = utils.get_page(params[0], params[1], params[2][-1:], params[3][-1:]) _logger.info('Syncing topic with page :- lccn:%s.' % params[0]) except ValueError: pass except Http404: pass models.TopicPages.objects.get_or_create( page=chronam_page, topic=topic, query_params=params[-1], url=page_url, title=list(page.iterlinks())[0][0].text, description=page.text_content().lstrip( list(page.iterchildren())[0].text).lstrip('"').lstrip( ','))