Пример #1
0
def status(request):
    page_title = "System Status"
    page_count = models.Page.objects.all().count()
    issue_count = models.Issue.objects.all().count()
    batch_count = models.Batch.objects.all().count()
    title_count = models.Title.objects.all().count()
    holding_count = models.Holding.objects.all().count()
    essay_count = models.Essay.objects.all().count()
    pages_indexed = index.page_count()
    titles_indexed = index.title_count()
    return render_to_response("reports/status.html", dictionary=locals(), context_instance=RequestContext(request))
Пример #2
0
def search_titles_results(request):
    page_title = 'US Newspaper Directory Search Results'
    crumbs = list(settings.BASE_CRUMBS)
    crumbs.extend([{'label': 'Search Newspaper Directory',
                    'href': reverse('chronam_search_titles')},
                   ])

    def prep_title_for_return(t):
        title = {}
        title.update(t.solr_doc)
        title['oclc'] = t.oclc
        return title

    format = request.GET.get('format', None)

    # check if requested format is CSV before building pages for response. CSV 
    # response does not make use of pagination, instead all matching titles from
    # SOLR are returned at once
    if format == 'csv':
        query = request.GET.copy()
        q, fields, sort_field, sort_order = index.get_solr_request_params_from_query(query)
        
        # return all titles in csv format. * May hurt performance. Assumption is that this
        # request is not made often. 
        # TODO: revisit if assumption is incorrect
        solr_response = index.execute_solr_query(q, fields, sort_field, 
                                                 sort_order, index.title_count(), 0)
        titles = index.get_titles_from_solr_documents(solr_response)

        csv_header_labels = ('lccn', 'title', 'place_of_publication', 'start_year',
                             'end_year', 'publisher', 'edition', 'frequency', 'subject', 
                             'state', 'city', 'country', 'language', 'oclc',
                             'holding_type',)
        response = HttpResponse(content_type='text/csv')
        response['Content-Disposition'] = 'attachment; filename="chronam_titles.csv"'
        writer = csv.writer(response)
        writer.writerow(csv_header_labels)
        for title in titles:
            writer.writerow(map(lambda val: smart_str(val or '--'),
                               (title.lccn, title.name, title.place_of_publication,
                                title.start_year, title.end_year, title.publisher, 
                                title.edition, title.frequency, 
                                map(str, title.subjects.all()), 
                                set(map(lambda p: p.state, title.places.all())), 
                                map(lambda p: p.city, title.places.all()),
                                str(title.country), map(str, title.languages.all()),
                                title.oclc, title.holding_types)))
        return response
 
    try:
        curr_page = int(request.GET.get('page', 1))
    except ValueError, e:
        curr_page = 1
Пример #3
0
def search_titles_results(request):
    page_title = 'US Newspaper Directory Search Results'
    crumbs = list(settings.BASE_CRUMBS)
    crumbs.extend([{'label': 'Search Newspaper Directory',
                    'href': reverse('chronam_search_titles')},
                   ])

    def prep_title_for_return(t):
        title = {}
        title.update(t.solr_doc)
        title['oclc'] = t.oclc
        return title

    format = request.GET.get('format', None)

    # check if requested format is CSV before building pages for response. CSV 
    # response does not make use of pagination, instead all matching titles from
    # SOLR are returned at once
    if format == 'csv':
        query = request.GET.copy()
        q, fields, sort_field, sort_order, facets = index.get_solr_request_params_from_query(query)

        # return all titles in csv format. * May hurt performance. Assumption is that this
        # request is not made often.
        # TODO: revisit if assumption is incorrect
        solr_response = index.execute_solr_query(q, fields, sort_field,
                                                 sort_order, index.title_count(), 0)
        titles = index.get_titles_from_solr_documents(solr_response)

        csv_header_labels = ('lccn', 'title', 'place_of_publication', 'start_year',
                             'end_year', 'publisher', 'edition', 'frequency', 'subject',
                             'state', 'city', 'country', 'language', 'oclc',
                             'holding_type',)
        response = HttpResponse(content_type='text/csv')
        response['Content-Disposition'] = 'attachment; filename="chronam_titles.csv"'
        writer = csv.writer(response)
        writer.writerow(csv_header_labels)
        for title in titles:
            writer.writerow(map(lambda val: smart_str(val or '--'),
                               (title.lccn, title.name, title.place_of_publication,
                                title.start_year, title.end_year, title.publisher,
                                title.edition, title.frequency,
                                map(str, title.subjects.all()),
                                set(map(lambda p: p.state, title.places.all())),
                                map(lambda p: p.city, title.places.all()),
                                str(title.country), map(str, title.languages.all()),
                                title.oclc, title.holding_types)))
        return response
 
    try:
        curr_page = int(request.GET.get('page', 1))
    except ValueError, e:
        curr_page = 1
Пример #4
0
def status(request):
    page_title = 'System Status'
    page_count = models.Page.objects.all().count()
    issue_count = models.Issue.objects.all().count()
    batch_count = models.Batch.objects.all().count()
    title_count = models.Title.objects.all().count()
    holding_count = models.Holding.objects.all().count()
    essay_count = models.Essay.objects.all().count()
    pages_indexed = index.page_count()
    titles_indexed = index.title_count()
    return render_to_response('reports/status.html', dictionary=locals(),
                              context_instance=RequestContext(request))
Пример #5
0
def status(request):
    crumbs = list(settings.BASE_CRUMBS)
    crumbs.extend([
        {'label': 'System Status'},
    ])
    page_title = 'System Status'
    page_count = models.Page.objects.all().count()
    issue_count = models.Issue.objects.all().count()
    batch_count = models.Batch.objects.all().count()
    title_count = models.Title.objects.all().count()
    # holding_count = models.Holding.objects.all().count()
    # essay_count = models.Essay.objects.all().count()
    pages_indexed = index.page_count()
    titles_indexed = index.title_count()
    return render_to_response('reports/status.html', dictionary=locals(),
                              context_instance=RequestContext(request))
Пример #6
0
    def handle(self, **options):
        if not (models.Title.objects.all().count() == 0
                and models.Holding.objects.all().count() == 0
                and models.Essay.objects.all().count() == 0
                and models.Batch.objects.all().count() == 0
                and models.Issue.objects.all().count() == 0
                and models.Page.objects.all().count() == 0
                and index.page_count() == 0 and index.title_count() == 0):
            _logger.warn("Database or index not empty as expected.")
            return

        start = datetime.now()
        management.call_command('loaddata', 'languages.json')
        management.call_command('loaddata', 'institutions.json')
        management.call_command('loaddata', 'ethnicities.json')
        management.call_command('loaddata', 'labor_presses.json')
        management.call_command('loaddata', 'countries.json')

        bib_in_settings = validate_bib_dir()
        if bib_in_settings:
            # look in BIB_STORAGE for original titles to load
            for filename in os.listdir(bib_in_settings):
                if filename.startswith('titles-') and filename.endswith(
                        '.xml'):
                    filepath = os.path.join(bib_in_settings, filename)
                    management.call_command('load_titles',
                                            filepath,
                                            skip_index=True)

        management.call_command(
            'title_sync',
            skip_essays=options['skip_essays'],
            pull_title_updates=options['pull_title_updates'])

        end = datetime.now()
        total_time = end - start
        _logger.info('start time: %s' % start)
        _logger.info('end time: %s' % end)
        _logger.info('total time: %s' % total_time)
        _logger.info("chronam_sync done.")
Пример #7
0
    def handle(self, **options):
        if not (models.Title.objects.all().count() == 0 and
                models.Holding.objects.all().count() == 0 and
                models.Essay.objects.all().count() == 0 and
                models.Batch.objects.all().count() == 0 and
                models.Issue.objects.all().count() == 0 and
                models.Page.objects.all().count() == 0 and
                index.page_count() == 0 and
                index.title_count() == 0):
            _logger.warn("Database or index not empty as expected.")
            return

        start = datetime.now()
        management.call_command('loaddata', 'languages.json')
        management.call_command('loaddata', 'institutions.json')
        management.call_command('loaddata', 'ethnicities.json')
        management.call_command('loaddata', 'labor_presses.json')
        management.call_command('loaddata', 'countries.json')

        bib_in_settings = validate_bib_dir()
        if bib_in_settings:
            # look in BIB_STORAGE for original titles to load
            for filename in os.listdir(bib_in_settings):
                if filename.startswith('titles-') and filename.endswith('.xml'):
                    filepath = os.path.join(bib_in_settings, filename)
                    management.call_command('load_titles', filepath, skip_index=True)

        management.call_command('title_sync', 
                                skip_essays=options['skip_essays'],
                                pull_title_updates=options['pull_title_updates'])

        

        end = datetime.now()
        total_time = end - start
        _logger.info('start time: %s' % start)
        _logger.info('end time: %s' % end)
        _logger.info('total time: %s' % total_time)
        _logger.info("chronam_sync done.")
Пример #8
0
def search_titles_results(request):
    page_title = "US Newspaper Directory Search Results"
    crumbs = list(settings.BASE_CRUMBS)
    crumbs.extend([{"label": "Search Newspaper Directory", "href": reverse("chronam_search_titles")}])

    def prep_title_for_return(t):
        title = {}
        title.update(t.solr_doc)
        title["oclc"] = t.oclc
        return title

    format = request.GET.get("format")

    # check if requested format is CSV before building pages for response. CSV
    # response does not make use of pagination, instead all matching titles from
    # SOLR are returned at once
    if format == "csv":
        query = request.GET.copy()
        q, fields, sort_field, sort_order = index.get_solr_request_params_from_query(query)

        # return all titles in csv format. * May hurt performance. Assumption is that this
        # request is not made often.
        # TODO: revisit if assumption is incorrect
        solr_response = index.execute_solr_query(q, fields, sort_field, sort_order, index.title_count(), 0)
        titles = index.get_titles_from_solr_documents(solr_response)

        csv_header_labels = (
            "lccn",
            "title",
            "place_of_publication",
            "start_year",
            "end_year",
            "publisher",
            "edition",
            "frequency",
            "subject",
            "state",
            "city",
            "country",
            "language",
            "oclc",
            "holding_type",
        )
        response = HttpResponse(content_type="text/csv")
        response["Content-Disposition"] = 'attachment; filename="chronam_titles.csv"'
        writer = csv.writer(response)
        writer.writerow(csv_header_labels)
        for title in titles:
            writer.writerow(
                map(
                    lambda val: smart_str(val or "--"),
                    (
                        title.lccn,
                        title.name,
                        title.place_of_publication,
                        title.start_year,
                        title.end_year,
                        title.publisher,
                        title.edition,
                        title.frequency,
                        map(str, title.subjects.all()),
                        set(map(lambda p: p.state, title.places.all())),
                        map(lambda p: p.city, title.places.all()),
                        str(title.country),
                        map(str, title.languages.all()),
                        title.oclc,
                        title.holding_types,
                    ),
                )
            )
        return response

    try:
        curr_page = int(request.GET.get("page", 1))
    except ValueError as e:
        curr_page = 1

    paginator = index.SolrTitlesPaginator(request.GET)

    try:
        page = paginator.page(curr_page)
    except:
        raise Http404

    page_range_short = list(_page_range_short(paginator, page))

    try:
        rows = int(request.GET.get("rows", "20"))
    except ValueError as e:
        rows = 20

    query = request.GET.copy()
    query.rows = rows
    if page.has_next():
        query["page"] = curr_page + 1
        next_url = "?" + query.urlencode()
    if page.has_previous():
        query["page"] = curr_page - 1
        previous_url = "?" + query.urlencode()
    start = page.start_index()
    end = page.end_index()
    host = request.get_host()
    page_list = []
    for p in range(len(page.object_list)):
        page_start = start + p
        page_list.append((page_start, page.object_list[p]))

    if format == "atom":
        feed_url = request.build_absolute_uri()
        updated = rfc3339(datetime.datetime.now())
        return render_to_response(
            "search_titles_results.xml",
            dictionary=locals(),
            context_instance=RequestContext(request),
            content_type="application/atom+xml",
        )

    elif format == "json":
        results = {
            "startIndex": start,
            "endIndex": end,
            "totalItems": paginator.count,
            "itemsPerPage": rows,
            "items": [prep_title_for_return(t) for t in page.object_list],
        }
        # add url for the json view
        for i in results["items"]:
            i["url"] = request.build_absolute_uri(i["id"].rstrip("/") + ".json")
        json_text = json.dumps(results)
        # jsonp?
        callback = request.GET.get("callback")
        if callback and is_valid_jsonp_callback(callback):
            json_text = "%s(%s);" % ("callback", json_text)
        return HttpResponse(json_text, content_type="application/json")

    sort = request.GET.get("sort", "relevance")

    q = request.GET.copy()
    if "page" in q:
        del q["page"]
    if "sort" in q:
        del q["sort"]
    q = q.urlencode()
    collapse_search_tab = True
    return render_to_response(
        "search_titles_results.html", dictionary=locals(), context_instance=RequestContext(request)
    )
Пример #9
0
def search_titles_results(request):
    page_title = 'US Newspaper Directory Search Results'
    crumbs = list(settings.BASE_CRUMBS)
    crumbs.extend([{'label': 'Search Newspaper Directory',
                    'href': reverse('chronam_search_titles')},
                   ])

    def prep_title_for_return(t):
        title = {}
        title.update(t.solr_doc)
        title['oclc'] = t.oclc
        return title

    format = request.GET.get('format')

    # check if requested format is CSV before building pages for response. CSV
    # response does not make use of pagination, instead all matching titles from
    # SOLR are returned at once
    if format == 'csv':
        query = request.GET.copy()
        q, fields, sort_field, sort_order = index.get_solr_request_params_from_query(query)

        # return all titles in csv format. * May hurt performance. Assumption is that this
        # request is not made often.
        # TODO: revisit if assumption is incorrect
        solr_response = index.execute_solr_query(q, fields, sort_field,
                                                 sort_order, index.title_count(), 0)
        titles = index.get_titles_from_solr_documents(solr_response)

        csv_header_labels = ('lccn', 'title', 'place_of_publication', 'start_year',
                             'end_year', 'publisher', 'edition', 'frequency', 'subject',
                             'state', 'city', 'country', 'language', 'oclc',
                             'holding_type',)
        response = HttpResponse(content_type='text/csv')
        response['Content-Disposition'] = 'attachment; filename="chronam_titles.csv"'
        writer = csv.writer(response)
        writer.writerow(csv_header_labels)
        for title in titles:
            writer.writerow(map(lambda val: smart_str(val or '--'),
                                (title.lccn, title.name, title.place_of_publication,
                                 title.start_year, title.end_year, title.publisher,
                                 title.edition, title.frequency,
                                 map(str, title.subjects.all()),
                                 set(map(lambda p: p.state, title.places.all())),
                                 map(lambda p: p.city, title.places.all()),
                                 str(title.country), map(str, title.languages.all()),
                                 title.oclc, title.holding_types)))
        return response

    try:
        curr_page = int(request.GET.get('page', 1))
    except ValueError as e:
        curr_page = 1

    paginator = index.SolrTitlesPaginator(request.GET)

    try:
        page = paginator.page(curr_page)
    except:
        raise Http404

    page_range_short = list(_page_range_short(paginator, page))

    try:
        rows = int(request.GET.get('rows', '20'))
    except ValueError as e:
        rows = 20

    query = request.GET.copy()
    query.rows = rows
    if page.has_next():
        query['page'] = curr_page + 1
        next_url = '?' + query.urlencode()
    if page.has_previous():
        query['page'] = curr_page - 1
        previous_url = '?' + query.urlencode()
    start = page.start_index()
    end = page.end_index()
    host = request.get_host()
    page_list = []
    for p in range(len(page.object_list)):
        page_start = start + p
        page_list.append((page_start, page.object_list[p]))

    if format == 'atom':
        feed_url = 'http://' + host + request.get_full_path()
        updated = rfc3339(datetime.datetime.now())
        return render_to_response('search_titles_results.xml',
                                  dictionary=locals(),
                                  context_instance=RequestContext(request),
                                  content_type='application/atom+xml')

    elif format == 'json':
        results = {
            'startIndex': start,
            'endIndex': end,
            'totalItems': paginator.count,
            'itemsPerPage': rows,
            'items': [prep_title_for_return(t) for t in page.object_list]
        }
        # add url for the json view
        for i in results['items']:
            i['url'] = 'http://' + request.get_host() + i['id'].rstrip("/") + ".json"
        json_text = json.dumps(results, indent=2)
        # jsonp?
        callback = request.GET.get('callback')
        if callback and is_valid_jsonp_callback(callback):
            json_text = "%s(%s);" % ('callback', json_text)
        return HttpResponse(json_text, content_type='application/json')

    sort = request.GET.get('sort', 'relevance')

    q = request.GET.copy()
    if 'page' in q:
        del q['page']
    if 'sort' in q:
        del q['sort']
    q = q.urlencode()
    collapse_search_tab = True
    return render_to_response('search_titles_results.html',
                              dictionary=locals(),
                              context_instance=RequestContext(request))