Пример #1
0
def create_index():
    index = set_hosts_index()
    logprint('debug', 'creating new index')
    index = Index(settings.DOCSTORE_INDEX)
    index.create()
    logprint('debug', 'creating mappings')
    Author.init()
    Page.init()
    Source.init()
    logprint('debug', 'registering doc types')
    index.doc_type(Author)
    index.doc_type(Page)
    index.doc_type(Source)
    logprint('debug', 'DONE')
Пример #2
0
def source_cite(request, encyclopedia_id, template_name="wikiprox/cite.html"):
    try:
        source = Source.get(encyclopedia_id)
    except NotFoundError:
        raise Http404
    citation = Citation(source, request)
    return render_to_response(template_name, {"citation": citation}, context_instance=RequestContext(request))
Пример #3
0
def source(request, encyclopedia_id, template_name="wikiprox/source.html"):
    try:
        source = Source.get(encyclopedia_id)
    except NotFoundError:
        raise Http404
    return render_to_response(
        template_name,
        {
            "source": source,
            "RTMP_STREAMER": settings.RTMP_STREAMER,
            "MEDIA_URL": settings.MEDIA_URL,
            "SOURCES_MEDIA_URL": settings.SOURCES_MEDIA_URL,
        },
        context_instance=RequestContext(request),
    )
Пример #4
0
def source(request, encyclopedia_id, format=None):
    """DOCUMENTATION GOES HERE.
    """
    try:
        source = Source.get(encyclopedia_id)
    except NotFoundError:
        return Response(status=status.HTTP_404_NOT_FOUND)
    data = {
        'encyclopedia_id': source.encyclopedia_id,
        'psms_id': source.psms_id,
        'densho_id': source.densho_id,
        'institution_id': source.institution_id,
        'url': reverse('wikiprox-api-source', args=([source.encyclopedia_id]), request=request),
        'absolute_url': reverse('wikiprox-source', args=([source.encyclopedia_id]), request=request),
        'streaming_path': source.streaming_path(),
        'rtmp_streamer': settings.RTMP_STREAMER,
        'rtmp_path': source.streaming_url,
        'external_url': source.external_url,
        'original_path': source.original_path(),
        'original_url': source.original_url,
        'original_size': source.original_size,
        'img_size': source.display_size,
        'filename': source.filename,
        'img_path': source.img_path,
        'img_url': source.img_url(),
        'media_format': source.media_format,
        'aspect_ratio': source.aspect_ratio,
        'collection_name': source.collection_name,
        'headword': source.headword,
        'caption': source.caption,
        'caption_extended': source.caption_extended,
        'transcript_path': source.transcript_path(),
        'transcript_url': source.transcript_url(),
        'courtesy': source.courtesy,
        'creative_commons': source.creative_commons,
        'created': source.created,
        'modified': source.modified,
        'published': source.published,
    }
    return Response(data)
Пример #5
0
def articles(report=False, dryrun=False, force=False):
    index = set_hosts_index()
    
    logprint('debug', '------------------------------------------------------------------------')
    # authors need to be refreshed
    logprint('debug', 'getting mw_authors,articles...')
    mw_author_titles = Proxy().authors(cached_ok=False)
    mw_articles = Proxy().articles_lastmod()
    logprint('debug', 'getting es_articles...')
    es_articles = Page.pages()
    if force:
        logprint('debug', 'forcibly update all articles')
        articles_update = [page['title'] for page in es_articles]
        articles_delete = []
    else:
        logprint('debug', 'determining new,delete...')
        articles_update,articles_delete = Elasticsearch.articles_to_update(
            mw_author_titles, mw_articles, es_articles)
    logprint('debug', 'mediawiki articles: %s' % len(mw_articles))
    logprint('debug', 'elasticsearch articles: %s' % len(es_articles))
    logprint('debug', 'articles to update: %s' % len(articles_update))
    logprint('debug', 'articles to delete: %s' % len(articles_delete))
    if report:
        return
    
    logprint('debug', 'adding articles...')
    posted = 0
    could_not_post = []
    for n,title in enumerate(articles_update):
        logprint('debug', '--------------------')
        logprint('debug', '%s/%s %s' % (n+1, len(articles_update), title))
        logprint('debug', 'getting from mediawiki')
        mwpage = Proxy().page(title)
        try:
            existing_page = Page.get(title)
            logprint('debug', 'exists in elasticsearch')
        except:
            existing_page = None
        if (mwpage.published or settings.MEDIAWIKI_SHOW_UNPUBLISHED):
            page_sources = [source['encyclopedia_id'] for source in mwpage.sources]
            for mwsource in mwpage.sources:
                logprint('debug', '- source %s' % mwsource['encyclopedia_id'])
                source = Source.from_mw(mwsource, title)
                if not dryrun:
                    source.save()
            logprint('debug', 'creating page')
            page = Page.from_mw(mwpage, page=existing_page)
            if not dryrun:
                logprint('debug', 'saving')
                page.save()
                try:
                    p = Page.get(title)
                except NotFoundError:
                    logprint('error', 'ERROR: Page(%s) NOT SAVED!' % title)
        else:
            logprint('debug', 'not publishable: %s' % mwpage)
            could_not_post.append(mwpage)
    
    if could_not_post:
        logprint('debug', '========================================================================')
        logprint('debug', 'Could not post these: %s' % could_not_post)
    logprint('debug', 'DONE')