def create_index(): index = set_hosts_index() logprint('debug', 'creating new index') index = Index(settings.DOCSTORE_INDEX) index.create() logprint('debug', 'creating mappings') Author.init() Page.init() Source.init() logprint('debug', 'registering doc types') index.doc_type(Author) index.doc_type(Page) index.doc_type(Source) logprint('debug', 'DONE')
def source_cite(request, encyclopedia_id, template_name="wikiprox/cite.html"): try: source = Source.get(encyclopedia_id) except NotFoundError: raise Http404 citation = Citation(source, request) return render_to_response(template_name, {"citation": citation}, context_instance=RequestContext(request))
def source(request, encyclopedia_id, template_name="wikiprox/source.html"): try: source = Source.get(encyclopedia_id) except NotFoundError: raise Http404 return render_to_response( template_name, { "source": source, "RTMP_STREAMER": settings.RTMP_STREAMER, "MEDIA_URL": settings.MEDIA_URL, "SOURCES_MEDIA_URL": settings.SOURCES_MEDIA_URL, }, context_instance=RequestContext(request), )
def source(request, encyclopedia_id, format=None): """DOCUMENTATION GOES HERE. """ try: source = Source.get(encyclopedia_id) except NotFoundError: return Response(status=status.HTTP_404_NOT_FOUND) data = { 'encyclopedia_id': source.encyclopedia_id, 'psms_id': source.psms_id, 'densho_id': source.densho_id, 'institution_id': source.institution_id, 'url': reverse('wikiprox-api-source', args=([source.encyclopedia_id]), request=request), 'absolute_url': reverse('wikiprox-source', args=([source.encyclopedia_id]), request=request), 'streaming_path': source.streaming_path(), 'rtmp_streamer': settings.RTMP_STREAMER, 'rtmp_path': source.streaming_url, 'external_url': source.external_url, 'original_path': source.original_path(), 'original_url': source.original_url, 'original_size': source.original_size, 'img_size': source.display_size, 'filename': source.filename, 'img_path': source.img_path, 'img_url': source.img_url(), 'media_format': source.media_format, 'aspect_ratio': source.aspect_ratio, 'collection_name': source.collection_name, 'headword': source.headword, 'caption': source.caption, 'caption_extended': source.caption_extended, 'transcript_path': source.transcript_path(), 'transcript_url': source.transcript_url(), 'courtesy': source.courtesy, 'creative_commons': source.creative_commons, 'created': source.created, 'modified': source.modified, 'published': source.published, } return Response(data)
def articles(report=False, dryrun=False, force=False): index = set_hosts_index() logprint('debug', '------------------------------------------------------------------------') # authors need to be refreshed logprint('debug', 'getting mw_authors,articles...') mw_author_titles = Proxy().authors(cached_ok=False) mw_articles = Proxy().articles_lastmod() logprint('debug', 'getting es_articles...') es_articles = Page.pages() if force: logprint('debug', 'forcibly update all articles') articles_update = [page['title'] for page in es_articles] articles_delete = [] else: logprint('debug', 'determining new,delete...') articles_update,articles_delete = Elasticsearch.articles_to_update( mw_author_titles, mw_articles, es_articles) logprint('debug', 'mediawiki articles: %s' % len(mw_articles)) logprint('debug', 'elasticsearch articles: %s' % len(es_articles)) logprint('debug', 'articles to update: %s' % len(articles_update)) logprint('debug', 'articles to delete: %s' % len(articles_delete)) if report: return logprint('debug', 'adding articles...') posted = 0 could_not_post = [] for n,title in enumerate(articles_update): logprint('debug', '--------------------') logprint('debug', '%s/%s %s' % (n+1, len(articles_update), title)) logprint('debug', 'getting from mediawiki') mwpage = Proxy().page(title) try: existing_page = Page.get(title) logprint('debug', 'exists in elasticsearch') except: existing_page = None if (mwpage.published or settings.MEDIAWIKI_SHOW_UNPUBLISHED): page_sources = [source['encyclopedia_id'] for source in mwpage.sources] for mwsource in mwpage.sources: logprint('debug', '- source %s' % mwsource['encyclopedia_id']) source = Source.from_mw(mwsource, title) if not dryrun: source.save() logprint('debug', 'creating page') page = Page.from_mw(mwpage, page=existing_page) if not dryrun: logprint('debug', 'saving') page.save() try: p = Page.get(title) except NotFoundError: logprint('error', 'ERROR: Page(%s) NOT SAVED!' % title) else: logprint('debug', 'not publishable: %s' % mwpage) could_not_post.append(mwpage) if could_not_post: logprint('debug', '========================================================================') logprint('debug', 'Could not post these: %s' % could_not_post) logprint('debug', 'DONE')