def authors(hosts, index, report=False, dryrun=False, force=False, title=None): i = set_hosts_index(hosts=hosts, index=index) logprint('debug', '------------------------------------------------------------------------') logprint('debug', 'getting mw_authors...') mw_author_titles = Proxy.authors(cached_ok=False) mw_articles = Proxy.articles_lastmod() logprint('debug', 'getting es_authors...') es_authors = Author.authors() logprint('debug', 'mediawiki authors: %s' % len(mw_author_titles)) if title: authors_new = [title] else: if force: logprint('debug', 'forcibly update all authors') authors_new = [page['title'] for page in es_authors] authors_delete = [] else: logprint('debug', 'determining new,delete...') authors_new,authors_delete = Elasticsearch.authors_to_update( mw_author_titles, mw_articles, es_authors) logprint('debug', 'authors to add: %s' % len(authors_new)) #logprint('debug', 'authors to delete: %s' % len(authors_delete)) if report: return #logprint('debug', 'deleting...') #for n,title in enumerate(authors_delete): # logprint('debug', '--------------------') # logprint('debug', '%s/%s %s' % (n, len(authors_delete), title)) # author = Author.get(title=title) # if not dryrun: # author.delete() logprint('debug', 'adding...') errors = [] for n,title in enumerate(authors_new): logprint('debug', '--------------------') logprint('debug', '%s/%s %s' % (n, len(authors_new), title)) logprint('debug', 'getting from mediawiki') mwauthor = Proxy.page(title, index=index) try: existing_author = Author.get(title) logprint('debug', 'exists in elasticsearch') except: existing_author = None logprint('debug', 'creating author') author = Author.from_mw(mwauthor, author=existing_author) if not dryrun: logprint('debug', 'saving') author.save() try: a = Author.get(title) except NotFoundError: logprint('error', 'ERROR: Author(%s) NOT SAVED!' % title) errors.append(title) if errors: logprint('info', 'ERROR: %s titles were unpublishable:' % len(errors)) for title in errors: logprint('info', 'ERROR: %s' % title) logprint('debug', 'DONE')