Exemplo n.º 1
0
def authors(hosts, index, report=False, dryrun=False, force=False, title=None):
    i = set_hosts_index(hosts=hosts, index=index)

    logprint('debug', '------------------------------------------------------------------------')
    logprint('debug', 'getting mw_authors...')
    mw_author_titles = Proxy.authors(cached_ok=False)
    mw_articles = Proxy.articles_lastmod()
    logprint('debug', 'getting es_authors...')
    es_authors = Author.authors()
    logprint('debug', 'mediawiki authors: %s' % len(mw_author_titles))
    
    if title:
        authors_new = [title]
    else:
        if force:
            logprint('debug', 'forcibly update all authors')
            authors_new = [page['title'] for page in es_authors]
            authors_delete = []
        else:
            logprint('debug', 'determining new,delete...')
            authors_new,authors_delete = Elasticsearch.authors_to_update(
                mw_author_titles, mw_articles, es_authors)
        logprint('debug', 'authors to add: %s' % len(authors_new))
        #logprint('debug', 'authors to delete: %s' % len(authors_delete))
        if report:
            return
    
    #logprint('debug', 'deleting...')
    #for n,title in enumerate(authors_delete):
    #    logprint('debug', '--------------------')
    #    logprint('debug', '%s/%s %s' % (n, len(authors_delete), title))
    #    author = Author.get(title=title)
    #    if not dryrun:
    #        author.delete()
     
    logprint('debug', 'adding...')
    errors = []
    for n,title in enumerate(authors_new):
        logprint('debug', '--------------------')
        logprint('debug', '%s/%s %s' % (n, len(authors_new), title))
        logprint('debug', 'getting from mediawiki')
        mwauthor = Proxy.page(title, index=index)
        try:
            existing_author = Author.get(title)
            logprint('debug', 'exists in elasticsearch')
        except:
            existing_author = None
        logprint('debug', 'creating author')
        author = Author.from_mw(mwauthor, author=existing_author)
        if not dryrun:
            logprint('debug', 'saving')
            author.save()
            try:
                a = Author.get(title)
            except NotFoundError:
                logprint('error', 'ERROR: Author(%s) NOT SAVED!' % title)
                errors.append(title)
    if errors:
        logprint('info', 'ERROR: %s titles were unpublishable:' % len(errors))
        for title in errors:
            logprint('info', 'ERROR: %s' % title)
    logprint('debug', 'DONE')