示例#1
0
文件: solrsearch.py 项目: cmak/reddit
def changed(types=None,since=None,commit=True,optimize=False):
    """
        Run by `cron` (through `paster run`) on a schedule to update
        all Things that have been created or have changed since the
        last run. Things add themselves to a `thing_changes` table,
        which we read, find the Things, tokenise, and re-submit them
        to Solr
    """
    global indexed_types

    set_emptying_cache()

    start_t = datetime.now()

    if not types:
        types = indexed_types
    if not since:
        since = get_last_run()

    all_changed = []

    for cls in types:
        changed = set(x[0]
                      for x in thing_changes.get_changed(cls,min_date = since))
        # changed =:= [(Fullname,Date) | ...]
        changed = cls._by_fullname(changed,
                                   data=True, return_dict=False)
        changed = [x for x in changed if not x._spam and not x._deleted]

        # note: anything marked as spam or deleted is not updated in
        # the search database. Since these are filtered out in the UI,
        # that's probably fine.
        if len(changed) > 0:
            changed  = tokenize_things(changed)
            print "Found %d %ss starting with %s" % (len(changed),cls.__name__,unicode_safe(changed[0]['contents']))
            all_changed += changed
        else:
            print "No changed %ss detected" % (cls.__name__,)

    with SolrConnection(commit=commit,optimize=optimize) as s:
        s.add(all_changed)

    save_last_run(start_t)
示例#2
0
def changed(commit=True,optimize=False,delete_old=True):
    """
        Run by `cron` (through `paster run`) on a schedule to update
        all Things that have been created or have changed since the
        last run. Things add themselves to a `thing_changes` table,
        which we read, find the Things, tokenise, and re-submit them
        to Solr
    """
    set_emptying_cache()
    with SolrConnection(commit=commit,optimize=optimize) as s:
        changes = thing_changes.get_changed()
        if changes:
            max_date = max(x[1] for x in changes) 
            changed = IteratorChunker(x[0] for x in changes)
            
            while not changed.done:
                chunk = changed.next_chunk(200)
    
                # chunk =:= [(Fullname,Date) | ...]
                chunk = Thing._by_fullname(chunk,
                                           data=True, return_dict=False)
                chunk = [x for x in chunk if not x._spam and not x._deleted]
                to_delete = [x for x in chunk if x._spam or x._deleted]
    
                # note: anything marked as spam or deleted is not
                # updated in the search database. Since these are
                # filtered out in the UI, that's probably fine.
                if len(chunk) > 0:
                    chunk  = tokenize_things(chunk)
                    s.add(chunk)
    
                for i in to_delete:
                    s.delete(id=i._fullname)

    if delete_old:
        thing_changes.clear_changes(max_date = max_date)
示例#3
0
def changed(commit=True,optimize=False,delete_old=True):
    """
        Run by `cron` (through `paster run`) on a schedule to update
        all Things that have been created or have changed since the
        last run. Things add themselves to a `thing_changes` table,
        which we read, find the Things, tokenise, and re-submit them
        to Solr
    """
    set_emptying_cache()
    with SolrConnection(commit=commit,optimize=optimize) as s:
        changes = thing_changes.get_changed()
        if changes:
            max_date = max(x[1] for x in changes) 
            changed = IteratorChunker(x[0] for x in changes)
            
            while not changed.done:
                chunk = changed.next_chunk(200)
    
                # chunk =:= [(Fullname,Date) | ...]
                chunk = Thing._by_fullname(chunk,
                                           data=True, return_dict=False)
                chunk = [x for x in chunk if not x._spam and not x._deleted]
                to_delete = [x for x in chunk if x._spam or x._deleted]
    
                # note: anything marked as spam or deleted is not
                # updated in the search database. Since these are
                # filtered out in the UI, that's probably fine.
                if len(chunk) > 0:
                    chunk  = tokenize_things(chunk)
                    s.add(chunk)
    
                for i in to_delete:
                    s.delete(id=i._fullname)

    if delete_old:
        thing_changes.clear_changes(max_date = max_date)