def changed(types=None,since=None,commit=True,optimize=False): """ Run by `cron` (through `paster run`) on a schedule to update all Things that have been created or have changed since the last run. Things add themselves to a `thing_changes` table, which we read, find the Things, tokenise, and re-submit them to Solr """ global indexed_types set_emptying_cache() start_t = datetime.now() if not types: types = indexed_types if not since: since = get_last_run() all_changed = [] for cls in types: changed = set(x[0] for x in thing_changes.get_changed(cls,min_date = since)) # changed =:= [(Fullname,Date) | ...] changed = cls._by_fullname(changed, data=True, return_dict=False) changed = [x for x in changed if not x._spam and not x._deleted] # note: anything marked as spam or deleted is not updated in # the search database. Since these are filtered out in the UI, # that's probably fine. if len(changed) > 0: changed = tokenize_things(changed) print "Found %d %ss starting with %s" % (len(changed),cls.__name__,unicode_safe(changed[0]['contents'])) all_changed += changed else: print "No changed %ss detected" % (cls.__name__,) with SolrConnection(commit=commit,optimize=optimize) as s: s.add(all_changed) save_last_run(start_t)
def changed(commit=True,optimize=False,delete_old=True): """ Run by `cron` (through `paster run`) on a schedule to update all Things that have been created or have changed since the last run. Things add themselves to a `thing_changes` table, which we read, find the Things, tokenise, and re-submit them to Solr """ set_emptying_cache() with SolrConnection(commit=commit,optimize=optimize) as s: changes = thing_changes.get_changed() if changes: max_date = max(x[1] for x in changes) changed = IteratorChunker(x[0] for x in changes) while not changed.done: chunk = changed.next_chunk(200) # chunk =:= [(Fullname,Date) | ...] chunk = Thing._by_fullname(chunk, data=True, return_dict=False) chunk = [x for x in chunk if not x._spam and not x._deleted] to_delete = [x for x in chunk if x._spam or x._deleted] # note: anything marked as spam or deleted is not # updated in the search database. Since these are # filtered out in the UI, that's probably fine. if len(chunk) > 0: chunk = tokenize_things(chunk) s.add(chunk) for i in to_delete: s.delete(id=i._fullname) if delete_old: thing_changes.clear_changes(max_date = max_date)