def shouldFlipFirst(key): channel = key.split('/')[0] if channels.get(channel) == -1: return False if 0 <= channels.get(channel) <= 2: return True if len(index.get(key)) < 20 and not matchKey(index.get(key), ['hasFile', 'hasLink']): return False return not matchKey(index.get(key), blocklist.items())
def sortAndClean(result): result = [(timestamp.get(key, 0) - channels.get(key.split('/')[0]) * 1000, key) for key in result] result.sort(reverse=True) result = [item[1] for item in result] result = flipFirst(result, lambda key: channels.get(key.split('/')[0]) != -2, sendAfter=False) result = flipFirst(result, lambda key: isCNIndex(key)) result = flipFirst( result, lambda key: (key.split('/')[0] not in suspect._db.items)) result = flipFirst(result, lambda key: key in coreIndex) return result
def cleanupSuspectAndOld(): items = [(item[0], item[0].split('/')[0]) for item in maintext.items()] items = [item for item in items if not item[0].endswith('/0')] bucket = createBucket(items) count = 0 for channel in bucket: if channels.get(channel) == -2: count += cleanKeys(bucket[channel], 0) if channels.get(channel) <= -1: count += cleanupChannel(bucket[channel]) count += cleanupOldOrBad(bucket[channel]) for channel in suspect.items(): if channels.get(channel) >= 3: count += cleanupChannel(bucket.get(channel)) print('cleanupSuspect removed %d items' % count)
def shouldFlipFirstForChannel(key): channel = key.split('/')[0] if not key.endswith('/0'): return False if channel in suspect._db.items or channels.get(channel) == -1: return False return timestamp.get(key, 0) > time.time() - 24 * 7 * 60 * 60
def shouldRemove(key): if key.endswith('/0'): return False if not maintext.get(key): return True channel = key.split('/')[0] if channels.get(channel) in [-2, 100]: return True if 0 <= channels.get(channel) < 2: return False if noCNnoEN(index.get(key)): return True if matchKey(index.get(key), ['hasFile', 'hasLink']): return False if timestamp.get(key, 0) < dbase.getRetain(key.split('/')[0]): return True return False
def getScore(key): c_score = channels.get(key.split('/')[0]) score = timestamp.get(key, 0) - c_score * 1000 if c_score == -2: return 1 if c_score == -1: return 0 return -score
def cleanupSuspect(): bucket = {} for key, text in maintext.items(): if key.endswith('/0'): continue text = key.split('/')[0] if text in bucket: bucket[text].append(key) else: bucket[text] = [key] count = 0 for channel in bucket: if channels.get(channel) <= -1: count += cleanupChannel(bucket[channel], keepChinese=False) for channel in suspect.items(): if channels.get(channel) > 5: count += cleanupChannel(bucket.get(channel)) print('cleanupSuspect', count)
def searchTextRaw(targets, searchCore=False): result = searchRaw(targets, searchCore=searchCore) result = [(timestamp.get(key, 0) - channels.get(key.split('/')[0]) * 1000, key) for key in result] result.sort(reverse=True) result = [item[1] for item in result] result = flipFirst(result, lambda key: channels.get(key.split('/')[0]) != -2, sendAfter=False) result = dedupResult(result, lambda key: maintext.get(key), sendAfter=False) suspects = dbase.suspect.items() result = flipFirst(result, lambda key: (key.split('/')[0] not in suspects)) result = flipFirst( result, lambda key: searchHitAll(targets, (key, maintext.get(key)))) result = dedupResult(result, lambda key: key.split('/')[0]) result = flipFirst(result, lambda key: shouldFlipFirst(key)) return result
def rescore(): for channel in channels._db.items: if channels.get(channel) >= 1: channels._db.items[channel] += 1 channels.save_dont_call_in_prod()
def getRetainLen(channel): score = channels.get(channel) if score < 0: return 30 return int(300 + 6000 / (1 + score))
def getMaxIteration(channel): score = channels.get(channel) return max(0, 10 - score)**3 + 20