Пример #1
0
def shouldFlipFirst(key):
    channel = key.split('/')[0]
    if channels.get(channel) == -1:
        return False
    if 0 <= channels.get(channel) <= 2:
        return True
    if len(index.get(key)) < 20 and not matchKey(index.get(key),
                                                 ['hasFile', 'hasLink']):
        return False
    return not matchKey(index.get(key), blocklist.items())
Пример #2
0
def sortAndClean(result):
    result = [(timestamp.get(key, 0) - channels.get(key.split('/')[0]) * 1000,
               key) for key in result]
    result.sort(reverse=True)
    result = [item[1] for item in result]
    result = flipFirst(result,
                       lambda key: channels.get(key.split('/')[0]) != -2,
                       sendAfter=False)
    result = flipFirst(result, lambda key: isCNIndex(key))
    result = flipFirst(
        result, lambda key: (key.split('/')[0] not in suspect._db.items))
    result = flipFirst(result, lambda key: key in coreIndex)
    return result
Пример #3
0
def cleanupSuspectAndOld():
    items = [(item[0], item[0].split('/')[0]) for item in maintext.items()]
    items = [item for item in items if not item[0].endswith('/0')]
    bucket = createBucket(items)
    count = 0
    for channel in bucket:
        if channels.get(channel) == -2:
            count += cleanKeys(bucket[channel], 0)
        if channels.get(channel) <= -1:
            count += cleanupChannel(bucket[channel])
        count += cleanupOldOrBad(bucket[channel])
    for channel in suspect.items():
        if channels.get(channel) >= 3:
            count += cleanupChannel(bucket.get(channel))
    print('cleanupSuspect removed %d items' % count)
Пример #4
0
def shouldFlipFirstForChannel(key):
    channel = key.split('/')[0]
    if not key.endswith('/0'):
        return False
    if channel in suspect._db.items or channels.get(channel) == -1:
        return False
    return timestamp.get(key, 0) > time.time() - 24 * 7 * 60 * 60
Пример #5
0
def shouldRemove(key):
    if key.endswith('/0'):
        return False
    if not maintext.get(key):
        return True
    channel = key.split('/')[0]
    if channels.get(channel) in [-2, 100]:
        return True
    if 0 <= channels.get(channel) < 2:
        return False
    if noCNnoEN(index.get(key)):
        return True
    if matchKey(index.get(key), ['hasFile', 'hasLink']):
        return False
    if timestamp.get(key, 0) < dbase.getRetain(key.split('/')[0]):
        return True
    return False
Пример #6
0
def getScore(key):
    c_score = channels.get(key.split('/')[0])
    score = timestamp.get(key, 0) - c_score * 1000
    if c_score == -2:
        return 1
    if c_score == -1:
        return 0
    return -score
Пример #7
0
def cleanupSuspect():
	bucket = {}
	for key, text in maintext.items():
		if key.endswith('/0'):
			continue
		text = key.split('/')[0]
		if text in bucket:
			bucket[text].append(key)
		else:
			bucket[text] = [key]
	count = 0
	for channel in bucket:
		if channels.get(channel) <= -1:
			count += cleanupChannel(bucket[channel], keepChinese=False)
	for channel in suspect.items():
		if channels.get(channel) > 5:
			count += cleanupChannel(bucket.get(channel))
	print('cleanupSuspect', count)
Пример #8
0
def searchTextRaw(targets, searchCore=False):
    result = searchRaw(targets, searchCore=searchCore)
    result = [(timestamp.get(key, 0) - channels.get(key.split('/')[0]) * 1000,
               key) for key in result]
    result.sort(reverse=True)
    result = [item[1] for item in result]
    result = flipFirst(result,
                       lambda key: channels.get(key.split('/')[0]) != -2,
                       sendAfter=False)
    result = dedupResult(result,
                         lambda key: maintext.get(key),
                         sendAfter=False)
    suspects = dbase.suspect.items()
    result = flipFirst(result, lambda key: (key.split('/')[0] not in suspects))
    result = flipFirst(
        result, lambda key: searchHitAll(targets, (key, maintext.get(key))))
    result = dedupResult(result, lambda key: key.split('/')[0])
    result = flipFirst(result, lambda key: shouldFlipFirst(key))
    return result
Пример #9
0
def rescore():
    for channel in channels._db.items:
        if channels.get(channel) >= 1:
            channels._db.items[channel] += 1
    channels.save_dont_call_in_prod()
Пример #10
0
def getRetainLen(channel):
    score = channels.get(channel)
    if score < 0:
        return 30
    return int(300 + 6000 / (1 + score))
Пример #11
0
def getMaxIteration(channel):
    score = channels.get(channel)
    return max(0, 10 - score)**3 + 20