def run(pids): global predictor, num_normals, num_spams infos = libtieba.get_posts_info(pids) for info in infos: scores = predictor.predict(info.title, info.content) #print scores if scores[FLAGS.score_index] > FLAGS.thre: num_spams += 1 print info.postId, info.threadId, info.forumName, info.userName, info.title, info.content print 'score: ', scores[0], ' dnn_score: ', scores[ 1], ' adjusted_dnn_score: ', scores[2], ' spam ratio: ', float( num_spams) / (num_spams + num_normals), FLAGS.thre else: num_normals += 1 l = [ str(scores[0]), str(scores[2]), str(info.postId), info.forumName, info.userName, info.title, info.content ] line = '\t'.join(l) if scores[0] > 0.9: out.write('%s\n' % line) if scores[2] > 0.9: out_dnn.write('%s\n' % line)
def run(pids): #print 'run pids begin' global predictor, num_normals, num_spams infos = libtieba.get_posts_info(pids) #print 'finish get posts info' for info in infos: score = predictor.predict(info.title, info.content) #print 'finish predict' if score > FLAGS.thre: num_spams += 1 print info.postId, info.threadId, info.forumName, info.userName, info.title, info.content print 'score: ', score, ' spam ratio: ', float(num_spams) / (num_spams + num_normals), FLAGS.thre vals_ = [] if FLAGS.write_db: vals_ = db_writer.add(info, score) #print 'finish add one to db' if FLAGS.realtime_del: if len(vals_) == FLAGS.buffer_size: print 'real time delete' exe = '/home/users/chenghuige/forum/orp002/php/bin/php' out = open('delete.temp', 'w') for val in vals_: thread_id = val[1] post_id = val[2] user_id = val[-2] forum_id = val[0] out.write('{}\t{}\t{}\t{}\n'.format(thread_id, post_id, user_id, forum_id)) out.close() os.system(exe + ' multi-delete.php delete.temp') else: num_normals += 1 l = [str(score), str(info.postId), info.forumName, info.userName, info.title, info.content] line = '\t'.join(l)
def run(pids): global predictor, num_normals, num_spams infos = libtieba.get_posts_info(pids) for info in infos: score = predictor.predict(info.title, info.content) if score > FLAGS.thre: num_spams += 1 print info.postId, info.threadId, info.forumName, info.userName, info.title, info.content print 'score: ', score, ' spam ratio: ', float(num_spams) / (num_spams + num_normals), FLAGS.thre else: num_normals += 1 l = [str(score), str(info.postId), info.forumName, info.userName, info.title, info.content] line = '\t'.join(l)
def run(pids): print 'run pids begin' global predictor, num_normals, num_spams infos = libtieba.get_posts_info(pids) print 'finish get posts info' for info in infos: score = predictor.predict(info.title, info.content) print 'finish predict' if score > FLAGS.thre: num_spams += 1 print info.postId, info.threadId, info.forumName, info.userName, info.title, info.content print 'score: ', score, ' spam ratio: ', float(num_spams) / ( num_spams + num_normals), FLAGS.thre db_writer.add(info, score) print 'finish add one to db' else: num_normals += 1 l = [ str(score), str(info.postId), info.forumName, info.userName, info.title, info.content ] line = '\t'.join(l) print 'run pids end'
warnings.simplefilter("ignore", RuntimeWarning) import libtieba out = open(sys.argv[2], 'w') lines = open(sys.argv[1]).readlines() linelist = [line.strip().split() for line in lines] pids = [l[0] for l in linelist] m = {} for l in linelist: m[long(l[0])] = l[1] pidvec = libtieba.vector_less__long_long_unsigned_int__greater_() for pid in pids: pidvec.append(long(pid)) print len(pidvec) results = libtieba.get_posts_info(pidvec) for item in results: if (item.postId == 0): continue item.title = item.title.replace('\n', ' ') item.content = item.content.replace('\n', ' ') out.write("%s\n" % ("\t".join([ str(item.threadId), m[item.postId], str(item.postId), item.title, item.content ]))) print len(results)