def spyNew(sleeptime=100, processes=3, threads=3): """ 对热门、新帖以及额外配置的板块列表进行监测,这是直接运行代码将调用的函数 """ global ignore_counts ignore_counts = 0 starttime = time.time() myprint("start") m = MPMS(getBBS, handler, processes=processes, threads_per_process=threads) t = 0 workload = set() thenew = getHotPost() + getNewPost() boardlist = set([int(i[0]) for i in thenew]) myprint("get new finished, len(thenew)={}, len(boardlist)={}".format(len(thenew), len(boardlist))) boardlist.update(CONFIG_INTERESTING_BOARDS) newclicksdata = [] for boardid in boardlist: newclicksdata += getBoardPage_detailed(boardid, 1) for boardid, postid, reply, clicks, lastpost in newclicksdata: if filter_pass(boardid, postid, reply, clicks, lastpost): if postid not in workload: m.put([boardid, postid, ""]) workload.add(postid) myprint("Check {} boards, ignore {} posts, using {} seconds".format(len(boardlist), ignore_counts, int(time.time() - starttime))) if time.time() - starttime > 10: myprint("too slow! add wait time") sleeptime += time.time() - starttime - 10 while len(m) > 0: myprint("Remaning queue length: {len}".format(len=len(m))) sleep(2) myprint("All done! wait 5 seconds to clean up") sleep(5) myprint("Try close the queue... If this hang on, you have to kill the python process") plus1("tryclose.log") m.close() myprint("Try join the queue... If this hang on, you have to kill the python process") plus1("tryjoin.log") m.join() plus1("join_success.log") myprint("All child process exited succesfully") sleeptime = max(0, starttime + sleeptime - time.time()) print("Sleep a while ( {sleeptime:.0f}s )...".format(sleeptime=sleeptime)) sleep(sleeptime) myprint("Sleep done! wake up and exit...") return
def main(): results = "" # we will run the benchmarks several times using the following params # 下面这些值用于多次运行,看时间 test_params = ( # (processes, threads_per_process) (20, 50), (10, 20), (5, 10), (3, 3), (1, 1) ) for processes, threads_per_process in test_params: # Init the poll # 初始化 m = MPMS( worker, collector, processes=processes, # optional, how many processes, default value is your cpu core number threads=threads_per_process, # optional, how many threads per process, default is 2 meta={"any": 1, "dict": "you", "want": {"pass": "******"}, "worker": 0.5}, ) m.start() # start and fork subprocess start_time = time() # when we started # 记录开始时间 # put task parameters into the task queue, 2000 total tasks # 把任务加入任务队列,一共2000次 for i in range(2000): m.put(i, t=time()) # optional, close the task queue. queue will be auto closed when join() # 关闭任务队列,可选. 在join()的时候会自动关闭 # m.close() # close task queue and wait all workers and handler to finish # 等待全部任务及全部结果处理完成 m.join() # write and print records # 下面只是记录和打印结果 results += "Processes:" + str(processes) + " Threads_per_process:" + str(threads_per_process) \ + " Total_threads:" + str(processes * threads_per_process) \ + " TotalTime: " + str(time() - start_time) + "\n" print(results) print('sleeping 5s before next') sleep(5)
def spyBoard_dict(boardid_dict, pages_input=None, sleeptime=86400, processes=2, threads=2): """ 对给定的板块id列表进行监测 """ m = MPMS(getBBS, handler, processes=processes, threads_per_process=threads) for boardid in boardid_dict: if pages_input is not None: pages = pages_input else: pages = getBoardSize(boardid) print("[board {}]Try to get {} pages".format(boardid, pages)) for j in range(pages, 0, -1): thispage = getBoardPage(boardid, j) if thispage == []: break for i in thispage: m.put([boardid, i[1], "big"]) sleep(sleeptime) return
item.append(uid) return item def handler(meta, item): meta["fp"].write("\t".join([str(i) for i in item]) + "\n") if __name__ == "__main__": #print(sign(1579490640000, "/api/front/psons/search", {"size": 12, "page":0, "lang": "cn"})) meta = {"fp": open("personzju.txt", "w", encoding="utf-8")} m = MPMS(worker, handler, 2, 2, meta=meta) m.start() for t in get("/api/front/psons/search", { "size": 10000, "page": 0, "lang": "cn" }, cache=True)["data"]["content"]: #tprint(t["cn_name"], t["college_name"], t["work_title"], t["mapping_name"], t["access_count"]) m.put([ t["cn_name"], t["college_name"], t["work_title"], t["mapping_name"], t["access_count"] ]) while len(m) > 10: myprint("Remaning " + str(len(m))) time.sleep(2) m.join() myprint("Done!")