def scaner(): mongodb = connect_mongodb() taskdao = TaskDao(mongodb) processdao = ProcessDao(mongodb) localhost = get_attr('LOCAL_HOST') print(localhost) while (True): task_list = taskdao.find_by_localhost_and_status(localhost, 'running') for t in task_list: starttime = t['starttime'] endtime = t['endtime'] print(starttime + " " + endtime) if endtime != '': if compare_time(time.strftime("%Y/%m/%d %H:%M"), starttime, endtime) is False: taskid = str(t['_id']) print(taskid) process_list = processdao.find_by_localhost_and_taskid( localhost, taskid) for p in process_list: if p['taskid'] == taskid and p['status'] != 'stopping': print("杀死进程%s" % (p['pid'])) # p.terminate() try: os.kill(p['pid'], signal.SIGKILL) except: continue delete(taskid, False) t['status'] = 'stopping' taskdao.save(t) processdao.delete_by_localhost_and_taskid( localhost, taskid) time.sleep(30)
def delete(taskid, is_changed): redis = connect_redis() url_manager = URLDao(redis) url_manager.delete_task(taskid) if is_changed: mongodb = connect_mongodb() taskdao = TaskDao(mongodb) task = taskdao.find_by_id(taskid) endtime = time.strftime("%Y/%m/%d %H:%M") task['endtime'] = endtime taskdao.save(task)
def init(): redis_host = get_attr('REDIS_HOST') sub = get_attr('SUBSCRIBE') localhost = get_attr('LOCAL_HOST') listener = Messager(redis_host) listener.subscribe(sub) db = connect_mongodb() taskdao = TaskDao(db) processdao = ProcessDao(db) process = ProcessController(localhost) return localhost, listener, taskdao, processdao, process
def wait(taskid): mongodb = connect_mongodb() taskdao = TaskDao(mongodb) task = taskdao.find_by_id(taskid) starttime = task['starttime'] endtime = task['endtime'] localhost = get_attr('LOCAL_HOST') flag = False while (flag is False): flag = compare_time(time.strftime("%Y/%m/%d %H:%M"), starttime, endtime) time.sleep(30) if flag is True: task['status'] = 'running' taskdao.save(task) processdao = ProcessDao(mongodb) processdao.update_status_by_localhost_and_taskid( localhost, taskid, 'running') run(taskid)
def init(taskid, is_restart): mongodb = connect_mongodb() taskdao = TaskDao(mongodb) task = taskdao.find_by_id(taskid) temp = None if "news" == task['webtype']: if is_restart: temp = deepcopy(NewsSpiderRecover) else: temp = deepcopy(NewsSpider) elif "blog" == task['webtype']: if is_restart: temp = deepcopy(BlogSpiderRecover) else: temp = deepcopy(BlogSpider) elif "ecommerce" == task['webtype']: keywords = task['keywords'] if len(keywords) == 0: temp = deepcopy(ShopMainSpider) else: temp = deepcopy(ShopKeywordSpider) temp.keywords = keywords temp.name = taskid temp.redis_key = taskid + ":start_urls" redis = connect_redis() url_manager = URLDao(redis) allowed_domains = [] if task['webtype'] == 'news' or task['webtype'] == 'blog': for url in task['starturls']: url_manager.insert_url(taskid, url) print(get_domain(url)) allowed_domains.append(get_domain(url)) temp.allowed_domains = allowed_domains elif task['webtype'] == 'ecommerce': for url in task['starturls']: url_manager.insert_url(taskid, url)
def __init__(self, localhost): mongodb = connect_mongodb() self.processdao = ProcessDao(mongodb) self.taskdao = TaskDao(mongodb) self.localhost = localhost
class ProcessController(object): def __init__(self, localhost): mongodb = connect_mongodb() self.processdao = ProcessDao(mongodb) self.taskdao = TaskDao(mongodb) self.localhost = localhost ''' 开始一个进程,开始任务 ''' def start_task(self, taskid, is_restart): processnum = self.taskdao.find_by_id(taskid)['processnum'] # print(processnum) for i in range(0, processnum): init(taskid, is_restart) p = Process(name=taskid, target=run, args=(taskid, )) p.start() print(p.pid) self.processdao.insert_process(self.localhost, p.pid, taskid, 'running') # self.process_list.append(p) ''' 唤醒一个暂停的任务,将暂停状态的任务重新启动 ''' def resume_task(self, taskid): process_list = self.processdao.find_by_localhost_and_taskid( self.localhost, taskid) for p in process_list: if p['taskid'] == taskid: try: ps = psutil.Process(p['pid']) ps.resume() except: continue self.processdao.update_status_by_localhost_and_taskid( self.localhost, taskid, 'running') ''' 唤醒一个阻塞的进程,将暂停状态的任务重新启动 ''' def resume_process(self, pid): try: print("唤醒进程%s" % (pid)) ps = psutil.Process(pid) ps.resume() self.processdao.update_status_by_localhost_and_pid( self.localhost, pid, 'running') except: pass ''' 杀死一个进程,终止任务 ''' def terminate_task(self, taskid): process_list = self.processdao.find_by_localhost_and_taskid( self.localhost, taskid) for p in process_list: if p['taskid'] == taskid and p['status'] != 'stopping': try: print("杀死进程%s" % (p['pid'])) # p.terminate() os.kill(p['pid'], signal.SIGKILL) except: continue delete(taskid, True) self.processdao.delete_by_localhost_and_taskid(self.localhost, taskid) def terminate_process(self, pid): try: print("杀死进程%s" % (pid)) # p.terminate() os.kill(pid, signal.SIGKILL) process_list = self.processdao.find_by_localhost_and_pid( self.localhost, pid) self.processdao.delete_by_localhost_and_pid(self.localhost, pid) if len(process_list) > 0: taskid = process_list[0]['taskid'] self.taskdao.update_processnum(taskid) except: pass ''' 暂停进程,暂停任务 ''' def suspend_task(self, taskid): process_list = self.processdao.find_by_localhost_and_taskid( self.localhost, taskid) for p in process_list: if p['taskid'] == taskid and p['status'] != 'stopping': try: ps = psutil.Process(p['pid']) ps.suspend() except: continue self.processdao.update_status_by_localhost_and_taskid( self.localhost, taskid, 'pausing') def suspend_process(self, pid): try: print("挂起进程%s" % (pid)) ps = psutil.Process(pid) ps.suspend() self.processdao.update_status_by_localhost_and_pid( self.localhost, pid, 'pausing') except: pass ''' 休眠 ''' def sleep(self, taskid, t): process_list = self.processdao.find_all() for p in process_list: print(p['taskid']) if p['taskid'] == taskid: time.sleep(t) break ''' 查看所有的进程名 ''' def processes(self): process_list = self.processdao.find_all() for p in process_list: print(str(p['pid']) + " " + p['taskid']) ''' 开启一个进程,等待任务启动 ''' def wait_task(self, taskid, is_restart): processnum = self.taskdao.find_by_id(taskid)['processnum'] for i in range(0, processnum): init(taskid, is_restart) p = Process(name=taskid, target=wait, args=(taskid, )) p.start() print(p.pid) self.processdao.insert_process(self.localhost, p.pid, taskid, 'waitting') ''' 扫描所有进程,将到时间的进程杀死 ''' def scan_task(self): self.processdao.delete_by_localhost_and_status(self.localhost, 'scanner') p = Process(name='spider_scaner', target=scaner) p.start() self.processdao.insert_process(self.localhost, p.pid, '', 'scanner')