示例#1
0
def scaner():
    mongodb = connect_mongodb()
    taskdao = TaskDao(mongodb)
    processdao = ProcessDao(mongodb)
    localhost = get_attr('LOCAL_HOST')
    print(localhost)
    while (True):
        task_list = taskdao.find_by_localhost_and_status(localhost, 'running')
        for t in task_list:
            starttime = t['starttime']
            endtime = t['endtime']
            print(starttime + " " + endtime)
            if endtime != '':
                if compare_time(time.strftime("%Y/%m/%d %H:%M"), starttime,
                                endtime) is False:
                    taskid = str(t['_id'])
                    print(taskid)
                    process_list = processdao.find_by_localhost_and_taskid(
                        localhost, taskid)
                    for p in process_list:
                        if p['taskid'] == taskid and p['status'] != 'stopping':
                            print("杀死进程%s" % (p['pid']))
                            # p.terminate()
                            try:
                                os.kill(p['pid'], signal.SIGKILL)
                            except:
                                continue
                    delete(taskid, False)
                    t['status'] = 'stopping'
                    taskdao.save(t)
                    processdao.delete_by_localhost_and_taskid(
                        localhost, taskid)
        time.sleep(30)
示例#2
0
def delete(taskid, is_changed):
    redis = connect_redis()
    url_manager = URLDao(redis)
    url_manager.delete_task(taskid)

    if is_changed:
        mongodb = connect_mongodb()
        taskdao = TaskDao(mongodb)
        task = taskdao.find_by_id(taskid)

        endtime = time.strftime("%Y/%m/%d %H:%M")
        task['endtime'] = endtime
        taskdao.save(task)
示例#3
0
文件: manage.py 项目: Tyihou/project
def init():
    redis_host = get_attr('REDIS_HOST')
    sub = get_attr('SUBSCRIBE')
    localhost = get_attr('LOCAL_HOST')
    listener = Messager(redis_host)
    listener.subscribe(sub)
    db = connect_mongodb()
    taskdao = TaskDao(db)
    processdao = ProcessDao(db)
    process = ProcessController(localhost)
    return localhost, listener, taskdao, processdao, process
示例#4
0
def wait(taskid):
    mongodb = connect_mongodb()
    taskdao = TaskDao(mongodb)
    task = taskdao.find_by_id(taskid)

    starttime = task['starttime']
    endtime = task['endtime']
    localhost = get_attr('LOCAL_HOST')
    flag = False
    while (flag is False):
        flag = compare_time(time.strftime("%Y/%m/%d %H:%M"), starttime,
                            endtime)
        time.sleep(30)
    if flag is True:
        task['status'] = 'running'
        taskdao.save(task)
        processdao = ProcessDao(mongodb)
        processdao.update_status_by_localhost_and_taskid(
            localhost, taskid, 'running')
        run(taskid)
示例#5
0
def init(taskid, is_restart):
    mongodb = connect_mongodb()
    taskdao = TaskDao(mongodb)
    task = taskdao.find_by_id(taskid)
    temp = None
    if "news" == task['webtype']:
        if is_restart:
            temp = deepcopy(NewsSpiderRecover)
        else:
            temp = deepcopy(NewsSpider)
    elif "blog" == task['webtype']:
        if is_restart:
            temp = deepcopy(BlogSpiderRecover)
        else:
            temp = deepcopy(BlogSpider)
    elif "ecommerce" == task['webtype']:
        keywords = task['keywords']
        if len(keywords) == 0:
            temp = deepcopy(ShopMainSpider)
        else:
            temp = deepcopy(ShopKeywordSpider)
            temp.keywords = keywords
    temp.name = taskid
    temp.redis_key = taskid + ":start_urls"

    redis = connect_redis()
    url_manager = URLDao(redis)
    allowed_domains = []
    if task['webtype'] == 'news' or task['webtype'] == 'blog':
        for url in task['starturls']:
            url_manager.insert_url(taskid, url)
            print(get_domain(url))
            allowed_domains.append(get_domain(url))
        temp.allowed_domains = allowed_domains
    elif task['webtype'] == 'ecommerce':
        for url in task['starturls']:
            url_manager.insert_url(taskid, url)
示例#6
0
 def __init__(self, localhost):
     mongodb = connect_mongodb()
     self.processdao = ProcessDao(mongodb)
     self.taskdao = TaskDao(mongodb)
     self.localhost = localhost
示例#7
0
class ProcessController(object):
    def __init__(self, localhost):
        mongodb = connect_mongodb()
        self.processdao = ProcessDao(mongodb)
        self.taskdao = TaskDao(mongodb)
        self.localhost = localhost

    '''
        开始一个进程,开始任务
    '''

    def start_task(self, taskid, is_restart):
        processnum = self.taskdao.find_by_id(taskid)['processnum']
        # print(processnum)
        for i in range(0, processnum):
            init(taskid, is_restart)
            p = Process(name=taskid, target=run, args=(taskid, ))
            p.start()
            print(p.pid)
            self.processdao.insert_process(self.localhost, p.pid, taskid,
                                           'running')
            # self.process_list.append(p)

    '''
        唤醒一个暂停的任务,将暂停状态的任务重新启动
    '''

    def resume_task(self, taskid):
        process_list = self.processdao.find_by_localhost_and_taskid(
            self.localhost, taskid)
        for p in process_list:
            if p['taskid'] == taskid:
                try:
                    ps = psutil.Process(p['pid'])
                    ps.resume()
                except:
                    continue
        self.processdao.update_status_by_localhost_and_taskid(
            self.localhost, taskid, 'running')

    '''
        唤醒一个阻塞的进程,将暂停状态的任务重新启动
    '''

    def resume_process(self, pid):
        try:
            print("唤醒进程%s" % (pid))
            ps = psutil.Process(pid)
            ps.resume()
            self.processdao.update_status_by_localhost_and_pid(
                self.localhost, pid, 'running')
        except:
            pass

    '''
        杀死一个进程,终止任务
    '''

    def terminate_task(self, taskid):
        process_list = self.processdao.find_by_localhost_and_taskid(
            self.localhost, taskid)
        for p in process_list:
            if p['taskid'] == taskid and p['status'] != 'stopping':
                try:
                    print("杀死进程%s" % (p['pid']))
                    # p.terminate()
                    os.kill(p['pid'], signal.SIGKILL)
                except:
                    continue
        delete(taskid, True)
        self.processdao.delete_by_localhost_and_taskid(self.localhost, taskid)

    def terminate_process(self, pid):
        try:
            print("杀死进程%s" % (pid))
            # p.terminate()
            os.kill(pid, signal.SIGKILL)
            process_list = self.processdao.find_by_localhost_and_pid(
                self.localhost, pid)
            self.processdao.delete_by_localhost_and_pid(self.localhost, pid)
            if len(process_list) > 0:
                taskid = process_list[0]['taskid']
            self.taskdao.update_processnum(taskid)
        except:
            pass

    '''
        暂停进程,暂停任务
    '''

    def suspend_task(self, taskid):
        process_list = self.processdao.find_by_localhost_and_taskid(
            self.localhost, taskid)
        for p in process_list:
            if p['taskid'] == taskid and p['status'] != 'stopping':
                try:
                    ps = psutil.Process(p['pid'])
                    ps.suspend()
                except:
                    continue
        self.processdao.update_status_by_localhost_and_taskid(
            self.localhost, taskid, 'pausing')

    def suspend_process(self, pid):
        try:
            print("挂起进程%s" % (pid))
            ps = psutil.Process(pid)
            ps.suspend()
            self.processdao.update_status_by_localhost_and_pid(
                self.localhost, pid, 'pausing')
        except:
            pass

    '''
        休眠
    '''

    def sleep(self, taskid, t):
        process_list = self.processdao.find_all()
        for p in process_list:
            print(p['taskid'])
            if p['taskid'] == taskid:
                time.sleep(t)
                break

    '''
        查看所有的进程名
    '''

    def processes(self):
        process_list = self.processdao.find_all()
        for p in process_list:
            print(str(p['pid']) + " " + p['taskid'])

    '''
        开启一个进程,等待任务启动
    '''

    def wait_task(self, taskid, is_restart):
        processnum = self.taskdao.find_by_id(taskid)['processnum']
        for i in range(0, processnum):
            init(taskid, is_restart)
            p = Process(name=taskid, target=wait, args=(taskid, ))
            p.start()
            print(p.pid)
            self.processdao.insert_process(self.localhost, p.pid, taskid,
                                           'waitting')

    '''
        扫描所有进程,将到时间的进程杀死
    '''

    def scan_task(self):
        self.processdao.delete_by_localhost_and_status(self.localhost,
                                                       'scanner')
        p = Process(name='spider_scaner', target=scaner)
        p.start()
        self.processdao.insert_process(self.localhost, p.pid, '', 'scanner')