示例#1
0
 def __init__(self,
              ip=ConfigHolder.getConfig().getProperty(BASE, SERVER_IP),
              port=int(ConfigHolder.getConfig().getProperty(
                  BASE, SERVER_PORT))):
     self.ip = ip
     self.port = port
     self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
示例#2
0
def getLocalFilesPath():
    serverType = os.environ.get(CABBAGE)
    if serverType == NODE:
        return ConfigHolder.getConfig().getProperty(BASE,
                                                    CLIENT_FILE_DIRECTORY)
    elif serverType == MASTER:
        return ConfigHolder.getConfig().getProperty(BASE,
                                                    SERVER_FILE_DIRECTORY)
示例#3
0
def configWatch(children):
    try:
        for l in children :
            pa =CONFIG_PATH+"/"+l
            kazooClient.addDataListener(pa, configOptionDataChange)
            data= kazooClient.getData(pa)
            ConfigHolder.getConfig().setProperty(BASE,l ,data)
            
    except Exception:
        Logger.exception( log)
示例#4
0
    def __init__(self,
                 ip=ConfigHolder.getConfig().getProperty(BASE, SERVER_IP),
                 port=int(ConfigHolder.getConfig().getProperty(
                     BASE, SERVER_PORT)),
                 maxCon=1000):
        self.pool = None  # do not accept more than 10000 connections
        self.ip = ip
        self.port = port

        self.maxCon = maxCon
        self.inited = False
示例#5
0
    def run(self):

        jobId = self.message.jobId
        fileName = self.message.fileName
        serverDir = ConfigHolder.getConfig().getProperty(
            BASE, SERVER_FILE_DIRECTORY)
        jobDir = serverDir + "/" + jobId
        filePath = jobDir + "/" + fileName
        #         job =None
        with storeFactory.store() as sotre:
            job = sotre.getJob(
                jobId
            )  #StoreHolder.getServerStore().getJob(jobId)#CacheHolder.getCache().get(jobId,JOBS)
        msg = FileResponseMessage()
        msg.fileName = fileName
        msg.jobId = jobId
        if self.message.type == self.message.MAIN:
            if fileName == job.fileName:
                f = open(filePath)
                msg.fileContent = base64.encodestring(f.read())
                f.close()
            return msg
        if self.message.type == self.message.ATTACH:
            for attachFile in job.attachFiles:
                if attachFile.fileName == fileName:
                    f = open(filePath)
                    msg.fileContent = base64.encodestring(f.read())
                    f.close()
                    break
            return msg
示例#6
0
    def _initJobs(self):
        store = StoreHolder.getRetryStore()
        jobs =store.getJobs()
        work =store.getWork(HOST_NAME)
        queues=work.queues
        routes={}
     
        for job in jobs:
            if job.status != JOB_DELETE  and job.brokerQueue in queues:

                #fixbug 动态扩容时,缓存JOB
                if not CacheHolder.getCache().hasKey(job.jobId, JOBS):
                    CacheHolder.getCache().put(job.jobId, job,JOBS)
                
                clientDir = ConfigHolder.getConfig().getProperty(BASE,CLIENT_FILE_DIRECTORY)
                path = clientDir+"/"+job.jobId  
                if not os.path.isdir(path) :
                    # @FIX BUG  文件不同步
                    syncJob(job.jobId,store)

                self.addScriptJobId(job.jobId)
                
                for taskName in job.tasks:
                    que= store.getQueue(job.brokerQueue)
                    routes[taskName]={'queue': que.queueName, 'routing_key': que.routingKey}
                    
        celeryconfig.CELERY_ROUTES = routes
示例#7
0
 def start(self):
     log.info("启动web服务.........")
     application = CabbageApplication([
         (r"/", MainHandler),
         (r"/toNewJob", NewJobHandler),
         (r"/runJob", JobRunHandler),
         (r"/jobList", JobListHandler),
         (r"/jobRunList", JobRunListHandler),
         (r"/jobListData", JobListDataHandler),
         (r"/removeJob", RemoveJobListHandlder),
         (r"/work/list", WorkListHandler),
         (r"/work/workStatusChange", WorkStatusHandler),
         (r"/queues/brokerServer", BrokerServerHandler),
         (r"/queues/brokerQueue", BrokerQueueHandler),
         (r"/queues/selectQueue", BrokerQueueByHostNameListHandler),
         (r"/queues/addQueue", AddBrokerQueueHandler),
         (r"/queues/addQueueNode", AddBrokerQueueNodeHandler),
         (r"/queues/addBrokerServer", AddBrokerServerHandler),
         (r"/config", ConfigHandler),
         (r"/settings", SettingsHandler),
         (r"/login", LoginHandler),
         
         
     ], debug=False,**settings)
   
     
     port =ConfigHolder.getConfig().getProperty(BASE, SERVER_WEB_PORT)
     
     sockets = tornado.netutil.bind_sockets(port)
     tornado.process.fork_processes(8)
     server = HTTPServer(application)
     server.add_sockets(sockets)
     log.info("web服务启动成功,端口:%s........."%port)
示例#8
0
class Logger():
    logging.config.fileConfig(ConfigHolder.getConfig().getProperty(
        BASE, LOG_CONFIG_PATH))
    loggers = {}

    @classmethod
    def getLogger(self, key):
        if key in Logger.loggers:
            return Logger.loggers[key]
        else:
            logger = logging.getLogger(key)
            Logger.loggers[key] = logger
            return logger

    @classmethod
    def info(self, log, message):
        log.info("【%s】: %s" % (HOST_NAME, message))

    @classmethod
    def debug(self, log, message):
        log.debug("【%s】: %s" % (HOST_NAME, message))

    @classmethod
    def error(self, log, message):
        log.error("【%s】: %s" % (HOST_NAME, message))

    @classmethod
    def exception(self, log):
        log.exception("【%s】:" % HOST_NAME)
示例#9
0
 def _getConnectUri(self):
     connectUri = ConfigHolder.getConfig().getProperty(BASE, CONNECT_URI)
     work = CacheHolder.getCache().get(HOST_NAME, WORKS)
     if work.brokerServer:
         brokerServer = StoreHolder.getStore().getBrokerServer(
             work.brokerServer)
         connectUri = brokerServer.connectUri
     return connectUri
 def _initConifg(self):
     CONFIG_PATH="/cabbage/config"
     if not self.kazooClient.isExistPath(CONFIG_PATH):
         self.kazooClient.create(CONFIG_PATH, makepath=True)
         
         for key,value in ConfigHolder.getConfig().items(BASE):
             self.kazooClient.createPersistent(CONFIG_PATH+"/"+key,value)
         
     self.kazooClient.addChildListener(CONFIG_PATH, configWatch)
 def __init__(self, cfgPath=None):
     path = ConfigHolder.getConfig(cfgPath=cfgPath).getProperty(
         BASE, SERVER_FILE_DIRECTORY)
     if not os.path.isdir(path):
         os.makedirs(path)
     self.kazooClient = ZookeeperClientHolder.getRetryClient()
     self.store = StoreHolder.getServerStore()
     self.status = None
     self._initConifg()
示例#12
0
 def _loadMain(self):
     serverDir = ConfigHolder.getConfig().getProperty(
         BASE, SERVER_FILE_DIRECTORY)
     path = serverDir + "/" + self.job.jobId
     loadMoudle = LoadMoudleHolder.getLoadMoudle(PYTHON)
     classes = loadMoudle.load(path, self.job.fileName)
     for clazz in classes:
         obj = clazz[1]
         if isCabbageMain(obj):
             return obj
     return None
示例#13
0
 def getTasks(self, fileName, jobId):
     serverDir = ConfigHolder.getConfig().getProperty(
         BASE, SERVER_FILE_DIRECTORY)
     path = serverDir + "/" + jobId
     loadMoudle = LoadMoudleHolder.getLoadMoudle(PYTHON)
     classes = loadMoudle.load(path, fileName)
     tasks = []
     for clazz in classes:
         cls = clazz[1]
         if isCabbageTask(cls):
             tasks.append(cls.__module__ + "." + cls.__name__)
     return tasks
示例#14
0
def loadMain():
    serverDir = ConfigHolder.getConfig().getProperty(BASE,
                                                     SERVER_FILE_DIRECTORY)
    path = serverDir + "/job-06a4c37e-3ca2-46cb-9344-9f23fc03e8c7"
    loadMoudle = LoadMoudleHolder.getLoadMoudle(PYTHON)
    classes = loadMoudle.load(path, "product_list_crawler_main.py")
    for clazz in classes:
        obj = clazz[1]
        print clazz
        if isCabbageMain(obj):
            return obj
    return None
示例#15
0
def loadMain():
        serverDir = ConfigHolder.getConfig().getProperty(BASE,SERVER_FILE_DIRECTORY)
        path = "/Users/hua/workspace/mypython/cabbage/samples"
        loadMoudle = LoadMoudleHolder.getLoadMoudle(PYTHON)
        classes =loadMoudle.load(path,"test_both_task.py")
        for clazz in classes:
            obj = clazz[1]
            print obj
            if isCabbageTask(obj):
                print obj
                return obj
        return None
示例#16
0
    def _initData(self):
        if not self.kazooClient.isExistPath("/cabbage/jobs"):
            self.kazooClient.create("/cabbage/jobs", makepath=True)
            self.kazooClient.create("/cabbage/jobs/readies", makepath=True)
            self.kazooClient.create("/cabbage/jobs/results")
        if not self.kazooClient.isExistPath("/cabbage/works"):
            self.kazooClient.create("/cabbage/works", makepath=True)
            self.kazooClient.create("/cabbage/works/readies")

        if not self.kazooClient.isExistPath("/cabbage/monitor"):
            self.kazooClient.create("/cabbage/monitor", makepath=True)
            self.kazooClient.create("/cabbage/monitor/jobs")
            self.kazooClient.create("/cabbage/monitor/works")
            self.kazooClient.create("/cabbage/monitor/brokerServers")

        if not self.kazooClient.isExistPath("/cabbage/users"):
            self.kazooClient.create("/cabbage/users", makepath=True)
            if not self.kazooClient.isExistPath(
                    "/cabbage/users/" +
                    ConfigHolder.getConfig().getProperty(BASE, ADMIN_NAME)):
                self.store.saveUser(
                    User(userName=ConfigHolder.getConfig().getProperty(
                        BASE, ADMIN_NAME),
                         userPwd=ConfigHolder.getConfig().getProperty(
                             BASE, ADMIN_PWD),
                         isAdmin=True))

        if not self.kazooClient.isExistPath("/cabbage/queueServer"):
            self.kazooClient.create("/cabbage/queueServer/brokerServers",
                                    makepath=True)
            self.kazooClient.create(
                "/cabbage/queueServer/brokerServers/readies")
            self.kazooClient.create("/cabbage/queueServer/queues",
                                    makepath=True)

        os.environ.setdefault(CABBAGE, MASTER)
示例#17
0
def serverScheduler():
    registerMoniters()
    #     checkResultScheduler = "%s %s * * *"%(int(getNowMinute())+1,getNowHour())

    #     JobManageHolder.getJobManage().addJob(checkResult,jobId="checkResult",cron="*/1 * * * *")

    JobManageHolder.getJobManage().addJob(monitorJob,
                                          jobId="monitor",
                                          cron="*/1 * * * *")

    resultUploadScheduler = ConfigHolder.getConfig().getProperty(
        BASE, RESULTE_UPLOAD_SCHEDULER)
    JobManageHolder.getJobManage().addJob(uploadServerScheduler,
                                          jobId="uploadServerScheduler",
                                          cron=resultUploadScheduler)
示例#18
0
    def __init__(self, cfgPath=None):
        path = ConfigHolder.getConfig().getProperty(BASE,
                                                    CLIENT_FILE_DIRECTORY)
        self.kazooClient = ZookeeperClientHolder.getRetryClient()
        if not os.path.isdir(path):
            os.makedirs(path)

        if not self.kazooClient.isExistPath("/cabbage/jobs"):
            self.kazooClient.create("/cabbage/jobs", makepath=True)
            self.kazooClient.create("/cabbage/jobs/readies", makepath=True)
        if not self.kazooClient.isExistPath("/cabbage/works"):
            self.kazooClient.create("/cabbage/works", makepath=True)
            self.kazooClient.create("/cabbage/works/readies")
        registerClientEvent()
        self.status = OFF_LINE
        #         self._initConifg()
        os.environ.setdefault(CABBAGE, NODE)
 def save(self):
     try:
         nfsPath=ConfigHolder.getConfig().getProperty(BASE,NFS_DIRECTORY)
         dateStr = getNowDateStr()
         if self.jobId:
             localPath = getLocalFilesPath()
             dateStr = getNowDateStr()
             hour = getNowHour()
             
             if hour == 0:# 提交前一天的数据
                 dateStr = formatDate(subDay(getNow(),1),f="%Y%m%d")
             
             localPath = localPath+"/"+self.jobId+"/result/"+dateStr
             
             Logger.info( log, "upload file to nfs. jobId【%s】 date【%s】" % (self.jobId,dateStr))
             if not  os.path.isdir(localPath):
                 return
             
             fileNames = os.listdir(localPath)
             if len(fileNames) == 0:
                 return
             
             remoteDire=nfsPath+"/"+self.jobId+"/"+dateStr
             
             if not os.path.isdir(remoteDire):
                 os.makedirs(remoteDire)
 #                 os.chmod(remoteDire,777)
             Logger.info(log,"hour:%s  files:%s"%(hour,",".join(fileNames)))
             for fileName in fileNames:
                 if hour != 0:
                     if int(fileName) >= hour:
                         continue
                     
                 newFileName = None
                 if os.environ[CABBAGE] ==MASTER:
                     newFileName = HOST_NAME+"_"+LOCAL_IP+"_"+MASTER+"_"+fileName
                 else:
                     newFileName = HOST_NAME+"_"+LOCAL_IP+"_"+NODE+"_"+fileName
                 
                 if os.path.isfile(localPath+"/"+fileName):
                     shutil.move(localPath+"/"+fileName,remoteDire+"/"+newFileName)
                     
     except Exception as e:
         Logger.exception(log)
                 
                 
示例#20
0
def taskFailed(state, event, app):
    eventOutDic = event.copy()
    taskName = None
    try:
        taskId = event['uuid']
        task = state.tasks.get(taskId)
        #
        #         taskName  = task.name if task and hasattr(task,'name') else None
        #
        #         if hasattr(task,'kwargs') and task.kwargs is not None and JOB_ID in task.kwargs:
        #             eventOutDic[JOB_ID] = eval(str(task.kwargs))[JOB_ID]
        #
        #         if eventOutDic[JOB_ID] is None:
        #             eventOutDic[JOB_ID] = TaskCacheHolder.getJobCache().get(taskName)

        #         if taskName is None or eventOutDic[JOB_ID] is None :
        jobId, taskName = _getJobIdAndTaskName(taskId)
        eventOutDic[JOB_ID] = jobId

        job = CacheHolder.getCache().get(eventOutDic[JOB_ID], JOBS)

        brokerServer = job.brokerServer
        taskPath = ConfigHolder.getConfig().getProperty(
            BASE, TASK_FAILLOG_PATH)

        if not os.path.isdir(taskPath):
            os.makedirs(taskPath)

        dateStr = getNowDateStr()
        with open(taskPath + "/" + brokerServer + "_" + dateStr + ".log",
                  "a+") as writer:
            writer.write(str(eventOutDic) + "\n")

#         with storeFactory.store() as store:
#             store.deleteTaskId(task.id)

#         StoreHolder.getRedisStaticStore().deleteTaskId(taskId)

        CabbageCounterHolder.getCabbageCounter().updateTaksFail(
            taskName, _getHostName(event))

    except Exception as e:
        Logger.exception(log)
示例#21
0
 def run(self):
     clientDir = ConfigHolder.getConfig().getProperty(
         BASE, CLIENT_FILE_DIRECTORY)
     jobId = self.message.jobId
     jobDir = clientDir + "/" + jobId
     fileName = self.message.fileName
     fileContent = self.message.fileContent
     if os.path.exists(jobDir) is False:
         os.mkdir(jobDir)
         os.mkdir(jobDir + "/result")
     filePath = jobDir + "/" + fileName
     if os.path.exists(filePath):
         os.remove(filePath)
     f = open(filePath, "w")
     f.write(base64.decodestring(fileContent))
     f.close()
     #need notify file is ready and all file is ready
     #then  notify server this client to be ready exec job
     CacheHolder.getCache().put(fileName, DONE, jobId)
    def save(self):

        hdfsPath=ConfigHolder.getConfig().getProperty(BASE,HDFS_ROOT_PATH)
       
        dateStr = getNowDateStr()
        if self.jobId:
            localPath = getLocalFilesPath()
            dateStr = getNowDateStr()
            hour = getNowHour()
            
            if hour == 0:# 提交前一天的数据
                dateStr = formatDate(subDay(getNow(),1),f="%Y%m%d")
            
            p = localPath+"/"+self.jobId+"/result/"+dateStr
            Logger.debug( log, "upload file to hdfs. jobId【%s】 date【%s】" % (self.jobId,dateStr))
            if not  os.path.isdir(p):
                return
            
            fileNames = os.listdir(p)
            if len(fileNames) == 0:
                return
            
            client =HdfsClientHolder.getHdfsClient()
            remoteDire=hdfsPath+"/"+self.jobId
            
            if not client.isDirectory(remoteDire):
                client.mkdir(remoteDire)
            remoteDire= remoteDire+"/"+dateStr
            
            if not client.isDirectory(remoteDire):
                client.mkdir(remoteDire)
            Logger.info(log,"hour:%s  files:%s"%(hour,",".join(fileNames)))
            for fileName in fileNames:
                
                if hour != 0:
                    if int(fileName) >= hour:
                        continue
#                 if os.path.isfile(p+"/"+fileName):
            
                self.uploadToHdfs(client,localPath,self.jobId,hdfsPath,fileName,dateStr)
                os.remove(p+"/"+fileName)
    def _initJobs(self, cabbage):
        store = StoreHolder.getRetryStore()
        jobs = store.getJobs()
        work = store.getWork(HOST_NAME)
        queues = work.queues
        routes = {}
        queues_celery = []
        for que in queues:
            que = store.getQueue(que)
            queues_celery.append(
                Queue(que.queueName,
                      Exchange(que.queueName),
                      routing_key=que.queueName,
                      queue_arguments={'x-max-priority': int(que.priority)}))

        for job in jobs:
            if job.status != JOB_DELETE and job.brokerQueue in queues:

                #fixbug 动态扩容时,缓存JOB
                if not CacheHolder.getCache().hasKey(job.jobId, JOBS):
                    CacheHolder.getCache().put(job.jobId, job, JOBS)

                clientDir = ConfigHolder.getConfig().getProperty(
                    BASE, CLIENT_FILE_DIRECTORY)
                path = clientDir + "/" + job.jobId
                if not os.path.isdir(path):
                    # @FIX BUG  文件不同步
                    syncJob(job.jobId, store)

                self.addScriptJobId(job.jobId, cabbage)

                for taskName in job.tasks:
                    que = store.getQueue(job.brokerQueue)
                    routes[taskName] = {
                        'queue': que.queueName,
                        'routing_key': que.routingKey
                    }

        log.info(routes)
        celeryconfig.CELERY_QUEUES = tuple(queues_celery)
        celeryconfig.CELERY_ROUTES = routes
示例#24
0
    def loadJobScript(self,jobId,store):
        '''
            将当前任务的脚本准备好
        '''
        job=store.getJob(jobId)
        
        clientDir = ConfigHolder.getConfig().getProperty(BASE,CLIENT_FILE_DIRECTORY)
        path = clientDir+"/"+jobId
        #不同步主节点文件
#         if job.fileType == PYTHON:
#             if not os.path.isfile(path+"/"+job.fileName):
#                 syncFile(job.fileName,jobId,FileRequestMessage.MAIN)
#             self.loadCeleryTask(path,job.fileName)
            
        if job.attachFiles: 
            for attachFile in job.attachFiles:
                if attachFile.fileType == PYTHON:
                    if not os.path.isfile(path+"/"+attachFile.fileName):
                        syncFile(attachFile.fileName,jobId,FileRequestMessage.ATTACH)
                    self.loadCeleryTask(path,attachFile.fileName)
        log.info("节点【%s】当前任务【%s】的脚本加载完" % (HOST_NAME,jobId))
 def __init__(self):
     self.max = ConfigHolder.getConfig().getProperty(
         BASE, UPLOAD_HDFS_FILE_PROCESS_COUNT)
     self.executor = futures.ThreadPoolExecutor(max_workers=self.max)
     self.jobIdQueue = Queue.Queue()
     self.start()
示例#26
0
 def getClient(self,ip=ConfigHolder.getConfig().getProperty(BASE,REDIS_IP),
               port=ConfigHolder.getConfig().getProperty(BASE,REDIS_PORT),
               password=ConfigHolder.getConfig().getProperty(BASE,REDIS_PWD)):
     return RedisClient(ip=ip,port=port,password=password)
示例#27
0
Created on 2016年7月13日

@author: hua
'''
from cabbage.config import ConfigHolder
from cabbage.constants import BASE
from cabbage.utils.host_name import HOST_NAME, LOCAL_IP
from kombu.entity import Exchange, Queue

# CELERYD_POOL_RESTARTS = True
CELERY_ALWAYS_EAGER = False

CELERYD_LOG_LEVEL = "DEBUG"
#CELERY_REDIRECT_STDOUTS_LEVEL="DEBUG"

if ConfigHolder.getConfig().hasProperty(BASE, "celerydConcurrency"):
    CELERYD_CONCURRENCY = ConfigHolder.getConfig().getProperty(
        BASE, "celerydConcurrency")
else:
    CELERYD_CONCURRENCY = 1

CELERY_IGNORE_RESULT = True

# CELERY_TRACK_STARTED =True

CELERY_SEND_TASK_SENT_EVENT = True

CELERY_SEND_EVENTS = True

#CELERY_RESULT_BACKEND = 'amqp'
#CELERY_RESULT_EXCHANGE = 'celereyResults'
 def getRetryClient(self):
     retry = KazooRetry(max_tries=1000,delay=0.1,backoff=2,max_jitter=0.8,max_delay=3600, ignore_expire=True)
     return KazooZookeeperClient(ConfigHolder.getConfig().getProperty(BASE,ZOOKEEPER),connection_retry=retry)
 def getClient(self,connection_retry=None):
     return KazooZookeeperClient(ConfigHolder.getConfig().getProperty(BASE,ZOOKEEPER),connection_retry=connection_retry)
 def start(self):
     jobProcessCount = ConfigHolder.getConfig().getProperty(
         BASE, UPLOAD_HDFS_FILE_PROCESS_COUNT)
     self.pool = multiprocessing.Pool(processes=int(jobProcessCount))
     for i in range(int(jobProcessCount)):
         self.pool.apply_async(action, (self.jobIdQueue, ))