示例#1
0
    def setUp(self):
        self.job = Job('test job', UrlPatterns(), BuiltinOpener, [])
        self.root = tempfile.mkdtemp()

        master_root = os.path.join(self.root, 'master')
        worker_root = os.path.join(self.root, 'worker')
        os.makedirs(master_root)
        os.makedirs(worker_root)

        node = '%s:%s' % (get_ip(), self.job.context.job.port)
        nodes = [node]
        master = '%s:%s' % (get_ip(), self.job.context.job.master_port)

        self.master_loader = MasterJobLoader(self.job, master_root, nodes)
        self.worker_loader = WorkerJobLoader(self.job, worker_root, master)
示例#2
0
文件: master.py 项目: zhangw/cola
    def add_arguments(self, parser):
        ip = get_ip()

        self.master_parser = parser.add_parser("master", help="master commands")
        self.master_parser.add_argument("-w", "--working", metavar="working dir", nargs="?", help="master working dir")
        self.master_parser.add_argument(
            "-s",
            "--start",
            metavar="master address",
            nargs="?",
            const=ip,
            help="master address(in the former of `ip:port` or `ip`)",
        )
        self.master_parser.add_argument(
            "-k",
            "--kill",
            metavar="master address",
            nargs="?",
            const=ip,
            help="master to kill(in the former of `ip:port` or `ip`)",
        )
        self.master_parser.add_argument(
            "-l",
            "--list",
            metavar="master address",
            nargs="?",
            const=ip,
            help="list workers(in the former of `ip:port` or `ip`)",
        )
        self.master_parser.set_defaults(func=self.run)
 def __init__(self, port, nodes_ip_addresses, worker_port, popen=None):
     self.job_master = '%s:%s' % (get_ip(), port)
     self.nodes = [
         '%s:%s' % (node_ip, worker_port) for node_ip in nodes_ip_addresses
     ]
     self.worker_port = worker_port
     self.popen = None
示例#4
0
    def __init__(self,
                 master,
                 root,
                 zip_dir,
                 job_dir,
                 data_path=None,
                 force=False):
        self.master = master
        self.host = get_ip()
        self.port = main_conf.worker.port
        self.node = '%s:%s' % (self.host, self.port)

        self.root = root
        self.zip_dir = zip_dir
        self.job_dir = job_dir
        self.data_path = data_path
        self.force = force

        self.stopped = False

        self.running_jobs = {}

        self.check(force=force)
        self.init_rpc_server()

        self.rpc_server.register_function(self.stop, 'stop')
        self.rpc_server.register_function(self.kill, 'kill')
        self.rpc_server.register_function(self.start_job, 'start_job')
        self.rpc_server.register_function(self.clear_job, 'clear_job')
        self.set_file_receiver(self.zip_dir)
示例#5
0
文件: watcher.py 项目: bingyupj/cola
    def __init__(self, root, zip_dir, job_dir, ip_address=None, data_path=None, force=False):
        self.root = root
        self.zip_dir = zip_dir
        self.job_dir = job_dir
        self.data_path = data_path
        self.force = force

        self.nodes_watchers = {}
        self.running_jobs = {}
        self.black_list = []
        if ip_address is None:
            ip_address = get_ip()
        else:
            choices_ips = get_ips()
            if ip_address not in choices_ips:
                raise ValueError("IP address must be one of (%s)" % ",".join(choices_ips))
        self.ip_address = ip_address
        self.port = main_conf.master.port

        self.stopped = False

        self.check(force=force)
        self.init_rpc_server()

        self.rpc_server.register_function(self.register_watcher_heartbeat, "register_heartbeat")
        self.rpc_server.register_function(self.stop, "stop")
        self.rpc_server.register_function(self.list_jobs, "list_jobs")
        self.rpc_server.register_function(self.start_job, "start_job")
        self.rpc_server.register_function(self.stop_job, "stop_job")
        self.rpc_server.register_function(self.finish_job, "finish_job")
        self.rpc_server.register_function(self.clear_job, "clear_job")
        self.rpc_server.register_function(self.list_job_dirs, "list_job_dirs")
        self.rpc_server.register_function(self.list_workers, "list_workers")

        self.set_receiver(zip_dir)
示例#6
0
 def __init__(self, port, nodes_ip_addresses, worker_port, popen=None):
     self.job_master = '%s:%s' % (get_ip(), port)
     self.nodes = [
         '%s:%s'%(node_ip, worker_port) for node_ip in nodes_ip_addresses
     ]
     self.worker_port = worker_port
     self.popen = None
示例#7
0
文件: loader.py 项目: iswangheng/cola
def load_job(path, nodes, context=None):
    if not os.path.exists(path):
        raise ValueError('Job definition does not exist.')

    job = import_job(path)

    job_name = job.name.replace(' ', '_')
    if job.debug:
        job_name += '_debug'
    holder = os.path.join(root_dir(), 'data', 'master', 'jobs', job_name)
    if not os.path.exists(holder):
        os.makedirs(holder)

    lock_f = os.path.join(holder, 'lock')
    if os.path.exists(lock_f):
        raise JobMasterRunning('There has been a running job master')
    open(lock_f, 'w').close()

    rpc_server = create_rpc_server(job)
    try:
        loader = JobLoader(job, nodes, rpc_server, context=context)
        loader.run()
        # nofify master watcher finishing
        master_watcher = '%s:%s' % (get_ip(), main_conf.master.port)
        client_call(master_watcher, 'finish_job', job.real_name)
    finally:
        os.remove(lock_f)
        rpc_server.shutdown()
示例#8
0
def start_master():
    path = os.path.join(root_dir(), 'cola', 'master', 'watcher.py')
    
    print 'Start master at %s:%s' % (get_ip(), main_conf.master.port)
    print 'Master will run in background.'
    
    subprocess.Popen(['python', path])
示例#9
0
文件: loader.py 项目: ballacky13/cola
def create_rpc_server(job, context=None):
    ctx = context or job.context
    rpc_server = ColaRPCServer((get_ip(), ctx.job.master_port))
    thd = threading.Thread(target=rpc_server.serve_forever)
    thd.setDaemon(True)
    thd.start()
    return rpc_server
示例#10
0
文件: loader.py 项目: ballacky13/cola
def load_job(path, nodes, context=None):
    if not os.path.exists(path):
        raise ValueError('Job definition does not exist.')
        
    job = import_job(path)
    
    job_name = job.name.replace(' ', '_')
    if job.debug:
        job_name += '_debug'
    holder = os.path.join(root_dir(), 'data', 'master', 'jobs', job_name)
    if not os.path.exists(holder):
        os.makedirs(holder)
    
    lock_f = os.path.join(holder, 'lock')
    if os.path.exists(lock_f):
        raise JobMasterRunning('There has been a running job master')
    open(lock_f, 'w').close()
    
    rpc_server = create_rpc_server(job)
    try:
        loader = JobLoader(job, nodes, rpc_server, context=context)
        loader.run()
        # nofify master watcher finishing
        master_watcher = '%s:%s' % (get_ip(), main_conf.master.port)
        client_call(master_watcher, 'finish_job', job.real_name)
    finally:
        os.remove(lock_f)
        rpc_server.shutdown()
示例#11
0
 def setUp(self):
     self.job = Job('test job', UrlPatterns(), BuiltinOpener, [])
     self.root = tempfile.mkdtemp()
     
     master_root = os.path.join(self.root, 'master')
     worker_root = os.path.join(self.root, 'worker')
     os.makedirs(master_root)
     os.makedirs(worker_root)
     
     node = '%s:%s' % (get_ip(), self.job.context.job.port)
     nodes = [node]
     master = '%s:%s' % (get_ip(), self.job.context.job.master_port)
     
     
     self.master_loader = MasterJobLoader(self.job, master_root, nodes)
     self.worker_loader = WorkerJobLoader(self.job, worker_root, master)
示例#12
0
    def add_arguments(self, parser):
        ip = get_ip()

        self.worker_parser = parser.add_parser('worker',
                                               help='worker commands')
        self.worker_parser.add_argument(
            '-m',
            '--master',
            metavar='master address',
            nargs='?',
            default=ip,
            help='master connected to(in the former of `ip:port` or `ip`)')
        self.worker_parser.add_argument(
            '-s',
            '--start',
            metavar='worker address',
            nargs='?',
            const=ip,
            help='local worker connected to(in the former of `ip:port` or `ip`'
        )
        self.worker_parser.add_argument('-w',
                                        '--working',
                                        metavar='working dir',
                                        nargs='?',
                                        help='worker working dir')
        self.worker_parser.set_defaults(func=self.run)
示例#13
0
 def __init__(self, root, zip_dir, job_dir, 
              data_path=None, force=False):
     self.root = root
     self.zip_dir = zip_dir
     self.job_dir = job_dir
     self.data_path = data_path
     self.force = force
     
     self.nodes_watchers = {}
     self.running_jobs = {}
     self.black_list = []
     self.ip_address = get_ip()
     self.port = main_conf.master.port
     
     self.stopped = False
     
     self.check(force=force)
     self.init_rpc_server()
     
     self.rpc_server.register_function(self.register_watcher_heartbeat, 
                                       'register_heartbeat')
     self.rpc_server.register_function(self.stop, 'stop')
     self.rpc_server.register_function(self.list_jobs, 'list_jobs')
     self.rpc_server.register_function(self.start_job, 'start_job')
     self.rpc_server.register_function(self.stop_job, 'stop_job')
     self.rpc_server.register_function(self.finish_job, 'finish_job')
     self.rpc_server.register_function(self.clear_job, 'clear_job')
     self.rpc_server.register_function(self.list_job_dirs, 'list_job_dirs')
     self.rpc_server.register_function(self.list_workers, 'list_workers')
     
     self.set_receiver(zip_dir)
示例#14
0
文件: loader.py 项目: iswangheng/cola
def create_rpc_server(job, context=None):
    ctx = context or job.context
    rpc_server = ColaRPCServer((get_ip(), ctx.job.port))
    thd = threading.Thread(target=rpc_server.serve_forever)
    thd.setDaemon(True)
    thd.start()
    return rpc_server
示例#15
0
def start_master():
    path = os.path.join(root_dir(), 'cola', 'master', 'watcher.py')

    print 'Start master at %s:%s' % (get_ip(), main_conf.master.port)
    print 'Master will run in background.'

    subprocess.Popen(['python', path])
示例#16
0
文件: master.py 项目: ll2088/cola
    def add_arguments(self, parser):
        ip = get_ip()

        self.master_parser = parser.add_parser('master',
                                               help='master commands')
        self.master_parser.add_argument(
            '-s',
            '--start',
            metavar='start master',
            nargs='?',
            const=ip,
            help='master address(in the former of `ip:port` or `ip`)')
        self.master_parser.add_argument(
            '-k',
            '--kill',
            metavar='kill master',
            nargs='?',
            const=ip,
            help='master to kill(in the former of `ip:port` or `ip`)')
        self.master_parser.add_argument(
            '-l',
            '--list',
            metavar='list workers',
            nargs='?',
            const=ip,
            help='list workers(in the former of `ip:port` or `ip`)')
        self.master_parser.set_defaults(func=self.run)
示例#17
0
文件: watcher.py 项目: Ganer/cola
 def __init__(self, master, root, zip_dir, job_dir, 
              data_path=None, force=False):
     self.master = master
     self.host = get_ip()
     self.port = main_conf.worker.port
     self.node = '%s:%s' % (self.host, self.port)
     
     self.root = root
     self.zip_dir = zip_dir
     self.job_dir = job_dir
     self.data_path = data_path
     self.force = force
     
     self.stopped = False
     
     self.running_jobs = {}
     
     self.check(force=force)
     self.init_rpc_server()
     
     self.rpc_server.register_function(self.stop, 'stop')
     self.rpc_server.register_function(self.kill, 'kill')
     self.rpc_server.register_function(self.start_job, 'start_job')
     self.rpc_server.register_function(self.clear_job, 'clear_job')
     self.set_file_receiver(self.zip_dir)
示例#18
0
def start_log_server():
    global log_server
    global log_server_port
    
    if log_server is not None:
        return
    log_server = LogRecordSocketReceiver(logger=logger, host=get_ip(), 
                                         port=log_server_port)
    threading.Thread(target=log_server.serve_forever).start()
示例#19
0
文件: worker.py 项目: awai0707/cola
 def add_arguments(self, parser):
     ip = get_ip()
     
     self.worker_parser = parser.add_parser('worker', help='worker commands')
     self.worker_parser.add_argument('-m', '--master', metavar='master address', nargs='?', default=ip,
                                     help='master connected to(in the former of `ip:port` or `ip`)')
     self.worker_parser.add_argument('-s', '--start', metavar='worker address', nargs='?', const=ip,
                                     help='local worker connected to(in the former of `ip:port` or `ip`')
     self.worker_parser.set_defaults(func=self.run)
示例#20
0
def start_log_server():
    global log_server
    global log_server_port

    if log_server is not None:
        return
    log_server = LogRecordSocketReceiver(logger=logger, host=get_ip(),
                                         port=log_server_port)
    threading.Thread(target=log_server.serve_forever).start()
示例#21
0
 def __init__(self, job, data_dir, nodes, client=None,
              context=None, copies=1, force=False):
     ctx = context or job.context
     master_port = ctx.job.master_port
     local = '%s:%s' % (get_ip(), master_port)
     
     JobLoader.__init__(self, job, data_dir, local, 
                        context=ctx, copies=copies, force=force)
     LimitionJobLoader.__init__(self, job, context=ctx)
     
     # check
     self.check()
     
     self.nodes = nodes
     self.not_registered = self.nodes[:]
     self.not_finished = self.nodes[:]
     
     # mq
     self.mq_client = MessageQueueClient(self.nodes, copies=copies)
     
     # lock
     self.ready_lock = threading.Lock()
     self.ready_lock.acquire()
     self.finish_lock = threading.Lock()
     self.finish_lock.acquire()
     
     # logger
     self.logger = get_logger(
         name='cola_master_%s'%self.job.real_name,
         filename=os.path.join(self.root, 'job.log'),
         is_master=True)
     self.client = client
     self.client_handler = None
     if self.client is not None:
         self.client_handler = add_log_client(self.logger, self.client)
     
     self.init_rpc_server()
     self.init_rate_clear()
     self.init_logger_server(self.logger)
     
     # register rpc server
     self.rpc_server.register_function(self.client_stop, 'client_stop')
     self.rpc_server.register_function(self.ready, 'ready')
     self.rpc_server.register_function(self.worker_finish, 'worker_finish')
     self.rpc_server.register_function(self.complete, 'complete')
     self.rpc_server.register_function(self.error, 'error')
     self.rpc_server.register_function(self.get_nodes, 'get_nodes')
     self.rpc_server.register_function(self.apply, 'apply')
     self.rpc_server.register_function(self.require, 'require')
     self.rpc_server.register_function(self.stop, 'stop')
     self.rpc_server.register_function(self.add_node, 'add_node')
     self.rpc_server.register_function(self.remove_node, 'remove_node')
     
     # register signal
     signal.signal(signal.SIGINT, self.signal_handler)
     signal.signal(signal.SIGTERM, self.signal_handler)
示例#22
0
    def __init__(self,
                 job,
                 data_dir,
                 context=None,
                 logger=None,
                 local=None,
                 nodes=None,
                 copies=1,
                 force=False):
        self.job = job
        ctx = context or self.job.context

        self.local = local
        if self.local is None:
            host, port = get_ip(), ctx.job.port
            self.local = '%s:%s' % (host, port)
        else:
            host, port = tuple(self.local.split(':', 1))
        self.nodes = nodes
        if self.nodes is None:
            self.nodes = [self.local]

        self.logger = logger
        self.info_logger = get_logger(name='cola_worker_info_%s' %
                                      self.job.real_name)

        super(BasicWorkerJobLoader, self).__init__(self.job,
                                                   data_dir,
                                                   self.local,
                                                   context=ctx,
                                                   copies=copies,
                                                   force=force)

        # instances count that run at the same time
        self.instances = max(min(self.ctx.job.instances, MAX_THREADS_SIZE), 1)
        # excecutings
        self.executings = []
        # exception times that continously throw
        self.error_times = 0
        # budget
        self.budget = 0

        self.check()
        # init rpc server
        self.init_rpc_server()
        # init message queue
        self.init_mq()

        # register signal
        signal.signal(signal.SIGINT, self.signal_handler)
        signal.signal(signal.SIGTERM, self.signal_handler)

        self.rpc_server.register_function(self.stop, name='stop')
        self.rpc_server.register_function(self.add_node, name='add_node')
        self.rpc_server.register_function(self.remove_node, name='remove_node')
        self.rpc_server.register_function(self.run, name='run')
示例#23
0
文件: loader.py 项目: friedvan/cola
 def __init__(self, job, data_dir, context=None, logger=None,
              local=None, nodes=None, copies=1, force=False):
     self.job = job
     ctx = context or self.job.context
     
     self.local = local
     if self.local is None:
         host, port = get_ip(), ctx.job.port
         self.local = '%s:%s' % (host, port)
     else:
         host, port = tuple(self.local.split(':', 1))
     self.nodes = nodes
     if self.nodes is None:
         self.nodes = [self.local]
         
     self.logger = logger
     self.info_logger = get_logger(
         name='cola_worker_info_%s'%self.job.real_name)
         
     super(BasicWorkerJobLoader, self).__init__(
         self.job, data_dir, self.local, 
         context=ctx, copies=copies, force=force)
     
     # instances count that run at the same time
     self.instances = max(min(self.ctx.job.instances, MAX_THREADS_SIZE), 1)
     # excecutings
     self.executings = []
     # exception times that continously throw
     self.error_times = 0
     # budget
     self.budget = 0
     
     # counter
     self.pages_size = 0
     
     # lock when not stopped
     self.stop_lock = threading.Lock()
     self.stop_lock.acquire()
     
     self.check()
     # init rpc server
     self.init_rpc_server()
     # init message queue
     self.init_mq()
     
     # register signal
     signal.signal(signal.SIGINT, self.signal_handler)
     signal.signal(signal.SIGTERM, self.signal_handler)
     
     self.rpc_server.register_function(self.stop, name='stop')
     self.rpc_server.register_function(self.add_node, name='add_node')
     self.rpc_server.register_function(self.remove_node, name='remove_node')
     self.rpc_server.register_function(self.run, name='run')
     self.rpc_server.register_function(self.pages, name='pages')
示例#24
0
文件: master.py 项目: awai0707/cola
 def add_arguments(self, parser):
     ip = get_ip()
     
     self.master_parser = parser.add_parser('master', help='master commands')
     self.master_parser.add_argument('-s', '--start', metavar='start master', nargs='?', const=ip,
                                     help='master address(in the former of `ip:port` or `ip`)')
     self.master_parser.add_argument('-k', '--kill', metavar='kill master', nargs='?', const=ip,
                                     help='master to kill(in the former of `ip:port` or `ip`)')
     self.master_parser.add_argument('-l', '--list', metavar='list workers', nargs='?', const=ip,
                                     help='list workers(in the former of `ip:port` or `ip`)')
     self.master_parser.set_defaults(func=self.run)
示例#25
0
文件: loader.py 项目: 0pengl/cola
 def run(self):
     self.ready_lock.acquire()
     
     if not self.stopped and len(self.not_registered) == 0:
         self.mq_client.put(self.job.starts)
         for node in self.nodes:
             client_call(node, 'run')
         
     self.finish_lock.acquire()
     
     master_watcher = '%s:%s' % (get_ip(), main_conf.master.port)
     client_call(master_watcher, 'finish_job', self.job.real_name, ignore=True)
示例#26
0
文件: context.py 项目: awai0707/cola
 def __init__(self, local_mode=False, is_master=False, master_addr=None, 
              is_client=False, working_dir=None, mkdirs=False, 
              ip=None, ips=None):
     self.is_local_mode = local_mode
     self.is_master = is_master
     self.is_client = is_client
     
     self.master_addr = master_addr
     self.master_ip = self.master_addr
     if not self.is_local_mode:
         if self.master_addr is None:
             raise ValueError('Master address must be supplied when local_mode is False')
             
         if ':' not in self.master_addr:
             self.master_addr = '%s:%s' % (self.master_addr, main_conf.master.port)
         else:
             self.master_ip = self.master_addr.split(':', 1)[0]
     
     self.working_dir = working_dir
     if self.working_dir is None:
         tmp = tempfile.gettempdir()
         self.working_dir = os.path.join(tmp, 'cola')
         if mkdirs and not os.path.exists(self.working_dir):
             os.makedirs(self.working_dir)
             
     self.ip = ip
     if self.ip is None:
         if self.is_master:
             self.ip = self.master_ip
         else:
             self.ip = get_ip()
             if self.is_local_mode and not self.ip:
                 self.ip = '127.0.0.1'
     if self.master_addr is None: self.master_addr = '%s:%s' % (self.ip, main_conf.master.port)
     self.worker_addr = '%s:%s' % (self.ip, main_conf.worker.port)
     
     self.ips = ips if ips is not None else []
     if not self.ips:
         self.ips.append(self.ip)
     self.addrs = [self.fix_addr(_ip) for _ip in self.ips]
         
     self.manager = ContextManager()
     self.manager.start(manager_init)
     self.env = self.manager.dict({'ip': self.ip, 
                                   'root': self.working_dir,
                                   'is_local': self.is_local_mode, 
                                   'master_ip': self.master_ip,
                                   'job_desc' : {}
                                   })
     self.logger = get_logger('cola_context')
     
     self.master_rpc_server = None
     self.worker_rpc_server = None
示例#27
0
文件: loader.py 项目: xren/cola
 def run(self):
     self.ready_lock.acquire()
     
     if not self.stopped and len(self.not_registered) == 0:
         self.mq_client.put(self.job.starts)
         for node in self.nodes:
             client_call(node, 'run')
         
     self.finish_lock.acquire()
     
     master_watcher = '%s:%s' % (get_ip(), main_conf.master.port)
     client_call(master_watcher, 'finish_job', self.job.real_name, ignore=True)
示例#28
0
def start_worker(master, data_path=None, force=False):
    path = os.path.join(root_dir(), 'cola', 'worker', 'watcher.py')
    
    print 'Start worker at %s:%s' % (get_ip(), main_conf.worker.port)
    print 'Worker will run in background. Please do not shut down the terminal.'
    
    cmds = ['python', path, '-m', master]
    if data_path is not None:
        cmds.extend(['-d', data_path])
    if force is True:
        cmds.append('-f')
    subprocess.Popen(cmds)
示例#29
0
def start_master(data_path=None, force=False):
    path = os.path.join(root_dir(), "cola", "master", "watcher.py")

    print "Start master at %s:%s" % (get_ip(), main_conf.master.port)
    print "Master will run in background. Please do not shut down the terminal."

    cmds = ["python", path]
    if data_path is not None:
        cmds.extend(["-d", data_path])
    if force is True:
        cmds.append("-f")
    subprocess.Popen(cmds)
def start_worker(master, data_path=None, force=False):
    path = os.path.join(root_dir(), 'cola', 'worker', 'watcher.py')

    print 'Start worker at %s:%s' % (get_ip(), main_conf.worker.port)
    print 'Worker will run in background. Please do not shut down the terminal.'

    cmds = ['python', path, '-m', master]
    if data_path is not None:
        cmds.extend(['-d', data_path])
    if force is True:
        cmds.append('-f')
    subprocess.Popen(cmds)
示例#31
0
 def __init__(self, rpc_server, master, zip_dir, job_dir):
     self.rpc_server = rpc_server
     self.master = master
     self.node = '%s:%s' % (get_ip(), main_conf.worker.port)
     self.zip_dir = zip_dir
     self.job_dir = job_dir
     
     self.stopped = False
     
     self.rpc_server.register_function(self.stop, 'stop')
     self.rpc_server.register_function(self.start_job, 'start_job')
     self.rpc_server.register_function(self.clear_job, 'clear_job')
     self.set_file_receiver(self.zip_dir)
示例#32
0
    def __init__(self, rpc_server, master, zip_dir, job_dir):
        self.rpc_server = rpc_server
        self.master = master
        self.node = '%s:%s' % (get_ip(), main_conf.worker.port)
        self.zip_dir = zip_dir
        self.job_dir = job_dir

        self.stopped = False

        self.rpc_server.register_function(self.stop, 'stop')
        self.rpc_server.register_function(self.start_job, 'start_job')
        self.rpc_server.register_function(self.clear_job, 'clear_job')
        self.set_file_receiver(self.zip_dir)
示例#33
0
文件: loader.py 项目: iswangheng/cola
def load_job(path, master=None):
    if not os.path.exists(path):
        raise ValueError('Job definition does not exist.')

    job = import_job(path)

    holder = os.path.join(root_dir(), 'data', 'worker', 'jobs', job.real_name)
    mq_holder = os.path.join(holder, 'mq')
    if not os.path.exists(mq_holder):
        os.makedirs(mq_holder)

    # Logger
    logger = get_logger(os.path.join(holder, 'job.log'))

    local_node = '%s:%s' % (get_ip(), job.context.job.port)
    nodes = [local_node]
    if master is not None:
        nodes = client_call(master, 'get_nodes')

    # Bloom filter hook
    bloom_filter_file = os.path.join(holder, 'bloomfilter')
    bloom_filter_hook = create_bloom_filter_hook(bloom_filter_file, job)

    rpc_server = create_rpc_server(job)
    loader = JobLoader(job, rpc_server, logger=logger, master=master)
    loader.init_mq(nodes,
                   local_node,
                   mq_holder,
                   verify_exists_hook=bloom_filter_hook,
                   copies=2 if master else 1)

    if master is None:
        try:
            loader.mq.put(job.starts)
            loader.run()
        finally:
            rpc_server.shutdown()
    else:
        try:
            client_call(master, 'ready', local_node)

            def _start():
                while not loader.stopped:
                    time.sleep(TIME_SLEEP)
                loader.run()

            thread = threading.Thread(target=_start)
            thread.start()
            thread.join()
        finally:
            rpc_server.shutdown()
示例#34
0
    def __init__(self,
                 container_id,
                 working_dir,
                 job_path,
                 job_name,
                 env,
                 mq,
                 counter_server,
                 budget_server,
                 speed_server,
                 stopped,
                 nonsuspend,
                 idle_statuses,
                 n_tasks=1,
                 is_local=False,
                 master_ip=None,
                 logger=None,
                 task_start_id=0):
        self.container_id = container_id
        self.working_dir = working_dir
        self.mq = mq
        self.env = env
        self.job_name = job_name
        self.job_desc = env['job_desc'].get(job_name) or \
                        import_job_desc(job_path)

        self.counter_server = counter_server
        self.budget_server = budget_server
        self.speed_server = speed_server

        self.stopped = stopped
        self.nonsuspend = nonsuspend
        self.idle_statuses = idle_statuses
        self.n_tasks = n_tasks
        self.is_local = is_local
        self.master_ip = master_ip
        self.logger = logger

        self.task_start_id = task_start_id
        self.ip = self.env.get('ip', None) or get_ip()

        self.counter_clients = [None for _ in range(self.n_tasks)]
        self.budget_clients = [None for _ in range(self.n_tasks)]
        self.speed_clients = [None for _ in range(self.n_tasks)]

        self.task_threads = []

        self.inited = False
        self.lock = multiprocessing.Lock()
示例#35
0
文件: loader.py 项目: ballacky13/cola
def load_job(path, master=None):
    if not os.path.exists(path):
        raise ValueError('Job definition does not exist.')
        
    job = import_job(path)
    
    holder = os.path.join(
        root_dir(), 'data', 'worker', 'jobs', job.real_name)
    mq_holder = os.path.join(holder, 'mq')
    if not os.path.exists(mq_holder):
        os.makedirs(mq_holder)
    
    # Logger
    logger = get_logger(os.path.join(holder, 'job.log'))
    
    local_node = '%s:%s' % (get_ip(), job.context.job.port)
    nodes = [local_node]
    if master is not None:
        nodes = client_call(master, 'get_nodes')
    
    # Bloom filter hook
    bloom_filter_file = os.path.join(holder, 'bloomfilter')
    bloom_filter_hook = create_bloom_filter_hook(bloom_filter_file, job)
    
    rpc_server = create_rpc_server(job)
    loader = JobLoader(job, rpc_server, logger=logger, master=master)
    loader.init_mq(nodes, local_node, mq_holder, 
                   verify_exists_hook=bloom_filter_hook,
                   copies=2 if master else 1)
    
    if master is None:
        try:
            loader.mq.put(job.starts)
            loader.run()
        finally:
            rpc_server.shutdown()
    else:
        try:
            client_call(master, 'ready', local_node)
            
            def _start():
                while not loader.stopped: 
                    time.sleep(TIME_SLEEP)
                loader.run()
            thread = threading.Thread(target=_start)
            thread.start()
            thread.join()
        finally:
            rpc_server.shutdown()
示例#36
0
def start_master(ip=None, data_path=None, force=False):
    path = os.path.join(root_dir(), 'cola', 'master', 'watcher.py')
    
    ip_str = ip if ip is not None else get_ip()
    print 'Start master at %s:%s' % (ip_str, main_conf.master.port)
    print 'Master will run in background. Please do not shut down the terminal.'
    
    cmds = ['python', path]
    if ip is not None:
        cmds.extend(['-i', ip])
    if data_path is not None:
        cmds.extend(['-d', data_path])
    if force is True:
        cmds.append('-f')
    subprocess.Popen(cmds)
示例#37
0
文件: job.py 项目: zzzz123321/cola
    def add_arguments(self, parser):
        ip = get_ip()

        self.job_parser = parser.add_parser('job', help='job commands')
        self.job_parser.add_argument(
            '-m',
            '--master',
            metavar='master address',
            nargs='?',
            default=ip,
            help='master connected to(in the former of `ip:port` or `ip`)')
        self.job_parser.add_argument(
            '-l',
            '--list',
            action='store_true',
            help='list all jobs including <id> <name> and <status>')
        self.job_parser.add_argument('-k',
                                     '--kill',
                                     metavar='job name',
                                     nargs='?',
                                     help='kill job by job name')
        self.job_parser.add_argument(
            '-u',
            '--upload',
            metavar='job directory',
            nargs='?',
            help='upload a job directory to the cluster')
        self.job_parser.add_argument(
            '-r',
            '--run',
            metavar='job name',
            nargs='?',
            const='U',
            help='run a job by the job id or with the `upload` command')
        self.job_parser.add_argument(
            '-t',
            '--status',
            metavar='job name',
            nargs='?',
            help='show the status of a job, and the counters if it\'s running')
        self.job_parser.add_argument(
            '-p',
            '--package',
            metavar='job_name',
            nargs='?',
            help=
            'package the running info of a job including log and errors infos')
        self.job_parser.set_defaults(func=self.run)
示例#38
0
def start_rpc_server():
    global rpc_server
    global rpc_server_thread

    if rpc_server is not None and \
        rpc_server_thread is not None:
        return rpc_server_thread

    rpc_server = ColaRPCServer((get_ip(), main_conf.client.port))
    rpc_server.register_function(stop)

    thd = threading.Thread(target=rpc_server.serve_forever)
    thd.setDaemon(True)
    thd.start()
    rpc_server_thread = thd
    return rpc_server_thread
示例#39
0
def start_rpc_server():
    global rpc_server
    global rpc_server_thread
    
    if rpc_server is not None and \
        rpc_server_thread is not None:
        return rpc_server_thread
    
    rpc_server = ColaRPCServer((get_ip(), main_conf.client.port))
    rpc_server.register_function(stop)
    
    thd = threading.Thread(target=rpc_server.serve_forever)
    thd.setDaemon(True)
    thd.start()
    rpc_server_thread = thd
    return rpc_server_thread
示例#40
0
文件: job.py 项目: ll2088/cola
 def add_arguments(self, parser):
     ip = get_ip()
     
     self.job_parser = parser.add_parser('job', help='job commands')
     self.job_parser.add_argument('-m', '--master', metavar='master address', nargs='?', default=ip,
                                  help='master connected to(in the former of `ip:port` or `ip`)')
     self.job_parser.add_argument('-l', '--list', action='store_true',
                                  help='list all jobs including <id> <name> and <status>' )
     self.job_parser.add_argument('-k', '--kill', metavar='kill some job', nargs='?', 
                                  help='kill job by job name')
     self.job_parser.add_argument('-u', '--upload', metavar='upload a job', nargs='?', 
                                  help='upload a job directory to the cluster')
     self.job_parser.add_argument('-r', '--run', metavar='run a job', nargs='?', const='U',
                                  help='run a job by the job id or with the `upload` command')
     self.job_parser.add_argument('-t', '--status', metavar='get the status of a job', nargs='?',
                                  help='show the status of a job, and the counters if it\'s running')
     self.job_parser.add_argument('-p', '--package', metavar='package a job', nargs='?',
                                  help='package the running info of a job including log and errors infos')
     self.job_parser.set_defaults(func=self.run)
示例#41
0
def put_starts(master=None):
    if master is None:
        nodes = ['%s:%s' % (get_ip(), getattr(user_config.job, 'port'))]
    else:
        nodes = client_call(master, 'get_nodes')

    mq_client = MessageQueueClient(nodes)
    with open(keywords_f) as f:
        keys = []
        size = 0
        for keyword in f.xreadlines():
            keys.append(keyword)
            size += 1
            if size >= PUTSIZE:
                mq_client.put(keys)
                size = 0
                keys = []
        if len(keys) > 0:
            mq_client.put(keys)
示例#42
0
    def __init__(self,
                 root,
                 zip_dir,
                 job_dir,
                 ip_address=None,
                 data_path=None,
                 force=False):
        self.root = root
        self.zip_dir = zip_dir
        self.job_dir = job_dir
        self.data_path = data_path
        self.force = force

        self.nodes_watchers = {}
        self.running_jobs = {}
        self.black_list = []
        if ip_address is None:
            ip_address = get_ip()
        else:
            choices_ips = get_ips()
            if ip_address not in choices_ips:
                raise ValueError('IP address must be one of (%s)' %
                                 ','.join(choices_ips))
        self.ip_address = ip_address
        self.port = main_conf.master.port

        self.stopped = False

        self.check(force=force)
        self.init_rpc_server()

        self.rpc_server.register_function(self.register_watcher_heartbeat,
                                          'register_heartbeat')
        self.rpc_server.register_function(self.stop, 'stop')
        self.rpc_server.register_function(self.list_jobs, 'list_jobs')
        self.rpc_server.register_function(self.start_job, 'start_job')
        self.rpc_server.register_function(self.stop_job, 'stop_job')
        self.rpc_server.register_function(self.finish_job, 'finish_job')
        self.rpc_server.register_function(self.clear_job, 'clear_job')
        self.rpc_server.register_function(self.list_job_dirs, 'list_job_dirs')
        self.rpc_server.register_function(self.list_workers, 'list_workers')

        self.set_receiver(zip_dir)
示例#43
0
文件: starts.py 项目: 0pengl/cola
def put_starts(master=None):
    if master is None:
        nodes = ['%s:%s' % (get_ip(), getattr(user_config.job, 'port'))]
    else:
        nodes = client_call(master, 'get_nodes')
        
    mq_client = MessageQueueClient(nodes)
    with open(keywords_f) as f:
        keys = []
        size = 0
        for keyword in f.xreadlines():
            keys.append(keyword)
            size += 1
            if size >= PUTSIZE:
                mq_client.put(keys)
                size = 0
                keys = []
        if len(keys) > 0:
            mq_client.put(keys)
示例#44
0
def load_job(job_path, data_path=None, master=None, force=False):
    if not os.path.exists(job_path):
        raise ValueError("Job definition does not exist.")

    job = import_job(job_path)

    if data_path is None:
        data_path = os.path.join(root_dir(), "data")
    root = os.path.join(data_path, "worker", "jobs", job.real_name)
    if not os.path.exists(root):
        os.makedirs(root)

    if master is None:
        with StandaloneWorkerJobLoader(job, root, force=force) as job_loader:
            job_loader.run()
    else:
        nodes = client_call(master, "get_nodes")
        local = "%s:%s" % (get_ip(), job.context.job.port)
        client_call(master, "ready", local)
        with WorkerJobLoader(job, root, master, local=local, nodes=nodes, force=force) as job_loader:
            client_call(master, "ready", local)
            job_loader.ready_for_run()
示例#45
0
def load_job(job_path, data_path=None, master=None, force=False):
    if not os.path.exists(job_path):
        raise ValueError('Job definition does not exist.')

    job = import_job(job_path)

    if data_path is None:
        data_path = os.path.join(root_dir(), 'data')
    root = os.path.join(data_path, 'worker', 'jobs', job.real_name)
    if not os.path.exists(root):
        os.makedirs(root)

    if master is None:
        with StandaloneWorkerJobLoader(job, root, force=force) as job_loader:
            job_loader.run()
    else:
        nodes = client_call(master, 'get_nodes')
        local = '%s:%s' % (get_ip(), job.context.job.port)
        client_call(master, 'ready', local)
        with WorkerJobLoader(job, root, master, local=local, nodes=nodes, force=force) \
            as job_loader:
            client_call(master, 'ready', local)
            job_loader.ready_for_run()
示例#46
0
 def __init__(self, container_id, working_dir, 
              job_path, job_name, env, mq,
              counter_server, budget_server, speed_server,
              stopped, nonsuspend, idle_statuses, n_tasks=1, 
              is_local=False, master_ip=None, logger=None,
              task_start_id=0):
     self.container_id = container_id
     self.working_dir = working_dir
     self.mq = mq
     self.env = env
     self.job_name = job_name
     self.job_desc = env['job_desc'].get(job_name) or \
                     import_job_desc(job_path)
     
     self.counter_server = counter_server
     self.budget_server = budget_server
     self.speed_server = speed_server
     
     self.stopped = stopped
     self.nonsuspend = nonsuspend
     self.idle_statuses = idle_statuses
     self.n_tasks = n_tasks
     self.is_local = is_local
     self.master_ip = master_ip
     self.logger = logger
     
     self.task_start_id = task_start_id
     self.ip = self.env.get('ip', None) or get_ip()
     
     self.counter_clients = [None for _ in range(self.n_tasks)]
     self.budget_clients = [None for _ in range(self.n_tasks)]
     self.speed_clients = [None for _ in range(self.n_tasks)]
     
     self.task_threads = []
     
     self.inited = False
     self.lock = multiprocessing.Lock()
示例#47
0
文件: loader.py 项目: friedvan/cola
def load_job(job_path, data_path=None, master=None, force=False):
    if not os.path.exists(job_path):
        raise ValueError('Job definition does not exist.')
        
    job = import_job(job_path)
    
    if data_path is None:
        data_path = os.path.join(root_dir(), 'data')
    root = os.path.join(
        data_path, 'worker', 'jobs', job.real_name)
    if not os.path.exists(root):
        os.makedirs(root)
    
    if master is None:
        with StandaloneWorkerJobLoader(job, root, force=force) as job_loader:
            job_loader.run()
    else:
        nodes = client_call(master, 'get_nodes')
        local = '%s:%s' % (get_ip(), job.context.job.port)
        client_call(master, 'ready', local)
        with WorkerJobLoader(job, root, master, local=local, nodes=nodes, force=force) \
            as job_loader:
            client_call(master, 'ready', local)
            job_loader.ready_for_run()
示例#48
0
 def __init__(self, rpc_server, zip_dir, job_dir):
     self.rpc_server = rpc_server
     self.zip_dir = zip_dir
     self.job_dir = job_dir
     
     self.nodes_watchers = {}
     self.running_jobs = {}
     self.black_list = []
     self.ip_address = get_ip()
     
     self.stopped = False
     
     self.rpc_server.register_function(self.register_watcher_heartbeat, 
                                       'register_heartbeat')
     self.rpc_server.register_function(self.stop, 'stop')
     self.rpc_server.register_function(self.list_jobs, 'list_jobs')
     self.rpc_server.register_function(self.start_job, 'start_job')
     self.rpc_server.register_function(self.stop_job, 'stop_job')
     self.rpc_server.register_function(self.finish_job, 'finish_job')
     self.rpc_server.register_function(self.clear_job, 'clear_job')
     self.rpc_server.register_function(self.list_job_dirs, 'list_job_dirs')
     self.rpc_server.register_function(self.list_workers, 'list_workers')
     
     self.set_receiver(zip_dir)
示例#49
0
文件: starts.py 项目: brightgems/cola
def put_starts(master=None):
    if master is None:
        master = ['%s:%s' % (get_ip(), getattr(user_config.master, 'port'))]
    print('master:%s' % master)
    jobs = client_call(master, 'runnable_jobs')
    app_name = ''
    for a, j in jobs.items():
        if j == "douban movie":
            app_name = a
            break
    if not app_name:
        raise Exception('douban movie job has not upload')

    nodes = client_call(master, 'list_workers')
    addrs = []
    default_addr = master.split(':')[0]
    for ap, s in nodes:
        a, p = ap.split(':')
        if a.lower() == 'localhost':
            addrs.append('%s:%s' % (default_addr, p))
        else:
            addrs.append(ap)

    mq_client = MessageQueueClient(addrs, app_name)
    print('get:%s' % mq_client.get())
    urls = []
    size = 0
    for url in starts:
        urls.append(url)
        size += 1
        if size >= PUTSIZE:
            mq_client.put(urls)
            size = 0
            urls = []
    if len(urls) > 0:
        mq_client.put(urls)
示例#50
0
def create_rpc_server():
    rpc_server = ColaRPCServer((get_ip(), main_conf.master.port))
    thd = threading.Thread(target=rpc_server.serve_forever)
    thd.setDaemon(True)
    thd.start()
    return rpc_server
示例#51
0
def register(func):
    func_name = func.__name__
    name = '-%s' % func_name.replace('_', '-').strip('-')
    help_ = func.__doc__.strip()
    
    registered_func[func_name] = func
    parser.add_argument(name, nargs='*', dest=func_name,
                        default=argparse.SUPPRESS, help=help_)
    
    def inner(master, *args, **kwargs):
        return func(master, *args, **kwargs)
    return inner

log_server = None
log_server_port = 9120
client = '%s:%s' % (get_ip(), log_server_port)
def start_log_server():
    global log_server
    global log_server_port
    
    if log_server is not None:
        return
    log_server = LogRecordSocketReceiver(logger=logger, host=get_ip(), 
                                         port=log_server_port)
    threading.Thread(target=log_server.serve_forever).start()
    
def stop_log_server():
    global log_server
    
    if log_server is None:
        return
                        default=None,
                        const=None,
                        help='root directory to put data')
    parser.add_argument('-f',
                        '--force',
                        metavar='force start',
                        nargs='?',
                        default=False,
                        const=True,
                        type=bool)
    args = parser.parse_args()

    master = args.master
    if master is None:
        connect_to_localhost = raw_input("Connect to localhost? (yes or no) ")
        conn = connect_to_localhost.lower().strip()
        if conn == 'yes' or conn == 'y':
            master = '%s:%s' % (get_ip(), main_conf.master.port)
        elif conn == 'no' or conn == 'n':
            master = raw_input(
                "Please input the master(form: \"ip:port\" or \"ip\") ")
            if ':' not in master:
                master += ':%s' % main_conf.master.port
        else:
            print 'Input illegal!'
    else:
        if ':' not in master:
            master += ':%s' % main_conf.master.port

    if master is not None:
        start_worker(master, data_path=args.data, force=args.force)
示例#53
0
Created on 2013-6-27

@author: Chine
'''

import socket
import os

from cola.core.rpc import client_call
from cola.core.utils import get_ip
from cola.core.logs import get_logger
from cola.worker.recover import recover

from conf import user_config

logger = get_logger(name='weibosearch_stop')

if __name__ == '__main__':
    ip, port = get_ip(), getattr(user_config.job, 'port')
    logger.info('Trying to stop single running worker')
    try:
        client_call('%s:%s' % (ip, port), 'stop')
    except socket.error:
        stop = raw_input("Force to stop? (y or n) ").strip()
        if stop == 'y' or stop == 'yes':
            job_path = os.path.split(os.path.abspath(__file__))[0]
            recover()
        else:
            print 'ignore'
    logger.info('Successfully stopped single running worker')
示例#54
0
def register(func):
    func_name = func.__name__
    name = '-%s' % func_name.replace('_', '-').strip('-')
    help_ = func.__doc__.strip()

    registered_func[func_name] = func
    parser.add_argument(name, nargs='*', dest=func_name,
                        default=argparse.SUPPRESS, help=help_)

    def inner(master, *args, **kwargs):
        return func(master, *args, **kwargs)
    return inner

log_server = None
log_server_port = 9120
client = '%s:%s' % (get_ip(), log_server_port)
def start_log_server():
    global log_server
    global log_server_port

    if log_server is not None:
        return
    log_server = LogRecordSocketReceiver(logger=logger, host=get_ip(),
                                         port=log_server_port)
    threading.Thread(target=log_server.serve_forever).start()

def stop_log_server():
    global log_server

    if log_server is None:
        return
示例#55
0
    def __init__(self,
                 job,
                 data_dir,
                 nodes,
                 local_ip=None,
                 client=None,
                 context=None,
                 copies=1,
                 force=False):
        ctx = context or job.context
        master_port = ctx.job.master_port
        if local_ip is None:
            local_ip = get_ip()
        else:
            choices_ips = get_ips()
            if local_ip not in choices_ips:
                raise ValueError('IP address must be one of (%s)' %
                                 ','.join(choices_ips))
        local = '%s:%s' % (local_ip, master_port)

        JobLoader.__init__(self,
                           job,
                           data_dir,
                           local,
                           context=ctx,
                           copies=copies,
                           force=force)
        LimitionJobLoader.__init__(self, job, context=ctx)

        # check
        self.check()

        self.nodes = nodes
        self.not_registered = self.nodes[:]
        self.not_finished = self.nodes[:]

        # mq
        self.mq_client = MessageQueueClient(self.nodes, copies=copies)

        # lock
        self.ready_lock = threading.Lock()
        self.ready_lock.acquire()
        self.finish_lock = threading.Lock()
        self.finish_lock.acquire()

        # logger
        self.logger = get_logger(name='cola_master_%s' % self.job.real_name,
                                 filename=os.path.join(self.root, 'job.log'),
                                 is_master=True)
        self.client = client
        self.client_handler = None
        if self.client is not None:
            self.client_handler = add_log_client(self.logger, self.client)

        self.init_rpc_server()
        self.init_rate_clear()
        self.init_logger_server(self.logger)

        # register rpc server
        self.rpc_server.register_function(self.client_stop, 'client_stop')
        self.rpc_server.register_function(self.ready, 'ready')
        self.rpc_server.register_function(self.worker_finish, 'worker_finish')
        self.rpc_server.register_function(self.complete, 'complete')
        self.rpc_server.register_function(self.error, 'error')
        self.rpc_server.register_function(self.get_nodes, 'get_nodes')
        self.rpc_server.register_function(self.apply, 'apply')
        self.rpc_server.register_function(self.require, 'require')
        self.rpc_server.register_function(self.stop, 'stop')
        self.rpc_server.register_function(self.add_node, 'add_node')
        self.rpc_server.register_function(self.remove_node, 'remove_node')

        # register signal
        signal.signal(signal.SIGINT, self.signal_handler)
        signal.signal(signal.SIGTERM, self.signal_handler)
示例#56
0
文件: context.py 项目: ll2088/cola
    def __init__(self,
                 local_mode=False,
                 is_master=False,
                 master_addr=None,
                 is_client=False,
                 working_dir=None,
                 mkdirs=False,
                 ip=None,
                 ips=None):
        self.is_local_mode = local_mode
        self.is_master = is_master
        self.is_client = is_client

        self.master_addr = master_addr
        self.master_ip = self.master_addr
        if not self.is_local_mode:
            if self.master_addr is None:
                raise ValueError(
                    'Master address must be supplied when local_mode is False')

            if ':' not in self.master_addr:
                self.master_addr = '%s:%s' % (self.master_addr,
                                              main_conf.master.port)
            else:
                self.master_ip = self.master_addr.split(':', 1)[0]

        self.working_dir = working_dir
        if self.working_dir is None:
            tmp = tempfile.gettempdir()
            self.working_dir = os.path.join(tmp, 'cola')
            if mkdirs and not os.path.exists(self.working_dir):
                os.makedirs(self.working_dir)

        self.ip = ip
        if self.ip is None:
            if self.is_master:
                self.ip = self.master_ip
            else:
                self.ip = get_ip()
                if self.is_local_mode and not self.ip:
                    self.ip = '127.0.0.1'
        if self.master_addr is None:
            self.master_addr = '%s:%s' % (self.ip, main_conf.master.port)
        self.worker_addr = '%s:%s' % (self.ip, main_conf.worker.port)

        self.ips = ips if ips is not None else []
        if not self.ips:
            self.ips.append(self.ip)
        self.addrs = [self.fix_addr(_ip) for _ip in self.ips]

        self.manager = ContextManager()
        self.manager.start(manager_init)
        self.env = self.manager.dict({
            'ip': self.ip,
            'root': self.working_dir,
            'is_local': self.is_local_mode,
            'master_ip': self.master_ip,
            'job_desc': {}
        })
        self.logger = get_logger('cola_context')

        self.master_rpc_server = None
        self.worker_rpc_server = None
示例#57
0
 def init_logger_server(self, logger):
     self.log_server = LogRecordSocketReceiver(host=get_ip(), logger=logger)
     threading.Thread(target=self.log_server.serve_forever).start()