def _send2one(self,content,ip,port): try: s=socket.socket(socket.AF_INET,socket.SOCK_STREAM) s.settimeout(self._args.timeout) LOG.info('connecting to %s:%d...'%(ip,port)) s.connect((ip,port)) s.send(content) s.close() LOG.info('connection to %s:%d close'%(ip,port)) except socket.timeout,e: LOG.warning('%s:%d lost response'%(ip,port))
def _send2one(self, content, ip, port): try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(self._args.timeout) LOG.info('connecting to %s:%d...' % (ip, port)) s.connect((ip, port)) s.send(content) s.close() LOG.info('connection to %s:%d close' % (ip, port)) except socket.timeout, e: LOG.warning('%s:%d lost response' % (ip, port))
def _send2one(self,content,ip,port): try: s=socket.socket(socket.AF_INET,socket.SOCK_STREAM) s.settimeout(self._args.timeout) LOG.debug('connect to %s:%d...'%(ip,port)) s.connect((ip,port)) s.send(content) s.close() LOG.debug('connection to %s:%d close'%(ip,port)) except socket.timeout as e: LOG.warning('%s:%d lost response'%(ip,port)) except socket.error as arg: LOG.error('socket error while connecting to %s:%d errno %d: %s'%(ip,port,arg[0],arg[1]))
def _send2one(self, content, ip, port): try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(self._args.timeout) LOG.debug('connect to %s:%d...' % (ip, port)) s.connect((ip, port)) s.send(content) s.close() LOG.debug('connection to %s:%d close' % (ip, port)) except socket.timeout as e: LOG.warning('%s:%d lost response' % (ip, port)) except socket.error as arg: LOG.error('socket error while connecting to %s:%d errno %d: %s' % (ip, port, arg[0], arg[1]))
def _send2one_r(self,content,ip,port,result): try: s=socket.socket(socket.AF_INET,socket.SOCK_STREAM) s.settimeout(self._args.timeout) LOG.info('connecting to %s:%d...'%(ip,port)) s.connect((ip,port)) s.send(content.replace('__EOF__','__EOF___')) s.send('__EOF__') r=s.recv(4096) if r!='': result.append(r) s.close() LOG.debug('connection to %s:%d close'%(ip,port)) except socket.timeout,e: LOG.warning('%s:%d lost response'%(ip,port)) return ''
def _send2one_r(self, content, ip, port, result): try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(self._args.timeout) LOG.info('connecting to %s:%d...' % (ip, port)) s.connect((ip, port)) s.send(content.replace('__EOF__', '__EOF___')) s.send('__EOF__') r = s.recv(4096) if r != '': result.append(r) s.close() LOG.debug('connection to %s:%d close' % (ip, port)) except socket.timeout, e: LOG.warning('%s:%d lost response' % (ip, port)) return ''
def parse_distribute_task(self): # do some common check here if self._args.task_granularity < 0 or self._args.task_granularity > 3: raise SwarmUseException( 'invalid task granularity, it should be one number of 1-3') if self._args.process_num < 0: raise SwarmUseException('process number can not be negative') if self._args.thread_num <= 0: raise SwarmUseException('thread number should be positive') # connect to db server LOG.info('try to connect to db server: %s:%d' % (self._args.db_addr, self._args.db_port)) self._args.db, self._args.coll = init_db(self._args.db_addr, self._args.db_port, self._args.mod) LOG.info('Connection to db server completed') # start the manager self._manager = MSwarmManager(self._args.timeout, address=('', self._args.m_port), authkey=self._args.authkey) try: module = importlib.import_module('modules.' + self._args.mod + '.' + self._args.mod) except ImportError as e: raise SwarmModuleException('an error occured when load module:' + self._args.mod) LOG.info('load module: ' + self._args.mod) LOG.info('begin to decompose task...') mod_master = getattr(module, 'Master')(self._args) # begin first round of tasks decomposition and distribution roundn = 0 self._manager.init_task_statistics() while True: subtaskl = mod_master.generate_subtasks() taskn = len(subtaskl) if taskn == 0: break roundn += 1 LOG.log(REPORT, 'begin round %d' % roundn) LOG.info('round %d: put task into queue...' % roundn) for cur in subtaskl: self._manager.put_task(self._args.mod, cur) LOG.log( REPORT, 'round %d: %d tasks have been put into queue' % (roundn, taskn)) LOG.info('round %d: get result from swarm...' % roundn) # get result confirmedn = 0 self._manager.prepare_get_result() while True: try: result = self._manager.get_result() if result == '': break confirmedn += 1 LOG.log( REPORT, 'round %d: %d/%d tasks have been completed' % (roundn, confirmedn, taskn)) mod_master.handle_result(result) except Queue.Empty as e: # check number of slave host, if someone has lost response, reorganize tasks # in queue. LOG.info('try to detect swarm status') r = self._send2swarm_r('ack') if len(r) < self._swarm_num: LOG.warning( '%d of swarm has lost response. now total swarm:%d' % (self._swarm_num - len(r), len(r))) self._swarm_num = len(r) # if no swarm left if self._swarm_num == 0: raise SwarmSlaveException( 'no swarm left. task failed') LOG.log(REPORT, 'reorganize tasks in queue...') self._manager.reorganize_tasks() LOG.log(REPORT, 'reorganization completed') else: LOG.log( REPORT, 'all swarm works fine. now num: %d' % self._swarm_num) # continue LOG.log(REPORT, 'round %d over' % roundn) LOG.log(REPORT, 'all tasks have been comfirmed') # do final report now mod_master.report() self._shutdown()
def parse_distribute_task(self): # do some common check here if self._args.task_granularity<0 or self._args.task_granularity>3: raise SwarmUseException('invalid task granularity, it should be one number of 1-3') if self._args.process_num<0: raise SwarmUseException('process number can not be negative') if self._args.thread_num<=0: raise SwarmUseException('thread number should be positive') # connect to db server LOG.info('try to connect to db server: %s:%d'%(self._args.db_addr,self._args.db_port)) self._args.db,self._args.coll=init_db(self._args.db_addr,self._args.db_port,self._args.mod) LOG.info('Connection to db server completed') # start the manager self._manager=MSwarmManager(self._args.timeout,address=('', self._args.m_port), authkey=self._args.authkey) try: module=importlib.import_module('modules.'+self._args.mod+'.'+self._args.mod) except ImportError as e: raise SwarmModuleException('an error occured when load module:'+self._args.mod) LOG.info('load module: '+self._args.mod) LOG.info('begin to decompose task...') mod_master=getattr(module,'Master')(self._args) # begin first round of tasks decomposition and distribution roundn=0 self._manager.init_task_statistics() while True: subtaskl=mod_master.generate_subtasks() taskn=len(subtaskl) if taskn==0: break roundn+=1 LOG.log(REPORT,'begin round %d'%roundn) LOG.info('round %d: put task into queue...'%roundn) for cur in subtaskl: self._manager.put_task(self._args.mod,cur) LOG.log(REPORT,'round %d: %d tasks have been put into queue'%(roundn,taskn)) LOG.info('round %d: get result from swarm...'%roundn) # get result confirmedn=0 self._manager.prepare_get_result() while True: try: result=self._manager.get_result() if result=='': break confirmedn+=1 LOG.log(REPORT,'round %d: %d/%d tasks have been completed'%(roundn, confirmedn,taskn)) mod_master.handle_result(result) except Queue.Empty as e: # check number of slave host, if someone has lost response, reorganize tasks # in queue. LOG.info('try to detect swarm status') r=self._send2swarm_r('ack') if len(r)<self._swarm_num: LOG.warning('%d of swarm has lost response. now total swarm:%d' %(self._swarm_num-len(r),len(r))) self._swarm_num=len(r) # if no swarm left if self._swarm_num==0: raise SwarmSlaveException('no swarm left. task failed') LOG.log(REPORT,'reorganize tasks in queue...') self._manager.reorganize_tasks() LOG.log(REPORT,'reorganization completed') else: LOG.log(REPORT,'all swarm works fine. now num: %d'%self._swarm_num) # continue LOG.log(REPORT,'round %d over'%roundn) LOG.log(REPORT,'all tasks have been comfirmed') # do final report now mod_master.report() self._shutdown()