class Root(resource.Resource): def __init__(self, app, config): resource.Resource.__init__(self) self.app = app self.IApp = IServiceCollection(app, app) self.debug = config.getboolean('debug', False) self.nodename = config.get('node_name', socket.gethostname()) static_serve_path = config.get('static_serve_path', 'files') storage_path = config.get('storage_path') self.putChild('', Home(self)) self.putChild('jobs', Jobs(self)) self.putChild(static_serve_path, File(storage_path)) services = config.items('services', ()) for servName, servClsName in services: servCls = load_object(servClsName) self.putChild(servName, servCls(self)) def request_permitted(self, request): # @TODO log not permitted requests return request.getClientIP() in self.app.trusted_clients.keys() @property def launcher(self): return self.IApp.getServiceNamed('launcher') @property def scheduler(self): return self.IApp.getServiceNamed('scheduler') @property def poller(self): app = IServiceCollection(self.app, self.app) return app.getServiceNamed('poller')
def startService(self): app = IServiceCollection(self.app, self.app) self.poller = app.getServiceNamed('poller') self.fetcher = app.getServiceNamed('fetcher') self.amqp = app.getServiceNamed('amqp') self.task_storage = app.getServiceNamed('task_storage') self.check_storage_path() for slot in range(self.max_proc): self._wait_for_project(slot) log.msg(format='Flowder %(version)s started: max_proc=%(max_proc)r', version=__version__, max_proc=self.max_proc, system='Launcher')
def poll(self): app = IServiceCollection(self.app, self.app) launch = app.getServiceNamed('launcher') spiders = launch.processes.values() running = [{ "spider": s.spider, "jobid": s.job, } for s in spiders] # [{'spider': 'link', 'jobid': 'fd967e32f53b11e7a966485b39c53ff1'}, # {'spider': 'link', 'jobid': '03ba6c7ef53c11e7a966485b39c53ff1'}] # print '!!!!!!!!!!!!!!!!!!!', running job_num = len(running) self.socket.send(str(job_num)) # data = self.socket.recv(1024) # print 'data',data # print 'data send to server !!!!!' if self.dq.pending: return for p, q in iteritems(self.queues): c = yield maybeDeferred(q.count) if c: msg = yield maybeDeferred(q.pop) if msg is not None: # In case of a concurrently accessed queue returnValue(self.dq.put(self._message(msg, p)))
def poll(self): app = IServiceCollection(self.app, self.app) launch = app.getServiceNamed('launcher') cur_pro_num = len(launch.processes) # print cur_pro_num, 'fffffffssss' key = "stats:{m}:info".format(m=self.slave_id) redishget_url = '{dispatch_host}/redishset'. \ format(dispatch_host=self.dispatch_host) redishget_data = { "key": key, "field": "cur_pro_num", "value": cur_pro_num } requests.post( redishget_url, redishget_data ) # self.redis_conn.hset(key, cur_pro_num, 'self.node_info') redisexpire_url = '{dispatch_host}/redisexpire'. \ format(dispatch_host=self.dispatch_host) redisexpire_data = {"key": key, "time": 10} requests.post(redisexpire_url, redisexpire_data) #self.redis_conn.expire(key, 10) if self.dq.pending: return for p, q in iteritems(self.queues): c = yield maybeDeferred(q.count) if c: msg = yield maybeDeferred(q.pop) if msg is not None: # In case of a concurrently accessed queue returnValue(self.dq.put(self._message(msg, p)))
def launcher(self): """ Copied from website.Root Should do some refactory to avoid this duplicated code """ app = IServiceCollection(self.app, self.app) return app.getServiceNamed('launcher')
def render_POST(self, txrequest): # args = native_stringify_dict(copy(txrequest.args), keys_only=False) # settings = args.pop('setting', []) # settings = dict(x.split('=', 1) for x in settings) # args = dict((k, v[0]) for k, v in args.items()) # project = args.pop('project') # spider = args.pop('spider') # version = args.get('_version', '') # spiders = get_spider_list(project, version=version) # if not spider in spiders: # return {"status": "error", "message": "spider '%s' not found" % spider} # args['settings'] = settings # jobid = args.pop('jobid', uuid.uuid1().hex) # # args['_job'] = jobid # self.root.scheduler.schedule(project, spider, **args) args = native_stringify_dict(copy(txrequest.args), keys_only=False) args = dict((k, v[0]) for k, v in args.items()) crawlid = args.pop('crawlid') appid = args.pop('appid') app = IServiceCollection(self.root.app, self.root.app) self.launcher = app.getServiceNamed('launcher') self.root.launcher._spawn_process(appid, crawlid) return {"node_name": self.root.nodename, "status": "ok"}
class Services(object): implements(IServices) def __init__(self, app): self.app = app self.collect = IServiceCollection(self.app) _multi = [] _services = [] def add(self, name, services): log.msg( 'bit.core.services: Services.add %s, %s' % (name, services)) if not isinstance(services, dict): services.setName(name) services.setServiceParent(self.collect) self._services.append(name) return add = True if name in self._multi: plug_services = self.collect.getServiceNamed(name) add = False else: plug_services = MultiService() plug_services.setName(name) self._multi.append(name) for sid, s in services.items(): s.setName(sid) s.setServiceParent(plug_services) if add: plug_services.setServiceParent(self.collect) @property def services(self): _services = {} for service in (self._services + self._multi): _services[service] = self.collect.getServiceNamed(service) return _services
def poll(self): # regex = 'dispatch:{slaveId}:*:*:*'.format(slaveId=SlaveID) # for key in self.redis_conn.scan_iter(match=regex): # signal = self.redis_conn.get(key) # userId = key.split(':')[2] # taskId = key.split(':')[3] req = Request('{dispatch_host}/hastask?slaveid={slaveid}'.format( slaveid=self.slave_id, dispatch_host=self.dispatch_host)) res = urllib2.urlopen(req) messages = res.read() messages_json = json.loads(messages) for message in messages_json: signal = message['signal'] userId = message['userId'] taskId = message['taskId'] if signal == 'start': # 启动命令。启动并将该key删除 app = IServiceCollection(self.app, self.app) launcher = app.getServiceNamed('launcher') launcher._spawn_process(userId, taskId) print '启动命令' else: # 暂停命令 url = '{dispatch_host}/stoptask'.format( dispatch_host=self.dispatch_host) pid = signal.split('&')[1] data = { "slaveId": self.slave_id, "userId": userId, "taskId": taskId, "pid": pid } if requests.post(url, data).status_code == 200: print '终止命令' else: print '未正常终止'
def launcher(self): app = IServiceCollection(self.app, self.app) return app.getServiceNamed('launcher')
def launcher(self): app = IServiceCollection(self.app, self.app) return app.getServiceNamed("launcher")
class AmqpService(service.Service): """ Simple L{service.IService} which consumes an AMQP queue. :param settings an instance of miutils.config.BaseConfig :param consume_callback when got an item send the message to this callback """ CONSUME_INTERVAL = 10 name = 'amqp' def __init__(self, app, settings): self.IApp = IServiceCollection(app, app) self.signal_manager = get_signal_manager(app) self.settings = settings self._conn = None self.conn_parameters = None self._client = None self._channel = None self._stopping = None self._consumer_tag = None self._closing = False self._running = False self._publish_meta = None self._message_number_in = 0 self._message_number_out = 0 self._queue_in = None self._queue_out = None self._lock = threading.Lock() self._in_retry = dict() self._cached_messages = [] self.conn_retry_interval = 0 self.app_id = settings.get('app_id', 'fw0') self.consume_interval = settings.getint("consume_interval", 2, section='amqp') self.exchange_name = settings.get("exchange_name", 'flowder-ex', section='amqp') self.queue_in_name = settings.get("queue_in_name", 'flowder-in-queue', section='amqp') self.queue_in_routing_key = settings.get("queue_in_routing_key", 'flowder.in', section='amqp') self.queue_out_name = settings.get("queue_out_name", 'flowder-out-queue', section='amqp') self.queue_out_routing_key = settings.get("queue_out_routing_key", 'flowder.out', section='amqp') @property def scheduler(self): return self.IApp.getServiceNamed('scheduler') def startService(self): params = {} if self.settings.get("username", None, section='amqp') \ and self.settings.get("pass", None, section='amqp'): params['credentials'] = pika_credentials.PlainCredentials( self.settings.get("username", None, section='amqp'), self.settings.get("pass", None, section='amqp')) if self.settings.getdict("params", dict(), section='amqp'): params.update( self.settings.getdict("params", dict(), section='amqp')) if self.settings.get("amqp_vhost", '/'): params.update({ 'virtual_host': self.settings.get("vhost", '/', section='amqp') }) parameters = ConnectionParameters(**params) self._client = protocol.ClientCreator( reactor, twisted_connection.TwistedProtocolConnection, parameters) self.do_connect() def do_connect(self): d = self._client.connectTCP( self.settings.get("host", "localhost", section='amqp'), self.settings.getint("port", 5672, section='amqp')) d.addCallbacks(lambda protocol: protocol.ready, self.failed) d.addCallbacks(self.ready, self.failed) return d def failed(self, failure): if failure.check(ChannelClosed): self.retry_connect() elif failure.check(ConnectionClosed) or failure.check( ConnectionRefusedError): self.retry_connect() else: log.err("Unhandled failure in Amqp Service....") failure.printTraceback() reactor.stop() @defer.inlineCallbacks def ready(self, connection): self._in_retry['connection'] = False self._in_retry['channel'] = False if not connection: raise ConnectionClosed self._conn = connection log.msg("AMQP Connection created") if self._channel: yield self.check_connection() self._channel = yield self._conn.channel() self.conn_retry_interval = 0 log.msg("Setting up exchange and queue") # Exchange yield self._channel.exchange_declare( exchange=self.exchange_name, type=self.settings.get("exchange_type", 'topic', section='amqp'), passive=True, ) # Queue self._queue_in = yield self._channel.queue_declare( queue=self.queue_in_name, auto_delete=False, exclusive=False, durable=True, ) self._queue_out = yield self._channel.queue_declare( queue=self.queue_out_name, auto_delete=False, exclusive=False, durable=True, ) # Queue-in > Exchange yield self._channel.queue_bind(exchange=self.exchange_name, queue=self.queue_in_name, routing_key=self.queue_in_routing_key) yield self._channel.queue_bind(exchange=self.exchange_name, queue=self.queue_out_name, routing_key=self.queue_out_routing_key) yield self._channel.basic_qos(prefetch_count=1) self._running = True log.msg("Start Consuming %s" % self.queue_in_name) log.msg("Start Producing %s" % self.queue_out_name) # Consume queue_in queue queue_obj, consumer_tag = yield self._channel.basic_consume( queue=self.queue_in_name, no_ack=False) l = task.LoopingCall(self.read, queue_obj) l.start(0.01) @defer.inlineCallbacks def read(self, queue_obj): ch, method, properties, msg = yield queue_obj.get() msg = amqp_message_decode(msg) log.debug("Consuming msg %s" % msg) self._message_number_in += 1 self.process_in_message(msg) time.sleep(self.consume_interval) yield ch.basic_ack(delivery_tag=method.delivery_tag) log.debug('Acknowledging message #%s' % self._message_number_in) @defer.inlineCallbacks def publish(self, message): self._message_number_out += 1 amqp_message_update_meta(message, self.get_meta()) amqp_msg = amqp_message_encode(message) log.debug("Publish message #%s, AMQP message: %s" % (self._message_number_out, amqp_msg)) properties = BasicProperties( app_id=self.app_id, content_type='application/json', content_encoding='utf-8', delivery_mode=2, # persistent ) try: yield self._channel.basic_publish( self.exchange_name, self.queue_out_routing_key, amqp_msg, properties=properties, ) except ChannelClosed: self.retry_channel() self._cached_messages.append(message) except AMQPError: self.retry_connect() self._cached_messages.append(message) def retry_connect(self): with self._lock: if 'connection' not in self._in_retry or not self._in_retry[ 'connection']: self.conn_retry_interval += 2 log.err( "Connection Closed! retry connecting in %s seconds..." % self.conn_retry_interval) self._in_retry['connection'] = True d = task.deferLater(reactor, self.conn_retry_interval, self.do_connect) d.addErrback(self.failed) def retry_channel(self): with self._lock: if 'channel' not in self._in_retry or not self._in_retry['channel']: log.err("Channel Closed! retry creating it ...") self._in_retry['channel'] = True d = defer.maybeDeferred(self.ready, self._conn) d.addErrback(self.failed) def check_connection(self): d = defer.maybeDeferred(self.get_queue_size) d.addErrback(self.failed) @defer.inlineCallbacks def get_queue_size(self): queue = yield self._channel.queue_declare(queue=self.queue_out_name, passive=True) defer.returnValue(queue) def process_in_message(self, message): if 'fetch_uri' not in message: raise InvalidAMQPMessage('Given message has no fetch_uri value.') message_copy = message.copy() jobid = uuid.uuid1().hex _message = dict() _message['job_id'] = jobid _message['fetch_uri'] = message_copy.pop('fetch_uri') # Encode additional fields to send back to client on fetch try: _message['settings'] = amqp_message_encode(message_copy) except: # @TODO define a more specific exception handling here raise EncodingError("Can't encode message info. %s" % message) self.scheduler.schedule(_message) self.signal_manager.send_catch_log(signal=signals.request_received, jobid=jobid) def stopService(self): self._stopping = True if not self._cached_messages: return self.really_stop_service(None) log.warn( "Cached messages found, try to publish them before shut down!") dfds = [] for slot, c in enumerate(self._cached_messages): dfd = defer.maybeDeferred(self._publish, c, slot) dfds.append(dfd) dfd_list = defer.DeferredList(dfds) dfd_list.addBoth(self.really_stop_service) return dfd_list def really_stop_service(self, _): service.Service.stopService(self) def get_meta(self): if not self._publish_meta: self._publish_meta = { 'app_id': self.app_id, 'timestamp': time.time(), } return self._publish_meta
class AmqpService(service.Service): """ Simple L{service.IService} which consumes an AMQP queue. :param settings an instance of miutils.config.BaseConfig :param consume_callback when got an item send the message to this callback """ CONSUME_INTERVAL = 10 name = 'amqp' def __init__(self, app, settings): self.IApp = IServiceCollection(app, app) self.signal_manager = get_signal_manager(app) self.settings = settings self._conn = None self.conn_parameters = None self._client = None self._channel = None self._stopping = None self._consumer_tag = None self._closing = False self._running = False self._publish_meta = None self._message_number_in = 0 self._message_number_out = 0 self._queue_in = None self._queue_out = None self._lock = threading.Lock() self._in_retry = dict() self._cached_messages = [] self.conn_retry_interval = 0 self.app_id = settings.get('app_id', 'fw0') self.consume_interval = settings.getint("consume_interval", 2, section='amqp') self.exchange_name = settings.get("exchange_name", 'flowder-ex', section='amqp') self.queue_in_name = settings.get("queue_in_name", 'flowder-in-queue', section='amqp') self.queue_in_routing_key = settings.get("queue_in_routing_key", 'flowder.in', section='amqp') self.queue_out_name = settings.get("queue_out_name", 'flowder-out-queue', section='amqp') self.queue_out_routing_key = settings.get("queue_out_routing_key", 'flowder.out', section='amqp') @property def scheduler(self): return self.IApp.getServiceNamed('scheduler') def startService(self): params = {} if self.settings.get("username", None, section='amqp') \ and self.settings.get("pass", None, section='amqp'): params['credentials'] = pika_credentials.PlainCredentials( self.settings.get("username", None, section='amqp'), self.settings.get("pass", None, section='amqp') ) if self.settings.getdict("params", dict(), section='amqp'): params.update(self.settings.getdict("params", dict(), section='amqp')) if self.settings.get("amqp_vhost", '/'): params.update({'virtual_host': self.settings.get("vhost", '/', section='amqp')}) parameters = ConnectionParameters(**params) self._client = protocol.ClientCreator( reactor, twisted_connection.TwistedProtocolConnection, parameters) self.do_connect() def do_connect(self): d = self._client.connectTCP( self.settings.get("host", "localhost", section='amqp'), self.settings.getint("port", 5672, section='amqp') ) d.addCallbacks(lambda protocol: protocol.ready, self.failed) d.addCallbacks(self.ready, self.failed) return d def failed(self, failure): if failure.check(ChannelClosed): self.retry_connect() elif failure.check(ConnectionClosed) or failure.check(ConnectionRefusedError): self.retry_connect() else: log.err("Unhandled failure in Amqp Service....") failure.printTraceback() reactor.stop() @defer.inlineCallbacks def ready(self, connection): self._in_retry['connection'] = False self._in_retry['channel'] = False if not connection: raise ConnectionClosed self._conn = connection log.msg("AMQP Connection created") if self._channel: yield self.check_connection() self._channel = yield self._conn.channel() self.conn_retry_interval = 0 log.msg("Setting up exchange and queue") # Exchange yield self._channel.exchange_declare( exchange=self.exchange_name, type=self.settings.get("exchange_type", 'topic', section='amqp'), passive=True, ) # Queue self._queue_in = yield self._channel.queue_declare( queue=self.queue_in_name, auto_delete=False, exclusive=False, durable=True, ) self._queue_out = yield self._channel.queue_declare( queue=self.queue_out_name, auto_delete=False, exclusive=False, durable=True, ) # Queue-in > Exchange yield self._channel.queue_bind( exchange=self.exchange_name, queue=self.queue_in_name, routing_key=self.queue_in_routing_key) yield self._channel.queue_bind( exchange=self.exchange_name, queue=self.queue_out_name, routing_key=self.queue_out_routing_key) yield self._channel.basic_qos(prefetch_count=1) self._running = True log.msg("Start Consuming %s" % self.queue_in_name) log.msg("Start Producing %s" % self.queue_out_name) # Consume queue_in queue queue_obj, consumer_tag = yield self._channel.basic_consume(queue=self.queue_in_name, no_ack=False) l = task.LoopingCall(self.read, queue_obj) l.start(0.01) @defer.inlineCallbacks def read(self, queue_obj): ch, method, properties, msg = yield queue_obj.get() msg = amqp_message_decode(msg) log.debug("Consuming msg %s" % msg) self._message_number_in += 1 self.process_in_message(msg) time.sleep(self.consume_interval) yield ch.basic_ack(delivery_tag=method.delivery_tag) log.debug('Acknowledging message #%s' % self._message_number_in) @defer.inlineCallbacks def publish(self, message): self._message_number_out += 1 amqp_message_update_meta(message, self.get_meta()) amqp_msg = amqp_message_encode(message) log.debug("Publish message #%s, AMQP message: %s" % (self._message_number_out, amqp_msg)) properties = BasicProperties( app_id=self.app_id, content_type='application/json', content_encoding='utf-8', delivery_mode=2, # persistent ) try: yield self._channel.basic_publish( self.exchange_name, self.queue_out_routing_key, amqp_msg, properties=properties, ) except ChannelClosed: self.retry_channel() self._cached_messages.append(message) except AMQPError: self.retry_connect() self._cached_messages.append(message) def retry_connect(self): with self._lock: if 'connection' not in self._in_retry or not self._in_retry['connection']: self.conn_retry_interval += 2 log.err("Connection Closed! retry connecting in %s seconds..." % self.conn_retry_interval) self._in_retry['connection'] = True d = task.deferLater(reactor, self.conn_retry_interval, self.do_connect) d.addErrback(self.failed) def retry_channel(self): with self._lock: if 'channel' not in self._in_retry or not self._in_retry['channel']: log.err("Channel Closed! retry creating it ...") self._in_retry['channel'] = True d = defer.maybeDeferred(self.ready, self._conn) d.addErrback(self.failed) def check_connection(self): d = defer.maybeDeferred(self.get_queue_size) d.addErrback(self.failed) @defer.inlineCallbacks def get_queue_size(self): queue = yield self._channel.queue_declare( queue=self.queue_out_name, passive=True ) defer.returnValue(queue) def process_in_message(self, message): if 'fetch_uri' not in message: raise InvalidAMQPMessage('Given message has no fetch_uri value.') message_copy = message.copy() jobid = uuid.uuid1().hex _message = dict() _message['job_id'] = jobid _message['fetch_uri'] = message_copy.pop('fetch_uri') # Encode additional fields to send back to client on fetch try: _message['settings'] = amqp_message_encode(message_copy) except: # @TODO define a more specific exception handling here raise EncodingError("Can't encode message info. %s" % message) self.scheduler.schedule(_message) self.signal_manager.send_catch_log(signal=signals.request_received, jobid=jobid) def stopService(self): self._stopping = True if not self._cached_messages: return self.really_stop_service(None) log.warn("Cached messages found, try to publish them before shut down!") dfds = [] for slot, c in enumerate(self._cached_messages): dfd = defer.maybeDeferred(self._publish, c, slot) dfds.append(dfd) dfd_list = defer.DeferredList(dfds) dfd_list.addBoth(self.really_stop_service) return dfd_list def really_stop_service(self, _): service.Service.stopService(self) def get_meta(self): if not self._publish_meta: self._publish_meta = { 'app_id': self.app_id, 'timestamp': time.time(), } return self._publish_meta
def poller(self): app = IServiceCollection(self.app, self.app) return app.getServiceNamed('poller')