def start(): if os.path.exists('./Worker.log'): os.remove('./Worker.log') if os.path.exists('./Scrapy.log'): os.remove('./Scrapy.log') setproctitle.setproctitle(DBSession.query(WorkerModel).get(1).platform) reactor.__init__() # @UndefinedVariable CrawlerManager() reactor.run() # @UndefinedVariable
def sdfg(callback): log.startLogging(sys.stdout) # Test complete whether success or exception callback().then(promise.promise()).then(lambda _: reactor.stop()) reactor.callLater(3, reactor.stop) reactor.run() reactor.__init__()
def check_proxies(self): all_list = self.db_conn.get_list(ProxyManager.all_list) if all_list == None or len(all_list) == 0: return [] self.valid_proxy = [] self.unchecked_proxy = 0 self.mutli_thread_lock = threading.Lock() self.stop_reactor_lock = threading.Lock() mutli_check_func = functools.partial(self.mutli_thread_check, all_list) if reactor._startedBefore: reactor.__init__() # callFromThread将阻塞调用该函数的线程 # 将启动多线程的函数放入reactor的主线程,便于停止 reactor.callFromThread(mutli_check_func) reactor.run() return self.valid_proxy
def main(): ''' 启动爬虫客户端 ''' setproctitle.setproctitle(kid_setting.CLIENT_PROC_NAME) client_factory = KidClientFactory() reactor.__init__() # @UndefinedVariable reactor.suggestThreadPoolSize(25) # @UndefinedVariable connector = reactor.connectTCP( kid_setting.SERVER_IP, # @UndefinedVariable kid_setting.SERVER_PORT, client_factory) connector.transport.getHandle().setsockopt(socket.SOL_SOCKET, socket.SO_SNDBUF, 4096 * 100) print_plus('IP:%s\tPort:%s\tHBase IP:%s\tCrawler Type:%d' % (kid_setting.SERVER_IP, kid_setting.SERVER_PORT, kid_setting.HBASE_HOST, kid_setting.CRAWLER_TYPE)) reactor.run() # @UndefinedVariable
def craw_all(): """ :return: list of dict[str title, str url], for all sites """ @defer.inlineCallbacks def crawl(): yield runner.crawl(GeneralSpider) reactor.stop() global settings_ref runner = CrawlerRunner() crawl() reactor.run( ) # the config will block here until the last crawl call is finished reactor.__init__() ret = [] for filename in settings_ref.temp_file_name: with open(filename, 'r') as f: output = json.load(f) f.close() ret.append(output) return ret
def Connection(self,host,port): f = MIGRobotBaseSchedulerFactory(self.platform_id,self.uid,self.robot_id) reactor.__init__() #因使用进程池,故工作进程会把主进程的reactor拷贝过来,reactor在主进程已经运行,故需要重新初始化 reactor.connectTCP(host, int(port), f)
# coding=utf-8 import gevent.monkey; gevent.monkey.patch_all() from twisted.internet import reactor from ultron.cluster.central.central_engine import CentralEngine from ultron.utilities.mlog import MLog if __name__ == "__main__": MLog.config('central') reactor.__init__() central_engine = CentralEngine() reactor.run()
def start(): reactor.__init__() # @UndefinedVariable CrawlerManager() reactor.run() # @UndefinedVariable
def setUp( self ): debug( "Setting up %s test...", self.__class__.__name__ ) reactor.__init__() ThousandParsecClientFactory().makeTestSession( self )
def Connection(self,host,port): f = MIGAssistantBaseSchedulerFactory(self.platform_id,self.uid,self.nickname) reactor.__init__() #因使用进程池,故工作进程会把主进程的reactor拷贝过来,reactor在主进程已经运行,故需要重新初始化 reactor.connectTCP(host, port, f)
def Connection(self,host,port): f = MIGBaseSchedulerFactory(self.platform_id,self.uid,self.token,self.oppid,self.oppo_type) reactor.__init__() #因使用进程池,故工作进程会把主进程的reactor拷贝过来,reactor在主进程已经运行,故需要重新初始化 reactor.connectTCP(host, port, f)