def fuckup(p_command=None): Main.rootdir = os.path.abspath('.') manager = Manager() #BaseManager.register('CrawlerPicker', CrawlerPicker) #manager = BaseManager() #Initialize application configure filename = "application-config.yml" Configure.load(p_dir=Main.rootdir+"/"+filename, p_command=p_command) #Initialize log Logger() #Initialize elasticsearch client ESHandler.ini() #Initialize job schedule #main_jod_queue = queue.Queue(Configure.configure().value("scheduler.messageQueueSize", p_default=1000)) main_jod_queue = ThreadSafeQueue(size=Configure.configure().value("scheduler.messageQueueSize", p_default=1000)) crawler_picker = CrawlerPicker() Main.crawlerRegister = CrawlerRegister(p_crawler_picker=crawler_picker, p_main_jod_queue=main_jod_queue) Main.crawlerRegister.start() #main_jod_queue = manager.Queue(Configure.configure().value("scheduler.messageQueueSize", p_default=1000)) #main_jod_queue = Queue(maxsize=Configure.configure().value("scheduler.messageQueueSize", p_default=1000)) Main.parellelSchedule=ParellelSchedule(p_main_jod_queue=main_jod_queue) Main.parellelSchedule.start() #Main.parellelSchedule.run() #Main.crawlerRegister.daemon = True #Main.crawlerRegister.run() #registerserver = Configure.configure().value("server.crawler.healthServer.host") #registerport = Configure.configure().value("server.crawler.healthServer.port") #Main.jobSync = JobSync(p_queue=main_jod_queue, p_register={"host":registerserver, "port":registerport}, p_crawler_picker=crawler_picker) #Main.jobSync.start() #Start main thread loop #tornado.ioloop.IOLoop.current().start() #After start all sub process, we need invode join function to make shared object available #Main.jobSync.join() Main.crawlerRegister.join() #Initialize server try: # This is here to simulate application activity (which keeps the main thread alive). while True: time.sleep(2) except (KeyboardInterrupt, SystemExit): pass
def fuckup(p_command=None): start = datetime.datetime.now() Main.rootdir = os.path.abspath('.') manager = Manager() #Initialize application configure filename = "application-config.yml" Configure.load(p_dir=Main.rootdir+"/"+filename, p_command=p_command) #Initialize log Logger() Logger.getLogger().info("Web Driver Pool Launching......") #Initialize driver pool driver_queue = queue.Queue(Configure.configure().value("headless.webdriver.maxBrowserNum")) request_queue = queue.Queue(Configure.configure().value("headless.webdriver.maxRequestAcceptNum")) #Manager().Queue(Configure.configure().value("headless.webdriver.maxBrowserNum")) Main.webDriverContainer = WebDriverContainer( p_queue = driver_queue, p_request_queue = request_queue ) Main.webDriverContainer.run() #Main.pooledWebDriverManager = PooledWebDriverManager(p_queue = queue) #Main.pooledWebDriverManager.start() end = datetime.datetime.now() duration = (start-end).seconds Logger.getLogger().info("Web Driver Pool Launched after %d seconds"%(duration)) try: delimiter = Configure.configure().value("server.webdriverServer.delimiter") deary = delimiter.split('\\x') #print ("delimiter's array: ", deary) destr = '' for i in range(len(deary)): if deary[i] != '': de = chr(int(deary[i],16)) destr = de + destr StreamHandler.startlisten(p_name="Headless-Webdriver-Server", p_prefix="server.webdriverServer", p_queue=request_queue, p_delimiter=destr) #tornado.ioloop.IOLoop.current().start() except (KeyboardInterrupt, SystemExit): pass
def fuckup(p_command=None): Main.rootdir = os.path.abspath('.') #Initialize application configure filename = "application-config.yml" Configure.load(p_dir=Main.rootdir + "/" + filename, p_command=p_command) nodename = Configure.configure().value("worknode.name") try: s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.connect(('8.8.8.8', 80)) Main.ipAddr = s.getsockname()[0] finally: s.close() #Initialize log Logger() #Initialize elasticsearch client Main.es_client = ESHandler() #Initialize worker monitor monitor = MultiProcessJobWatcher() executors = { 'default': ThreadPoolExecutor(1), 'processpool': ProcessPoolExecutor(1) } job_defaults = {'coalesce': True, 'max_instances': 1} mosche = BackgroundScheduler(executors=executors, job_defaults=job_defaults, timezone=utc) mosche.add_job(monitor, 'interval', seconds=Configure.configure().value( "worknode.workerMonitorInterval")) #Initialize worker leader leader = Leader(p_addr=Main.ipAddr, p_node_name=nodename, p_monitor=monitor) #Initialize node register and health info report schedule scheduleserveraddr = Configure.configure().value( "server.healthServer.host") scheduleserverport = Configure.configure().value( "server.healthServer.port") scheduleserver = { "host": scheduleserveraddr, "port": scheduleserverport } Main.communicator = Communicator(p_schedule_server=scheduleserver, p_leader=leader) #Initialize node job accept service ServerWrapper.listen(p_name=nodename, p_prefix="server.nodeServer", p_handler=leader) tornado.ioloop.IOLoop.current().start() try: # This is here to simulate application activity (which keeps the main thread alive). while True: time.sleep(2) except (KeyboardInterrupt, SystemExit): # Not strictly necessary if daemonic mode is enabled but should be done if possible parellelSchedule.shutdown()