Exemplo n.º 1
0
  def fuckup(p_command=None):
    Main.rootdir = os.path.abspath('.')
    manager = Manager()
    #BaseManager.register('CrawlerPicker', CrawlerPicker)
    #manager = BaseManager()
    
    #Initialize application configure
    filename = "application-config.yml"
    Configure.load(p_dir=Main.rootdir+"/"+filename, p_command=p_command)
        
    #Initialize log    
    Logger()

    #Initialize elasticsearch client
    ESHandler.ini()
    
    #Initialize job schedule
    #main_jod_queue = queue.Queue(Configure.configure().value("scheduler.messageQueueSize", p_default=1000))
    main_jod_queue = ThreadSafeQueue(size=Configure.configure().value("scheduler.messageQueueSize", p_default=1000))

    crawler_picker = CrawlerPicker()
    Main.crawlerRegister = CrawlerRegister(p_crawler_picker=crawler_picker, p_main_jod_queue=main_jod_queue)
    Main.crawlerRegister.start()

    #main_jod_queue = manager.Queue(Configure.configure().value("scheduler.messageQueueSize", p_default=1000))
    #main_jod_queue = Queue(maxsize=Configure.configure().value("scheduler.messageQueueSize", p_default=1000))
    
    Main.parellelSchedule=ParellelSchedule(p_main_jod_queue=main_jod_queue)
    Main.parellelSchedule.start()
    #Main.parellelSchedule.run()
    
    #Main.crawlerRegister.daemon = True
    #Main.crawlerRegister.run()
    
    #registerserver = Configure.configure().value("server.crawler.healthServer.host")
    #registerport = Configure.configure().value("server.crawler.healthServer.port")
    #Main.jobSync = JobSync(p_queue=main_jod_queue, p_register={"host":registerserver, "port":registerport}, p_crawler_picker=crawler_picker)
    #Main.jobSync.start()
    #Start main thread loop
    #tornado.ioloop.IOLoop.current().start()
    
    #After start all sub process, we need invode join function to make shared object available
    #Main.jobSync.join()
    Main.crawlerRegister.join()
    
    #Initialize server
    try:
        # This is here to simulate application activity (which keeps the main thread alive).
        while True:
            time.sleep(2)
    except (KeyboardInterrupt, SystemExit):
      pass    
Exemplo n.º 2
0
  def fuckup(p_command=None):
    start = datetime.datetime.now()
    Main.rootdir = os.path.abspath('.')
    manager = Manager()
    
    #Initialize application configure
    filename = "application-config.yml"
    Configure.load(p_dir=Main.rootdir+"/"+filename, p_command=p_command)
        
    #Initialize log    
    Logger()
    Logger.getLogger().info("Web Driver Pool Launching......")
    
    #Initialize driver pool
    driver_queue = queue.Queue(Configure.configure().value("headless.webdriver.maxBrowserNum"))
    request_queue = queue.Queue(Configure.configure().value("headless.webdriver.maxRequestAcceptNum"))
    #Manager().Queue(Configure.configure().value("headless.webdriver.maxBrowserNum"))

    Main.webDriverContainer = WebDriverContainer( p_queue = driver_queue, p_request_queue = request_queue )
    Main.webDriverContainer.run()
    
    #Main.pooledWebDriverManager = PooledWebDriverManager(p_queue = queue)
    #Main.pooledWebDriverManager.start()
    end = datetime.datetime.now()
    duration = (start-end).seconds
    Logger.getLogger().info("Web Driver Pool Launched after %d seconds"%(duration))
    
    try:
      delimiter = Configure.configure().value("server.webdriverServer.delimiter")
      deary = delimiter.split('\\x')
      #print ("delimiter's array: ", deary)
      destr = ''
      for i in range(len(deary)):
        if deary[i] != '':
          de = chr(int(deary[i],16))
          destr = de + destr  
      StreamHandler.startlisten(p_name="Headless-Webdriver-Server", p_prefix="server.webdriverServer", p_queue=request_queue, p_delimiter=destr)
      #tornado.ioloop.IOLoop.current().start()
    except (KeyboardInterrupt, SystemExit):
      pass
Exemplo n.º 3
0
    def fuckup(p_command=None):
        Main.rootdir = os.path.abspath('.')

        #Initialize application configure
        filename = "application-config.yml"
        Configure.load(p_dir=Main.rootdir + "/" + filename,
                       p_command=p_command)

        nodename = Configure.configure().value("worknode.name")
        try:
            s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
            s.connect(('8.8.8.8', 80))
            Main.ipAddr = s.getsockname()[0]
        finally:
            s.close()

        #Initialize log
        Logger()

        #Initialize elasticsearch client
        Main.es_client = ESHandler()

        #Initialize worker monitor
        monitor = MultiProcessJobWatcher()
        executors = {
            'default': ThreadPoolExecutor(1),
            'processpool': ProcessPoolExecutor(1)
        }
        job_defaults = {'coalesce': True, 'max_instances': 1}
        mosche = BackgroundScheduler(executors=executors,
                                     job_defaults=job_defaults,
                                     timezone=utc)
        mosche.add_job(monitor,
                       'interval',
                       seconds=Configure.configure().value(
                           "worknode.workerMonitorInterval"))

        #Initialize worker leader
        leader = Leader(p_addr=Main.ipAddr,
                        p_node_name=nodename,
                        p_monitor=monitor)

        #Initialize node register and health info report schedule
        scheduleserveraddr = Configure.configure().value(
            "server.healthServer.host")
        scheduleserverport = Configure.configure().value(
            "server.healthServer.port")
        scheduleserver = {
            "host": scheduleserveraddr,
            "port": scheduleserverport
        }
        Main.communicator = Communicator(p_schedule_server=scheduleserver,
                                         p_leader=leader)

        #Initialize node job accept service
        ServerWrapper.listen(p_name=nodename,
                             p_prefix="server.nodeServer",
                             p_handler=leader)
        tornado.ioloop.IOLoop.current().start()

        try:
            # This is here to simulate application activity (which keeps the main thread alive).
            while True:
                time.sleep(2)
        except (KeyboardInterrupt, SystemExit):
            # Not strictly necessary if daemonic mode is enabled but should be done if possible
            parellelSchedule.shutdown()