示例#1
0
    def __init__(self,warehouse,warehouse_result):
        self.warehouse = warehouse
        self.warehouse_result = warehouse_result

        self.kafka = KafkaClient(Conf.getWareHouseAddr())
        self.producer = KeyedProducer(self.kafka)
        self.consumer = KafkaConsumer(self.warehouse,
                               bootstrap_servers=[Conf.getWareHouseAddr()],
                               group_id="cnlab",
                               auto_commit_enable=True,
                               auto_commit_interval_ms=30 * 1000,
                               auto_offset_reset='smallest')
示例#2
0
    def __init__(self, warehouse, warehouse_result):
        self.warehouse = warehouse
        self.warehouse_result = warehouse_result

        self.kafka = KafkaClient(Conf.getWareHouseAddr())
        self.producer = KeyedProducer(self.kafka)
        self.consumer = KafkaConsumer(
            self.warehouse,
            bootstrap_servers=[Conf.getWareHouseAddr()],
            group_id="cnlab",
            auto_commit_enable=True,
            auto_commit_interval_ms=30 * 1000,
            auto_offset_reset='smallest')
示例#3
0
    def check_kafka_events():
        global loopCondition
        from kafka import KafkaConsumer, KafkaClient, SimpleProducer
        warehouse_addr = Conf.getWareHouseAddr()
        consumer = KafkaConsumer("%sResult"%wk.options.warehouse,
                               bootstrap_servers=[warehouse_addr],
                               group_id="cnlab",
                               auto_commit_enable=True,
                               auto_commit_interval_ms=30 * 1000,
                               auto_offset_reset='smallest')

        while loopCondition:
            for message in consumer.fetch_messages():
                print "topic=%s, partition=%s, offset=%s, key=%s " % (message.topic, message.partition,
                                             message.offset, message.key)
                task = cPickle.loads(message.value)

                if task.state == Task.TASK_FINISHED:
                    print "taskId:%s,success!!!:%s"%(task.id,task.result)
                else:
                    print "taskId:%s,failed!!!"%task.id

                consumer.task_done(message)
                last_data_time = time.time()
                if not loopCondition:
                    break
示例#4
0
 def completionEvents(self):
     if self.mode != "factory":
         while True:
             try:
                 yield self.scheduler.completionEvents.get_nowait()
                 self.scheduler.completionEvents.task_done()
             except Queue.Empty:
                 if self.status.totalNum == self.status.finished_count + self.status.fail_count:
                     break
     if self.mode == "factory":
         raise Exception("please consume results from warehouse [%s,%s]!" %
                         (Conf.getWareHouseAddr(), self.options.warehouse))
示例#5
0
    def initialize(self):
        if not self.name:
            self.name = "oc" + random_time_str()

        self.status = ManagerStatus(self.name)
        self.status.mode = self.mode

        if self.mode == 'standalone' and not self.options.workertype:
            logger.error(
                "when --mode is standalone, --workertype must be specified")
            sys.exit(2)

        if self.mode == 'factory' and not self.options.warehouse:
            logger.error(
                "when --mode is factory, --warehouse must be specified")
            sys.exit(2)

        if self.mode == 'local':
            self.scheduler = LocalScheduler(self)
            self.isLocal = True

        elif self.mode == 'process':
            self.scheduler = MultiProcessScheduler(self, self.options.parallel)
            self.isLocal = False

        elif self.mode == 'standalone':
            self.scheduler = StandaloneScheduler(self, self.options.workertype)
            self.isLocal = False

        elif self.mode == 'factory':
            self.scheduler = FactoryScheduler(self, Conf.getWareHouseAddr(),
                                              self.options.warehouse)
            self.isLocal = False

        elif self.mode == 'mesos':
            master = Conf.getMesosMaster()
            self.scheduler = MesosScheduler(self, master, self.options)
            self.isLocal = False

        else:
            logger.error(
                "error mode, --mode should be one of [local, process, standalone, factory, mesos]"
            )
            sys.exit(1)

        if self.options.parallel:
            self.defaultParallelism = self.options.parallel
        else:
            self.defaultParallelism = self.scheduler.defaultParallelism()

        self.initialized = True
示例#6
0
def start_factory_mesos():
    global pyroLoopCondition
    parser = OptionParser(
        usage="Usage: python factorymesos.py [options] <command>")
    parser.allow_interspersed_args = False
    parser.add_option("-s",
                      "--master",
                      type="string",
                      default="",
                      help="url of master (mesos://172.31.252.180:5050)")
    parser.add_option("-f",
                      "--factory",
                      type="string",
                      default="",
                      help="host:port of master (172.31.252.180:6666)")
    parser.add_option(
        "-w",
        "--warehouse_addr",
        type="string",
        default="",
        help=
        "kafka-172.31.252.182:9092|mysql-172.31.254.25:3306,db,username,password"
    )
    parser.add_option("-p",
                      "--task_per_node",
                      type="int",
                      default=0,
                      help="max number of tasks on one node (default: 0)")
    parser.add_option("-I",
                      "--image",
                      type="string",
                      help="image name for Docker")
    parser.add_option("-V",
                      "--volumes",
                      type="string",
                      help="volumes to mount into Docker")
    parser.add_option("-r",
                      "--retry",
                      type="int",
                      default=0,
                      help="retry times when failed (default: 0)")
    parser.add_option(
        "-e",
        "--config",
        type="string",
        default="/work/opencluster/config.ini",
        help=
        "absolute path of configuration file(default:/work/opencluster/config.ini)"
    )

    parser.add_option("-g",
                      "--group",
                      type="string",
                      default='',
                      help="which group to run (default: ''")
    parser.add_option(
        "-q",
        "--quiet",
        action="store_true",
        help="be quiet",
    )
    parser.add_option(
        "-v",
        "--verbose",
        action="store_true",
        help="show more useful log",
    )

    (options, command) = parser.parse_args()

    if not options:
        parser.print_help()
        sys.exit(2)

    if options.config:
        Conf.setConfigFile(options.config)

    options.master = options.master or Conf.getMesosMaster()
    options.warehouse_addr = options.warehouse_addr or Conf.getWareHouseAddr()

    servers = options.factory or Conf.getFactoryServers()
    servs = servers.split(",")
    server = servs[0].split(":")

    options.logLevel = (options.quiet and logging.ERROR
                        or options.verbose and logging.DEBUG or logging.INFO)
    setLogger(Conf.getFactoryServiceName(), "MESOS", options.logLevel)

    implicitAcknowledgements = 1
    if os.getenv("MESOS_EXPLICIT_ACKNOWLEDGEMENTS"):
        implicitAcknowledgements = 0
    sched = FactoryMesos(options, command, implicitAcknowledgements)

    driver = MesosSchedulerDriver(sched, sched.framework, options.master,
                                  implicitAcknowledgements)
    driver.start()
    logger.debug("Mesos Scheudler driver started")

    warehouse_addrs = options.warehouse_addr.split(",")

    def fetchTasksFromMySQL():
        global pyroLoopCondition
        mysqlIpAndPort = warehouse_addrs[0].split(":")
        last_data_time = time.time()

        while pyroLoopCondition:
            db = MySQLdb.connect(host=mysqlIpAndPort[0],
                                 port=int(mysqlIpAndPort[1]),
                                 db=warehouse_addrs[1],
                                 user=warehouse_addrs[2],
                                 passwd=warehouse_addrs[3])
            try:
                cur = db.cursor()
                curUpt = db.cursor()
                dataResults = cur.execute(
                    "select task_id,task_desc,task_start_time,status from t_task where status=0 order by priority asc limit 200"
                )
                results = cur.fetchmany(dataResults)
                for r in results:
                    sched.append_task(cPickle.loads(r[1]))
                    curUpt.execute(
                        "update t_task set task_start_time=now(),status=1 where task_id='"
                        + r[0] + "'")
                if len(results) > 0:
                    db.commit()
                    last_data_time = time.time()
                    driver.reviveOffers()

                if sched.tasks_total_len() > MAX_WAITING_TASK:
                    time.sleep(2)
                if time.time() - last_data_time > MAX_EMPTY_TASK_PERIOD:
                    time.sleep(10)

                if cur:
                    cur.close()
                if curUpt:
                    curUpt.close()
            finally:
                db.close()

    def fetchTasksFromKafka(priority):
        global pyroLoopCondition

        consumer = KafkaConsumer('OpenCluster%s' % priority,
                                 bootstrap_servers=[options.warehouse_addr],
                                 group_id="cnlab",
                                 auto_commit_enable=True,
                                 auto_commit_interval_ms=30 * 1000,
                                 auto_offset_reset='smallest')

        last_data_time = time.time()
        while pyroLoopCondition:
            for message in consumer.fetch_messages():
                logger.error("%s:%s:%s: key=%s " %
                             (message.topic, message.partition, message.offset,
                              message.key))
                sched.append_task(cPickle.loads(message.value))
                consumer.task_done(message)
                last_data_time = time.time()
            if sched.tasks_len(priority) > MAX_WAITING_TASK:
                time.sleep(2)
            if time.time() - last_data_time > MAX_EMPTY_TASK_PERIOD:
                time.sleep(10)

    if len(warehouse_addrs) > 2:
        spawn(fetchTasksFromMySQL)
    else:
        for i in range(1, sched.priority_size + 1):
            spawn(fetchTasksFromKafka, i)

    def handler(signm, frame):
        logger.warning("got signal %d, exit now", signm)
        sched.stop(3)

    signal.signal(signal.SIGTERM, handler)
    signal.signal(signal.SIGABRT, handler)

    try:
        while not sched.stopped:
            time.sleep(0.5)
            sched.check(driver)

            now = time.time()
            if now > sched.last_offer_time + 60 + random.randint(0, 5):
                logger.warning("too long to get offer, reviving...")
                sched.last_offer_time = now
                driver.reviveOffers()

    except KeyboardInterrupt:
        logger.warning(
            'stopped by KeyboardInterrupt. The Program is exiting gracefully! Please wait...'
        )
        sched.stop(4)

    #terminate pyrothread
    pyroLoopCondition = False

    time.sleep(5)
    driver.stop(False)
    sys.exit(sched.status)