示例#1
0
    def consumer(self, que):
        a = []
        b = []
        while True:
            can_read, _, _ = select.select(que, [], [])
            for r in can_read:
                print(threading.current_thread().getName(), "============")
                item = r.get()
                print(item)
                if item == "start":
                    print("启动服务")
                    logger.info(
                        "启动程序   HeartSchedule >>> {}    ClusterServer >>>{}".
                        format(":".join(CONFIG["sch_grpc_server"]),
                               CONFIG.get("master_port")))
                    p = spawn(target=login_and_update, name="heartbeat")

                    cs = ClusterServer(
                        addr="0.0.0.0:{}".format(CONFIG.get("master_port")))
                    cs.start()
                    a.append(p)
                    b.append(cs)
                else:
                    print("关闭服务")
                    logger.info("关闭程序>>>HeartSchedule>>>ClusterServer")

                    if b or a:
                        b.pop().stop()
                        stop_thread(a.pop())
示例#2
0
    def start_heart_cluster(self):
        # 循环检测需要一个标识判断
        flag = True
        a = []
        b = []
        while True:
            time.sleep(.5)
            if up_cluster_event.is_set() and flag:
                logger.info(
                    "启动程序   HeartSchedule >>> {}    ClusterServer >>>{}".
                    format(":".join(CONFIG["sch_grpc_server"]),
                           CONFIG.get("master_port")))
                p = spawn(target=login_and_update, name="heartbeat")
                cs = ClusterServer(
                    addr="0.0.0.0:{}".format(CONFIG.get("master_port")))
                cs.start()
                a.append(p)
                b.append(cs)
                flag = False

            elif not up_cluster_event.is_set():
                flag = True
                if all([a, b]):
                    logger.info("关闭程序>>>HeartSchedule>>>ClusterServer")
                    b.pop().stop()
                    stop_thread(a.pop())
示例#3
0
文件: raft_main.py 项目: alan-mi/8.16
def run_server():
    p = spawn(target=vip_load, name="find_vip")
    try:
        server = grpc.server(ThreadPoolExecutor(40))
        # 将对应的任务处理函数添加到rpc server中
        raft_grpc_pb2_grpc.add_RaftServiceServicer_to_server(
            raft_grpc_server.RaftService(), server)
        # 这里使用的非安全接口,世界gRPC支持TLS/SSL安全连接,以及各种鉴权机制
        server.add_insecure_port("0.0.0.0:{}".format(
            CONFIG.get("raft_grpc_port")))
        server.start()
        # 开启服务
        # TODO 开启进程选举会报错
        app.run(
            host='0.0.0.0',
            # 8586端口 只是在muster启动
            port=CONFIG.get("raft_http_port"),
        )
    except Exception as e:
        logger.info(e)

    finally:
        stop_thread(p)
        dc_vip.vip.set_vip("down")
        exit("退出")
示例#4
0
文件: tools.py 项目: alan-mi/8.16
def heart_beat():
    from cluster_master.utils.client_mongo import cli, Mongo
    mongo_host = CONFIG.get("mongo_center")
    mongo = Mongo(host=mongo_host, db="cluster", table="machines")
    heart_beat_body = {}
    LOCAL_HOST = local_ip()
    grpc_public_port = CONFIG.get("grpc_public_port")
    heart_beat_body["callbackAddress"] = "{sch_ip}:{port}".format(
        sch_ip=CONFIG["sch_callback_ip"],
        port=CONFIG["grpc_public_port"]
        if grpc_public_port else int(CONFIG["sch_callback_port_prefix"]) +
        int(LOCAL_HOST.split(".")[-1]),
    )
    heart_beat_body["clusterID"] = CONFIG.get("cluster_id")
    heart_beat_body["gpus"] = [{
        "model": k,
        "count": v
    } for k, v in mongo.use_gpu_by_num().items()]
    return heart_beat_body
示例#5
0
文件: raft_main.py 项目: alan-mi/8.16
def vip_load():
    logger.info(vip_event.is_set())
    with grpc.insecure_channel("0.0.0.0:{}".format(
            CONFIG.get("raft_grpc_port"))) as chan:
        stub = raft_grpc_pb2_grpc.RaftServiceStub(channel=chan)

        # p = multiprocessing.Process(target=send_status_to_schedule, args=())
        # p.daemon = True
        from cluster_master.cluster import q1
        while True:
            time.sleep(3)
            ts = int(time.time())

            try:
                res_f = stub.GetStatus.future(
                    raft_grpc_pb2.GetStatusReq(ts=str(ts)), timeout=3)
                if res_f.result().ts == str(ts):
                    raft_status = json.loads(res_f.result().status)
                    # raft_status = raft_init.raft_obj.getStatus()
                    logger.info(
                        "vip_event {},leader {}, self_node {},isReady {}".
                        format(vip_event.is_set(), raft_status['leader'],
                               raft_status['self'], raft_status["isReady"]))

                    if vip_event.is_set(
                    ) and raft_status['leader'] == raft_status[
                            'self'] and raft_status["state"] == 2:
                        dc_vip.vip.set_vip("up")
                        vip_event.clear()
                        up_cluster_event.set()
                        q1.put("start")
                        # logger.info("启动>>>cluster_server")
                        # cs = ClusterServer(addr="0.0.0.0:8300")
                        # cs.start()
                        # cli.append(cs)
                    if not vip_event.is_set(
                    ) and raft_status['leader'] != raft_status['self']:
                        dc_vip.vip.set_vip("down")
                        vip_event.set()
                        up_cluster_event.clear()
                        q1.put("stop")

                        # cli.pop().stop()
            except Exception as e:
                logger.info(e)
                # logger.info("停止>>>cluster_server")
                # for i in cli:
                #     i.stop()
                continue
示例#6
0
def test_rpc_task():
    try:
        with grpc.insecure_channel("192.168.137.200:{}".format(
                CONFIG.get("master_port"))) as channel:
            stub = sch_pb2_grpc.SkylarkStub(channel=channel)
            # time.sleep(3)
            status = random.choice(["start", "stop"])
            id = hashlib.md5(bytes(random.randint(0, 1000))).hexdigest()
            print(id)
            proj_fields_map = {
                "taskID":
                "{}".format(id),
                "taskType":
                3,
                "taskName":
                "task_name",
                "projectHash":
                "QmPhoTxquhjH14hb5S82jnDtu8FcLnGzNZEvgN1jCtN15P",
                "gpus":
                random.choices([{
                    "model": "GeForce GTX 1080 Ti",
                    "count": 1
                }, {
                    "model": "GeForce GTX 1070 Ti",
                    "count": 1
                }]),
                "engine":
                2,
                "mainRelativePath":
                "pytorch_demo/demo_s_d/torch_mnist_demo.py",
                "runParam":
                "",
                "projectName":
                "pytorch_demo.zip",
                "outputPath":
                "pytorch_demo/demo_s_d/out",
                "status":
                "stop"
            }
            res = stub.TaskStatus(sch_pb2.Proto(
                version=1,
                seq=1,
                timestamp=int(time.time()),
                body=json.dumps(proj_fields_map).encode()),
                                  timeout=5)
            print(json.loads(res.body))
    except Exception as e:
        print("error", e)
示例#7
0
def test_rpc_heart():
    try:
        with grpc.insecure_channel("192.168.137.200:{}".format(
                CONFIG.get("master_port"))) as channel:
            stub = sch_pb2_grpc.SkylarkStub(channel=channel)
            # time.sleep(3)
            a = {
                "machineID":
                "_001|_{:03}".format(random.randint(0, 1000)),
                "clusterID":
                "_003",
                "ctrlAvailable":
                "N",
                "taskType":
                "start",
                "gpus": [{
                    "model": "GeForce GTX 1070 Ti",
                    "id": 1,
                    "status": None
                }, {
                    "model": "GeForce GTX 1080 Ti",
                    "id": 0,
                    "status": None
                }, {
                    "model": "GeForce GTX 1080 Ti",
                    "id": 1,
                    "status": None
                }],
                "callbackAddress":
                "61.142.213.210:22004",
                "intranetAddress":
                "192.168.137.4:28801",
                "geoCode":
                "0757-1",
                "status":
                2,
            }
            res = stub.HeartBeat(sch_pb2.Proto(version=1,
                                               seq=1,
                                               timestamp=int(time.time()),
                                               body=json.dumps(a).encode()),
                                 timeout=5)
            print(json.loads(res.body))
    except Exception as e:
        print("error", e)
from datetime import datetime
import re
import sqlalchemy
from flask_sqlalchemy import SQLAlchemy
from app import app
from utils import HTTPRequestError
from conf import CONFIG

app.config['SQLALCHEMY_DATABASE_URI'] = CONFIG.get_db_url()
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
db = SQLAlchemy(app)


class DeviceTemplate(db.Model):
    __tablename__ = 'templates'

    id = db.Column(db.Integer, db.Sequence('template_id'), primary_key=True)
    label = db.Column(db.String(128), nullable=False)
    created = db.Column(db.DateTime, default=datetime.now)
    updated = db.Column(db.DateTime, onupdate=datetime.now)

    attrs = db.relationship("DeviceAttr",
                            back_populates="template",
                            lazy='joined',
                            cascade="delete")
    devices = db.relationship("Device",
                              secondary='device_template',
                              back_populates="templates")

    def __repr__(self):
        return "<Template(label='%s')>" % self.label
#!/usr/bin/env python3
#

from conf import CONFIG
import psycopg2
from bs4 import BeautifulSoup

DBCONF = CONFIG().CONF_DB

DBCONN = {
    'host': DBCONF['host'],
    'database': DBCONF['database_example'],
    'user': DBCONF['user'],
    'password': DBCONF['password']
}
COUNT = 3857


def meta_parser(entry, tag):
    tag_soup = BeautifulSoup(str(entry), 'lxml')
    return [ts.string for ts in tag_soup.find_all(tag)]


def tag_parser(td_list, tag):
    t_list = list()
    for td in td_list[1:]:
        tag_soup = BeautifulSoup(str(td), 'lxml').find_all(tag)
        tag_list = [
            td.attrs['title'] if 'title' in td.attrs.keys() else td.string
            for td in tag_soup
        ]
示例#10
0
    "N",
    "gpus": [{
        "model": "GeForce GTX 1070 Ti",
        "id": 2,
        "status": None
    }, {
        "model": "GeForce GTX 1080 Ti",
        "id": 0,
        "status": None
    }, {
        "model": "GeForce GTX 1080 Ti",
        "id": 3,
        "status": None
    }],
}
mongo_host = CONFIG.get("mongo_center")
cli = Mongo(host=mongo_host)

# cli.add_data(a)
task_id = None

GPU_1 = "GeForce GTX 1070 Ti"
GPU_2 = "GeForce GTX 1080 Ti"


def use_gpu_by_model():
    rule = {'ctrlAvailable': 'N', "gpus.status": "adf"}
    print(list(cli.find_data(rule)))
    cli.table.update_one({""}, rule)