if response.status_code != requests.codes.ok:
                print("Error: %s" % response.text)
                raise BenchmarkException("Error creating app %s" % app_name)

            version = 1

            model = BasicNN()
            nn_model = train(model)

            deploy_and_test_model(clipper_conn,
                                  nn_model,
                                  version,
                                  link_model=True)

            app_and_model_name = "easy-register-app-model"
            create_endpoint(clipper_conn, app_and_model_name, "integers",
                            predict, nn_model)
            test_model(clipper_conn, app_and_model_name, 1)

        except BenchmarkException:
            log_clipper_state(clipper_conn)
            logger.exception("BenchmarkException")
            clipper_conn = create_docker_connection(cleanup=True,
                                                    start_clipper=False)
            sys.exit(1)
        else:
            clipper_conn = create_docker_connection(cleanup=True,
                                                    start_clipper=False)
    except Exception:
        logger.exception("Exception")
        clipper_conn = create_docker_connection(cleanup=True,
                                                start_clipper=False)
Пример #2
0
def frontend(args):
    print(args)
    batch_size = args.batch_size
    num_models = args.num_models
    num_redundant_models = args.num_redundant_models
    func = predict
    red_func = predict

    if args.redundancy_mode == "none" or args.redundancy_mode == "equal":
        redundancy_mode = 0
    elif args.redundancy_mode == "coded":
        redundancy_mode = 2
    elif args.redundancy_mode == "cheap":
        redundancy_mode = 3

    if args.queue_mode == "single_queue":
        queue_mode = 0
        single_queue = True
    else:
        # Round robin
        queue_mode = 1
        single_queue = False

    assert len(args.f_ips) == num_models + num_redundant_models
    if len(args.f_ports) != len(args.f_ips):
        assert len(args.f_ports) == 1
        args.f_ports *= len(args.f_ips)

    model_instance_ip_port = []
    red_model_instance_ip_port = []
    for i in range(len(args.f_ips)):
        if i < num_models:
            model_instance_ip_port.append(
                (args.f_ips[i], int(args.f_ports[i])))
        else:
            red_model_instance_ip_port.append(
                (args.f_ips[i], int(args.f_ports[i])))

    client_ip_port = []
    if len(args.f_client_ports) != len(args.f_client_ips):
        assert len(args.f_client_ports) == 1
        args.f_client_ports *= len(args.f_client_ips)
    client_ip_port = [
        (ip, int(port))
        for ip, port in zip(args.f_client_ips, args.f_client_ports)
    ]
    cm = DistributedParmDockerContainerManager(
        model_instance_ip_port=model_instance_ip_port,
        red_model_instance_ip_port=red_model_instance_ip_port,
        client_ip_port=client_ip_port)
    clipper_conn = ClipperConnection(cm, distributed=True)
    frontend_args = {
        "redundancy_mode": redundancy_mode,
        "queue_mode": queue_mode,
        "num_models": num_models,
        "num_redundant_models": num_redundant_models,
        "batch_size": batch_size,
        "mode": args.f_mode
    }

    clipper_conn.start_clipper(frontend_args=frontend_args)

    if args.redundancy_mode == "coded":
        red_input_type = "floats"
    else:
        red_input_type = "bytes"

    pytorch_deployer.create_endpoint(clipper_conn=clipper_conn,
                                     name="example",
                                     input_type="bytes",
                                     func=func,
                                     pytorch_model=model,
                                     pkgs_to_install=['pillow'],
                                     num_replicas=num_models,
                                     batch_size=batch_size,
                                     num_red_replicas=num_redundant_models,
                                     red_func=red_func,
                                     red_input_type=red_input_type,
                                     red_pytorch_model=red_model,
                                     prefer_original=False,
                                     slo_micros=10000000 * 10)

    sleep_time = 5
    print("Sleeping for", sleep_time, "seconds to let things start up")
    time.sleep(sleep_time)

    total_time = cm.run_clients()
    print(total_time)

    with open(args.f_outfile, 'w') as outfile:
        outfile.write("{:.4f}".format(total_time))

    clipper_conn.stop_all()
Пример #3
0
def frontend(args):
    print(args)
    batch_size = args.batch_size
    num_models = args.num_models
    func = predict
    red_func = predict

    assert args.redundancy_mode in ["none", "equal"]
    redundancy_mode = 0

    if args.queue_mode == "single_queue":
        queue_mode = 0
    elif args.queue_mode == "rr":
        queue_mode = 1
    else:
        assert False, "Unrecognized queue mode '{}'".format(args.queue_mode)

    model_instance_ip_port = []
    red_model_instance_ip_port = []
    cur_port = base_port
    if num_models < len(args.f_ips):
	# Round up to highest int so as not to launch more models than needed.
        num_between = int(len(args.f_ips) / num_models + 0.5)
        chosen_indices = list(range(0, len(args.f_ips), num_between))
        print("Range is", chosen_indices)

        # Shift our chosen indices so that they are evenly distributed
        # throughout the clients.
        delta = len(args.f_ips) - chosen_indices[-1]
        shift = delta // 2
        if len(args.f_ips) == 15:
            shift += 1
        chosen_indices = [i + shift for i in chosen_indices]
        print("Shifted range is", chosen_indices)
        for i in chosen_indices:
            model_instance_ip_port.append((args.f_ips[i], cur_port))

    else:
        for i in range(num_models):
            model_instance_ip_port.append(
                    (args.f_ips[i % len(args.f_ips)], cur_port))

            # Wrap around to the next port number if we will ned to repeat workers.
            if i % len(args.f_ips) == len(args.f_ips) - 1:
                cur_port += 1

    print("Model instance ip, port:", model_instance_ip_port)
    client_ip_port = []
    if len(args.f_client_ports) != len(args.f_client_ips):
        assert len(args.f_client_ports) == 1
        args.f_client_ports *= len(args.f_client_ips)
    client_ip_port = [(ip, int(port)) for ip, port in zip(args.f_client_ips, args.f_client_ports)]
    cm = DistributedParmDockerContainerManager(model_instance_ip_port=model_instance_ip_port,
                                                 red_model_instance_ip_port=red_model_instance_ip_port,
                                                 client_ip_port=client_ip_port)
    clipper_conn = ClipperConnection(cm, distributed=True)
    frontend_args = {
        "redundancy_mode": redundancy_mode,
        "queue_mode": queue_mode,
        "num_models": num_models,
        "num_redundant_models": 0,
        "batch_size": batch_size,
        "mode": args.f_mode,
    }

    clipper_conn.start_clipper(frontend_args=frontend_args)

    red_input_type = "bytes"
    pytorch_deployer.create_endpoint(
            clipper_conn=clipper_conn,
            name="bg",
            input_type="bytes",
            func=func,
            pytorch_model=model,
            pkgs_to_install=['pillow'],
            num_replicas=num_models,
            batch_size=batch_size,
            num_red_replicas=0,
            red_func=red_func,
            red_input_type=red_input_type,
            red_pytorch_model=red_model,
            prefer_original=False,
            slo_micros=10000000 * 10)

    sleep_time = 5
    print("Sleeping for", sleep_time, "seconds to let things start up")
    time.sleep(sleep_time)

    cm.run_clients(wait=False)

    # Listen to a ud socket to determine when we should quit.
    sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
    sock.bind('/home/ubuntu/bg_sock')
    sock.listen(5)
    (clientsocket, address) = sock.accept()

    print("Stopping all clients")
    cm.stop_all_clients()

    print("Sending response")
    clientsocket.sendall('1'.encode())
    clientsocket.close()
    sock.close()

    print("Stopping all")
    clipper_conn.stop_all()