Exemplo n.º 1
0
 def as_trafaret(cls) -> t.Trafaret:
     return t.Dict(
         {
             t.Key("backend"): t.String,
             t.Key("path"): tx.Path(type="dir"),
             t.Key("fsprefix", default="."): tx.PurePath(relative_only=True),
             t.Key("options", default=None): t.Null | t.Mapping(t.String, t.Any),
         },
     )
Exemplo n.º 2
0
 def as_trafaret(cls) -> t.Trafaret:
     return t.Dict({
         t.Key('backend'):
         t.String,
         t.Key('path'):
         tx.Path(type='dir'),
         t.Key('fsprefix', default='.'):
         tx.PurePath(relative_only=True),
         t.Key('options', default=None):
         t.Null | t.Mapping(t.String, t.Any),
     })
Exemplo n.º 3
0
from ai.backend.common.config import etcd_config_iv
from ai.backend.common.logging import logging_config_iv

from .types import VolumeInfo

_max_cpu_count = os.cpu_count()

local_config_iv = t.Dict({
    t.Key('storage-proxy'):
    t.Dict({
        t.Key('node-id'):
        t.String,
        t.Key('num-proc', default=_max_cpu_count):
        t.Int[1:_max_cpu_count],
        t.Key('pid-file', default=os.devnull):
        tx.Path(type='file', allow_nonexisting=True, allow_devnull=True),
        t.Key('event-loop', default='asyncio'):
        t.Enum('asyncio', 'uvloop'),
        t.Key('scandir-limit', default=1000):
        t.Int[0:],
        t.Key('max-upload-size', default="100g"):
        tx.BinarySize,
        t.Key('secret'):
        t.String,  # used to generate JWT tokens
        t.Key('session-expire'):
        tx.TimeDuration,
    }),
    t.Key('logging'):
    logging_config_iv,
    t.Key('api'):
    t.Dict({
Exemplo n.º 4
0
@check_api_params(
    t.Dict({
        t.Key('src'): t.String,
        t.Key('target'): t.String,
        t.Key('launchOptions', default={}): t.Dict({
            t.Key('scalingGroup', default='default'): t.String,
            t.Key('group', default='default'): t.String,
        }).allow_extra('*'),
        t.Key('brand'): t.String,
        t.Key('baseDistro'): t.Enum('ubuntu', 'centos'),
        t.Key('minCPU', default=1): t.Int[1:],
        t.Key('minMemory', default='64m'): tx.BinarySize,
        t.Key('preferredSharedMemory', default='64m'): tx.BinarySize,
        t.Key('supportedAccelerators'): t.List(t.String),
        t.Key('runtimeType'): t.Enum('python'),
        t.Key('runtimePath'): tx.Path(type='file', allow_nonexisting=True, resolve=False),
        t.Key('CPUCountEnvs'): t.List(t.String),
        t.Key('servicePorts', default=[]): t.List(t.Dict({
            t.Key('name'): t.String,
            t.Key('protocol'): t.Enum('http', 'tcp', 'pty'),
            t.Key('ports'): t.List(t.Int[1:65535], min_length=1),
        })),
    }).allow_extra('*'))
async def import_image(request: web.Request, params: Any) -> web.Response:
    '''
    Import a docker image and convert it to a Backend.AI-compatible one,
    by automatically installing a few packages and adding image labels.

    Currently we only support auto-conversion of Python-based kernels (e.g.,
    NGC images) which has its own Python version installed.
Exemplo n.º 5
0
 t.Key('db'): t.Dict({
     t.Key('type', default='postgresql'): t.Enum('postgresql'),
     t.Key('addr'): tx.HostPortPair,
     t.Key('name'): tx.Slug[2:64],
     t.Key('user'): t.String,
     t.Key('password'): t.String,
 }),
 t.Key('manager'): t.Dict({
     t.Key('num-proc', default=_max_cpu_count): t.Int[1:_max_cpu_count],
     t.Key('user', default=None): tx.UserID(default_uid=_file_perm.st_uid),
     t.Key('group', default=None): tx.GroupID(default_gid=_file_perm.st_gid),
     t.Key('service-addr', default=('0.0.0.0', 8080)): tx.HostPortPair,
     t.Key('heartbeat-timeout', default=5.0): t.Float[1.0:],  # type: ignore
     t.Key('secret', default=None): t.Null | t.String,
     t.Key('ssl-enabled', default=False): t.ToBool,
     t.Key('ssl-cert', default=None): t.Null | tx.Path(type='file'),
     t.Key('ssl-privkey', default=None): t.Null | tx.Path(type='file'),
     t.Key('event-loop', default='asyncio'): t.Enum('asyncio', 'uvloop'),
     t.Key('pid-file', default=os.devnull): tx.Path(type='file',
                                                    allow_nonexisting=True,
                                                    allow_devnull=True),
     t.Key('hide-agents', default=False): t.Bool,
     t.Key('importer-image', default='lablup/importer:manylinux2010'): t.String,
 }).allow_extra('*'),
 t.Key('docker-registry'): t.Dict({
     t.Key('ssl-verify', default=True): t.ToBool,
 }).allow_extra('*'),
 t.Key('logging'): t.Any,  # checked in ai.backend.common.logging
 t.Key('debug'): t.Dict({
     t.Key('enabled', default=False): t.ToBool,
     t.Key('log-events', default=False): t.ToBool,
Exemplo n.º 6
0
from .types import VolumeInfo

_max_cpu_count = os.cpu_count()
_file_perm = (Path(__file__).parent / "server.py").stat()


local_config_iv = (
    t.Dict(
        {
            t.Key("storage-proxy"): t.Dict(
                {
                    t.Key("node-id"): t.String,
                    t.Key("num-proc", default=_max_cpu_count): t.Int[1:_max_cpu_count],
                    t.Key("pid-file", default=os.devnull): tx.Path(
                        type="file",
                        allow_nonexisting=True,
                        allow_devnull=True,
                    ),
                    t.Key("event-loop", default="asyncio"): t.Enum("asyncio", "uvloop"),
                    t.Key("scandir-limit", default=1000): t.Int[0:],
                    t.Key("max-upload-size", default="100g"): tx.BinarySize,
                    t.Key("secret"): t.String,  # used to generate JWT tokens
                    t.Key("session-expire"): tx.TimeDuration,
                    t.Key("user", default=None): tx.UserID(
                        default_uid=_file_perm.st_uid,
                    ),
                    t.Key("group", default=None): tx.GroupID(
                        default_gid=_file_perm.st_gid,
                    ),
                },
            ),
Exemplo n.º 7
0
     tx.AliasedKey(['backend', 'mode']):
     tx.Enum(AgentBackend),
     t.Key('rpc-listen-addr', default=('', 6001)):
     tx.HostPortPair(allow_blank_host=True),
     t.Key('agent-sock-port', default=6007):
     t.Int[1024:65535],
     t.Key('id', default=None):
     t.Null | t.String,
     t.Key('region', default=None):
     t.Null | t.String,
     t.Key('instance-type', default=None):
     t.Null | t.String,
     t.Key('scaling-group', default='default'):
     t.String,
     t.Key('pid-file', default=os.devnull):
     tx.Path(type='file', allow_nonexisting=True, allow_devnull=True),
     t.Key('event-loop', default='asyncio'):
     t.Enum('asyncio', 'uvloop'),
     t.Key('skip-manager-detection', default=False):
     t.ToBool,
 }).allow_extra('*'),
 t.Key('container'):
 t.Dict({
     t.Key('kernel-uid', default=-1):
     tx.UserID,
     t.Key('kernel-gid', default=-1):
     tx.GroupID,
     t.Key('kernel-host', default=''):
     t.String(allow_blank=True),
     t.Key('port-range', default=(30000, 31000)):
     tx.PortRange,
Exemplo n.º 8
0
def main(cli_ctx, config_path, debug):

    watcher_config_iv = t.Dict({
        t.Key('watcher'): t.Dict({
            t.Key('service-addr', default=('0.0.0.0', 6009)): tx.HostPortPair,
            t.Key('ssl-enabled', default=False): t.Bool,
            t.Key('ssl-cert', default=None): t.Null | tx.Path(type='file'),
            t.Key('ssl-key', default=None): t.Null | tx.Path(type='file'),
            t.Key('target-service', default='backendai-agent.service'): t.String,
            t.Key('soft-reset-available', default=False): t.Bool,
        }).allow_extra('*'),
        t.Key('logging'): t.Any,  # checked in ai.backend.common.logging
        t.Key('debug'): t.Dict({
            t.Key('enabled', default=False): t.Bool,
        }).allow_extra('*'),
    }).merge(config.etcd_config_iv).allow_extra('*')

    raw_cfg, cfg_src_path = config.read_from_file(config_path, 'agent')

    config.override_with_env(raw_cfg, ('etcd', 'namespace'), 'BACKEND_NAMESPACE')
    config.override_with_env(raw_cfg, ('etcd', 'addr'), 'BACKEND_ETCD_ADDR')
    config.override_with_env(raw_cfg, ('etcd', 'user'), 'BACKEND_ETCD_USER')
    config.override_with_env(raw_cfg, ('etcd', 'password'), 'BACKEND_ETCD_PASSWORD')
    config.override_with_env(raw_cfg, ('watcher', 'service-addr', 'host'),
                             'BACKEND_WATCHER_SERVICE_IP')
    config.override_with_env(raw_cfg, ('watcher', 'service-addr', 'port'),
                             'BACKEND_WATCHER_SERVICE_PORT')
    if debug:
        config.override_key(raw_cfg, ('debug', 'enabled'), True)

    try:
        cfg = config.check(raw_cfg, watcher_config_iv)
        if 'debug' in cfg and cfg['debug']['enabled']:
            print('== Watcher configuration ==')
            pprint(cfg)
        cfg['_src'] = cfg_src_path
    except config.ConfigurationError as e:
        print('Validation of watcher configuration has failed:', file=sys.stderr)
        print(pformat(e.invalid_data), file=sys.stderr)
        raise click.Abort()

    # Change the filename from the logging config's file section.
    log_sockpath = Path(f'/tmp/backend.ai/ipc/watcher-logger-{os.getpid()}.sock')
    log_sockpath.parent.mkdir(parents=True, exist_ok=True)
    log_endpoint = f'ipc://{log_sockpath}'
    cfg['logging']['endpoint'] = log_endpoint
    logger = Logger(cfg['logging'], is_master=True, log_endpoint=log_endpoint)
    if 'file' in cfg['logging']['drivers']:
        fn = Path(cfg['logging']['file']['filename'])
        cfg['logging']['file']['filename'] = f"{fn.stem}-watcher{fn.suffix}"

    setproctitle(f"backend.ai: watcher {cfg['etcd']['namespace']}")
    with logger:
        log.info('Backend.AI Agent Watcher {0}', VERSION)
        log.info('runtime: {0}', utils.env_info())

        log_config = logging.getLogger('ai.backend.agent.config')
        log_config.debug('debug mode enabled.')

        aiotools.start_server(
            watcher_server,
            num_workers=1,
            args=(cfg, ),
            stop_signals={signal.SIGINT, signal.SIGTERM, signal.SIGALRM},
        )
        log.info('exit.')
    return 0
Exemplo n.º 9
0
        t.Key('brand'):
        t.String,
        t.Key('baseDistro'):
        t.Enum('ubuntu', 'centos'),
        t.Key('minCPU', default=1):
        t.Int[1:],
        t.Key('minMemory', default='64m'):
        tx.BinarySize,
        t.Key('preferredSharedMemory', default='64m'):
        tx.BinarySize,
        t.Key('supportedAccelerators'):
        t.List(t.String),
        t.Key('runtimeType'):
        t.Enum('python'),
        t.Key('runtimePath'):
        tx.Path(type='file', allow_nonexisting=True, resolve=False),
        t.Key('CPUCountEnvs'):
        t.List(t.String),
        t.Key('servicePorts', default=[]):
        t.List(
            t.Dict({
                t.Key('name'): t.String,
                t.Key('protocol'): t.Enum('http', 'tcp', 'pty'),
                t.Key('ports'): t.List(t.Int[1:65535], min_length=1),
            })),
    }).allow_extra('*'))
async def import_image(request: web.Request, params: Any) -> web.Response:
    '''
    Import a docker image and convert it to a Backend.AI-compatible one,
    by automatically installing a few packages and adding image labels.