Exemplo n.º 1
0
 def yml(self):
     di = BeneDict({
         'version': '3',
     })
     for pg in self.list_process_groups():
         if 'services' not in di:
             di['services'] = {}
         di['services'].update(pg.yml_dict())
     for p in self.list_processes():
         di['services'][p.name] = p.yml_dict()
     return di.dump_yaml_str()
Exemplo n.º 2
0
 def __init__(self, name, image):
     super().__init__()
     self.data = BeneDict({
         'name': name,
         'image': image,
         'env': [{
             'name': 'SYMPHONY_ROLE',
             'value': name
         }]
     })
     self.mounted_volumes = []
     self.pod_yml = None
Exemplo n.º 3
0
 def load(cls, di):
     name = di['name']
     data = di['data']
     container_image = data['image']
     instance = cls(name, container_image)
     instance.data = BeneDict(di['data'])
     return instance
Exemplo n.º 4
0
 def load(cls, di):
     instance = cls('', '')
     instance.data = BeneDict(di['data'])
     instance.mounted_volumes = [
         KubeVolume.load(x) for x in di['mounted_volumes']
     ]
     return instance
Exemplo n.º 5
0
class DockerServiceYML(DockerConfigYML):
    def __init__(self, name, container_image):
        super().__init__()
        self.name = name
        self.data = BeneDict({
            'image': container_image,
        })

    @classmethod
    def load(cls, di):
        name = di['name']
        data = di['data']
        container_image = data['image']
        instance = cls(name, container_image)
        instance.data = BeneDict(di['data'])
        return instance

    def save(self):
        di = {}
        di['name'] = self.name
        di['data'] = self.data
        return di

    def yml(self):
        return self.data.dump_yaml_str()

    def set_env(self, name, value):
        if 'environment' not in self.data.keys():
            self.data['environment'] = BeneDict()
        self.data['environment'][name] = value

    def set_envs(self, di):
        for k, v in di.items():
            self.set_env(k, v)

    def set_hostname(self, hostname):
        self.data.hostname = hostname

    def set_port(self, port):
        if 'ports' not in self.data.keys():
            self.data['ports'] = []
        self.data['ports'].append(port)

    def set_ports(self, ports):
        if 'ports' not in self.data.keys():
            self.data['ports'] = []
        self.data['ports'].extend(ports)
Exemplo n.º 6
0
 def load_config(self):
     surreal_yml_path = U.get_config_file()
     if not U.f_exists(surreal_yml_path):
         raise ValueError('Cannot find surreal config file at {}'.format(
             surreal_yml_path))
     self.config = BeneDict.load_yaml_file(surreal_yml_path)
     SymphonyConfig().set_username(self.username)
     SymphonyConfig().set_experiment_folder(self.folder)
Exemplo n.º 7
0
 def set_env(self, name, value):
     name = str(name)
     value = str(value)
     for entry in self.data['env']:
         if entry.name == name:
             entry.value = value
             return
     self.data.env.append(BeneDict({'name': name, 'value': value}))
Exemplo n.º 8
0
 def config_view(self):
     """
     kubectl config view
     Generates a yaml of context and cluster info
     """
     out, err, retcode = runner.run_verbose('kubectl config view',
                                            print_out=False,
                                            raise_on_error=True)
     return BeneDict(load_yaml_str(out))
Exemplo n.º 9
0
 def pod_spec(self):
     """
         Returns a spec to fall under Pod: spec:
     """
     if self.use_memory:
         emptyDir_config = {'medium': 'Memory'}
     else:
         emptyDir_config = {}
     return BeneDict({'name': self.name, 'emptyDir': emptyDir_config})
Exemplo n.º 10
0
 def pod_spec(self):
     """
         Returns a spec to fall under Pod: spec:
     """
     return BeneDict({
         'name': self.name,
         'gitRepo': {
             'repository': self.repository,
             'revision': self.revision,
         }
     })
Exemplo n.º 11
0
 def pod_spec(self):
     """
         Returns a spec to fall under Pod: spec:
     """
     return BeneDict({
         'name': self.name,
         'secret': {
             'secretName': self.secret_name,
             'defaultMode': self.defaultMode,
         }
     })
Exemplo n.º 12
0
 def pod_spec(self):
     """
         Returns a spec to fall under Pod: spec:
     """
     return BeneDict({
         'name': self.name,
         'nfs': {
             'server': self.server,
             'path': self.path,
         }
     })
Exemplo n.º 13
0
 def pod_spec(self):
     """
         Returns a spec to fall under Pod: spec:
     """
     return BeneDict({
         'name': self.name,
         'hostPath': {
             'hostpath_type': self.hostpath_type,
             'path': self.path,
         },
     })
Exemplo n.º 14
0
 def __init__(self, name, data):
     self.name = name
     self.data = data
     self.data = BeneDict({
         'apiVersion': 'v1',
         'kind': 'Secret',
         'metadata': {
             'name': name,
         },
         'type': 'Opaque',
         'data': data,
     })
Exemplo n.º 15
0
 def mount_volume(self, volume, mount_path):
     assert isinstance(volume, KubeVolume)
     volume_mounts = self.data.get('volumeMounts', [])
     volume_mounts.append(
         BeneDict({
             'name': volume.name,
             'mountPath': mount_path
         }))
     self.data['volumeMounts'] = volume_mounts
     self.mounted_volumes.append(volume)
     if self.pod_yml is not None:
         self.pod_yml.add_volume(volume)
Exemplo n.º 16
0
    def load_config(self):
        surreal_yml_path = U.get_config_file()
        if not U.f_exists(surreal_yml_path):
            raise ValueError('Cannot find surreal config file at {}'.format(
                surreal_yml_path))
        self.config = BeneDict.load_yaml_file(surreal_yml_path)
        SymphonyConfig().set_username(self.username)
        SymphonyConfig().set_experiment_folder(self.folder)

        if 'docker_build_settings' in self.config:
            for setting in self.config['docker_build_settings']:
                self.docker_build_settings[setting['name']] = setting
Exemplo n.º 17
0
 def external_url(self, experiment_name, service_name):
     res = BeneDict(
         self.query_resources('svc',
                              'yaml',
                              names=[service_name],
                              namespace=experiment_name))
     conf = res.status.loadBalancer
     if not ('ingress' in conf and 'ip' in conf.ingress[0]):
         raise ValueError('Service {} not found in experiment {}'.format(
             service_name, experiment_name))
     ip = conf.ingress[0].ip
     port = res.spec.ports[0].port
     return '{}:{}'.format(ip, port)
Exemplo n.º 18
0
    def setup(self):
        super().setup()
        self.docker_build_settings = {}
        self.config = BeneDict()
        self.load_config()
        self._setup_create()
        self._setup_tensorboard()
        self._setup_docker_clean()

        # Secondary nfs related support
        self._setup_get_videos()
        self._setup_get_config()
        self._setup_get_tensorboard()
Exemplo n.º 19
0
 def __init__(self, name):
     super().__init__()
     self.name = name
     self.data = BeneDict({
         'apiVersion': 'v1',
         'kind': 'Service',
         'metadata': {
             'name': name,
             'labels': {},
         },
         'spec': {
             'ports': [{}],
             'selector': {},
         },
     })
Exemplo n.º 20
0
def filter_data(data,
                perturbation_type=None,
                enforce_task_success=True,
                time_window=None):
    data_block = copy.deepcopy(data)
    if perturbation_type is not None:
        data_block = U.filter_by(data_block, 'stim_site', [perturbation_type])
    if enforce_task_success:
        data_block = U.filter_by(data_block, 'behavior_report', [1])
    if time_window is not None:
        low, high = time_window
        data_block['train_rates'] = data_block[
            'train_rates'][:, np.arange(low, high), :]
    data = BeneDict(data_block)
    return data
Exemplo n.º 21
0
class KubeConfigYML(object):
    def __init__(self):
        self.data = BeneDict({})

    def set_attr(self, new_config):
        """
            New config is a dictionary with the fields to be updated
        """
        merge_dict(self.data, new_config)

    def yml(self):
        """
        Dump yml string for kubernetes launch yml
        """
        return self.data.dump_yaml_str()
Exemplo n.º 22
0
 def __init__(self, name):
     super().__init__()
     self.data = BeneDict({
         'apiVersion': 'v1',
         'kind': 'Pod',
         'metadata': {
             'name': name,
             'labels': {
                 'symphony_pg': name
             }
         },
         'spec': {
             'containers': []
         }
     })
     self.container_ymls = []
     self.container_names = set()
Exemplo n.º 23
0
 def describe_process_group(self, experiment_name, process_group_name):
     """
     Returns:
     {
         'p1': {'status': 'live', 'timestamp': '11:23'},
         'p2': {'status': 'dead'}
     }
     """
     res = self.query_resources('pod',
                                names=[process_group_name],
                                output_format='json',
                                namespace=experiment_name)
     if not res:
         raise ValueError('Cannot find process_group {} in experiment {}' \
             .format(process_group_name, experiment_name))
     pod = BeneDict(res)
     return self._parse_container_statuses(pod.status.containerStatuses)
Exemplo n.º 24
0
 def __init__(self, name, port):
     super().__init__(name)
     self.name = name
     self.port = port
     self.data = BeneDict({
         'apiVersion': 'v1',
         'kind': 'Service',
         'metadata': {
             'name': name,
             'labels': {}
         },
         'spec': {
             'type': 'ClusterIP',
             'ports': [{
                 'port': port
             }],
             'selector': {
                 'service-' + name: 'bind'
             },
         },
     })
Exemplo n.º 25
0
 def __init__(self, config_file='~/.dockerly.yml'):
     try:
         self.config = BeneDict.load_yaml_file(
             os.path.expanduser(config_file))
     except FileNotFoundError:
         print('must specify a config file ' + config_file)
         raise
     for key in ['container_root', 'host_root', 'default_image']:
         assert key in self.config, 'config "{}" missing'.format(key)
     for k, v in self.config.items():
         if v == '_fill_yours_':
             raise ValueError('please fill in key "{}" in {}'.format(
                 k, config_file))
     self.container_root = self.config.container_root
     assert os.path.isabs(self.container_root), \
         'container_root must be an absolute path: ' + self.container_root
     # must use realpath, otherwise relative path will be wrong
     self.host_root = os.path.realpath(
         os.path.expanduser(self.config.host_root))
     self.ports = self.config.ports
     self.default_image = self.config.default_image
     self._docker_exe = 'nvidia-docker' if self.config.nvidia else 'docker'
     self._dry_run = False
Exemplo n.º 26
0
 def describe_experiment(self, experiment_name):
     """
     Returns:
     {
         'pgroup1': {
             'p1': {'status': 'live', 'timestamp': '11:23'},
             'p2': {'status': 'dead'}
         },
         None: {  # always have all the processes
             'p3_lone': {'status': 'running'}
         }
     }
     """
     all_processes = BeneDict(
         self.query_resources('pod',
                              output_format='json',
                              namespace=experiment_name))
     out = OrderedDict()
     for pod in all_processes.items:
         pod_name = pod.metadata.name
         if 'containerStatuses' in pod.status:  # Pod is created
             container_statuses = self._parse_container_statuses(
                 pod.status.containerStatuses)
             # test if the process is stand-alone
             if len(container_statuses) == 1 and list(
                     container_statuses.keys())[0] == pod_name:
                 if not None in out:
                     out[None] = OrderedDict()
                 out[None][pod_name] = container_statuses[pod_name]
             else:
                 out[pod_name] = container_statuses
         else:
             out[pod_name] = {
                 '~': self._parse_unstarted_pod_statuses(pod.status)
             }
     return out
Exemplo n.º 27
0
 def __init__(self):
     self.data = BeneDict({})
Exemplo n.º 28
0
 def set_env(self, name, value):
     if 'environment' not in self.data.keys():
         self.data['environment'] = BeneDict()
     self.data['environment'][name] = value
Exemplo n.º 29
0
 def __init__(self, name, container_image):
     super().__init__()
     self.name = name
     self.data = BeneDict({
         'image': container_image,
     })
Exemplo n.º 30
0
    def create_basic(self, *, settings, experiment_name, algorithm_args,
                     input_args, force, dry_run):
        settings = _merge_setting_dictionaries(settings,
                                               self.DEFAULT_SETTING_BASIC)
        settings = _merge_setting_dictionaries(input_args, settings)
        settings = BeneDict(settings)

        cluster = self.create_cluster()
        if 'mount_secrets' in self.config:
            secrets = self.config.mount_secrets
        else:
            secrets = None
        exp = cluster.new_experiment(experiment_name, secrets=secrets)

        image_builder = SurrealDockerBuilder(
            build_settings=self.docker_build_settings,
            images_requested={
                'agent': {
                    'identifier': settings.agent.image,
                    'build_config': settings.agent.build_image
                },
                'nonagent': {
                    'identifier': settings.nonagent.image,
                    'build_config': settings.nonagent.build_image
                },
            },
            tag=exp.name,
            push=True)
        agent_image = image_builder.images_provided['agent']
        nonagent_image = image_builder.images_provided['nonagent']
        # defer to build last, so we don't build unless everything passes

        algorithm_args += [
            "--num-agents",
            str(settings.num_agents * settings.agent_batch),
        ]
        # TODO: restore_functionalities
        if settings.restore_folder is not None:
            algorithm_args += [
                "--restore_folder",
                shlex.quote(settings.restore_folder)
            ]
        experiment_folder = self.get_remote_experiment_folder(exp.name)
        algorithm_args += ["--experiment-folder", str(experiment_folder)]
        algorithm_args += ["--env", str(settings.env)]
        algorithm_args += ["--agent-batch", str(settings.agent_batch)]
        algorithm_args += ["--eval-batch", str(settings.eval_batch)]
        executable = self._find_executable(settings.algorithm)
        cmd_gen = CommandGenerator(num_agents=settings.num_agents,
                                   num_evals=settings.num_evals,
                                   executable=executable,
                                   config_commands=algorithm_args)

        nonagent = exp.new_process_group('nonagent')
        learner = nonagent.new_process('learner',
                                       container_image=nonagent_image,
                                       args=[cmd_gen.get_command('learner')])
        # Because learner and everything are bundled together

        # json_path = 'cluster_definition.tf.json'  # always use slash
        # filepath = pkg_resources.resource_filename(__name__, json_path)
        json_path = self.config.cluster_definition
        dispatcher = GKEDispatcher(json_path)
        # We only need to claim resources for learner
        dispatcher.assign_to(learner,
                             process_group=nonagent,
                             **settings.nonagent.scheduling)
        # For dm_control
        learner.set_env('DISABLE_MUJOCO_RENDERING', "1")

        replay = nonagent.new_process('replay',
                                      container_image=nonagent_image,
                                      args=[cmd_gen.get_command('replay')])

        ps = nonagent.new_process('ps',
                                  container_image=nonagent_image,
                                  args=[cmd_gen.get_command('ps')])

        tensorboard = nonagent.new_process(
            'tensorboard',
            container_image=nonagent_image,
            args=[cmd_gen.get_command('tensorboard')])

        tensorplex = nonagent.new_process(
            'tensorplex',
            container_image=nonagent_image,
            args=[cmd_gen.get_command('tensorplex')])

        loggerplex = nonagent.new_process(
            'loggerplex',
            container_image=nonagent_image,
            args=[cmd_gen.get_command('loggerplex')])
        nonagent.image_pull_policy('Always')

        agents = []
        for i in range(settings.num_agents):
            if settings.agent_batch == 1:
                agent_name = 'agent-{}'.format(i)
            else:
                agent_name = 'agents-{}'.format(i)
            agent = exp.new_process(agent_name,
                                    container_image=agent_image,
                                    args=[cmd_gen.get_command(agent_name)])

            agent.image_pull_policy('Always')
            # We only need to claim resources for learner
            dispatcher.assign_to(agent, **settings.agent.scheduling)
            agents.append(agent)

        evals = []
        for i in range(settings.num_evals):
            if settings.eval_batch == 1:
                eval_name = 'eval-{}'.format(i)
            else:
                eval_name = 'evals-{}'.format(i)
            eval_p = exp.new_process(eval_name,
                                     container_image=agent_image,
                                     args=[cmd_gen.get_command(eval_name)])
            dispatcher.assign_to(eval_p, **settings.agent.scheduling)
            eval_p.image_pull_policy('Always')

            evals.append(eval_p)

        setup_network(agents=agents,
                      evals=evals,
                      learner=learner,
                      replay=replay,
                      ps=ps,
                      tensorboard=tensorboard,
                      tensorplex=tensorplex,
                      loggerplex=loggerplex)

        if 'nfs' in self.config:
            print('NFS mounted')
            nfs_server = self.config.nfs.servername
            nfs_server_path = self.config.nfs.fs_location
            nfs_mount_path = self.config.nfs.mount_path
            for proc in exp.list_all_processes():
                proc.mount_nfs(server=nfs_server,
                               path=nfs_server_path,
                               mount_path=nfs_mount_path)
        else:
            print('NFS not mounted')

        image_builder.build()
        cluster.launch(exp, force=force, dry_run=dry_run)