Exemplo n.º 1
0
def test_multiple_logdirs():
    logdir1 = scluster.get_logdir_root() + '/test1'
    dummy_task = scluster.make_task()
    dummy_task.run(f'rm -Rf {logdir1}')
    task1 = scluster.make_task(run_name='test1')
    assert task1.logdir == logdir1

    logdir2 = scluster.get_logdir_root() + '/test2'
    task2 = scluster.make_task(run_name='test2')
    dummy_task.run(f'rm -Rf {logdir2}*')
    dummy_task.run(f'mkdir {logdir2}')
    assert task2.logdir == logdir2 + '.01'
Exemplo n.º 2
0
def test_multiple_logdir_tasks():
    n = 10
    dummy_task = scluster.make_task()
    logdir1 = scluster.get_logdir_root() + '/test1'
    dummy_task.run(f'rm -Rf {logdir1}')
    job = scluster.make_job(run_name='test1', num_tasks=n)

    obtained_logdirs = []

    import wrapt

    @wrapt.synchronized
    def query(i):
        obtained_logdirs.append(job.tasks[i].logdir)

    threads = [threading.Thread(target=query, args=(i, )) for i in range(n)]
    for thread in reversed(threads):
        thread.start()

    random.shuffle(threads)
    for thread in threads:
        thread.join()

    assert len(set(obtained_logdirs)) == 1
    assert obtained_logdirs[0] == logdir1
Exemplo n.º 3
0
def run_launcher():
  import scluster

  if args.aws:
    scluster.set_backend('aws')

  if args.nightly:
    # running locally MacOS
    if 'Darwin' in util.ossystem('uname') and not args.aws:
      install_script = 'pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.2-cp36-cp36m-macosx_10_6_intel.whl'
    else:
      install_script = 'pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.5.2-cp36-cp36m-manylinux1_x86_64.whl'
  else:
    install_script = 'pip install ray'

  worker = scluster.make_task(name=args.name,
                              install_script=install_script,
                              image_name=args.image)
  if not scluster.running_locally():
    worker._run_raw('killall python', ignore_errors=True)
  worker.upload(__file__)
  worker.upload('util.py')
  if args.xray:
    worker.run('export RAY_USE_XRAY=1')
  worker.run('ray stop')

  resources = """--resources='{"ps": 1, "worker": 1}'"""
  worker.run(f"ray start --head {resources} --redis-port=6379")
  #  worker.run(f"ray start --redis-address={worker.ip}:6379 {resources}")
  worker.run(
    f'./{__file__} --role=driver --ip={worker.ip}:6379 --size-mb={args.size_mb} --iters={args.iters}')
  print(worker.read('out'))
Exemplo n.º 4
0
def run_launcher():
    import scluster
    scluster.util.assert_script_in_current_directory()

    if args.aws:
        scluster.set_backend('aws')

    # use 4GB instance, 0.5GB not enough
    worker = scluster.make_task(args.name,
                                image_name=args.image,
                                instance_type='t3.medium')
    worker.upload(__file__)
    worker.upload('util.py')

    # kill python just for when tmux session reuse is on
    if not scluster.running_locally():
        # on AWS probably running in conda DLAMI, switch into TF-enabled env
        worker._run_raw('killall python', ignore_errors=True)
        worker.run('source activate tensorflow_p36')

    ip_config = f'--sender-ip={worker.ip} --receiver-ip={worker.ip}'
    worker.run(f'python {__file__} --role=receiver {ip_config}',
               non_blocking=True)
    worker.switch_window(1)  # run in new tmux window
    if not scluster.running_locally():
        worker.run('source activate tensorflow_p36')
    worker.run(
        f'python {__file__} --role=sender {ip_config} --iters={args.iters} --size-mb={args.size_mb} --shards={args.shards}'
    )
    print(worker.read('out'))
Exemplo n.º 5
0
def test():
    task = scluster.make_task(
        image_name=scluster.aws_backend.GENERIC_SMALL_IMAGE)
    task.run("mkdir /illegal", non_blocking=True)
    task.join(ignore_errors=True)  # this succeed/print error message

    task.run("mkdir /illegal", non_blocking=True)
    with pytest.raises(RuntimeError):
        task.join()  # this should fail
Exemplo n.º 6
0
def main():
  task = scluster.make_task(name=args.name,
                            instance_type=args.instance_type,
                            image_name=args.image_name)

  # upload notebook config with provided password
  jupyter_config_fn = _create_jupyter_config(args.password)
  remote_config_fn = '~/.jupyter/jupyter_notebook_config.py'
  task.upload(jupyter_config_fn, remote_config_fn)

  # upload sample notebook and start Jupyter server
  task.run('mkdir -p /ncluster/notebooks')
  task.upload(f'{module_path}/gpubox_sample.ipynb',
              '/ncluster/notebooks/gpubox_sample.ipynb',
              dont_overwrite=True)
  task.run('cd /ncluster/notebooks')
  task.run('jupyter notebook', non_blocking=True)
  print(f'Jupyter notebook will be at http://{task.public_ip}:8888')
Exemplo n.º 7
0
#!/bin/env python
import sys

if not sys.argv[1:]:
    import scluster
    task = scluster.make_task(instance_type='t3.micro')
    task.upload(__file__)
    task.run('pip install tensorflow')
    task.run(f'python {__file__} worker')
elif sys.argv[1] == 'worker':
    import tensorflow as tf
    import os
    sess = tf.Session()
    ones = tf.ones((1000, 1000))
    result = sess.run(tf.matmul(ones, ones))
    print(f"matmul gave {result.sum()}")
    os.system('sudo shutdown -h -P 10')  # shut down the instance in 10 mins
Exemplo n.º 8
0
#!/usr/bin/env python
import scluster

# allocate default machine type and default image
task = scluster.make_task()
output = task.run('ifconfig')
print(f"Task ifconfig returned {output}")