def test_remote_worker(self): experiment_name = 'test_remote_worker_' + str(uuid.uuid4()) queue_name = experiment_name logger = logs.get_logger('test_remote_worker') logger.setLevel(10) pw = subprocess.Popen([ 'studio-start-remote-worker', '--queue=' + queue_name, '--single-run', '--no-cache', '--timeout=30', '--image=peterzhokhoff/studioml' ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stubtest_worker(self, experiment_name=experiment_name, runner_args=['--queue=' + queue_name, '--force-git'], config_name='test_config_http_client.yaml', test_script='tf_hello_world.py', script_args=['arg0'], expected_output='[ 2.0 6.0 ]', queue=PubsubQueue(queue_name)) workerout, _ = pw.communicate() if workerout: logger.debug("studio-start-remote-worker output: \n" + str(workerout))
def test_remote_worker_co(self): logger = logs.get_logger('test_remote_worker_co') logger.setLevel(10) tmpfile = os.path.join(tempfile.gettempdir(), str(uuid.uuid4())) random_str = str(uuid.uuid4()) with open(tmpfile, 'w') as f: f.write(random_str) experiment_name = 'test_remote_worker_co_' + str(uuid.uuid4()) queue_name = experiment_name pw = subprocess.Popen([ 'studio-start-remote-worker', '--queue=' + queue_name, '--single-run', '--no-cache', '--image=peterzhokhoff/studioml' ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stubtest_worker(self, experiment_name=experiment_name, runner_args=[ '--capture-once=' + tmpfile + ':f', '--queue=' + queue_name, '--force-git' ], config_name='test_config_http_client.yaml', test_script='art_hello_world.py', script_args=[], expected_output=random_str, queue=PubsubQueue(queue_name)) workerout, _ = pw.communicate() logger.debug('studio-start-remote-worker output: \n' + str(workerout)) os.remove(tmpfile)
def test_remote_worker_c(self): tmpfile = os.path.join(tempfile.gettempdir(), str(uuid.uuid4())) logger = logs.get_logger('test_remote_worker_c') logger.setLevel(10) experiment_name = "test_remote_worker_c_" + str(uuid.uuid4()) random_str1 = str(uuid.uuid4()) with open(tmpfile, 'w') as f: f.write(random_str1) random_str2 = str(uuid.uuid4()) queue_name = experiment_name pw = subprocess.Popen([ 'studio-start-remote-worker', '--queue=' + queue_name, '--single-run', '--no-cache', '--image=peterzhokhoff/studioml' ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) db = stubtest_worker(self, experiment_name=experiment_name, runner_args=[ '--capture=' + tmpfile + ':f', '--queue=' + queue_name, '--force-git' ], config_name='test_config_http_client.yaml', test_script='art_hello_world.py', script_args=[random_str2], expected_output=random_str1, queue=PubsubQueue(queue_name), delete_when_done=False) workerout, _ = pw.communicate() if workerout: logger.debug("studio-start-remote-worker output: \n" + str(workerout)) os.remove(tmpfile) tmppath = os.path.join(tempfile.gettempdir(), str(uuid.uuid4())) if os.path.exists(tmppath): os.remove(tmppath) db.get_artifact(db.get_experiment(experiment_name).artifacts['f'], tmppath, only_newer=False) with open(tmppath, 'r') as f: self.assertEquals(f.read(), random_str2) os.remove(tmppath) db.delete_experiment(experiment_name)
def test_unfold_tuples(self): logger = logs.get_logger('test_stop_experiment') h = HyperparameterParser(RunnerArgs(), logger) hyperparams = [Hyperparameter(name='a', values=[1, 2, 3]), Hyperparameter(name='b', values=[4, 5])] expected_tuples = [ {'a': 1, 'b': 4}, {'a': 2, 'b': 4}, {'a': 3, 'b': 4}, {'a': 1, 'b': 5}, {'a': 2, 'b': 5}, {'a': 3, 'b': 5}] self.assertEqual( sorted(h.convert_to_tuples(hyperparams), key=lambda x: str(x)), sorted(expected_tuples, key=lambda x: str(x)))
def test_parse_range(self): logger = logs.get_logger('test_stop_experiment') h = HyperparameterParser(RunnerArgs(), logger) range_strs = ['1,2,3', ':5', '2:5', '0.1:0.05:0.3', '0.1:3:0.3', '0.01:4l:10'] gd_truths = [ [ 1.0, 2.0, 3.0], [ 0.0, 1.0, 2.0, 3.0, 4.0, 5.0], [ 2.0, 3.0, 4.0, 5.0], [ 0.1, 0.15, 0.2, 0.25, 0.3], [ 0.1, 0.2, 0.3], [ 0.01, 0.1, 1, 10]] for range_str, gd_truth in zip(range_strs, gd_truths): hyperparameter = h._parse_grid("test", range_str) self.assertTrue(np.isclose(hyperparameter.values, gd_truth).all())
def __init__( self, # Name of experiment experimentId, # Completion service configuration cs_config=None, # used to pass a studioML configuration block read by client software studio_config=None, # Studio config yaml file studio_config_file=None, shutdown_del_queue=False ): # StudioML configuration self.config = model.get_config(studio_config_file) self.logger = logs.get_logger(self.__class__.__name__) self.verbose_level = parse_verbosity(self.config['verbose']) self.logger.setLevel(self.verbose_level) # Setup Completion Service instance properties # based on configuration self.experimentId = experimentId self.project_name = "completion_service_" + experimentId self.resumable = RESUMABLE self.clean_queue = CLEAN_QUEUE self.queue_upscaling = QUEUE_UPSCALING self.num_workers = int(cs_config.get('num_workers', 1)) self.cloud_timeout = cs_config.get('timeout') self.bid = cs_config.get('bid') self.ssh_keypair = cs_config.get('ssh_keypair') self.sleep_time = cs_config.get('sleep_time') self.shutdown_del_queue = shutdown_del_queue # Figure out request for resources: resources_needed = cs_config.get('resources_needed') self.resources_needed = DEFAULT_RESOURCES_NEEDED self.resources_needed.update(resources_needed) studio_resources = self.config.get('resources_needed') if studio_resources: self.resources_needed.update(studio_resources) # Figure out task queue and cloud we are going to use: queue_name = cs_config.get('queue') cloud_name = cs_config.get('cloud') if cs_config.get('local'): queue_name = None cloud_name = None elif queue_name is not None: self.shutdown_del_queue = False if cloud_name in ['ec2spot', 'ec2']: assert queue_name.startswith("sqs_") else: queue_name = self.experiment_id if cloud_name in ['ec2spot', 'ec2']: queue_name = "sqs_" + queue_name self.cloud = cloud_name if queue_name is not None and queue_name.startswith("rmq_"): assert self.cloud is None self.wm = model.get_worker_manager( self.config, self.cloud) if queue_name is not None: self.logger.info( "CompletionService configured with queue {0}" .format(queue_name)) self.queue = model.get_queue(queue_name=queue_name, cloud=self.cloud, config=self.config, logger=self.logger, verbose=self.verbose_level) self.queue_name = self.queue.get_name() self.submitted = {} self.use_spot = cloud_name in ['ec2spot', 'gcspot'] self.logger.info("Project name: {0}".format(self.project_name)) self.logger.info("Initial/final queue name: {0}, {1}" .format(queue_name, self.queue_name)) self.logger.info("Cloud name: {0}".format(self.cloud))
def main(args=sys.argv): logger = logs.get_logger('studio-remote-worker') parser = argparse.ArgumentParser(description='Studio remote worker. \ Usage: studio-remote-worker \ ') parser.add_argument('--config', help='configuration file', default=None) parser.add_argument('--guest', help='Guest mode (does not require db credentials)', action='store_true') parser.add_argument( '--single-run', help='quit after a single run (regardless of the state of the queue)', action='store_true') parser.add_argument('--queue', help='queue name', required=True) parser.add_argument('--verbose', '-v', help='Verbosity level. Allowed vaules: ' + 'debug, info, warn, error, crit ' + 'or numerical value of logger levels.', default=None) parser.add_argument( '--timeout', '-t', help='Timeout after which remote worker stops listening (in seconds)', type=int, default=100) parsed_args, script_args = parser.parse_known_args(args) verbose = parse_verbosity(parsed_args.verbose) logger.setLevel(verbose) config = None if parsed_args.config is not None: config = model.get_config(parsed_args.config) if parsed_args.queue.startswith('ec2_') or \ parsed_args.queue.startswith('sqs_'): queue = SQSQueue(parsed_args.queue, verbose=verbose) elif parsed_args.queue.startswith('rmq_'): queue = get_cached_queue(name=parsed_args.queue, route='StudioML.' + parsed_args.queue, config=config, logger=logger, verbose=verbose) else: queue = PubsubQueue(parsed_args.queue, verbose=verbose) logger.info('Waiting for work') timeout_before = parsed_args.timeout timeout_after = timeout_before if timeout_before > 0 else 0 # wait_for_messages(queue, timeout_before, logger) logger.info('Starting working') worker_loop(queue, parsed_args, single_experiment=parsed_args.single_run, timeout=timeout_after, verbose=verbose)
def get_logger(): global logger if not logger: logger = logs.get_logger('studio-serve') logger.setLevel(logs.DEBUG) return logger
import importlib import shutil import pickle import os import sys import six import signal import pdb from studio import fs_tracker, model, logs, util logger = logs.get_logger('completion_service_client') try: logger.setLevel(model.parse_verbosity(sys.argv[1])) except BaseException: logger.setLevel(10) def main(): logger.debug('copying and importing client module') logger.debug('getting file mappings') # Register signal handler for signal.SIGUSR1 # which will invoke built-in Python debugger: signal.signal(signal.SIGUSR1, lambda sig, stack: pdb.set_trace()) artifacts = fs_tracker.get_artifacts() files = {} logger.debug("Artifacts = {}".format(artifacts)) for tag, path in six.iteritems(artifacts):
import time from studio import logs logger = logs.get_logger('helloworld') logger.setLevel(10) i = 0 while True: logger.info('{} seconds passed '.format(i)) time.sleep(1) i += 1
def test_baked_image(self): # create a docker image with baked in credentials # and run a remote worker tests with it logger = logs.get_logger('test_baked_image') logger.setLevel(logs.DEBUG) # check if docker is installed dockertestp = subprocess.Popen(['docker'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) dockertestout, _ = dockertestp.communicate() if dockertestout: logger.info("docker test output: \n" + str(dockertestout)) if dockertestp.returncode != 0: logger.error("docker is not installed (correctly)") return image = 'test_image' + str(uuid.uuid4()) addcredsp = subprocess.Popen( [ 'studio-add-credentials', '--tag=' + image, '--base-image=peterzhokhoff/studioml' ], # stdout=subprocess.PIPE, # stderr=subprocess.STDOUT ) addcredsout, _ = addcredsp.communicate() if addcredsout: logger.info('studio-add-credentials output: \n' + str(addcredsout)) if addcredsp.returncode != 0: logger.error("studio-add-credentials failed.") self.assertTrue(False) experiment_name = 'test_remote_worker_baked' + str(uuid.uuid4()) queue_name = experiment_name logger = logs.get_logger('test_baked_image') logger.setLevel(10) pw = subprocess.Popen( [ 'studio-start-remote-worker', '--queue=' + queue_name, '--no-cache', '--single-run', '--timeout=30', '--image=' + image ], # stdout=subprocess.PIPE, # stderr=subprocess.STDOUT ) stubtest_worker(self, experiment_name=experiment_name, runner_args=['--queue=' + queue_name, '--force-git'], config_name='test_config_http_client.yaml', test_script='tf_hello_world.py', script_args=['arg0'], expected_output='[ 2.0 6.0 ]', queue=PubsubQueue(queue_name)) workerout, _ = pw.communicate() if workerout: logger.debug("studio-start-remote-worker output: \n" + str(workerout)) rmip = subprocess.Popen(['docker', 'rmi', image], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) rmiout, _ = rmip.communicate() if rmiout: logger.info('docker rmi output: \n' + str(rmiout))