def test_experiment_lifetime(self): my_path = os.path.dirname(os.path.realpath(__file__)) logger = logs.getLogger('test_experiment_lifetime') logger.setLevel(10) config_name = os.path.join(my_path, 'test_config_http_client.yaml') key = 'test_experiment_lifetime' + str(uuid.uuid4()) with model.get_db_provider(model.get_config(config_name)) as db: try: db.delete_experiment(key) except Exception: pass p = subprocess.Popen(['studio', 'run', '--config=' + config_name, '--experiment=' + key, '--force-git', '--verbose=debug', '--lifetime=-10m', 'stop_experiment.py'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=my_path) pout, _ = p.communicate() if pout: logger.debug("studio run output: \n" + pout.decode()) db.delete_experiment(key)
def _test_serving(self, data_in, expected_data_out, wrapper=None): self.port = randint(5000, 9000) server_experimentid = 'test_serving_' + str(uuid.uuid4()) with get_local_queue_lock(): args = [ 'studio', 'run', '--force-git', '--verbose=debug', '--experiment=' + server_experimentid, '--config=' + self.get_config_path(), 'studio::serve_main', '--port=' + str(self.port), '--host=localhost' ] if wrapper: args.append('--wrapper=' + wrapper) subprocess.Popen(args, cwd=os.path.dirname(__file__)) time.sleep(60) try: retval = requests.post(url='http://localhost:' + str(self.port), json=data_in) data_out = retval.json() assert data_out == expected_data_out finally: with model.get_db_provider(model.get_config( self.get_config_path())) as db: db.stop_experiment(server_experimentid) time.sleep(20) db.delete_experiment(server_experimentid)
def test_get_config_env(self): value1 = str(uuid.uuid4()) os.environ['TEST_VAR1'] = value1 value2 = str(uuid.uuid4()) os.environ['TEST_VAR2'] = value2 config = model.get_config( os.path.join(os.path.dirname(os.path.realpath(__file__)), 'test_config_env.yaml')) self.assertEquals(config['test_key'], value1) self.assertEquals(config['test_section']['test_key'], value2)
def main(args=sys.argv[1:]): parser = argparse.ArgumentParser( description='Studio WebUI server. \ Usage: studio \ <arguments>') parser.add_argument('--config', help='configuration file', default=None) # parser.add_argument('--guest', # help='Guest mode (does not require db credentials)', # action='store_true') parser.add_argument('--port', help='port to run Flask server on', type=int, default=5000) parser.add_argument('--host', help='host name.', default='localhost') parser.add_argument( '--verbose', '-v', help='Verbosity level. Allowed vaules: ' + 'debug, info, warn, error, crit ' + 'or numerical value of logger levels.', default=None) args = parser.parse_args(args) config = model.get_config() if args.config: with open(args.config) as f: config = yaml.load(f, Loader=yaml.FullLoader) if args.verbose: config['verbose'] = args.verbose # if args.guest: # config['database']['guest'] = True global _config global _db_provider _config = config _db_provider = model.get_db_provider(_config) getlogger().setLevel(parse_verbosity(config.get('verbose'))) global _save_auth_cookie _save_auth_cookie = True print('Starting Studio UI on port {0}'.format(args.port)) app.run(host=args.host, port=args.port)
def test_stop_experiment(self): my_path = os.path.dirname(os.path.realpath(__file__)) logger = logs.getLogger('test_stop_experiment') logger.setLevel(10) config_name = os.path.join(my_path, 'test_config_http_client.yaml') key = 'test_stop_experiment' + str(uuid.uuid4()) with model.get_db_provider(model.get_config(config_name)) as db: try: db.delete_experiment(key) except Exception: pass p = subprocess.Popen(['studio', 'run', '--config=' + config_name, '--experiment=' + key, '--force-git', '--verbose=debug', 'stop_experiment.py'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=my_path) # wait till experiment spins up experiment = None while experiment is None or experiment.status == 'waiting': time.sleep(1) try: experiment = db.get_experiment(key) except BaseException: pass logger.info('Stopping experiment') db.stop_experiment(key) pout, _ = p.communicate() if pout: logger.debug("studio run output: \n" + pout.decode()) db.delete_experiment(key)
def __init__( self, # Name of experiment experimentId, # Config yaml file config=None, # Number of remote workers to spin up num_workers=1, # Compute requirements, amount of RAM, GPU, etc resources_needed={}, # Name of the queue for submission to a server. queue=None, # What computer resource to use, either AWS, Google, or local cloud=None, # Timeout for cloud instances cloud_timeout=100, # Bid price for EC2 spot instances bid='100%', # Keypair to use for EC2 workers ssh_keypair=None, # If true, get results that are submitted by other instances of CS resumable=False, # Whether to clean the submission queue on initialization clean_queue=True, # Whether to enable autoscaling for EC2 instances queue_upscaling=True, # Whether to delete the queue on shutdown shutdown_del_queue=False, # delay between queries for results sleep_time=1 ): self.config = model.get_config(config) self.cloud = cloud self.experimentId = experimentId self.project_name = "completion_service_" + experimentId self.resources_needed = DEFAULT_RESOURCES_NEEDED if self.config.get('resources_needed'): self.resources_needed.update(self.config.get('resources_needed')) self.resources_needed.update(resources_needed) self.wm = runner.get_worker_manager( self.config, self.cloud) self.logger = logs.getLogger(self.__class__.__name__) self.verbose_level = model.parse_verbosity(self.config['verbose']) self.logger.setLevel(self.verbose_level) self.queue = runner.get_queue(queue, self.cloud, self.verbose_level) self.queue_name = self.queue.get_name() self.clean_queue = clean_queue if self.clean_queue: self.queue.clean() self.cloud_timeout = cloud_timeout self.bid = bid self.ssh_keypair = ssh_keypair self.submitted = set([]) self.num_workers = num_workers self.resumable = resumable self.queue_upscaling = queue_upscaling self.shutdown_del_queue = shutdown_del_queue self.use_spot = cloud in ['ec2spot', 'gcspot'] self.sleep_time = sleep_time
def get_config(): global _config if _config is None: _config = model.get_config() return _config
import requests from requests.exceptions import ChunkedEncodingError from .experiment import experiment_from_dict from .auth import get_and_verify_user, get_auth from .util import parse_verbosity from studio.util import logs app = Flask(__name__) DB_PROVIDER_EXPIRATION = 1800 _db_provider_timestamp = None _db_provider = None _config = model.get_config() _tensorboard_dirs = {} _save_auth_cookie = False logger = None @app.route('/') def dashboard(): return _render('dashboard.html') @app.route('/projects') def projects(): return _render('projects.html')
def stubtest_worker( testclass, experiment_name, runner_args, config_name, test_script, expected_output, script_args=[], queue=LocalQueue(), wait_for_experiment=True, delete_when_done=True, test_output=True, test_workspace=True): my_path = os.path.dirname(os.path.realpath(__file__)) config_name = os.path.join(my_path, config_name) logger = logs.getLogger('stubtest_worker') logger.setLevel(10) queue.clean() with model.get_db_provider(model.get_config(config_name)) as db: try: db.delete_experiment(experiment_name) except Exception: pass os.environ['PYTHONUNBUFFERED'] = 'True' p = subprocess.Popen(['studio', 'run'] + runner_args + ['--config=' + config_name, '--verbose=debug', '--force-git', '--experiment=' + experiment_name, test_script] + script_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True, cwd=my_path) pout, _ = p.communicate() if pout: logger.debug("studio run output: \n" + sixdecode(pout)) splitpout = sixdecode(pout).split('\n') experiments = [line.split(' ')[-1] for line in splitpout if line.startswith('studio run: submitted experiment')] logger.debug("added experiments: {}".format(experiments)) db = model.get_db_provider(model.get_config(config_name)) experiment_name = experiments[0] try: experiment = db.get_experiment(experiment_name) if wait_for_experiment: while not experiment or not experiment.status == 'finished': experiment = db.get_experiment(experiment_name) if test_output: with open(db.get_artifact(experiment.artifacts['output']), 'r') as f: data = f.read() split_data = data.strip().split('\n') print(data) testclass.assertEquals(split_data[-1], expected_output) if test_workspace: check_workspace(testclass, db, experiment_name) if delete_when_done: retry(lambda: db.delete_experiment(experiment_name), sleep_time=10) return db except Exception as e: print("Exception {} raised during test".format(e)) print("worker output: \n {}".format(pout)) print("Exception trace:") print(traceback.format_exc()) raise e
def main(args=sys.argv[1:]): logger = logs.get_logger('studio-runner') parser = argparse.ArgumentParser(description='Studio runner. \ Usage: studio run <runner_arguments> \ script <script_arguments>') parser.add_argument('--config', help='configuration file', default=None) parser.add_argument('--project', help='name of the project', default=None) parser.add_argument('--experiment', '-e', help='Name of the experiment. If none provided, ' + 'random uuid will be generated', default=None) parser.add_argument('--guest', help='Guest mode (does not require db credentials)', action='store_true') parser.add_argument( '--force-git', help='If run in a git directory, force running the experiment ' + 'even if changes are not commited', action='store_true') parser.add_argument('--gpus', help='Number of gpus needed to run the experiment', type=int, default=None) parser.add_argument('--cpus', help='Number of cpus needed to run the experiment' + ' (used to configure cloud instance)', type=int, default=None) parser.add_argument('--ram', help='Amount of RAM needed to run the experiment' + ' (used to configure cloud instance), ex: 10G, 10GB', default=None) parser.add_argument('--gpuMem', help='Amount of GPU RAM needed to run the experiment', default=None) parser.add_argument( '--hdd', help='Amount of hard drive space needed to run the experiment' + ' (used to configure cloud instance), ex: 10G, 10GB', default=None) parser.add_argument('--queue', '-q', help='Name of the remote execution queue', default=None) parser.add_argument( '--cloud', help='Cloud execution mode. Could be gcloud, gcspot, ec2 or ec2spot', default=None) parser.add_argument( '--bid', help='Spot instance price bid, specified in USD or in percentage ' + 'of on-demand instance price. Default is %(default)s', default='100%') parser.add_argument( '--capture-once', '-co', help='Name of the immutable artifact to be captured. ' + 'It will be captured once before the experiment is run', default=[], action='append') parser.add_argument( '--capture', '-c', help='Name of the mutable artifact to be captured continuously', default=[], action='append') parser.add_argument( '--reuse', '-r', help='Name of the artifact from another experiment to use', default=[], action='append') parser.add_argument('--verbose', '-v', help='Verbosity level. Allowed values: ' + 'debug, info, warn, error, crit ' + 'or numerical value of logger levels.', default=None) parser.add_argument( '--metric', help='Metric to show in the summary of the experiment, ' + 'and to base hyperparameter search on. ' + 'Refers a scalar value in tensorboard log ' + 'example: --metric=val_loss[:final | :min | :max] to report ' + 'validation loss in the end of the keras experiment ' + '(or smallest or largest throughout the experiment for :min ' + 'and :max respectively)', default=None) parser.add_argument( '--hyperparam', '-hp', help='Try out multiple values of a certain parameter. ' + 'For example, --hyperparam=learning_rate:0.01:0.1:l10 ' + 'will instantiate 10 versions of the script, replace ' + 'learning_rate with a one of the 10 values for learning ' + 'rate that lies on a log grid from 0.01 to 0.1, create ' 'experiments and place them in the queue.', default=[], action='append') parser.add_argument('--num-workers', help='Number of local or cloud workers to spin up', type=int, default=None) parser.add_argument( '--python-pkg', help='Python package not present in the current environment ' + 'that is needed for experiment. Only compatible with ' + 'remote and cloud workers for now', default=[], action='append') parser.add_argument( '--ssh-keypair', help='Name of the SSH keypair used to access the EC2 ' + 'instances directly', default=None) parser.add_argument( '--optimizer', '-opt', help='Name of optimizer to use, by default is grid search. ' + 'The name of the optimizer must either be in ' + 'studio/optimizer_plugins ' + 'directory or the path to the optimizer source file ' + 'must be supplied. ', default='grid') parser.add_argument( '--cloud-timeout', help="Time (in seconds) that cloud workers wait for messages. " + "If negative, " + "wait for the first message in the queue indefinitely " + "and shut down " + "as soon as no new messages are available. " + "If zero, don't wait at all." + "Default value is %(default)d", type=int, default=300) parser.add_argument('--user-startup-script', help='Path of script to run immediately ' + 'before running the remote worker', default=None) parser.add_argument( '--branch', help='Branch of studioml to use when running remote worker, useful ' + 'for debugging pull requests. Default is current', default=None) parser.add_argument( '--max-duration', help='Max experiment runtime (i.e. time after which experiment ' + 'should be killed no matter what.). Examples of values ' + 'might include 5h, 48h2m10s', default=None) parser.add_argument( '--lifetime', help='Max experiment lifetime (i.e. wait time after which ' + 'experiment loses relevance and should not be started)' + ' Examples include 240h30m10s', default=None) parser.add_argument( '--container', help='Singularity container in which experiment should be run. ' + 'Assumes that container has all dependencies installed', default=None) parser.add_argument('--port', help='Ports to open on a cloud instance', default=[], action='append') # detect which argument is the script filename # and attribute all arguments past that index as related to the script (runner_args, other_args) = parser.parse_known_args(args) py_suffix_args = [ i for i, arg in enumerate(args) if arg.endswith('.py') or '::' in arg ] rerun = False if len(py_suffix_args) < 1: print('None of the arugments end with .py') if len(other_args) == 0: print("Trying to run a container job") assert runner_args.container is not None exec_filename = None elif len(other_args) == 1: print("Treating last argument as experiment key to rerun") rerun = True experiment_key = args[-1] else: print("Too many extra arguments - should be either none " + "for container job or one for experiment re-run") sys.exit(1) else: script_index = py_suffix_args[0] exec_filename, other_args = args[script_index], args[script_index + 1:] runner_args = parser.parse_args(args[:script_index]) # TODO: Queue the job based on arguments and only then execute. config = model.get_config(runner_args.config) if runner_args.verbose: config['verbose'] = runner_args.verbose if runner_args.guest: config['database']['guest'] = True if runner_args.container: runner_args.capture_once.append(runner_args.container + ':_singularity') verbose = model.parse_verbosity(config['verbose']) logger.setLevel(verbose) if git_util.is_git() and not git_util.is_clean() and not rerun: logger.warn('Running from dirty git repo') if not runner_args.force_git: logger.error( 'Specify --force-git to run experiment from dirty git repo') sys.exit(1) resources_needed = _parse_hardware(runner_args, config['resources_needed']) logger.debug('resources requested: ') logger.debug(str(resources_needed)) # Set up default artifacts: # note that their "local" paths will be updated # on Experiment creation, # but they must have "local" field defined # to have storage credentials set up properly. artifacts = { 'workspace': { 'mutable': False, 'local': os.getcwd(), 'unpack': True }, 'modeldir': { 'mutable': True, 'local': '', 'unpack': True }, 'retval': { 'mutable': True, 'local': '', 'unpack': True }, 'output': { 'mutable': True, 'local': '', 'unpack': True }, 'tb': { 'mutable': True, 'local': '', 'unpack': True }, '_metrics': { 'mutable': True, 'local': '', 'unpack': True }, '_metadata': { 'mutable': True, 'local': '', 'unpack': True } } artifacts.update(_parse_artifacts(runner_args.capture, mutable=True)) artifacts.update(_parse_artifacts(runner_args.capture_once, mutable=False)) with model.get_db_provider(config) as db: artifacts.update(_parse_external_artifacts(runner_args.reuse, db)) logger.debug("Task artifacts: %s", repr(artifacts)) storage_creds = config.get('storage', {}).get(KEY_CREDENTIALS, None) _setup_artifacts_creds(artifacts, storage_creds) if runner_args.branch: config['cloud']['branch'] = runner_args.branch if runner_args.user_startup_script: config['cloud']['user_startup_script'] = \ runner_args.user_startup_script if runner_args.lifetime: config['experimentLifetime'] = runner_args.lifetime queueLifetime = None if any(runner_args.hyperparam): if runner_args.optimizer == "grid": experiments = _add_hyperparam_experiments(exec_filename, other_args, runner_args, artifacts, resources_needed, logger) queue = model.get_queue(queue_name=runner_args.queue, cloud=runner_args.cloud, config=config, close_after=queueLifetime, logger=logger, verbose=verbose) queue_name = submit_experiments(experiments, config=config, logger=logger, queue=queue) spin_up_workers(runner_args, config, resources_needed, queue_name=queue_name, verbose=verbose) else: opt_modulepath = os.path.join( os.path.dirname(os.path.abspath(__file__)), "optimizer_plugins", runner_args.optimizer + ".py") if not os.path.exists(opt_modulepath): opt_modulepath = os.path.abspath( os.path.expanduser(runner_args.optimizer)) logger.info('optimizer path: %s' % opt_modulepath) assert os.path.exists(opt_modulepath) sys.path.append(os.path.dirname(opt_modulepath)) opt_module = importlib.import_module( os.path.basename(opt_modulepath.replace(".py", ''))) h = HyperparameterParser(runner_args, logger) hyperparams = h.parse() optimizer = getattr(opt_module, "Optimizer")(hyperparams, config['optimizer'], logger) workers_started = False queue_name = runner_args.queue while not optimizer.stop(): hyperparam_pop = optimizer.ask() hyperparam_tuples = h.convert_to_tuples(hyperparam_pop) experiments = _add_hyperparam_experiments( exec_filename, other_args, runner_args, artifacts, resources_needed, logger, optimizer=optimizer, hyperparam_tuples=hyperparam_tuples) queue = model.get_queue(queue_name=queue_name, cloud=runner_args.cloud, config=config, close_after=queueLifetime, logger=logger, verbose=verbose) queue_name = submit_experiments(experiments, config=config, logger=logger, queue=queue) if not workers_started: spin_up_workers(runner_args, config, resources_needed, queue_name=queue_name, verbose=verbose) workers_started = True fitnesses, behaviors = get_experiment_fitnesses( experiments, optimizer, config, logger) try: optimizer.tell(hyperparam_pop, fitnesses, behaviors) except BaseException: util.check_for_kb_interrupt() optimizer.tell(hyperparam_pop, fitnesses) try: optimizer.disp() except BaseException: util.check_for_kb_interrupt() logger.warn('Optimizer has no disp() method') else: if rerun: with model.get_db_provider(config) as db: experiment = db.get_experiment(experiment_key) new_key = runner_args.experiment if runner_args.experiment \ else experiment_key + '_rerun' + str(uuid.uuid4()) experiment.key = new_key for _, art in six.iteritems(experiment.artifacts): art['mutable'] = False experiments = [experiment] else: experiments = [ create_experiment(filename=exec_filename, args=other_args, experiment_name=runner_args.experiment, project=runner_args.project, artifacts=artifacts, resources_needed=resources_needed, metric=runner_args.metric, max_duration=runner_args.max_duration, dependency_policy=StudioDependencyPolicy()) ] queue = model.get_queue(queue_name=runner_args.queue, cloud=runner_args.cloud, config=config, close_after=queueLifetime, logger=logger, verbose=verbose) queue_name = submit_experiments(experiments, config=config, logger=logger, queue=queue) spin_up_workers(runner_args, config, resources_needed, queue_name=queue_name, verbose=verbose) return
def get_db_provider(self): config = model.get_config(self.client_config_file) config['database']['serverUrl'] = 'http://localhost:' + str(self.port) return model.get_db_provider(config)
def get_db_provider(self, config_name): config_file = os.path.join( os.path.dirname( os.path.realpath(__file__)), config_name) return model.get_db_provider(model.get_config(config_file))
def stubtest_worker(testclass, experiment_name, runner_args, config_name, test_script, expected_output, script_args=[], queue=LocalQueue(), wait_for_experiment=True, delete_when_done=True, test_output=True): my_path = os.path.dirname(os.path.realpath(__file__)) config_name = os.path.join(my_path, config_name) logger = logging.getLogger('stubtest_worker') logger.setLevel(10) queue.clean() with model.get_db_provider(model.get_config(config_name)) as db: try: db.delete_experiment(experiment_name) except Exception: pass p = subprocess.Popen(['studio', 'run'] + runner_args + [ '--config=' + config_name, '--verbose=debug', '--force-git', '--experiment=' + experiment_name, test_script ] + script_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True, cwd=my_path) pout, _ = p.communicate() if pout: logger.debug("studio run output: \n" + pout) db = model.get_db_provider(model.get_config(config_name)) experiments = [ e for e in db.get_user_experiments() if e.startswith(experiment_name) ] assert len(experiments) == 1 experiment_name = experiments[0] try: # test saved arguments keybase = "/experiments/" + experiment_name saved_args = db._get(keybase + '/args') if saved_args is not None: testclass.assertTrue(len(saved_args) == len(script_args)) for i in range(len(saved_args)): testclass.assertTrue(saved_args[i] == script_args[i]) testclass.assertTrue(db._get(keybase + '/filename') == test_script) else: testclass.assertTrue(script_args is None or len(script_args) == 0) experiment = db.get_experiment(experiment_name) if wait_for_experiment: while not experiment.status == 'finished': time.sleep(1) experiment = db.get_experiment(experiment_name) if test_output: with open(db.store.get_artifact(experiment.artifacts['output']), 'r') as f: data = f.read() split_data = data.strip().split('\n') testclass.assertEquals(split_data[-1], expected_output) check_workspace(testclass, db, experiment_name) if delete_when_done: db.delete_experiment(experiment_name) return db except Exception as e: print("Exception {} raised during test".format(e)) print("worker output: \n {}".format(pout)) print("Exception trace:") print(traceback.format_exc()) raise e
def studio_run(self, line, cell=None): script_text = [] pickleable_ns = {} for varname, var in six.iteritems(self.shell.user_ns): if not varname.startswith('__'): if isinstance(var, ModuleType) and \ var.__name__ != 'studio.magics': script_text.append( 'import {} as {}'.format(var.__name__, varname) ) else: try: pickle.dumps(var) pickleable_ns[varname] = var except BaseException: pass script_text.append(cell) script_text = '\n'.join(script_text) stub_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'run_magic.py.stub') with open(stub_path) as f: script_stub = f.read() script = script_stub.format(script=script_text) experiment_key = str(int(time.time())) + \ "_jupyter_" + str(uuid.uuid4()) print('Running studio with experiment key ' + experiment_key) config = model.get_config() if config['database']['type'] == 'http': print("Experiment progress can be viewed/shared at:") print("{}/experiment/{}".format( config['database']['serverUrl'], experiment_key)) workspace_new = fs_tracker.get_artifact_cache( 'workspace', experiment_key) rsync_cp('.', workspace_new) with open(os.path.join(workspace_new, '_script.py'), 'w') as f: f.write(script) ns_path = fs_tracker.get_artifact_cache('_ns', experiment_key) with gzip.open(ns_path, 'wb') as f: f.write(pickle.dumps(pickleable_ns)) if any(line): runner_args = line.strip().split(' ') else: runner_args = [] runner_args.append('--capture={}:_ns'.format(ns_path)) runner_args.append('--capture-once=.:workspace') runner_args.append('--force-git') runner_args.append('--experiment=' + experiment_key) notebook_cwd = os.getcwd() os.chdir(workspace_new) print(runner_args + ['_script.py']) runner_main(runner_args + ['_script.py']) os.chdir(notebook_cwd) with model.get_db_provider() as db: while True: experiment = db.get_experiment(experiment_key) if experiment and experiment.status == 'finished': break time.sleep(10) new_ns_path = db.get_artifact(experiment.artifacts['_ns']) with open(new_ns_path) as f: new_ns = pickle.loads(f.read()) self.shell.user_ns.update(new_ns)
def main(args=sys.argv): logger = logs.get_logger('studio-remote-worker') parser = argparse.ArgumentParser(description='Studio remote worker. \ Usage: studio-remote-worker \ ') parser.add_argument('--config', help='configuration file', default=None) parser.add_argument('--guest', help='Guest mode (does not require db credentials)', action='store_true') parser.add_argument( '--single-run', help='quit after a single run (regardless of the state of the queue)', action='store_true') parser.add_argument('--queue', help='queue name', required=True) parser.add_argument('--verbose', '-v', help='Verbosity level. Allowed vaules: ' + 'debug, info, warn, error, crit ' + 'or numerical value of logger levels.', default=None) parser.add_argument( '--timeout', '-t', help='Timeout after which remote worker stops listening (in seconds)', type=int, default=100) parsed_args, script_args = parser.parse_known_args(args) verbose = parse_verbosity(parsed_args.verbose) logger.setLevel(verbose) config = None if parsed_args.config is not None: config = model.get_config(parsed_args.config) if parsed_args.queue.startswith('ec2_') or \ parsed_args.queue.startswith('sqs_'): queue = SQSQueue(parsed_args.queue, verbose=verbose) elif parsed_args.queue.startswith('rmq_'): queue = get_cached_queue(name=parsed_args.queue, route='StudioML.' + parsed_args.queue, config=config, logger=logger, verbose=verbose) else: queue = PubsubQueue(parsed_args.queue, verbose=verbose) logger.info('Waiting for work') timeout_before = parsed_args.timeout timeout_after = timeout_before if timeout_before > 0 else 0 # wait_for_messages(queue, timeout_before, logger) logger.info('Starting working') worker_loop(queue, parsed_args, single_experiment=parsed_args.single_run, timeout=timeout_after, verbose=verbose)
def __init__( self, # Name of experiment experimentId, # Completion service configuration cs_config=None, # used to pass a studioML configuration block read by client software studio_config=None, # Studio config yaml file studio_config_file=None, shutdown_del_queue=False ): # StudioML configuration self.config = model.get_config(studio_config_file) self.logger = logs.get_logger(self.__class__.__name__) self.verbose_level = parse_verbosity(self.config['verbose']) self.logger.setLevel(self.verbose_level) # Setup Completion Service instance properties # based on configuration self.experimentId = experimentId self.project_name = "completion_service_" + experimentId self.resumable = RESUMABLE self.clean_queue = CLEAN_QUEUE self.queue_upscaling = QUEUE_UPSCALING self.num_workers = int(cs_config.get('num_workers', 1)) self.cloud_timeout = cs_config.get('timeout') self.bid = cs_config.get('bid') self.ssh_keypair = cs_config.get('ssh_keypair') self.sleep_time = cs_config.get('sleep_time') self.shutdown_del_queue = shutdown_del_queue # Figure out request for resources: resources_needed = cs_config.get('resources_needed') self.resources_needed = DEFAULT_RESOURCES_NEEDED self.resources_needed.update(resources_needed) studio_resources = self.config.get('resources_needed') if studio_resources: self.resources_needed.update(studio_resources) # Figure out task queue and cloud we are going to use: queue_name = cs_config.get('queue') cloud_name = cs_config.get('cloud') if cs_config.get('local'): queue_name = None cloud_name = None elif queue_name is not None: self.shutdown_del_queue = False if cloud_name in ['ec2spot', 'ec2']: assert queue_name.startswith("sqs_") else: queue_name = self.experiment_id if cloud_name in ['ec2spot', 'ec2']: queue_name = "sqs_" + queue_name self.cloud = cloud_name if queue_name is not None and queue_name.startswith("rmq_"): assert self.cloud is None self.wm = model.get_worker_manager( self.config, self.cloud) if queue_name is not None: self.logger.info( "CompletionService configured with queue {0}" .format(queue_name)) self.queue = model.get_queue(queue_name=queue_name, cloud=self.cloud, config=self.config, logger=self.logger, verbose=self.verbose_level) self.queue_name = self.queue.get_name() self.submitted = {} self.use_spot = cloud_name in ['ec2spot', 'gcspot'] self.logger.info("Project name: {0}".format(self.project_name)) self.logger.info("Initial/final queue name: {0}, {1}" .format(queue_name, self.queue_name)) self.logger.info("Cloud name: {0}".format(self.cloud))
def get_db_provider(self): config = model.get_config('test_config_http_client.yaml') config['database']['serverUrl'] = 'http://localhost:' + str(self.port) return model.get_db_provider(config)