def main(args): # Get experiment data db = schedy.SchedyDB() experiment = db.get_experiment(args.experiment) # Create the directory that will contain the models try: os.makedirs(args.models_dir) except OSError as e: if e.errno != errno.EEXIST: raise # Load the training/test data (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(x_train.shape[0], IMG_SIZE, IMG_SIZE, 1).astype( K.floatx()) / 255. x_test = x_test.reshape(x_test.shape[0], IMG_SIZE, IMG_SIZE, 1).astype( K.floatx()) / 255. y_train = keras.utils.to_categorical(y_train, NUM_CLASSES) y_test = keras.utils.to_categorical(y_test, NUM_CLASSES) # Process one job with experiment.next_job() as job: # Create and train the model for one epoch model = make_model(job) model.fit(x_train, y_train, epochs=5, batch_size=256) loss, accuracy = model.evaluate(x_test, y_test, batch_size=256) # Update the results job.results.setdefault('loss', []).append(loss) job.results.setdefault('accuracy', []).append(accuracy) job.results['min_loss'] = min(job.results['loss']) job.results['max_accuracy'] = max(job.results['accuracy']) # Do not forget to save the weights, as the next job that will need to # load these weights_path = os.path.join(args.models_dir, job.job_id + '.h5') job.results['weights_path'] = weights_path model.save_weights(weights_path)
def cmd_add(args): db = schedy.SchedyDB(config_path=args.config) if args.scheduler == 'manual': exp = schedy.ManualSearch(args.experiment, status=args.status) elif args.scheduler == 'random': if len(args.hyperparameters) % 3 != 0: args.parser.error('Invalid hyperparameters (not a list of name/distribution/params).') hyperparameters = {} for i in range(0, len(args.hyperparameters), 3): name = args.hyperparameters[i] dist_name = args.hyperparameters[i + 1] params_txt = args.hyperparameters[i + 2] if name in hyperparameters: args.parser.error('Duplicate hyperparameter: {}.'.format(name)) try: dist_type = schedy.random._DISTRIBUTION_TYPES[dist_name] except KeyError: args.parser.error('Invalid distribution: {}.'.format(dist_name)) try: params = json.loads(params_txt) hyperparameters[name] = dist_type._from_args(params) except (TypeError, ValueError, KeyError) as e: args.parser.error('Invalid distribution parameters for {} ({!r}).'.format(name, e)) exp = schedy.RandomSearch(args.experiment, status=args.status, distributions=hyperparameters) db.add_experiment(exp)
def cmd_show(args): db = schedy.SchedyDB(config_path=args.config) exp = db.get_experiment(args.experiment) if args.job is None: print_exp(exp) else: job = exp.get_job(args.job) print_job(job)
def cmd_run(args): db = schedy.SchedyDB(config_path=args.config) exp = db.get_experiment(args.experiment) while True: try: with exp.next_job() as job: cmd_args = format_cmd_args(args.cmd, job) print('Calling {}'.format(cmd_args)) output_block = False output_content = '' with subprocess.Popen(cmd_args, stdout=subprocess.PIPE, bufsize=1) as p: for line in iter(p.stdout.readline, b''): if PY2: sys.stdout.write(line) else: sys.stdout.buffer.write(line) try: line_str = line.decode() if line_str.rstrip() == '--- RESULTS ---': output_block = True elif line_str.rstrip() == '--- END RESULTS ---': output_block = False elif output_block: output_content += line_str except UnicodeError: pass p.communicate() if p.returncode != 0: raise SubcommandError('Command {} failed with return code {}'.format(cmd_args, p.returncode)) if not output_content: raise SubcommandError('No results found in output for command {}'.format(cmd_args)) try: for key, value in dict(json.loads(output_content)).items(): job.results[key] = value except (TypeError, ValueError) as e: raise_from(SubcommandError('Invalid results from command {}'.format(cmd_args)), e) except (SubcommandError, json.JSONDecodeError): t, e, tb = sys.exc_info() if args.ignore_errors: print(e) else: reraise(t, e, tb) except schedy.errors.NoJobError: break if args.once: break
def cmd_rm(args): db = schedy.SchedyDB(config_path=args.config) exp = db.get_experiment(args.experiment) if args.job is None: if not args.force: print_exp(exp) confirmation = input('Are you sure you want to remove {} and all its jobs? [y/N] '.format(exp.name)) if confirmation.lower() != 'y': print('{} was not removed.'.format(exp.name)) return exp.delete() else: job = exp.get_job(args.job) if not args.force: print_job(job) confirmation = input('Are you sure you want to remove {}? [y/N] '.format(job.job_id)) if confirmation.lower() != 'y': print('{} was not removed.'.format(job.job_id)) return job.delete()
def cmd_gen_token(args): config = { 'root': args.root, 'token_type': 'password', } if args.email is None: config['email'] = input('Email: ') else: config['email'] = args.email if args.password is None: config['token'] = getpass.getpass('Password: '******'token'] = args.password db = schedy.SchedyDB(config_override=config) url = urljoin(db.root, 'resettoken/') response = db._authenticated_request('POST', url=url) schedy.errors._handle_response_errors(response) new_content = response.json() if args.config is None: config_path = schedy.core._default_config_path() else: config_path = args.config config_dir = os.path.dirname(config_path) if config_dir: try: os.makedirs(config_dir) except OSError: t, e, tb = sys.exc_info() if e.errno != errno.EEXIST: reraise(t, e, tb) pass with open(config_path, 'w') as config_file: config_file.write(json_dumps(new_content, cls=schedy.encoding.SchedyJSONEncoder)) try: os.chmod(config_path, stat.S_IRUSR | stat.S_IWUSR) except OSError: print('Token file permissions could not be set.') print('Your token has been saved to {}.'.format(config_path))
def cmd_push(args): db = schedy.SchedyDB(config_path=args.config) kwargs = dict() # Status if args.status is not None: kwargs['status'] = args.status # Results if args.results is not None: if len(args.results) % 2 != 0: args.parser.error('Invalid results (not a list of name/value).') results = dict() for i in range(0, len(args.results), 2): name = args.results[i] value_txt = args.results[i + 1] if name in results: args.parser.error('Duplicate result: {}.'.format(name)) try: results[name] = json.loads(value_txt) except (TypeError, ValueError) as e: args.parser.error('Invalid value for result {} ({!r}).'.format(name, e)) kwargs['results'] = results # Hyperparameters if len(args.hyperparameters) % 2 != 0: args.parser.error('Invalid hyperparameters (not a list of name/value).') hyperparameters = dict() for i in range(0, len(args.hyperparameters), 2): name = args.hyperparameters[i] value_txt = args.hyperparameters[i + 1] if name in hyperparameters: args.parser.error('Duplicate hyperparameter: {}.'.format(name)) try: hyperparameters[name] = json.loads(value_txt) except (TypeError, ValueError) as e: args.parser.error('Invalid value for hyperparameter {} ({!r}).'.format(name, e)) kwargs['hyperparameters'] = hyperparameters exp = db.get_experiment(args.experiment) job = exp.add_job(**kwargs) print_job(job)
def cmd_list(args): db = schedy.SchedyDB(config_path=args.config) if args.experiment is None: experiments = db.get_experiments() table = exp_table(experiments) else: exp = db.get_experiment(args.experiment) jobs = exp.all_jobs() table = job_table(jobs) if args.sort is not None: try: table.sort(args.sort, reverse=args.decreasing) except KeyError as e: args.parser.error(str(e)) if args.field is not None: table.filter_fields(args.field) if args.table: table.print_table() elif args.paragraph: table.print_paragraphs() else: if args.field is None: table.filter_categories([DEFAULT_CATEGORY]) table.print_table('plain', include_headers=False)
#!/usr/bin/env python # -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function, unicode_literals import schedy import time db = schedy.SchedyDB() experiment = db.get_experiment('MinimizeManual') while True: try: # Pull the next job, and start working on it # The with statement is there so that we always report to Schedy # whether the job has crashed or succeeded # The results will only be pushed to Schedy at the end of the with # statement with experiment.next_job() as job: x = job.hyperparameters['x'] y = job.hyperparameters['y'] result = x**2 + y**2 job.results['result'] = result # Catch any type of exception so that the worker never crashes # This includes the NoJobError exception thrown by Schedy if there is no # job queued for this experiment. except Exception as e: print(e) # Wait a minute before issuing the next request time.sleep(60)