def export_trials(trials: hyperopt.Trials, path: str) -> None: def slim(source: hyperopt.Trials) -> hyperopt.Trials: """ Strips trials to the basic values in order to pickle them """ _trials = hyperopt.Trials() for tid, trial in enumerate(source.trials): docs = hyperopt.Trials().new_trial_docs(tids=[trial['tid']], specs=[trial['spec']], results=[trial['result']], miscs=[trial['misc']]) _trials.insert_trial_docs(docs) _trials.refresh() return _trials trials_pickle_path = os.path.join(path, TRIALS_PICKLE_FILE) trials_table_path = os.path.join(path, TRIALS_TABLE_FILE) xpath.prepare_path(trials_pickle_path) xpath.prepare_path(trials_table_path) logger.info('Exporting trials (pickled) to %s', trials_pickle_path) with tf.io.gfile.GFile(trials_pickle_path, 'wb') as fp: st = slim(trials) pickle.dump(st, file=fp) logger.info('Exporting trials table (csv) to %s', trials_table_path) df = convert_trials_to_data_frame(trials) with tf.io.gfile.GFile(trials_table_path, 'w') as fp: df.to_csv(fp, header=True, index=False)
def parse_args() -> argparse.Namespace: """ Parse cmd arguments :return: :class:`ArgumentParser` instance """ arg_parser = argparse.ArgumentParser(description='FMNIST HyperParameter Search') arg_parser.add_argument('--spec', type=str, choices=[Spec.FCNN.name, Spec.CVNN.name, Spec.VGGN.name], help='Model to tune.') arg_parser.add_argument('--num-epochs', type=int, default=2, help='Num training epochs for each experiment run') arg_parser.add_argument('--buffer-size', type=int, default=256, help='Capacity for the reading queue') arg_parser.add_argument('--num-threads', type=int, default=1, help='Number of threads for processing data') arg_parser.add_argument('--no-shuffle', dest='shuffle', action='store_false') arg_parser.add_argument('--job-dir', required=True, help='Path to job dir') arg_parser.add_argument('--model-dir', required=True, help='Path to model dir') arg_parser.add_argument('--train-data', required=True, help='Path to input data path') arg_parser.add_argument('--max-evaluations', type=int, required=False, default=2, help='Max number of experiments') arg_parser.add_argument('--spark-host', type=str, required=False, default=None, help='Hostname of spark server to use Apache Spark for parallel tuning.') arg_parser.set_defaults(shuffle=True) args = arg_parser.parse_args() logger.info('Running with args:') for arg in vars(args): logger.info('\t%s: %s', arg, getattr(args, arg)) return args
def fn(arrays: xtype.DataTuple, partition: int) -> None: import tempfile import uuid temporary_path = os.path.join(tempfile.gettempdir(), '{}.{}'.format(uuid.uuid4(), extension)) file_path = os.path.join(path, 'part-{:03}.{}'.format(partition, extension)) xpath.prepare_path(file_path) with open(temporary_path, 'wb') as fp: np.savez(fp, *arrays) logger.info('Copying %s to %s', temporary_path, file_path) tf.io.gfile.copy(src=temporary_path, dst=file_path, overwrite=True)
def tune(param_space: Dict[str, Any], objective_fn: Callable[[Dict[str, Any]], Dict[str, Any]], max_evaluations: int, spark_host: str) -> Tuple[Dict[str, Any], hyperopt.Trials]: start = time.time() if spark_host: import pyspark spark_session = pyspark.sql.SparkSession(pyspark.SparkContext(master=spark_host, appName=APP_NAME)) trials = hyperopt.SparkTrials(spark_session=spark_session) else: trials = hyperopt.Trials() best_params = hyperopt.fmin(objective_fn, param_space, algo=tpe.suggest, max_evals=max_evaluations, trials=trials, rstate=np.random.RandomState(1777)) evaluated_best_params = hyperopt.space_eval(param_space, best_params) losses = [x['result']['loss'] for x in trials.trials] logger.info('Score best parameters: %f', min(losses) * -1) logger.info('Best parameters: %s', evaluated_best_params) logger.info('Time elapsed: %s', time.strftime("%H:%M:%S", time.gmtime(time.time() - start))) logger.info('Parameter combinations evaluated: %d', max_evaluations) return evaluated_best_params, trials
def wrapper_fn(guess_params: Dict[str, Any]): params = {k: v for k, v in guess_params.items() if not isinstance(v, dict)} params['num_blocks'] = guess_params['conv']['num_blocks'] params['block_size'] = guess_params['conv']['block_size'] signature = core.create_signature( params={**params, 'class': self.__class__.__name__} ) task_job_dir = os.path.join(job_dir, signature) task_model_dir = os.path.join(model_dir, signature) logger.info('Running with config: %s', params) def train_fn(batch_size: int, learning_rate: float, fcl_dropout_rate: float, activation: str, num_blocks: int, block_size: int, fcl_num_layers: int, fcl_layer_size: int, optimizer: str) -> Dict[str, Any]: from fmnist.learning.arch.vggn import train hps_loss, status = math.nan, hyperopt.STATUS_FAIL try: metrics, export_path = train.train(base_data_dir, num_threads=num_threads, buffer_size=buffer_size, batch_size=batch_size, num_epochs=num_epochs, shuffle=shuffle, job_dir=task_job_dir, model_dir=task_model_dir, learning_rate=learning_rate, num_blocks=num_blocks, block_size=block_size, fcl_dropout_rate=fcl_dropout_rate, activation=activation, fcl_num_layers=fcl_num_layers, fcl_layer_size=fcl_layer_size, optimizer_name=optimizer) if math.isnan(metrics['sparse_categorical_accuracy']) or math.isnan(metrics['loss']): status = hyperopt.STATUS_FAIL else: status = hyperopt.STATUS_OK hps_loss = -math.pow(metrics['sparse_categorical_accuracy'], 2.0) except Exception as err: logger.error(err) finally: return {'loss': hps_loss, 'status': status, 'job_dir': task_job_dir, 'model_dir': task_model_dir, 'params': {**params, 'num_epochs': num_epochs, 'tuner': self.__class__.__name__}} return train_fn(batch_size=params['batch_size'], learning_rate=params['learning_rate'], fcl_dropout_rate=params['fcl_dropout_rate'], activation=params['activation'], num_blocks=params['num_blocks'], block_size=params['block_size'], fcl_num_layers=params['fcl_num_layers'], fcl_layer_size=params['fcl_layer_size'], optimizer=params['optimizer'])
def parse_args(): arg_parser = argparse.ArgumentParser( 'fminst-vgg19-embedding', description='Get VGG19 embeddings for FMNIST') arg_parser.add_argument('--train-data', required=True) arg_parser.add_argument('--batch-size', required=False, type=int, default=32) arg_parser.add_argument('--job-dir', required=False, default=None) args = arg_parser.parse_args() logger.info('Running with arguments') for attr, value in vars(args).items(): logger.info('%s: %s', attr, value) return args
def main(): args = parse_args() fpath = create_path_fn(args.train_data) df_prime = pd.read_csv(fpath(DataPaths.FMNIST, 'fashion-mnist_train.csv')) df_test = pd.read_csv(fpath(DataPaths.FMNIST, 'fashion-mnist_test.csv')) df_train, df_val = data_frame_split(df_prime, left_fraction=0.80) for df, split in zip((df_train, df_val, df_test), ('train', 'val', 'test')): logger.info('Running partitioning pipeline for %s', split) ds = create_dataset(df) data_iter = create_generator(ds, batch_size=args.batch_size) export_fn = create_export_fn(fpath(DataPaths.INTERIM, split), 'npz') partition_export_fn = create_partitioning_fn(group_size=100, agg_fn=agg_fn, consumer_fn=export_fn) partition_export_fn(data_iter)
def wrapper_fn(params: Dict[str, Any]): signature = core.create_signature( params={**params, 'tuner': self.__class__.__name__} ) task_job_dir = os.path.join(job_dir, signature) task_model_dir = os.path.join(model_dir, signature) logger.info('Running with config: %s', params) def train_fn(batch_size: int, learning_rate: float, dropout_rate: float, activation: str, num_layers: int, layer_size: int, optimizer: str) -> Dict[str, Any]: from fmnist.learning.arch.fcnn import train hps_loss, status = math.nan, hyperopt.STATUS_FAIL try: metrics, export_path = train.train(base_data_dir, num_threads=num_threads, buffer_size=buffer_size, batch_size=batch_size, num_epochs=num_epochs, shuffle=shuffle, job_dir=task_job_dir, model_dir=task_model_dir, learning_rate=learning_rate, dropout_rate=dropout_rate, activation=activation, num_layers=num_layers, layer_size=layer_size, optimizer_name=optimizer) if math.isnan(metrics['sparse_categorical_accuracy']) or math.isnan(metrics['loss']): status = hyperopt.STATUS_FAIL else: status = hyperopt.STATUS_OK hps_loss = -math.pow(metrics['sparse_categorical_accuracy'], 2.0) except RuntimeError: pass finally: return {'loss': hps_loss, 'status': status, 'job_dir': task_job_dir, 'model_dir': task_model_dir, 'params': {**params, 'num_epochs': num_epochs, 'tuner': self.__class__.__name__}} return train_fn(batch_size=params['batch_size'], learning_rate=params['learning_rate'], dropout_rate=params['dropout_rate'], activation=params['activation'], num_layers=params['num_layers'], layer_size=params['layer_size'], optimizer=params['optimizer'])
def export_parameters(params: Dict[str, Any], path: str) -> None: evaluated_params_path = os.path.join(path, EVALUATED_PARAMS_FILE) logger.info('Exporting best params to %s', evaluated_params_path) xpath.prepare_path(evaluated_params_path) with tf.io.gfile.GFile(evaluated_params_path, 'w') as fp: json.dump(params, fp=fp, sort_keys=True)