class CometLogger(): def __init__(self, enabled, is_existing=False, prev_exp_key=None): """ Handles logging of experiment to comet and also persistence to local file system. Supports resumption of stopped experiments. """ disabled = not enabled if not is_existing: self.experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=PROJECT_NAME, disabled=disabled) else: if prev_exp_key is None: raise ValueError( "Requested existing experiment, but no key provided") print("Continuing existing experiment with key: ", prev_exp_key) self.experiment = ExistingExperiment( api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=PROJECT_NAME, disabled=disabled, previous_experiment=prev_exp_key) self.disabled = disabled def get_experiment_key(self): return self.experiment.get_key()[:9] def add_tag(self, tag): self.experiment.add_tag(tag) def log_metric(self, name, value, step=None): self.experiment.log_metric(name, value, step=step) def log_metrics(self, metrics_dict, prefix, step=None): self.experiment.log_metrics(metrics_dict, prefix=prefix, step=step) def log_params(self, params_dict): self.experiment.log_parameters(params_dict) def set_name(self, name_str): self.experiment.set_name(name_str) def log_dataset(self, dataset: SpeakerVerificationDataset): if self.disabled: return dataset_string = "" dataset_string += "<b>Speakers</b>: %s\n" % len(dataset.speakers) dataset_string += "\n" + dataset.get_logs() dataset_string = dataset_string.replace("\n", "<br>") self.vis.text(dataset_string, opts={"title": "Dataset"}) def log_implementation(self, params): if self.disabled: return implementation_string = "" for param, value in params.items(): implementation_string += "<b>%s</b>: %s\n" % (param, value) implementation_string = implementation_string.replace("\n", "<br>") self.implementation_string = implementation_string self.implementation_win = self.vis.text( implementation_string, opts={"title": "Training implementation"}) def draw_projections(self, embeds, utterances_per_speaker, step, out_fpath=None, max_speakers=16): if self.disabled: return max_speakers = min(max_speakers, len(colormap)) embeds = embeds[:max_speakers * utterances_per_speaker] n_speakers = len(embeds) // utterances_per_speaker ground_truth = np.repeat(np.arange(n_speakers), utterances_per_speaker) colors = [colormap[i] for i in ground_truth] reducer = umap.UMAP() projected = reducer.fit_transform(embeds) plt.scatter(projected[:, 0], projected[:, 1], c=colors) plt.gca().set_aspect("equal", "datalim") plt.title("UMAP projection (step %d)" % step) if out_fpath is not None: plt.savefig(out_fpath) plt.clf() self.experiment.log_image(out_fpath, step=step)
def experiment(variant, comet_exp_key=None): if comet_exp_key is not None: from rllab.misc.comet_logger import CometContinuedLogger, CometLogger from comet_ml import Experiment, ExistingExperiment # comet_log = CometContinuedLogger(api_key="KWwx7zh6I2uw6oQMkpEo3smu0", previous_experiment_key=variant['comet_exp_key']) comet_log = ExistingExperiment(api_key="KWwx7zh6I2uw6oQMkpEo3smu0", previous_experiment=variant['comet_exp_key']) # comet_log = CometLogger(api_key="KWwx7zh6I2uw6oQMkpEo3smu0", # project_name="ml4l3", workspace="glenb") comet_log.set_name("test seq train") # comet_log = comet_exp_key print (comet_log) else: comet_log = None print ("loading libraries") from sandbox.rocky.tf.algos.maml_il import MAMLIL from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline from rllab.baselines.gaussian_mlp_baseline import GaussianMLPBaseline from rllab.baselines.maml_gaussian_mlp_baseline import MAMLGaussianMLPBaseline from rllab.baselines.zero_baseline import ZeroBaseline from rllab.envs.normalized_env import normalize from rllab.misc.instrument import stub, run_experiment_lite from sandbox.rocky.tf.policies.maml_minimal_gauss_mlp_policy import MAMLGaussianMLPPolicy as basic_policy # from sandbox.rocky.tf.policies.maml_minimal_gauss_mlp_policy_adaptivestep import MAMLGaussianMLPPolicy as fullAda_basic_policy from sandbox.rocky.tf.policies.maml_minimal_gauss_mlp_policy_adaptivestep_ppo import \ MAMLGaussianMLPPolicy as PPO_policy from sandbox.rocky.tf.policies.maml_minimal_gauss_mlp_policy_adaptivestep_biastransform import \ MAMLGaussianMLPPolicy as fullAda_Bias_policy from sandbox.rocky.tf.policies.maml_minimal_gauss_mlp_policy_biasonlyadaptivestep_biastransform import \ MAMLGaussianMLPPolicy as biasAda_Bias_policy from sandbox.rocky.tf.policies.maml_minimal_conv_gauss_mlp_policy import MAMLGaussianMLPPolicy as conv_policy from sandbox.rocky.tf.optimizers.quad_dist_expert_optimizer import QuadDistExpertOptimizer from sandbox.rocky.tf.optimizers.first_order_optimizer import FirstOrderOptimizer from sandbox.rocky.tf.envs.base import TfEnv import sandbox.rocky.tf.core.layers as L from rllab.envs.mujoco.ant_env_rand_goal_ring import AntEnvRandGoalRing from multiworld.envs.mujoco.sawyer_xyz.push.sawyer_push import SawyerPushEnv from multiworld.envs.mujoco.sawyer_xyz.pickPlace.sawyer_pick_and_place import SawyerPickPlaceEnv from multiworld.envs.mujoco.sawyer_xyz.door.sawyer_door_open import SawyerDoorOpenEnv from multiworld.core.flat_goal_env import FlatGoalEnv from multiworld.core.finn_maml_env import FinnMamlEnv from multiworld.core.wrapper_env import NormalizedBoxEnv import tensorflow as tf import time from rllab.envs.gym_env import GymEnv from maml_examples.maml_experiment_vars import MOD_FUNC import numpy as np import random as rd import pickle print ("Done loading libraries") seed = variant['seed']; n_parallel = 1; log_dir = variant['log_dir'] x=0 setup(seed, n_parallel, log_dir) fast_batch_size = variant['fbs']; meta_batch_size = variant['mbs'] adam_steps = variant['adam_steps']; max_path_length = variant['max_path_length'] dagger = variant['dagger']; expert_policy_loc = variant['expert_policy_loc'] ldim = variant['ldim']; init_flr = variant['init_flr']; policyType = variant['policyType']; use_maesn = variant['use_maesn'] EXPERT_TRAJ_LOCATION = variant['expertDataLoc'] envType = variant['envType'] tasksFile = path_to_multiworld + 'multiworld/envs/goals/' + variant['tasksFile'] + '.pkl' all_tasks = pickle.load(open(tasksFile, 'rb')) assert meta_batch_size <= len(all_tasks), "meta batch size wrong: " + str(meta_batch_size) + " <= " + str(len(all_tasks)) tasks = all_tasks[:meta_batch_size] print("^^^^^^^^^^^^^^^^ meta_tasks: ", tasks, " ^^^^^^^^^^^^^^^^ ") use_images = 'conv' in policyType if 'Push' == envType: baseEnv = SawyerPushEnv(tasks=tasks, image=use_images, mpl=max_path_length) elif envType == 'sparsePush': baseEnv = SawyerPushEnv(tasks=tasks, image=use_images, mpl=max_path_length, rewMode='l2Sparse') elif 'PickPlace' in envType: baseEnv = SawyerPickPlaceEnv(tasks=tasks, image=use_images, mpl=max_path_length) elif 'Door' in envType: baseEnv = SawyerDoorOpenEnv(tasks=tasks, image=use_images, mpl=max_path_length) elif 'Ant' in envType: env = TfEnv(normalize(AntEnvRandGoalRing())) elif 'claw' in envType: env = TfEnv(DClawScrewRandGoal()) else: assert True == False if envType in ['Push', 'PickPlace', 'Door']: if use_images: obs_keys = ['img_observation'] else: obs_keys = ['state_observation'] env = TfEnv(NormalizedBoxEnv(FinnMamlEnv(FlatGoalEnv(baseEnv, obs_keys=obs_keys), reset_mode='idx'))) algoClass = MAMLIL baseline = LinearFeatureBaseline(env_spec=env.spec) load_policy = variant['load_policy'] if load_policy != None: policy = None load_policy = variant['load_policy'] # if 'conv' in load_policy: # baseline = ZeroBaseline(env_spec=env.spec) elif 'fullAda_PPO' in policyType: policy = PPO_policy( name="policy", env_spec=env.spec, grad_step_size=init_flr, hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), init_flr_full=init_flr, latent_dim=ldim ) elif 'fullAda_Bias' in policyType: policy = fullAda_Bias_policy( name="policy", env_spec=env.spec, grad_step_size=init_flr, hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), init_flr_full=init_flr, latent_dim=ldim ) elif 'biasAda_Bias' in policyType: policy = biasAda_Bias_policy( name="policy", env_spec=env.spec, grad_step_size=init_flr, hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), init_flr_full=init_flr, latent_dim=ldim ) elif 'basic' in policyType: policy = basic_policy( name="policy", env_spec=env.spec, grad_step_size=init_flr, hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), extra_input_dim=(0 if extra_input is "" else extra_input_dim), ) elif 'conv' in policyType: baseline = ZeroBaseline(env_spec=env.spec) policy = conv_policy( name="policy", latent_dim=ldim, policyType=policyType, env_spec=env.spec, init_flr=init_flr, hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), extra_input_dim=(0 if extra_input is "" else extra_input_dim), ) print("|||||||||||||||||||||||||||||||||||||||||||||||", variant['n_itr']) beta_steps = 1 ; meta_step_size = 0.01 ; num_grad_updates = 1 pre_std_modifier = 1.0 ; post_std_modifier = 0.00001 limit_demos_num = None algo = algoClass( env=env, policy=policy, load_policy=load_policy, baseline=baseline, batch_size=fast_batch_size, # number of trajs for alpha grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, # number of tasks sampled for beta grad update num_grad_updates=num_grad_updates, # number of alpha grad updates n_itr=variant['n_itr'], make_video=False, use_maml=True, use_pooled_goals=True, use_corr_term=use_corr_term, test_on_training_goals=test_on_training_goals, metalearn_baseline=False, # metalearn_baseline=False, limit_demos_num=limit_demos_num, test_goals_mult=1, step_size=meta_step_size, plot=False, beta_steps=beta_steps, adam_curve=None, adam_steps=adam_steps, pre_std_modifier=pre_std_modifier, l2loss_std_mult=l2loss_std_mult, importance_sampling_modifier=MOD_FUNC[''], post_std_modifier=post_std_modifier, expert_trajs_dir=EXPERT_TRAJ_LOCATION, expert_trajs_suffix='', seed=seed, extra_input=extra_input, extra_input_dim=(0 if extra_input is "" else extra_input_dim), plotDirPrefix=None, latent_dim=ldim, dagger=dagger, expert_policy_loc=expert_policy_loc, comet_logger=comet_log, outerIteration=variant['outer_Iteration'], use_ppo=True ) algo.train()
class CometLogger(): def __init__(self, disabled, is_existing=False, prev_exp_key=None): """ Handles logging of experiment to comet and also persistence to local file system. Supports resumption of stopped experiments. """ if not is_existing: self.experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=PROJECT_NAME, disabled=disabled) else: if prev_exp_key is None: raise ValueError("Requested existing experiment, but no key provided") print("Continuing existing experiment with key: ", prev_exp_key) self.experiment = ExistingExperiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=PROJECT_NAME, disabled=disabled, previous_experiment=prev_exp_key) self.disabled = disabled self.name = None def get_experiment_key(self): return self.experiment.get_key()[:9] def add_tag(self, tag): self.experiment.add_tag(tag) def log_metric(self, name, value, step=None): self.experiment.log_metric(name, value, step=step) def log_metrics(self, metrics_dict, prefix, step=None): self.experiment.log_metrics(metrics_dict, prefix=prefix, step=step) def log_params(self, params_dict): self.experiment.log_parameters(params_dict) def set_name(self, name_str): self.experiment.set_name(name_str) self.name = name_str def save_act_grads(self, log_dict): """Save a dictionary of activation/gradients records to disk""" assert isinstance(log_dict, dict) if self.name is None: warnings.warn("Experiment name not set, not saving") return # Save the log dictionary file_name = f"./.{self.name}.record" with open(file_name, 'wb') as f: pickle.dump(log_dict, f) # TODO: need to rewrite before can be used for MNIST. def draw_projections(self, embeds, utterances_per_speaker, step, out_fpath=None, max_speakers=16): import umap if self.disabled: return max_speakers = min(max_speakers, len(colormap)) embeds = embeds[:max_speakers * utterances_per_speaker] n_speakers = len(embeds) // utterances_per_speaker ground_truth = np.repeat(np.arange(n_speakers), utterances_per_speaker) colors = [colormap[i] for i in ground_truth] reducer = umap.UMAP() projected = reducer.fit_transform(embeds) plt.scatter(projected[:, 0], projected[:, 1], c=colors) plt.gca().set_aspect("equal", "datalim") plt.title("UMAP projection (step %d)" % step) if out_fpath is not None: plt.savefig(out_fpath) plt.clf() self.experiment.log_image(out_fpath, step=step)
def experiment(variant, comet_exp_key=None): comet_logger = None if comet_exp_key is not None: # from rllab.misc.comet_logger import CometContinuedLogger, CometLogger # from comet_ml import Experiment, ExistingExperiment # comet_log = CometContinuedLogger(api_key="KWwx7zh6I2uw6oQMkpEo3smu0", previous_experiment_key=variant['comet_exp_key']) comet_logger = ExistingExperiment( api_key="KWwx7zh6I2uw6oQMkpEo3smu0", previous_experiment=variant['comet_exp_key']) # comet_log = CometLogger(api_key="KWwx7zh6I2uw6oQMkpEo3smu0", # project_name="ml4l3", workspace="glenb") comet_logger.set_name("test seq train") # comet_log = comet_exp_key print("RL!: ", comet_logger) print("%%%%%%%%%%%%%%%%%", comet_logger) seed = variant['seed'] log_dir = variant['log_dir'] n_parallel = variant['n_parallel'] setup(seed, n_parallel, log_dir) init_file = variant['init_file'] taskIndex = variant['taskIndex'] n_itr = variant['n_itr'] default_step = variant['default_step'] policyType = variant['policyType'] envType = variant['envType'] tasksFile = path_to_multiworld + '/multiworld/envs/goals/' + variant[ 'tasksFile'] + '.pkl' tasks = pickle.load(open(tasksFile, 'rb')) max_path_length = variant['max_path_length'] use_images = 'conv' in policyType print("$$$$$$$$$$$$$$$ RL-TASK: ", str(tasks[taskIndex]), " $$$$$$$$$$$$$$$") if 'MultiDomain' in envType: baseEnv = Sawyer_MultiDomainEnv(tasks=tasks, image=use_images, mpl=max_path_length) elif 'Push' in envType: baseEnv = SawyerPushEnv(tasks=tasks, image=use_images, mpl=max_path_length) elif 'PickPlace' in envType: baseEnv = SawyerPickPlaceEnv(tasks=tasks, image=use_images, mpl=max_path_length) elif 'Door' in envType: baseEnv = SawyerDoorOpenEnv(tasks=tasks, image=use_images, mpl=max_path_length) elif 'Ant' in envType: env = TfEnv(normalize(AntEnvRandGoalRing())) elif 'Coffee' in envType: baseEnv = SawyerCoffeeEnv(mpl=max_path_length) else: raise AssertionError('') if envType in ['Push', 'PickPlace', 'Door']: if use_images: obs_keys = ['img_observation'] else: obs_keys = ['state_observation'] env = TfEnv( NormalizedBoxEnv( FinnMamlEnv(FlatGoalEnv(baseEnv, obs_keys=obs_keys), reset_mode='idx'))) baseline = ZeroBaseline(env_spec=env.spec) # baseline = LinearFeatureBaseline(env_spec = env.spec) batch_size = variant['batch_size'] if policyType == 'fullAda_Bias': baseline = LinearFeatureBaseline(env_spec=env.spec) algo = vpg_fullADA( env=env, policy=None, load_policy=init_file, baseline=baseline, batch_size=batch_size, # 2x max_path_length=max_path_length, n_itr=n_itr, # noise_opt = True, default_step=default_step, sampler_cls=VectorizedSampler, # added by RK 6/19 sampler_args=dict(n_envs=1), # reset_arg=np.asscalar(taskIndex), reset_arg=taskIndex, log_dir=log_dir, comet_logger=comet_logger, outer_iteration=variant['outer_iteration']) elif policyType == 'biasAda_Bias': algo = vpg_biasADA( env=env, policy=None, load_policy=init_file, baseline=baseline, batch_size=batch_size, # 2x max_path_length=max_path_length, n_itr=n_itr, # noise_opt = True, default_step=default_step, sampler_cls=VectorizedSampler, # added by RK 6/19 sampler_args=dict(n_envs=1), # reset_arg=np.asscalar(taskIndex), reset_arg=taskIndex, log_dir=log_dir) elif policyType == 'basic': algo = vpg_basic( env=env, policy=None, load_policy=init_file, baseline=baseline, batch_size=batch_size, max_path_length=max_path_length, n_itr=n_itr, # step_size=10.0, sampler_cls=VectorizedSampler, # added by RK 6/19 sampler_args=dict(n_envs=1), reset_arg=taskIndex, optimizer=None, optimizer_args={ 'init_learning_rate': default_step, 'tf_optimizer_args': { 'learning_rate': 0.5 * default_step }, 'tf_optimizer_cls': tf.train.GradientDescentOptimizer }, log_dir=log_dir # extra_input="onehot_exploration", # added by RK 6/19 # extra_input_dim=5, # added by RK 6/19 ) elif 'conv' in policyType: algo = vpg_conv( env=env, policy=None, load_policy=init_file, baseline=baseline, batch_size=batch_size, # 2x max_path_length=max_path_length, n_itr=n_itr, sampler_cls=VectorizedSampler, # added by RK 6/19 sampler_args=dict(n_envs=1), # noise_opt = True, default_step=default_step, # reset_arg=np.asscalar(taskIndex), reset_arg=taskIndex, log_dir=log_dir) else: raise AssertionError( 'Policy Type must be fullAda_Bias or biasAda_Bias') algo.train()
class CometConnection: def __init__(self, comet_name=None, dataset_config=None, exp_key=None): self.experiment = None if comet_name is not None and dataset_config is not None: self._init_new_experiment(comet_name, dataset_config) elif exp_key is not None: self._init_continue_experiment(exp_key) def _init_new_experiment(self, comet_name, dataset_config): self.experiment = Experiment(api_key=COMET_KEY, project_name=PROJECT_NAME) self.experiment.set_name(comet_name) self.log_data_attributes(dataset_config) self.experiment.log_asset('datagen/spectra_generator.m') def _init_continue_experiment(self, exp_key): self.experiment = ExistingExperiment(api_key=COMET_KEY, previous_experiment=exp_key) def serialize(self): params = dict() params["comet_exp_key"] = self.experiment.get_key() return params def save(self, save_dir): info_dict = self.serialize() json.dump(info_dict, open(os.path.join(save_dir, COMET_SAVE_FILENAME), "w")) def persist(self, config_path): info = json.load(open(config_path, 'r')) self.__init__(exp_key=info["comet_exp_key"]) def log_data_attributes(self, dataset_config): for key, value in dataset_config.items(): self.experiment.log_parameter("SPECTRUM_" + key, value) def log_imgs(self, dataset_name): try: imgs_dir = os.path.join(DATA_DIR, dataset_name, 'imgs') self.experiment.log_asset_folder(imgs_dir) except: print(f"No images found for dataset: {dataset_name}") def log_script(self, dataset_config): script_name = dataset_config['matlab_script'] try: matlab_dir = os.path.join(GEN_DIR, script_name) self.experiment.log_asset(matlab_dir) except: print(f"Could not find {script_name} under {GEN_DIR}.") def format_classification_report(self, classification_report): return { f'{k}_test_{metric}': metric_val for k, v in classification_report.items() for metric, metric_val in v.items() } def get_classification_report(self, y_test, preds): preds_formatted = np.argmax(preds, axis=1) test_formatted = np.argmax(y_test, axis=1) peak_labels = [ f"n_peaks_{1 + num_peak}" for num_peak in range(y_test.shape[1]) ] classif_report = classification_report(test_formatted, preds_formatted, target_names=peak_labels, output_dict=True) classif_report_str = classification_report(test_formatted, preds_formatted, target_names=peak_labels) if self.experiment is not None: formatted = self.format_classification_report(classif_report) self.experiment.log_metrics(formatted) self.experiment.log_text(classif_report_str) return classif_report