def fit_validate(exp_params, k, data_path, write_path, others=None, custom_tag=''): """Fit model and compute metrics on train and validation set. Intended for hyperparameter search. Only logs final metrics and scatter plot of final embedding. Args: exp_params(dict): Parameter dict. Should at least have keys model_name, dataset_name & random_state. Other keys are assumed to be model parameters. k(int): Fold identifier. data_path(str): Data directory. write_path(str): Where to write temp files. others(dict): Other things to log to Comet experiment. custom_tag(str): Custom tag for comet experiment. """ # Comet experiment exp = Experiment(parse_args=False) exp.disable_mp() custom_tag += '_validate' exp.add_tag(custom_tag) exp.log_parameters(exp_params) if others is not None: exp.log_others(others) # Parse experiment parameters model_name, dataset_name, random_state, model_params = parse_params(exp_params) # Fetch and split dataset. data_train = getattr(grae.data, dataset_name)(split='train', random_state=random_state, data_path=data_path) data_train, data_val = data_train.validation_split(random_state=FOLD_SEEDS[k]) # Model m = getattr(grae.models, model_name)(random_state=FOLD_SEEDS[k], **model_params) m.write_path = write_path m.data_val = data_val with exp.train(): m.fit(data_train) # Log plot m.comet_exp = exp m.plot(data_train, data_val, title=f'{model_name} : {dataset_name}') # Probe embedding prober = EmbeddingProber() prober.fit(model=m, dataset=data_train, mse_only=True) train_z, train_metrics = prober.score(data_train, is_train=True) # Log train metrics exp.log_metrics(train_metrics) with exp.validate(): val_z, val_metrics = prober.score(data_val) # Log train metrics exp.log_metrics(val_metrics) # Log marker to mark successful experiment exp.log_other('success', 1)
def setup_comet_ml(): """Initialise Experiment object.""" experiment = Experiment( api_key=config.COMET_API_KEY, disabled=True if not config.COMET_MONITOR else False, log_code=False, project_name=config.COMET_PROJECT_NAME, workspace=config.COMET_WORKSPACE, ) experiment.set_name(config.EXPERIMENT_NAME) experiment.log_others({ "conditioning": config.EXPERIMENT_Z, "dataset": config.EXPERIMENT_DATASET, }) experiment.log_parameters({ "batch_size": config.EXPERIMENT_BATCH_SIZE, "epochs": config.EXPERIMENT_EPOCHS, }) return experiment
""" # import comet_ml in the top of your file from comet_ml import Experiment import json from pathlib import Path from src.arg_parser import get_parser # Add the following code anywhere in your machine learning file experiment = Experiment(api_key="K96HV1ZN57Ip54lRy1GNaOpBN", project_name="fraudulentuserdetection", workspace="watarukudo0914") # from src.models import sdgcn, sgcn, rev2, rgcn parser = get_parser() args = parser.parse_args() experiment.log_others({ 'model_name': args.model_name, 'data_name': args.data_name, }) """ model設定 """ if args.model_name == 'sdgcn': from src.models import sdgcn model = sdgcn elif args.model_name == 'sgcn': from src.models import sgcn model = sgcn elif args.model_name == 'rgcn': from src.models import rgcn model = rgcn elif args.model_name == 'rev2': from src.models import rev2
class Dashboard: """Record training/evaluation statistics to comet :param Path log_dir :param list taskid_to_name """ def __init__(self, config, paras, log_dir, train_type, resume=False): self.log_dir = log_dir self.expkey_f = Path(self.log_dir, 'exp_key') self.global_step = 1 if resume: assert self.expkey_f.exists( ), f"Cannot find comet exp key in {self.log_dir}" with open(Path(self.log_dir, 'exp_key'), 'r') as f: exp_key = f.read().strip() self.exp = ExistingExperiment( previous_experiment=exp_key, project_name=COMET_PROJECT_NAME, workspace=COMET_WORKSPACE, auto_output_logging=None, auto_metric_logging=None, display_summary_level=0, ) else: self.exp = Experiment( project_name=COMET_PROJECT_NAME, workspace=COMET_WORKSPACE, auto_output_logging=None, auto_metric_logging=None, display_summary_level=0, ) #TODO: is there exists better way to do this? with open(self.expkey_f, 'w') as f: print(self.exp.get_key(), file=f) self.exp.log_other('seed', paras.seed) self.log_config(config) if train_type == 'evaluation': if paras.pretrain: self.exp.set_name( f"{paras.pretrain_suffix}-{paras.eval_suffix}") self.exp.add_tags([ paras.pretrain_suffix, config['solver']['setting'], paras.accent, paras.algo, paras.eval_suffix ]) if paras.pretrain_model_path: self.exp.log_other("pretrain-model-path", paras.pretrain_model_path) else: self.exp.log_other("pretrain-runs", paras.pretrain_runs) self.exp.log_other("pretrain-setting", paras.pretrain_setting) self.exp.log_other("pretrain-tgt-accent", paras.pretrain_tgt_accent) else: self.exp.set_name(paras.eval_suffix) self.exp.add_tags( ["mono", config['solver']['setting'], paras.accent]) else: self.exp.set_name(paras.pretrain_suffix) self.exp.log_others({ f"accent{i}": k for i, k in enumerate(paras.pretrain_accents) }) self.exp.log_other('accent', paras.tgt_accent) self.exp.add_tags([ paras.algo, config['solver']['setting'], paras.tgt_accent ]) #TODO: Need to add pretrain setting ##slurm-related hostname = os.uname()[1] if len(hostname.split('.')) == 2 and hostname.split( '.')[1] == 'speech': logger.notice(f"Running on Battleship {hostname}") self.exp.log_other('jobid', int(os.getenv('SLURM_JOBID'))) else: logger.notice(f"Running on {hostname}") def log_config(self, config): #NOTE: depth at most 2 for block in config: for n, p in config[block].items(): if isinstance(p, dict): self.exp.log_parameters(p, prefix=f'{block}-{n}') else: self.exp.log_parameter(f'{block}-{n}', p) def set_status(self, status): self.exp.log_other('status', status) def step(self, n=1): self.global_step += n def set_step(self, global_step=1): self.global_step = global_step def log_info(self, prefix, info): self.exp.log_metrics({k: float(v) for k, v in info.items()}, prefix=prefix, step=self.global_step) def log_other(self, name, value): self.exp.log_metric(name, value, step=self.global_step) def log_step(self): self.exp.log_other('step', self.global_step) def add_figure(self, fig_name, data): self.exp.log_figure(figure_name=fig_name, figure=data, step=self.global_step) def check(self): if not self.exp.alive: logger.warning("Comet logging stopped")
def fit_test(exp_params, data_path, k, write_path, others=None, custom_tag=''): """Fit model and compute metrics on both train and test sets. Also log plot and embeddings to comet. Args: exp_params(dict): Parameter dict. Should at least have keys model_name, dataset_name & random_state. Other keys are assumed to be model parameters. k(int): Fold identifier. data_path(str): Data directory. write_path(str): Where temp files can be written. others(dict): Other things to log to Comet experiment. custom_tag(str): Custom tag for Comet experiment. """ # Increment fold to avoid reusing validation seeds k += 10 # Comet experiment exp = Experiment(parse_args=False) exp.disable_mp() custom_tag += '_test' exp.add_tag(custom_tag) exp.log_parameters(exp_params) if others is not None: exp.log_others(others) # Parse experiment parameters model_name, dataset_name, random_state, model_params = parse_params(exp_params) # Fetch and split dataset. data_train_full = getattr(grae.data, dataset_name)(split='train', random_state=random_state, data_path=data_path) data_test = getattr(grae.data, dataset_name)(split='test', random_state=random_state, data_path=data_path) if model_name == 'PCA': # No validation split on PCA data_train, data_val = data_train_full, None else: data_train, data_val = data_train_full.validation_split(random_state=FOLD_SEEDS[k]) # Model m = getattr(grae.models, model_name)(random_state=FOLD_SEEDS[k], **model_params) m.comet_exp = exp # Used by DL models to log metrics between epochs m.write_path = write_path m.data_val = data_val # For early stopping # Benchmark fit time fit_start = time.time() m.fit(data_train) fit_stop = time.time() fit_time = fit_stop - fit_start # Log plots m.plot(data_train, data_test, title=f'{model_name}_{dataset_name}') if dataset_name in ['Faces', 'RotatedDigits', 'UMIST', 'Tracking', 'COIL100', 'Teapot']: m.view_img_rec(data_train, choice='random', title=f'{model_name}_{dataset_name}_train_rec') m.view_img_rec(data_test, choice='best', title=f'{model_name}_{dataset_name}_test_rec_best') m.view_img_rec(data_test, choice='worst', title=f'{model_name}_{dataset_name}_test_rec_worst') elif dataset_name in ['ToroidalHelices', 'Mammoth'] or 'SwissRoll' in dataset_name: m.view_surface_rec(data_train, title=f'{model_name}_{dataset_name}_train_rec', dataset_name=dataset_name) m.view_surface_rec(data_test, title=f'{model_name}_{dataset_name}_test_rec', dataset_name=dataset_name) # Score models prober = EmbeddingProber() prober.fit(model=m, dataset=data_train_full) with exp.train(): train_z, train_metrics = prober.score(data_train_full) _, train_y = data_train_full.numpy() # Log train metrics exp.log_metric(name='fit_time', value=fit_time) exp.log_metrics(train_metrics) with exp.test(): test_z, test_metrics = prober.score(data_test) _, test_y = data_test.numpy() # Log train metrics exp.log_metrics(test_metrics) # Log embedding as .npy file file_name = os.path.join(write_path, f'emb_{model_name}_{dataset_name}.npy') save_dict(dict(train_z=train_z, train_y=train_y, test_z=test_z, test_y=test_y, random_state=random_state, dataset_name=dataset_name, model_name=model_name), file_name) file = open(file_name, 'rb') exp.log_asset(file, file_name=file_name) file.close() os.remove(file_name) # Log marker to mark successful experiment exp.log_other('success', 1)