class CometMLMonitor(MonitorBase): """ Send data to https://www.comet.ml. Note: 1. comet_ml requires you to `import comet_ml` before importing tensorflow or tensorpack. 2. The "automatic output logging" feature of comet_ml will make the training progress bar appear to freeze. Therefore the feature is disabled by default. """ def __init__(self, experiment=None, api_key=None, tags=None, **kwargs): """ Args: experiment (comet_ml.Experiment): if provided, invalidate all other arguments api_key (str): your comet.ml API key tags (list[str]): experiment tags kwargs: other arguments passed to :class:`comet_ml.Experiment`. """ if experiment is not None: self._exp = experiment assert api_key is None and tags is None and len(kwargs) == 0 else: from comet_ml import Experiment kwargs.setdefault( 'log_code', True ) # though it's not functioning, git patch logging requires it kwargs.setdefault('auto_output_logging', None) self._exp = Experiment(api_key=api_key, **kwargs) if tags is not None: self._exp.add_tags(tags) self._exp.set_code( "Code logging is impossible because there are too many files ...") self._exp.log_dependency('tensorpack', __git_version__) @property def experiment(self): """ The :class:`comet_ml.Experiment` instance. """ return self._exp def _before_train(self): self._exp.set_model_graph(tf.get_default_graph()) @HIDE_DOC def process_scalar(self, name, val): self._exp.log_metric(name, val, step=self.global_step) def _after_train(self): self._exp.end() def _after_epoch(self): self._exp.log_epoch_end(self.epoch_num)
def main(cfg): shapes = cfg.model.shapes opt_params = cfg.optimizer.params experiment = Experiment(log_code=False) experiment.set_code(filename=hydra.utils.to_absolute_path(__file__)) experiment.add_tag("with_hydra") experiment.log_parameters({"hydra-cfg": [cfg]}) model = layers.MLP(shapes) optimizer = optim.Adam(model.parameters(), **opt_params) runner = tasks.ClassificationRunner( model, optimizer=optimizer, criterion=nn.CrossEntropyLoss(), experiment=experiment ) runner.fit(x, y, epochs=10, checkpoint_path="./checkpoints") runner.save()
class CometMLLogger(Logger): def __init__(self, logger_kwargs=None): super().__init__(logger_kwargs=logger_kwargs) self.experiment = Experiment(api_key=self.config["api_key"], project_name=self.config["project_name"], log_code=False, log_graph=False, auto_param_logging=False, auto_metric_logging=False, auto_output_logging=None, log_env_details=False, log_git_metadata=False) if "reward_func" in self.config: self.experiment.set_code( inspect.getsource(self.config["reward_func"])) def log_hyperparams(self, hyperparams, step=0): self.experiment.log_multiple_params(hyperparams, step=step) def log_metric(self, key, value, step=0): self.experiment.log_metric(key, value, step=step)
class CometMLMonitor(MonitorBase): """ Send scalar data and the graph to https://www.comet.ml. Note: 1. comet_ml requires you to `import comet_ml` before importing tensorflow or tensorpack. 2. The "automatic output logging" feature of comet_ml will make the training progress bar appear to freeze. Therefore the feature is disabled by default. """ def __init__(self, experiment=None, tags=None, **kwargs): """ Args: experiment (comet_ml.Experiment): if provided, invalidate all other arguments tags (list[str]): experiment tags kwargs: arguments used to initialize :class:`comet_ml.Experiment`, such as project name, API key, etc. Refer to its documentation for details. """ if experiment is not None: self._exp = experiment assert tags is None and len(kwargs) == 0 else: from comet_ml import Experiment kwargs.setdefault( 'log_code', True ) # though it's not functioning, git patch logging requires it kwargs.setdefault('auto_output_logging', None) self._exp = Experiment(**kwargs) if tags is not None: self._exp.add_tags(tags) self._exp.set_code("Code logging is impossible ...") self._exp.log_dependency('tensorpack', __git_version__) @property def experiment(self): """ The :class:`comet_ml.Experiment` instance. """ return self._exp def _before_train(self): self._exp.set_model_graph(tf.get_default_graph()) @HIDE_DOC def process_scalar(self, name, val): self._exp.log_metric(name, val, step=self.global_step) @HIDE_DOC def process_image(self, name, val): self._exp.set_step(self.global_step) for idx, v in enumerate(val): log_name = "{}_step{}{}".format( name, self.global_step, "_" + str(idx) if len(val) > 1 else "") self._exp.log_image(v, image_format="jpeg", name=log_name, image_minmax=(0, 255)) def _after_train(self): self._exp.end() def _after_epoch(self): self._exp.log_epoch_end(self.epoch_num)
epochs = 100000 steps_per_epoch = 0 total_images_looked_at = 0 d_steps = 1 g_steps = 1 graph, iterator, d_train_optimizer_ops, d_stabilize_optimizer_ops, g_train_optimizer_ops, g_stabilize_optimizer_ops, samples_for_all_resolutions, sizes = model.get_graph() experiment = Experiment(api_key='<API_KEY>', project_name='art_pgan', workspace='schmidtdominik', log_code=False) experiment.log_parameters({'G_learning_rate': model.g_learning_rate, 'D_learning_rate': model.d_learning_rate, 'D_steps': d_steps, 'G_steps': g_steps, 'batch_size': model.batch_size}) experiment.set_model_graph(graph) experiment.set_code('\n# [code]: train.py\n' + open('train.py', 'r').read() + '\n# [code]: image_pipeline.py\n' + open('image_pipeline.py', 'r').read() + '\n# [code]: model.py\n' + open( 'model.py', 'r').read() + '\n# [code]: discriminator.py\n' + open('discriminator.py', 'r').read() + '\n# [code]: generator.py\n' + open( 'generator.py', 'r').read()) try: os.mkdir('./checkpoints/') except FileExistsError: pass try: os.mkdir('./progress_images/') except FileExistsError: pass current_resolution = sizes[0] current_mode = 'train' last_schedule_update = 0 last_schedule_update_time = time.time() schedule_finalized = False # T4 --> S8 --> T8 --> S16 --> T16 --> ...
model_params = json.load(json_file) with open('main.py', 'r') as file: code = file.read() parameters = { k: model_params[args.model][k] for k in ('population', 'generations', 'crossover', 'factor') } experiment = Experiment(api_key=os.environ['COMET_API'], project_name=os.environ['PROJECT_NAME'], log_code=False, auto_param_logging=False) experiment.add_tag(args.model) experiment.set_code(code=code, overwrite=True) experiment.log_parameters(parameters) proc = subprocess.Popen( "python main.py --np %s --max_gen %s --cr %s --f %s" % ( parameters['population'], parameters['generations'], parameters['crossover'], parameters['factor'], ), stdout=subprocess.PIPE, shell=True) output = proc.stdout.read() scrap_output(output.decode("utf-8"),
if args.comet is not None: experiment = Experiment(api_key=args.comet, workspace=args.workspace, project_name=args.project, parse_args=False, auto_output_logging=None) if args.tags: experiment.add_tags(args.tags) with open(args.param_path) as fil: code = "".join(fil.readlines()) code += "\n\n#=============Full details=============\n\n" code += _jsonnet.evaluate_file(args.param_path) code += "\n\n#=============IMPORTANT: overwritten options============\n\n" code += args.overrides experiment.set_code(code) code_data = json.loads(_jsonnet.evaluate_file(args.param_path)) experiment.log_parameter( "bert", "bert" in code_data["dataset_reader"]["token_indexers"]) experiment.log_parameter( "elmo", "elmo" in code_data["dataset_reader"]["token_indexers"]) experiment.log_parameter("model_directory", serialization_dir) experiment.log_parameter("cuda_device", cuda_device) experiment.log_parameter("corpora", code_data["iterator"]["formalisms"]) experiment.log_parameter("encoder", code_data["model"]["encoder"]["type"]) experiment.log_parameter("hostname", socket.gethostname()) experiment.log_parameter( "random_seed", random_seed) #random_seed, numpy_seed, pytorch_seed experiment.log_parameter( "numpy_seed", numpy_seed) #random_seed, numpy_seed, pytorch_seed experiment.log_parameter(
def main(args: argparse.Namespace): for package_name in args.include_package: import_module_and_submodules(package_name) params = Params.from_file(args.param_path, args.overrides) random_seed, numpy_seed, pytorch_seed = 41, 11, 302 if not args.fix: random_seed, numpy_seed, pytorch_seed = random.randint( 0, 999999999), random.randint(0, 999999999), random.randint( 0, 999999999) params["random_seed"] = random_seed params["numpy_seed"] = numpy_seed params["pytorch_seed"] = pytorch_seed prepare_environment(params) serialization_dir = args.serialization_dir create_serialization_dir(params, serialization_dir, args.recover, args.force) prepare_global_logging(serialization_dir, args.file_friendly_logging) hyperparams = list( get_hyperparams(params.as_dict(infer_type_and_cast=True))) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) test_file = params.params.get("test_data_path", None) validation_data_path = params.get("validation_data_path", None) evaluate_on_test = params.pop_bool("evaluate_on_test", False) test_command = None if evaluate_on_test: test_command = BaseEvaluationCommand.from_params( params.pop("test_command")) cuda_device = params.params.get('trainer').get('cuda_device', -1) check_for_gpu(cuda_device) train_model = TrainPipelineModel.from_params( params=params, serialization_dir=serialization_dir, local_rank=0) trainer = train_model.trainer if trainer.validation_command is not None: trainer.validation_command.maybe_set_gold_file(validation_data_path) params.assert_empty('base train command') if args.comet is not None: experiment = Experiment(api_key=args.comet, workspace=args.workspace, project_name=args.project, parse_args=False, auto_output_logging=None) if args.tags: experiment.add_tags(args.tags) with open(args.param_path) as fil: code = "".join(fil.readlines()) code += "\n\n#=============Full details=============\n\n" full_details = _jsonnet.evaluate_file(args.param_path) code += full_details code += "\n\n#=============IMPORTANT: overwritten options============\n\n" code += args.overrides experiment.set_code(code, overwrite=True) for key, val in hyperparams: experiment.log_parameter(key, val) experiment.log_parameter("model_directory", serialization_dir) experiment.log_parameter("cuda_device", cuda_device) experiment.log_parameter("hostname", socket.gethostname()) experiment.log_parameter("random_seed", random_seed) experiment.log_parameter("numpy_seed", numpy_seed) experiment.log_parameter("pytorch_seed", pytorch_seed) else: experiment = None try: metrics = trainer.train(experiment) except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info( "Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir) raise # Evaluate if test_file and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights (see pred_test.txt)." ) trainer.annotator.annotate_file( trainer.model, test_file, os.path.join(serialization_dir, "pred_test.txt")) if test_command: logger.info("Comparing against gold standard.") test_command.maybe_set_gold_file(test_file) test_metrics = test_command.evaluate( os.path.join(serialization_dir, "pred_test.txt")) if experiment: with experiment.test(): experiment.log_metrics({ k: v for k, v in test_metrics.items() if np.isscalar(v) }) metrics = merge_dicts(metrics, "test", test_metrics) dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) if not args.no_archive: # Now tar up results archive_model(serialization_dir)