import wandb wandb.login('1e505430989c86455d2d70e1ef990b4bc50cb69c') wandb.init(project="ift6760-exp",anonymous='allow') wandb.save("*.pth") from load_data import Data import numpy as np import torch import time from collections import defaultdict from model import * from torch.optim.lr_scheduler import ExponentialLR import argparse import os class Experiment: def __init__(self, learning_rate=0.0005, ent_vec_dim=200, rel_vec_dim=200, num_iterations=500, batch_size=128, decay_rate=0., cuda=False, input_dropout=0.3, hidden_dropout1=0.4, hidden_dropout2=0.5, label_smoothing=0., bk=False): self.learning_rate = learning_rate self.ent_vec_dim = ent_vec_dim self.rel_vec_dim = rel_vec_dim self.num_iterations = num_iterations self.batch_size = batch_size self.decay_rate = decay_rate self.label_smoothing = label_smoothing self.cuda = cuda self.kwargs = {"input_dropout": input_dropout, "hidden_dropout1": hidden_dropout1, "hidden_dropout2": hidden_dropout2, "bk": bk}
def test_login_anonymous(mock_server, local_netrc): os.environ["WANDB_API_KEY"] = "B" * 40 wandb.login(anonymous="must") assert wandb.api.api_key == "ANONYMOOSE" * 4
from utils.plots import plot_images, plot_results from utils.torch_utils import de_parallel LOGGERS = ('csv', 'tb', 'wandb') # text-file, TensorBoard, Weights & Biases RANK = int(os.getenv('RANK', -1)) try: import wandb assert hasattr(wandb, '__version__') # verify package import not local dir if pkg.parse_version( wandb.__version__) >= pkg.parse_version('0.12.2') and RANK in [ 0, -1 ]: try: wandb_login_success = wandb.login(timeout=30) except wandb.errors.UsageError: # known non-TTY terminal issue wandb_login_success = False if not wandb_login_success: wandb = None except (ImportError, AssertionError): wandb = None class Loggers(): # YOLOv5 Loggers class def __init__(self, save_dir=None, weights=None, opt=None, hyp=None,
def setup(self, kwargs): """Complete setup for wandb.init(). This includes parsing all arguments, applying them with settings and enabling logging. """ self.kwargs = kwargs self._wl = wandb_setup._setup() # Make sure we have a logger setup (might be an early logger) _set_logger(self._wl._get_logger()) # Start with settings from wandb library singleton settings = self._wl._clone_settings() settings_param = kwargs.pop("settings", None) if settings_param: settings._apply_settings(settings_param) self._reporter = reporting.setup_reporter( settings=settings.duplicate().freeze()) sm_config = sagemaker.parse_sm_config() if sm_config: sm_api_key = sm_config.get("wandb_api_key", None) sm_run, sm_env = sagemaker.parse_sm_resources() if sm_env: if sm_api_key: sm_env["WANDB_API_KEY"] = sm_api_key settings._apply_environ(sm_env) wandb.setup(settings=settings) for k, v in six.iteritems(sm_run): kwargs.setdefault(k, v) # Remove parameters that are not part of settings init_config = kwargs.pop("config", None) or dict() config_include_keys = kwargs.pop("config_include_keys", None) config_exclude_keys = kwargs.pop("config_exclude_keys", None) # Add deprecation message once we can better track it and document alternatives # if config_include_keys or config_exclude_keys: # wandb.termwarn( # "config_include_keys and config_exclude_keys are deprecated:" # " use config=wandb.helper.parse_config(config_object, include=('key',))" # " or config=wandb.helper.parse_config(config_object, exclude=('key',))" # ) init_config = parse_config(init_config, include=config_include_keys, exclude=config_exclude_keys) # merge config with sweep or sm (or config file) self.config = sm_config or self._wl._config or dict() for k, v in init_config.items(): self.config.setdefault(k, v) monitor_gym = kwargs.pop("monitor_gym", None) if monitor_gym and len(wandb.patched["gym"]) == 0: wandb.gym.monitor() tensorboard = kwargs.pop("tensorboard", None) sync_tensorboard = kwargs.pop("sync_tensorboard", None) if tensorboard or sync_tensorboard and len( wandb.patched["tensorboard"]) == 0: wandb.tensorboard.patch() magic = kwargs.get("magic") if magic not in (None, False): magic_install(kwargs) # handle login related parameters as these are applied to global state anonymous = kwargs.pop("anonymous", None) force = kwargs.pop("force", None) login_key = wandb.login(anonymous=anonymous, force=force) if not login_key: settings.mode = "offline" # apply updated global state after login was handled settings._apply_settings(wandb.setup()._settings) settings._apply_init(kwargs) # TODO(jhr): should this be moved? probably. d = dict( _start_time=time.time(), _start_datetime=datetime.datetime.now(), ) settings.update(d) if settings._jupyter: self._jupyter_setup(settings) self._log_setup(settings) self.settings = settings.freeze()
def test_login_existing_key(local_netrc): os.environ["WANDB_API_KEY"] = "B" * 40 wandb.ensure_configured() wandb.login() assert wandb.api.api_key == "B" * 40
import argparse sys.path.append(os.path.join(os.path.dirname(__file__), '../')) import pandas as pd import numpy as np import pickle from collections import Counter from tqdm import tqdm try: from dotenv import find_dotenv, load_dotenv import wandb load_dotenv(find_dotenv()) wandb.login(key=os.environ['WANDB_API_KEY']) from wandb.keras import WandbCallback _has_wandb = True except: _has_wandb = False import tensorflow as tf import tensorflow.keras.backend as K import tokenizers from transformers import TFAutoModel, AutoTokenizer, AutoConfig from src import data, models from sklearn.metrics import f1_score, precision_score, recall_score
def run(opts): rank = opts.local_rank if torch.cuda.device_count() > 1 else 0 # Set the random seed torch.manual_seed(opts.seed + rank) random.seed(opts.seed + rank) np.random.seed(opts.seed + rank) if not os.path.exists(opts.save_dir) and rank == 0: os.makedirs(opts.save_dir) # Optionally configure wandb if not opts.no_wandb and rank == 0: wandb.login('never', '31ce01e4120061694da54a54ab0dafbee1262420') wandb.init(dir=opts.save_dir, config=opts, project='large_scale_tsp', name=opts.run_name, sync_tensorboard=True, save_code=True) # Set the device if opts.use_cuda: torch.cuda.set_device(rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') opts.device = torch.device("cuda", rank) else: opts.device = torch.device("cpu") # Figure out what's the problem problem = load_problem(opts.problem) # Load data from load_path load_data = {} assert opts.load_path is None or opts.resume is None, "Only one of load path and resume can be given" load_path = opts.load_path if opts.load_path is not None else opts.resume if load_path is not None: if rank == 0: print(' [*] Loading data from {}'.format(load_path)) load_data = torch_load_cpu(load_path) # Initialize model model_class = { 'attention': AttentionModel, 'pointer': PointerNetwork }.get(opts.model, None) assert model_class is not None, "Unknown model: {}".format(model_class) model: torch.nn.Module = model_class( opts.embedding_dim, opts.hidden_dim, problem, attention_type=opts.attention_type, n_encode_layers=opts.n_encode_layers, n_heads=opts.n_heads, feed_forward_dim=opts.feed_forward_dim, encoding_knn_size=opts.encoding_knn_size, decoding_knn_size=opts.decoding_knn_size, mask_inner=True, mask_logits=True, normalization=opts.normalization, tanh_clipping=opts.tanh_clipping, checkpoint_encoder=opts.checkpoint_encoder, shrink_size=opts.shrink_size).to(opts.device) if opts.init_normalization_parameters: for m in model.modules(): if isinstance(m, Normalization): m.init_parameters() if opts.use_cuda: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to( opts.device) model = DDP(model, device_ids=[rank]) # Overwrite model parameters by parameters to load model_ = get_inner_model(model) model_.load_state_dict({ **model_.state_dict(), **load_data.get('model', {}) }) # Initialize baseline if opts.baseline == 'exponential': baseline = ExponentialBaseline(opts.exp_beta) elif opts.baseline == 'critic' or opts.baseline == 'critic_lstm': assert problem.NAME == 'tsp', "Critic only supported for TSP" baseline = CriticBaseline( (CriticNetworkLSTM(2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.tanh_clipping) if opts.baseline == 'critic_lstm' else CriticNetwork( 2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.normalization)).to(opts.device)) elif opts.baseline == 'rollout': baseline = RolloutBaseline(model, problem, opts) else: assert opts.baseline is None, "Unknown baseline: {}".format( opts.baseline) baseline = NoBaseline() if opts.bl_warmup_epochs > 0: baseline = WarmupBaseline(baseline, opts.bl_warmup_epochs, warmup_exp_beta=opts.exp_beta) # Load baseline from data, make sure script is called with same type of baseline if 'baseline' in load_data: baseline.load_state_dict(load_data['baseline']) # Initialize optimizer optimizer = optim.Adam([{ 'params': model.parameters(), 'lr': opts.lr_model }] + ([{ 'params': baseline.get_learnable_parameters(), 'lr': opts.lr_critic }] if len(baseline.get_learnable_parameters()) > 0 else [])) scaler = torch.cuda.amp.GradScaler() if opts.precision == 16 else None # Load optimizer state if 'optimizer' in load_data: optimizer.load_state_dict(load_data['optimizer']) for state in optimizer.state.values(): for k, v in state.items(): # if isinstance(v, torch.Tensor): if torch.is_tensor(v): state[k] = v.to(opts.device) # Initialize learning rate scheduler, decay by lr_decay once per epoch! lr_scheduler = optim.lr_scheduler.LambdaLR( optimizer, lambda epoch: opts.lr_decay**epoch) # Start the actual training loop val_dataset = problem.make_dataset(size=opts.graph_size, num_samples=opts.val_size, filename=opts.val_dataset, distribution=opts.data_distribution) if opts.resume: epoch_resume = int( os.path.splitext(os.path.split(opts.resume)[-1])[0].split("-")[1]) torch.set_rng_state(load_data['rng_state']) if opts.use_cuda: torch.cuda.set_rng_state_all(load_data['cuda_rng_state']) # Set the random states # Dumping of state was done before epoch callback, so do that now (model is loaded) baseline.epoch_callback(model, epoch_resume) if rank == 0: print("Resuming after {}".format(epoch_resume)) opts.epoch_start = epoch_resume + 1 if opts.eval_only: validate(model, val_dataset, opts) else: for epoch in range(opts.epoch_start, opts.epoch_start + opts.n_epochs): train_epoch(model, optimizer, scaler, baseline, lr_scheduler, epoch, val_dataset, problem, opts)
def main(argv): wandb.login() is_gpu = torch.cuda.is_available() config = RobertaConfig( vocab_size=FLAGS.vocab_size, max_position_embeddings=FLAGS.max_position_embeddings, num_attention_heads=FLAGS.num_attention_heads, num_hidden_layers=FLAGS.num_hidden_layers, type_vocab_size=FLAGS.type_vocab_size, ) if FLAGS.tokenizer_path: tokenizer_path = FLAGS.tokenizer_path elif FLAGS.tokenizer_type.upper() == "BPE": tokenizer_path = FLAGS.output_tokenizer_dir if not os.path.isdir(tokenizer_path): os.makedirs(tokenizer_path) tokenizer = ByteLevelBPETokenizer() tokenizer.train( files=FLAGS.dataset_path, vocab_size=FLAGS.vocab_size, min_frequency=FLAGS.BPE_min_frequency, special_tokens=["<s>", "<pad>", "</s>", "<unk>", "<mask>"]) tokenizer.save_model(tokenizer_path) else: print("Please provide a tokenizer path if using the SMILES tokenizer") tokenizer = RobertaTokenizerFast.from_pretrained( tokenizer_path, max_len=FLAGS.max_tokenizer_len) model = RobertaForMaskedLM(config=config) model.num_parameters() dataset = RawTextDataset(tokenizer=tokenizer, file_path=FLAGS.dataset_path, block_size=FLAGS.tokenizer_block_size) data_collator = DataCollatorForLanguageModeling( tokenizer=tokenizer, mlm=True, mlm_probability=FLAGS.mlm_probability) training_args = TrainingArguments( output_dir=FLAGS.output_dir, overwrite_output_dir=FLAGS.overwrite_output_dir, num_train_epochs=FLAGS.num_train_epochs, per_device_train_batch_size=FLAGS.per_device_train_batch_size, save_steps=FLAGS.save_steps, save_total_limit=FLAGS.save_total_limit, fp16=is_gpu, # fp16 only works on CUDA devices ) trainer = Trainer( model=model, args=training_args, data_collator=data_collator, train_dataset=dataset, ) trainer.train() trainer.save_model(FLAGS.model_name)
def test_login_invalid_key(): os.environ["WANDB_API_KEY"] = "B" * 40 wandb.ensure_configured() with pytest.raises(wandb.UsageError): wandb.login() del os.environ["WANDB_API_KEY"]
def main(): wandb.login() # CIFAR use this config = dict( n_epochs=120, batch_size=128, classes=10, noise_rate=0.4, is_symmetric_noise=True, fraction=1.0, compute_memorization=True, dataset_name= 'CIFAR10', # opt: 'CIFAR10', 'CIFAR100', 'CDON' (not implemented) model_path='./models/CIFAR10_20.mdl', plot_path='./results/CIFAR10_20', learning_rate=0.02, momentum=0.9, weight_decay=1e-3, milestones=[40, 80], gamma=0.01, enable_amp=True, use_ELR=True, elr_lambda=3.0, elr_beta=0.7) # CDON use this config = dict( n_epochs=120, batch_size=128, classes=64, #157 categories for clothing # total subcategories is 3516 noise_rate=0.0, is_symmetric_noise=True, fraction=1.0, compute_memorization=False, dataset_name='CDON', # opt: 'CIFAR10', 'CIFAR100', 'CDON' model_path='./models/CDON_CE.mdl', plot_path='./results/CDON_CE', learning_rate=0.02, momentum=0.9, weight_decay=1e-3, milestones=[40, 80], gamma=0.01, enable_amp=True, use_ELR=False, elr_lambda=3.0, elr_beta=0.7) trainer_config = { 'model': ResNet34, 'optimizer': optim.SGD, 'optimizer_params': { 'lr': config['learning_rate'], 'momentum': config['momentum'], 'weight_decay': config['weight_decay'] }, 'scheduler': optim.lr_scheduler.MultiStepLR, 'scheduler_params': { 'milestones': config['milestones'], 'gamma': config['gamma'] }, 'criterion': torch.nn.CrossEntropyLoss, 'criterion_params': {} } # use_CosAnneal = { # 'scheduler': optim.lr_scheduler.CosineAnnealingWarmRestarts, # 'scheduler_params': {"T_0": 10, "eta_min": 0.001}, # # 'scheduler_params': {'T_max': 200, 'eta_min': 0.001} # } # trainer_config.update(use_CosAnneal) if config['use_ELR']: use_ELR = { 'criterion': ELR_loss, 'criterion_params': { 'beta': config['elr_beta'], 'lam': config['elr_lambda'] } } trainer_config.update(use_ELR) model_pipeline(config, trainer_config, loadExistingWeights=False)
train_ds = train_ds.map(add_channel_dim) val_ds = val_ds.map(add_channel_dim) """### simple model""" import tensorflow.keras as keras from tensorflow.keras import layers, losses, optimizers if running_in_colab: !pip install wandb -qqq import wandb as wb from wandb.keras import WandbCallback wb.login() wb.init(project='bird_ID', config={'lr': 1e-3, 'bs': 32}) config = wb.config keras.backend.clear_session() model = tf.keras.Sequential([ layers.Conv2D(filters=32, kernel_size=(4,4), strides=1, activation='relu', input_shape=(284, 257, 1)), layers.MaxPool2D(pool_size=(4,4)), layers.Conv2D(filters=64, kernel_size=(4,4), strides=1, activation='relu'), layers.MaxPool2D(pool_size=(4,4)), layers.Flatten(), layers.Dense(64, activation='relu'), layers.Dense(3) ])
def main(): global args, is_server if is_server: wandb.login() config = dict( vis_prefix=args.vis_prefix, resume=args.resume, ) if is_server: wandb.init(config=config, project="vol.4", name=args.run_name, tags=args.tags) # define constants # vis_prefix = 'baseline' CLASS_AMOUNT = 5 DEPTH = 50 root_dir = 'data/' # resume = "checkpoints/baseline_checkpoint.pth" traindir = os.path.join(root_dir, 'train') train_labels = os.path.join(root_dir, 'train', 'images_onehot_train.txt') valdir = os.path.join(root_dir, 'val') val_labels = os.path.join(root_dir, 'val', 'images_onehot_val.txt') # define the model model = ResidualNet('ImageNet', DEPTH, CLASS_AMOUNT, 'CBAM') if is_server: model = model.cuda(args.cuda_device) # # load the checkpoint if os.path.isfile(args.resume): print(f"=> loading checkpoint '{args.resume}'") checkpoint = torch.load(args.resume, map_location=torch.device('cpu')) state_dict = checkpoint['state_dict'] model.load_state_dict(state_dict) print(f"=> loaded checkpoint '{args.resume}'") print(f"epoch = {checkpoint['epoch']}") else: print(f"=> no checkpoint found at '{args.resume}'") return -1 # define datasets and data loaders size0 = 224 segm_dir = "images/256ISIC2018_Task1_Training_GroundTruth/" train_dataset = DatasetISIC2018( train_labels, traindir, segm_dir, size0, False, # perform flips False, # perform random resized crop with size = 224 transforms.CenterCrop(size0)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True) val_dataset = DatasetISIC2018(val_labels, valdir, segm_dir, size0, False, False, transforms.CenterCrop(size0)) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True) # create directories to save plots if args.vis_prefix is not None: if not os.path.exists(f'vis/{args.vis_prefix}'): os.mkdir(f'vis/{args.vis_prefix}') if not os.path.exists(f'vis/{args.vis_prefix}/train'): os.mkdir(f'vis/{args.vis_prefix}/train') if not os.path.exists(f'vis/{args.vis_prefix}/val'): os.mkdir(f'vis/{args.vis_prefix}/val') for i, dictionary in enumerate(train_loader): input_img = dictionary['image'] img_name = dictionary['name'][0] no_norm_image = dictionary['no_norm_image'] segm = dictionary['segm'] if is_server: input_img = input_img.cuda(args.cuda_device) make_plot_and_save(input_img, img_name, no_norm_image, segm, model, 'train', vis_prefix=args.vis_prefix) return for i, dictionary in enumerate(val_loader): input_img = dictionary['image'] img_name = dictionary['name'][0] no_norm_image = dictionary['no_norm_image'] segm = dictionary['segm'] if is_server: input_img = input_img.cuda(args.cuda_device) make_plot_and_save(input_img, img_name, no_norm_image, segm, model, 'val', vis_prefix=args.vis_prefix)
from a2c_ppo_acktr.arguments import get_args from a2c_ppo_acktr.envs import make_vec_envs from a2c_ppo_acktr.model import Policy, RandomPolicy, NaviBase, VGGBase, MobilenetBase, EfficientnetBase from a2c_ppo_acktr.storage import RolloutStorage from evaluation import evaluate args = get_args() if args.wandb is not None: if not utils.is_connected(): print("no internet connection. Going in dry") # ehehehe os.environ["WANDB_MODE"] = "dryrun" import wandb if args.wandb_key is not None: wandb.login(key=args.wandb_key) if args.wandb_name is None: wandb.init(project=args.wandb) else: wandb.init(project=args.wandb, name=args.wandb_name) if args.env_name.startswith("Pupper"): env_name_parts = args.env_name.split("-") params = [] for part in env_name_parts: if "_" in part: parts = part.split("_") setattr(args, parts[0], float(parts[1])) envtypes = ["Incremental", "Absolute", "Relative"] # get the right type and delist all others
def test_login(test_settings): s = wandb.Settings(mode="disabled") test_settings._apply_settings(s) wandb.setup(test_settings) wandb.login("")
from tasks.binary import Binary from tasks.multi import Multi from tasks.tokenize import Tokenize from sklearn.metrics import precision_recall_fscore_support from sklearn.metrics import roc_auc_score from sklearn.metrics import f1_score from sklearn.preprocessing import label_binarize from pyhdfs import HdfsClient import pickle import numpy as np import wandb wandb.login(key='6525130c8b35bcd27b1bc36f79ad88847e7dd982') class Wheel(object): MODEL = 'CNNRNNPooling' TASK = 'SB' def __init__(self,config): #Basic Configs self.device = 'cuda' self.__test_iter = None self.C3_HDFS_HOST = 'c3.nn01.nhnsystem.com:50070' #Config File self.username = config['username'] self.batch_size = config['batch']
from torch.utils.data import Dataset from tqdm import tqdm from transformers import ( BertTokenizer, BertModel, BertConfig, Trainer, TrainingArguments, ) from transformers.models.bert.modeling_bert import ( BertPreTrainedModel, BertOnlyMLMHead, MaskedLMOutput, ) wandb.login(key='8cefb8016177b89343b4f6c8eed0c154b55b006b') os.system('wandb online') os.environ['WANDB_PROJECT'] = 'bert_oppo_pretrain' from .utils import seed_everyone os.environ['CUDA_VISIBLE_DEVICES'] = '2' class BertForMaskedLM(BertPreTrainedModel): def __init__(self, config): super().__init__(config) self.bert = BertModel(config=config, add_pooling_layer=False) self.cls = BertOnlyMLMHead(config) def forward(
def main(args): train_loader, _ = data_helper.get_data(args.dataset, args.batch_size, args.image_size, args.environment) if args.wandb: wandb_name = "%s[%d]_%s" % (args.dataset, args.image_size, args.model_name) wandb.login() wandb.init(project="AAE", config=args, name=wandb_name) inception_model_score = load_inception_model(train_loader, args.dataset, args.image_size, args.environment) ae_optimizer, d_optimizer, decoder, discriminator, encoder, g_optimizer, mapper = \ model.get_aae_model_and_optimizer(args) if args.model_name == 'mimic': mapper = model.Mimic(args.latent_dim, args.latent_dim, args.mapper_inter_nz, args.mapper_inter_layer).to(args.device) decoder, encoder = pretrain_autoencoder(ae_optimizer, args, decoder, encoder, train_loader) if args.model_name == 'non-prior': mapper, m_optimizer = model.get_nonprior_model_and_optimizer(args) if args.model_name == 'learning-prior': mapper, m_optimizer, discriminator_forpl, dpl_optimizer = \ model.get_learning_prior_model_and_optimizer(args) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=1e-4) global start_time start_time = time.time() if args.pretrain_epoch > 0: pretrain_autoencoder(ae_optimizer, args, decoder, encoder, train_loader) log_dict, log, log2 = {}, {}, {} for i in range(0, args.epochs): log_dict, log, log2 = {}, {}, {} if args.time_limit and timeout(args.time_limit, start_time): break encoded_feature_list = [] for each_batch in tqdm.tqdm(train_loader, desc="train[%d/%d]" % (i, args.epochs)): each_batch = each_batch[0].to(args.device) if args.model_name in ['aae', 'mask_aae']: log = model.update_aae(ae_optimizer, args, d_optimizer, decoder, discriminator, each_batch, encoder, g_optimizer, args.latent_dim) elif args.model_name == 'mimic': log, encoded_feature = \ model.update_autoencoder(ae_optimizer, each_batch, encoder, decoder, return_encoded_feature=True) encoded_feature_list.append(encoded_feature) elif args.model_name == 'non-prior': log, encoded_feature = model.update_autoencoder( ae_optimizer, each_batch, encoder, decoder, return_encoded_feature_gpu=True, flag_retain_graph=False) log2 = model.update_posterior_part(args, mapper, discriminator, m_optimizer, d_optimizer, encoded_feature) elif args.model_name == 'learning-prior': log = model.update_aae_with_mappedz(args, ae_optimizer, d_optimizer, decoder, discriminator, mapper, each_batch, encoder, g_optimizer) log2 = model.update_mapper_with_discriminator_forpl( args, dpl_optimizer, decoder_optimizer, m_optimizer, discriminator_forpl, decoder, mapper, each_batch) if args.model_name == 'mimic': g_loss = model.train_mapper(args, encoder, mapper, args.device, args.lr, args.batch_size, encoded_feature_list) log_dict.update(log) log_dict.update(log2) # wandb log를 남기고, time_check와 time_limit 옵션이 둘다 없을때만, log interval마다 기록을 남김 if args.wandb and not args.time_check and not args.time_limit: decoder, discriminator, encoder, mapper = log_and_write_pca( args, decoder, discriminator, encoder, i, inception_model_score, mapper, log_dict) # wandb log를 남기고, time_check 또는 time_limit 옵션 둘 중 하나라도 있으면, 최후에 기록을 남김 if args.wandb and (args.time_check or args.time_limit): decoder, discriminator, encoder, mapper = log_and_write_pca( args, decoder, discriminator, encoder, i, inception_model_score, mapper, log_dict) save_models(args, decoder, encoder, mapper) if args.wandb: wandb.finish()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, record=False, record_project= 'benchmarking', record_name = 'trained' , data_path='', config_name='test', max_len_rb=100, benchmark=False, log_prefix=''): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 ep_cost = 0 local_steps_per_epoch = int(4000 / num_procs()) obs_dim = env.observation_space.shape act_dim = env.action_space.shape rew_mov_avg_10 = [] cost_mov_avg_10 = [] if benchmark: ep_costs = [] ep_rewards = [] if record: wandb.login() # 4 million env interactions wandb.init(project=record_project, name=record_name) rb = ReplayBuffer(size=10000, env_dict={ "obs": {"shape": obs_dim}, "act": {"shape": act_dim}, "rew": {}, "next_obs": {"shape": obs_dim}, "done": {}}) # columns = ['observation', 'action', 'reward', 'cost', 'done'] # sim_data = pd.DataFrame(index=[0], columns=columns) while n < num_episodes: if render: env.render() time.sleep(1e-3) a = get_action(o) next_o, r, d, info = env.step(a) if record: # buf.store(next_o, a, r, None, info['cost'], None, None, None) done_int = int(d==True) rb.add(obs=o, act=a, rew=r, next_obs=next_o, done=done_int) ep_ret += r ep_len += 1 ep_cost += info['cost'] # Important! o = next_o if d or (ep_len == max_ep_len): # finish recording and save csv if record: rb.on_episode_end() # make directory if does not exist if not os.path.exists(data_path + config_name + '_episodes'): os.makedirs(data_path + config_name + '_episodes') # buf = CostPOBuffer(obs_dim, act_dim, local_steps_per_epoch, 0.99, 0.99) if len(rew_mov_avg_10) >= 25: rew_mov_avg_10.pop(0) cost_mov_avg_10.pop(0) rew_mov_avg_10.append(ep_ret) cost_mov_avg_10.append(ep_cost) mov_avg_ret = np.mean(rew_mov_avg_10) mov_avg_cost = np.mean(cost_mov_avg_10) expert_metrics = {log_prefix + 'episode return': ep_ret, log_prefix + 'episode cost': ep_cost, # 'cumulative return': cum_ret, # 'cumulative cost': cum_cost, log_prefix + '25ep mov avg return': mov_avg_ret, log_prefix + '25ep mov avg cost': mov_avg_cost } if benchmark: ep_rewards.append(ep_ret) ep_costs.append(ep_cost) wandb.log(expert_metrics) logger.store(EpRet=ep_ret, EpLen=ep_len, EpCost=ep_cost) print('Episode %d \t EpRet %.3f \t EpLen %d \t EpCost %d' % (n, ep_ret, ep_len, ep_cost)) o, r, d, ep_ret, ep_len, ep_cost = env.reset(), 0, False, 0, 0, 0 n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular() if record: print("saving final buffer") bufname_pk = data_path + config_name + '_episodes/sim_data_' + str(int(num_episodes)) + '_buffer.pkl' file_pi = open(bufname_pk, 'wb') pickle.dump(rb.get_all_transitions(), file_pi) wandb.finish() return rb if benchmark: return ep_rewards, ep_costs
import subprocess import argparse import wandb from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor # Set random seed seed = 42 # Construct the argument parse and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-k", "--key", required=True, help="wandb API key") args = vars(ap.parse_args()) # Set wandb up print(wandb.login(key=args["key"])) print(wandb.init(project="wandb-github-action")) ################################ ########## DATA PREP ########### ################################ # Load in the data df = pd.read_csv("wine_quality.csv") # Split into train and test sections y = df.pop("quality") X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2, random_state=seed)
def fit_network(network, train_loader, valid_loader, criterion, acc_metric, optimizer, num_epochs=5, checkpoint_path=None, metric='loss', start_from_checkpoint=False, lr_scheduler=None, log_dict={}): """ Trains and validates the given network for specified number of epochs. Includes logging, saving checkpoints and lr_scheduling facilities. Parameters ---------- network : nn.Module network you want to train for segmentation train_loader : torch dataloader provides batches of training data vaild_loader : torch dataloader provides batches of validation data criterion : nn.Module loss criterion acc_metric : function accuracy metric optimizer : torch.optim optimizer num_epochs : int, optional number of epochs, by default 5 checkpoint_path : str, optional path of the checkpoint to load from or save to (w.r.t current working dir.), by default 'segmentation.pth' metric : str, by default 'loss' metric to use for saving the best network, 'loss': min valid loss is used, 'accuracy': max valid accuracy is used start_from_checkpoint : bool, optional start training form checkpoint, by default False lr_scheduler : torch lr scheduler, optional learning rate scheduler, by default None logging : dict, optional log training data to wandb. To enable logging provide the log_dict. Logging will prompt you to provide login credentials for wandb, by default False Sample log_dict: log_dict={ 'name': 'Package', # Name of each run (change at each run) 'entity': 'MML', # username of wandb account 'project': 'package_test', # project name 'notes': 'Test run', # adding notes to the run 'tags': ['Test'], # adding tags to runs for grouping # list of tags can have multiple tags 'log_histograms': False # log network parameters and weights (True/False) } """ ## Sanity Check ## if start_from_checkpoint == True and checkpoint_path is None: raise AssertionError( 'start_from_checkpoint must be False when checkpoint_path is None') if metric.lower() not in ['loss', 'accuracy']: raise AssertionError("metric must be in ['loss', 'accuracy']") if not start_from_checkpoint: start_epoch = 1 loss_min = np.inf print('Starting Training from Epoch: {}\n'.format(start_epoch)) else: if not os.path.exists(checkpoint_path): raise AssertionError( 'Checkpoint Path does not exist. Make start_from_checkpoint=False.' ) checkpoint = torch.load(checkpoint_path) start_epoch = checkpoint['epoch'] + 1 network.load_state_dict(checkpoint['network']) optimizer = checkpoint['optimizer'] loss_min = checkpoint['loss'] if start_epoch > num_epochs: raise AssertionError( 'Increase the number of num of epochs beyond {} to continue training' .format(start_epoch)) print('Resuming Training from Epoch: {}\n'.format(start_epoch)) # if log_dict is not None, log to wandb if log_dict: wandb.login() # configuration parameters config_dict = { 'network': network.__class__.__name__, 'criterion': criterion.__class__.__name__, 'acc_metric': acc_metric.__class__.__name__, 'learning_rate': optimizer.state_dict()['param_groups'][0]['lr'], 'num_epochs': num_epochs, 'optimizer': optimizer.__class__.__name__, 'patience': lr_scheduler.state_dict()['patience'] if lr_scheduler else None, 'decay_factor': lr_scheduler.state_dict()['factor'] if lr_scheduler else None, 'metric': metric, 'dataset': train_loader.dataset.__dict__['dataset'].name } # initialization parameters wandb.init(name=log_dict['name'], entity=log_dict['entity'], project=log_dict['project'], notes=log_dict['notes'], tags=log_dict['tags'], config=config_dict) # if you want to log network weight histograms if log_dict['log_histograms']: print('\nLogging Network Parameters\n') wandb.watch(network) loss_min = np.inf acc_max = 0 for epoch in range(start_epoch, num_epochs + 1): loss_train, acc_train = train(network, criterion, optimizer, train_loader, acc_metric) loss_valid, acc_valid = validate(network, criterion, valid_loader, acc_metric) # Reduce the learning rate on Plateau if lr_scheduler: lr_scheduler.step(loss_valid) sys.stdout.write('\r') sys.stdout.flush() print( '\n----------------------------------------------------------------------------------------' ) print( 'Epoch: {} Train Loss: {:.4f} Train Acc: {:.4f} Valid Loss: {:.4f} Valid Acc: {:.4f}' .format(epoch, loss_train, acc_train, loss_valid, acc_valid)) print( '----------------------------------------------------------------------------------------' ) # Save checkpoint loss_min, acc_max = save_checkpoint(epoch, network, optimizer, loss_valid, acc_valid, loss_min, acc_max, metric, checkpoint_path) # log the performance to wandb if log_dict: log_wandb(epoch, loss_train, loss_valid, loss_min, acc_train, acc_valid, acc_max, lr_scheduler) print('\n---Training Complete---')
if __name__ == '__main__': opt = TrainOptions().parse() # get training options model = create_model( opt) # create a model given opt.model and other options model.setup( opt) # regular setup: load and print networks; create schedulers dataset = create_dataset( opt) # create a dataset given opt.dataset_mode and other options dataset_size = len(dataset) # get the number of images in the dataset. print('The number of training samples = %d' % dataset_size) evaluator = get_evaluator(opt, model=model, dataset=dataset) total_iters = 0 # the total number of training iterations epoch = 0 if opt.wandb: wandb.login(key=os.environ.get('WANDB_API_KEY')) wandb.init(config=opt) while total_iters < opt.total_num_giters: epoch_start_time = time.time() # timer for entire epoch iter_data_time = time.time() # timer for data loading per iteration epoch_iter = 0 # the number of training iterations in current epoch, reset to 0 every epoch for i, data in enumerate(dataset): # inner loop within one epoch iter_start_time = time.time( ) # timer for computation per iteration if total_iters % opt.print_freq == 0: t_data = iter_start_time - iter_data_time model.set_input(
checkpoints = list( filter(lambda x: '.ckpt' in x, os.listdir(classifier_model_dir))) if load_pretrained else [] load_pretrained = load_pretrained and len(checkpoints) > 0 # print(model) # logger = TensorBoardLogger("logs", name=classifier_model_name, log_graph=True) #%% if __name__ == "__main__": if load_pretrained: checkpoint_path = os.path.join(classifier_model_dir, checkpoints[-1]) model = classifier.load_from_checkpoint(checkpoint_path) else: model = classifier(**model_args) pl.seed_everything(42) wandb.login(key='355d7f0e367b84fb9f8a140be052641fbd926fb5') logger = WandbLogger(name=classifier_model_name, save_dir='logs', offline=True) logger.watch(model, log='gradients', log_freq=100) #logger = TensorBoardLogger("logs", name=classifier_model_name, log_graph=True) grad_acumulator = GradientAccumulationScheduler(scheduling={0: 2, 1: 3}) lr_monitor = LearningRateMonitor(logging_interval='step') model_chkpt = ModelCheckpoint(dirpath=classifier_model_dir, monitor='val_acc_epoch', filename='{epoch}-{val_acc_epoch:.2f}', verbose=True) early_stopper = EarlyStopping(monitor='val_acc_epoch', patience=6, verbose=True) trainer = pl.Trainer(logger=logger,
def test_login_key(local_netrc, capsys): wandb.login(key="A" * 40) out, err = capsys.readouterr() assert "wandb: WARNING If" in err assert wandb.api.api_key == "A" * 40
def wandb_setup(): wandb.login() wandb.init( project="mnist-single-node-single-gpu", entity=os.environ.get("WANDB_USERNAME", "my-user-name"), )
def test_login_jupyter_anonymous(mock_server, local_netrc, mocker): python = mocker.patch("wandb._get_python_type") python.return_value = "ipython" wandb.login(anonymous="allow") assert wandb.api.api_key == "ANONYMOOSE" * 4
def build_trainer(opt, device_id, model, fields, optim, model_saver=None): """ Simplify `Trainer` creation based on user `opt`s* Args: opt (:obj:`Namespace`): user options (usually from argument parsing) model (:obj:`onmt.models.NMTModel`): the model to train fields (dict): dict of fields optim (:obj:`onmt.utils.Optimizer`): optimizer used during training data_type (str): string describing the type of data e.g. "text", "img", "audio" model_saver(:obj:`onmt.models.ModelSaverBase`): the utility object used to save the model """ tgt_field = dict(fields)["tgt"].base_field train_loss = onmt.utils.loss.build_loss_compute(model, tgt_field, opt) valid_loss = onmt.utils.loss.build_loss_compute(model, tgt_field, opt, train=False) trunc_size = opt.truncated_decoder # Badly named... shard_size = opt.max_generator_batches if opt.model_dtype == 'fp32' else 0 # @memray: BPTT is not compatible with Orth and SemCov, # Otherwise will trigger error: raise RuntimeError("grad can be implicitly created only for scalar outputs") # at function shards() in loss.py (torch.autograd.backward(inputs, grads)) if opt.data_type == 'keyphrase' or opt.model_type == 'keyphrase': trunc_size = 0 shard_size = 0 norm_method = opt.normalization accum_count = opt.accum_count accum_steps = opt.accum_steps n_gpu = opt.world_size average_decay = opt.average_decay average_every = opt.average_every dropout = opt.dropout dropout_steps = opt.dropout_steps if device_id >= 0: gpu_rank = opt.gpu_ranks[device_id] else: gpu_rank = 0 n_gpu = 0 gpu_verbose_level = opt.gpu_verbose_level earlystopper = onmt.utils.EarlyStopping( opt.early_stopping, scorers=onmt.utils.scorers_from_opts(opt)) \ if opt.early_stopping > 0 else None report_manager = onmt.utils.build_report_manager(opt, gpu_rank) # setup wandb if applicable if opt.wandb and gpu_rank == 0: from datetime import datetime now = datetime.now() datetime = now.strftime("-%Y%m%d-%H%M%S") wandb.login(key=opt.wandb_key) wandb.init(project=opt.wandb_project, id=opt.exp + datetime, resume=True, dir=opt.wandb_log_dir) wandb.config.update(opt, allow_val_change=True) wandb.watch(model, log='all') trainer = onmt.Trainer(model, train_loss, valid_loss, optim, trunc_size, shard_size, norm_method, accum_count, accum_steps, n_gpu, gpu_rank, gpu_verbose_level, report_manager, with_align=True if opt.lambda_align > 0 else False, model_saver=model_saver if gpu_rank == 0 else None, average_decay=average_decay, average_every=average_every, model_dtype=opt.model_dtype, earlystopper=earlystopper, dropout=dropout, dropout_steps=dropout_steps) return trainer
import math from collections import deque import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import wandb module_path = os.path.abspath(os.path.join('.')) sys.path.append(module_path + "/RideHailing/envs/") import RideHailing from RideHailing.envs import * from RideHailing.envs.RideHailing_env import * env = gym.make('RideHailing-v0', config=CONFIG) wandb.login(key='74f441f8a5ff9046ae53fad3d92540a168c6bc83') wandb.init(project='RL', tags=['DQN_FirstTrial']) # https://www.kaggle.com/isbhargav/guide-to-pytorch-learning-rate-scheduling DQN_config = { 'replay_size': 16384, 'min_replay_size': 2048, 'epsilon': 0.8, 'epsilon_decay_param': 50000, 'epsilon_decay_power': 0.6, 'target_update_freq': 200, #update target Q network every x steps 'sample_replay_size': 1024, 'discount': 1, 'lr': 0.005, 'step': 20, 'gamma': 0.2,
def main(args): global inception_model_score # load real images info or generate real images info model_name = args.model_name #torch.cuda.set_device(device=args.device) device = args.device epochs = args.epochs batch_size = args.batch_size img_size = args.img_size save_image_interval = args.save_image_interval loss_calculation_interval = args.loss_calculation_interval latent_dim = args.latent_dim project_name = args.project_name dataset = args.dataset lr = args.lr n_iter = args.n_iter fixed_z = make_fixed_z(model_name, latent_dim, device) image_shape = [3, img_size, img_size] time_limit_sec = timeparse(args.time_limit) if args.wandb: wandb.login() wandb_name = dataset + ',' + model_name + ',' + str( img_size) + ",convchange" if args.run_test: wandb_name += ', test run' wandb.init(project=project_name, config=args, name=wandb_name) config = wandb.config ''' customize ''' if model_name in ['vanilla']: args.mapper_inter_layer = 0 if model_name in [ 'vanilla', 'pointMapping_but_aae', 'non-prior', 'mimic+non-prior', 'vanilla-mimic' ]: encoder = Encoder(latent_dim, img_size).to(device) decoder = Decoder(latent_dim, img_size).to(device) discriminator = Discriminator(latent_dim).to(device) ae_optimizer = torch.optim.Adam(itertools.chain( encoder.parameters(), decoder.parameters()), lr=lr) d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=lr) elif model_name in [ 'ulearning', 'ulearning_point', 'mimic_at_last', 'mimic' ]: encoder = Encoder(latent_dim, img_size).to(device) decoder = Decoder(latent_dim, img_size).to(device) discriminator = None d_optimizer = None ae_optimizer = torch.optim.Adam(itertools.chain( encoder.parameters(), decoder.parameters()), lr=lr) ########################################### ##### Score ##### ########################################### inception_model_score.lazy_mode(True) ''' dataset 채워주세요! customize ''' if dataset == 'CelebA': train_loader = get_celebA_dataset(batch_size, img_size) elif dataset == 'FFHQ': train_loader = get_ffhq_thumbnails(batch_size, img_size) elif dataset == 'mnist': train_loader = get_mnist_dataset(batch_size, img_size) elif dataset == 'mnist_fashion': train_loader = get_mnist_fashion_dataset(batch_size, img_size) elif dataset == 'emnist': train_loader = get_emnist_dataset(batch_size, img_size) elif dataset == 'LSUN_dining_room': #wget http://dl.yf.io/lsun/scenes/dining_room_train_lmdb.zip #unzip dining_room_train_lmdb.zip #located dining_room_train_lmdb folder in dataset directory train_loader = get_lsun_dataset(batch_size, img_size, classes='dining_room_train') elif dataset == 'LSUN_classroom': #wget http://dl.yf.io/lsun/scenes/classroom_train_lmdb.zip #unzip classroom_train_lmdb.zip #located classroom_train_lmdb folder in dataset directory train_loader = get_lsun_dataset(batch_size, img_size, classes='classroom_train') elif dataset == 'LSUN_conference': #wget http://dl.yf.io/lsun/scenes/conference_room_train_lmdb.zip #unzip conference_room_train_lmdb.zip #located conference_room_train_lmdb folder in dataset directory train_loader = get_lsun_dataset(batch_size, img_size, classes='conference_room_train') elif dataset == 'LSUN_churches': #wget http://dl.yf.io/lsun/scenes/church_outdoor_train_lmdb.zip #unzip church_outdoor_train_lmdb.zip #located church_outdoor_train_lmdb folder in dataset directory train_loader = get_lsun_dataset(batch_size, img_size, classes='church_outdoor_train') else: print("dataset is forced selected to cifar10") train_loader = get_cifar1_dataset(batch_size, img_size) real_images_info_file_name = hashlib.md5( str(train_loader.dataset).encode()).hexdigest() + '.pickle' if args.run_test: real_images_info_file_name += '.run_test' os.makedirs('../../inception_model_info', exist_ok=True) if os.path.exists('../../inception_model_info/' + real_images_info_file_name): print("Using generated real image info.") print(train_loader.dataset) inception_model_score.load_real_images_info( '../../inception_model_info/' + real_images_info_file_name) else: inception_model_score.model_to(device) #put real image for each_batch in tqdm.tqdm(train_loader, desc='insert real dataset'): X_train_batch = each_batch[0] inception_model_score.put_real(X_train_batch) if args.run_test: break #generate real images info inception_model_score.lazy_forward(batch_size=256, device=device, real_forward=True) inception_model_score.calculate_real_image_statistics() #save real images info for next experiments inception_model_score.save_real_images_info( '../../inception_model_info/' + real_images_info_file_name) #offload inception_model inception_model_score.model_to('cpu') if args.mapper_inter_layer > 0: if model_name in ['ulearning_point', 'mimic_at_last']: mapper = EachLatentMapping( nz=args.latent_dim, inter_nz=args.mapper_inter_nz, linear_num=args.mapper_inter_layer).to(device) m_optimizer = None elif model_name in ['pointMapping_but_aae']: mapper = EachLatentMapping( nz=args.latent_dim, inter_nz=args.mapper_inter_nz, linear_num=args.mapper_inter_layer).to(device) m_optimizer = torch.optim.Adam(mapper.parameters(), lr=lr) elif model_name in ['ulearning', 'non-prior']: mapper = Mapping(args.latent_dim, args.mapper_inter_nz, args.mapper_inter_layer).to(device) m_optimizer = torch.optim.Adam(mapper.parameters(), lr=lr) elif model_name in ['mimic', 'vanilla-mimic']: mapper = Mimic(args.latent_dim, args.latent_dim, args.mapper_inter_nz, args.mapper_inter_layer).to(device) m_optimizer = torch.optim.Adam(mapper.parameters(), lr=lr, weight_decay=1e-3) elif model_name in [ 'mimic+non-prior', ]: mapper = MimicStack(args.latent_dim, args.latent_dim, args.mapper_inter_nz, args.mapper_inter_layer).to(device) m_optimizer = torch.optim.Adam(mapper.parameters(), lr=lr) else: # case vanilla and there is no mapper mapper = lambda x: x m_optimizer = None if args.load_netE != '': load_model(encoder, args.load_netE) if args.load_netM != '': load_model(mapper, args.load_netM) if args.load_netD != '': load_model(decoder, args.load_netD) time_start_run = time.time() AE_pretrain(args, train_loader, device, ae_optimizer, encoder, decoder) M_pretrain(args, train_loader, device, d_optimizer, m_optimizer, mapper, encoder, discriminator) # train phase i = 0 loss_log = {} for i in range(1, epochs + 1): loss_log = train_main(args, train_loader, i, device, ae_optimizer, m_optimizer, d_optimizer, encoder, decoder, mapper, discriminator) loss_log.update({'spend time': time.time() - time_start_run}) if check_time_over(time_start_run, time_limit_sec) == True: print("time limit over") break if i % save_image_interval == 0: insert_sample_image_inception(args, i, epochs, train_loader, mapper, decoder, inception_model_score) matric = gen_matric(wandb, args, train_loader, encoder, mapper, decoder, discriminator, inception_model_score) loss_log.update(matric) if args.wandb: wandb_update(wandb, i, args, train_loader, encoder, mapper, decoder, device, fixed_z, loss_log) else: print(loss_log) if i % args.save_model_every == 0: now_time = str(datetime.now()) save_model([encoder, mapper, decoder], [ "%s[%d epoch].netE" % (now_time, i), "%s[%d epoch].netM" % (now_time, i), "%s[%d epoch].netD" % (now_time, i) ]) #make last matric if model_name in ['mimic_at_last']: M_train_at_last(args, train_loader, device, d_optimizer, m_optimizer, mapper, encoder, discriminator) if i % save_image_interval != 0: insert_sample_image_inception(args, i, epochs, train_loader, mapper, decoder, inception_model_score) matric = gen_matric(wandb, args, train_loader, encoder, mapper, decoder, discriminator, inception_model_score) if args.wandb: loss_log.update(matric) wandb_update(wandb, i, args, train_loader, encoder, mapper, decoder, device, fixed_z, loss_log) now_time = str(datetime.now()) save_model([encoder, mapper, decoder], [ "%s[%d epoch].netE" % (now_time, i), "%s[%d epoch].netM" % (now_time, i), "%s[%d epoch].netD" % (now_time, i) ]) if args.wandb: wandb.finish()
import datetime import sys import os import numpy as np import pandas as pd import tensorflow as tf from tensorflow.keras.layers import Dense from tensorflow.keras.models import Sequential from tensorflow.keras import Input, Model import wandb from wandb.keras import WandbCallback wandb.login() # Import mlcompute module to use the optional set_mlc_device API for device selection with ML Compute. #from tensorflow.python.compiler.mlcompute import mlcompute # Select CPU device. #mlcompute.set_mlc_device(device_name='any') # Available options are 'cpu', 'gpu', and 'any'. from sklearn.preprocessing import MinMaxScaler from tqdm import tqdm import src.preprocessing_3days from src.preprocessing_3days import series_to_supervised, preprocess from src.functions import load_data, TimeSeriesTensor, create_evaluation_df, plot_train_history, validation, save_model, load_model def train_test_split(df, n_test): if len(df) < 8760:
def init(self): s = self.settings config = self.config if s.mode == "noop": # TODO(jhr): return dummy object return None # Make sure we are logged in wandb.login() stdout_master_fd = None stderr_master_fd = None stdout_slave_fd = None stderr_slave_fd = None console = s.console if console == "redirect": pass elif console == "off": pass elif console == "mock": pass elif console == "file": pass elif console == "iowrap": stdout_master_fd, stdout_slave_fd = io_wrap.wandb_pty(resize=False) stderr_master_fd, stderr_slave_fd = io_wrap.wandb_pty(resize=False) elif console == "_win32": # Not used right now stdout_master_fd, stdout_slave_fd = win32_create_pipe() stderr_master_fd, stderr_slave_fd = win32_create_pipe() else: self._reporter.internal("Unknown console: %s", console) backend = Backend(mode=s.mode) backend.ensure_launched( settings=s, stdout_fd=stdout_master_fd, stderr_fd=stderr_master_fd, use_redirect=self._use_redirect, ) backend.server_connect() # resuming needs access to the server, check server_status()? run = RunManaged(config=config, settings=s) run._set_backend(backend) run._set_reporter(self._reporter) # TODO: pass mode to backend # run_synced = None backend._hack_set_run(run) if s.mode == "online": ret = backend.interface.send_run_sync(run, timeout=30) # TODO: fail on error, check return type run._set_run_obj(ret.run) elif s.mode in ("offline", "dryrun"): backend.interface.send_run(run) elif s.mode in ("async", "run"): ret = backend.interface.send_run_sync(run, timeout=10) # TODO: on network error, do async run save backend.interface.send_run(run) self.run = run self.backend = backend set_global(run=run, config=run.config, log=run.log, join=run.join) self._reporter.set_context(run=run) run.on_start() logger.info("atexit reg") self._hooks = ExitHooks() self._hooks.hook() atexit.register(lambda: self._atexit_cleanup()) if self._use_redirect: # setup fake callback self._redirect_cb = self._callback self._redirect(stdout_slave_fd, stderr_slave_fd) # for super agent # run._save_job_spec() return run