def __getitem__(self, index): #print( "index : ", index ) image_name = self.image_names[index] #------------- # image #------------- image = Image.open( os.path.join(self.dataset_dir, self.datamode, image_name)).convert('RGB') self.seed_da = random.randint(0, 10000) if (self.data_augument): set_random_seed(self.seed_da) image = self.transform(image) #------------- # mask #------------- if (self.datamode == "train"): if (self.args.load_masks_from_dir): mask_split_np = self.get_mask_image_from_dir( self.df_train.loc[image_name], n_channels=self.args.n_in_channels, n_classes=self.n_classes, load_mask_dir=os.path.join(self.dataset_dir, "train_masks"), image_name=image_name) else: mask_split_np = self.get_mask_image( self.df_train.loc[image_name], n_channels=self.args.n_in_channels, n_classes=self.n_classes) # 1枚の画像中に複数のラベル値があるマスク画(int 型)/ 0 ~ n_classes mask_np = concat_masks(mask_split_np, n_classes=self.n_classes) #print( "mask_np.shape : ", mask_np.shape ) #print( "min(mask_np)={}, max(mask_np)={}".format(np.min(mask_np), np.max(mask_np)) ) # 1枚の画像中に複数のラベル値があるマスク画像(int 型)/ 0 ~ n_classes mask = torch.from_numpy( np.asarray( self.transform_mask_woToTernsor( Image.fromarray(mask_np)))).float() #print( "mask.shape : ", mask.shape ) #print( "torch.min(mask)={}, torch.max(mask)={}".format(torch.min(mask), torch.max(mask)) ) if (self.datamode == "train"): results_dict = { "image_name": image_name, "image": image, "mask": mask, } else: results_dict = { "image_name": image_name, "image": image, } return results_dict
def main(): parser = argparse.ArgumentParser() parser.add_argument("-c", "--config", type=str, required=True, help="yaml file for config.") parser.add_argument( "-n", "--name", type=str, default=None, help="Name of the model. Used for both logging and saving chkpt.", ) args = parser.parse_args() hp = load_hparam(args.config) if args.name is not None: hp.log.name = args.name # random seed if hp.train.random_seed is None: hp.train.random_seed = random.randint(1, 10000) set_random_seed(hp.train.random_seed) if hp.train.dist.gpus < 0: hp.train.dist.gpus = torch.cuda.device_count() if hp.model.device.lower() == "cpu" or hp.train.dist.gpus == 0: train_loop(0, hp) else: distributed_run(train_loop, hp.to_dict(), hp.train.dist.gpus)
def main(): config = get_config() torch.backends.cudnn.benchmark = True torch.backends.cudnn.enabled = True # logging to the file and stdout logger = get_logger(config.log_dir, config.exp_name) # fix random seed to reproduce results set_random_seed(config.random_seed) logger.info('Random seed: {:d}'.format(config.random_seed)) logger.info(pprint.pformat(config)) if config.method in ['src', 'jigsaw', 'rotate']: model = AuxModel(config, logger) elif config.method in ['cdan', 'cdan+e', 'dann']: model = CDANModel(config, logger) else: raise ValueError("Unknown method: %s" % config.method) # create data loaders src_loader, val_loader = get_train_val_dataloader(config.datasets.src) test_loader = get_test_dataloader(config.datasets.test) tar_loader = None if config.datasets.get('tar', None): tar_loader = get_target_dataloader(config.datasets.tar) # main loop if config.mode == 'train': model.train(src_loader, tar_loader, val_loader, test_loader) elif config.mode == 'test': model.test(test_loader)
def __call__(self, sample): set_random_seed(0) sample_list = [] for i in range(self.sample_num): sample_list.append(self.transform(sample)) return sample_list, self.clean_transform(sample)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-c", "--config", type=str, required=True, help="yaml file for config.") parser.add_argument( "-n", "--name", type=str, default=None, help="Name of the model. Used for both logging and saving chkpt.", ) args = parser.parse_args() hp = load_hparam(args.config) if args.name is not None: hp.log.name = args.name # random seed if hp.train.random_seed is None: hp.train.random_seed = random.randint(1, 10000) set_random_seed(hp.train.random_seed) # set log/checkpoint dir hp.log.chkpt_dir = os.path.join(hp.log.chkpt_dir, hp.log.name) hp.log.log_dir = os.path.join(hp.log.log_dir, hp.log.name) os.makedirs(hp.log.chkpt_dir, exist_ok=True) os.makedirs(hp.log.log_dir, exist_ok=True) # set logger logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", handlers=[ logging.FileHandler( os.path.join(hp.log.log_dir, "%s-%d.log" % (hp.log.name, time.time()))), logging.StreamHandler(), ], ) logger = logging.getLogger() # set writer (tensorboard / wandb) writer = Writer(hp, hp.log.log_dir) hp_str = yaml.dump(hp.to_dict()) logger.info("Config:") logger.info(hp_str) if hp.data.train_dir == "" or hp.data.test_dir == "": logger.error("train or test data directory cannot be empty.") raise Exception("Please specify directories of data in %s" % args.config) train_loop(hp, logger, writer)
def get_subset_with_len(dataset, length, shuffle=False): set_random_seed(0) dataset_size = len(dataset) index = np.arange(dataset_size) if shuffle: np.random.shuffle(index) index = torch.from_numpy(index[0:length]) subset = Subset(dataset, index) assert len(subset) == length return subset
def get_features(model, simclr_aug, x, layer='simclr', sample_num=1): model.eval() feats = [] for seed in range(sample_num): set_random_seed(seed) x_t = simclr_aug(x) with torch.no_grad(): _, output_aux = model(x_t, penultimate=True, simclr=True, shift=True) feats.append(output_aux[layer]) return feats
def main(hydra_cfg): hydra_cfg.device = hydra_cfg.device.lower() with open_dict(hydra_cfg): hydra_cfg.job_logging_cfg = HydraConfig.get().job_logging # random seed if hydra_cfg.random_seed is None: hydra_cfg.random_seed = random.randint(1, 10000) set_random_seed(hydra_cfg.random_seed) if hydra_cfg.dist.gpus < 0: hydra_cfg.dist.gpus = torch.cuda.device_count() if hydra_cfg.device == "cpu" or hydra_cfg.dist.gpus == 0: hydra_cfg.dist.gpus = 0 train_loop(0, hydra_cfg) else: distributed_run(train_loop, hydra_cfg)
def main(): params = get_params() set_random_seed(params.RANDOM_SEED) parse_data() data = DatasetNorm('cutted_data') train_set, test_set = torch.utils.data.random_split( data, [data.__len__() - 100, 100]) trainloader = DataLoader(dataset=train_set, batch_size=params.BATCH_SIZE, shuffle=True, num_workers=8) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") tcnn = TempoCNN().to(device) wandb.init(project="tcnn") config = wandb.config config.learning_rate = 0.001 wandb.watch(tcnn) if not params.LOAD_MODEL: model = train(tcnn, trainloader) save_model(model) else: model = load_model().to(device) testloader = DataLoader(dataset=test_set, batch_size=params.BATCH_SIZE, shuffle=True) iters = 0 loss = 0.0 cr_loss = nn.BCELoss() for i, data in enumerate(testloader, 0): tcnn.eval() mels, labels = data[0].to(device), data[1].to(device) pred = model(mels.unsqueeze(-1).permute(0, 3, 1, 2)).to('cpu').detach() res = accuracy(pred, labels) print(res) loss += cr_loss(pred.float(), labels.float().to('cpu').detach()).item() iters += 1 print(loss / iters)
def main(hydra_cfg): hydra_cfg.device = hydra_cfg.device.lower() with open_dict(hydra_cfg): hydra_cfg.job_logging_cfg = HydraConfig.get().job_logging # random seed if hydra_cfg.random_seed is None: hydra_cfg.random_seed = random.randint(1, 10000) set_random_seed(hydra_cfg.random_seed) if hydra_cfg.dist.gpus < 0: hydra_cfg.dist.gpus = torch.cuda.device_count() hydra_cfg.dist.master_port = os.environ["MASTER_PORT"] hydra_cfg.dist.master_addr = os.environ["MASTER_ADDR"] print(hydra_cfg.dist) if hydra_cfg.device == "cpu" or hydra_cfg.dist.gpus == 0: hydra_cfg.dist.gpus = 0 train_loop(0, hydra_cfg) else: distributed_run(train_loop, hydra_cfg)
if (args.use_tensorboard_debugger): tf.debugging.experimental.enable_dump_debug_info( dump_root=os.path.join(args.tensorboard_dir, args.exper_name + "_debug"), tensor_debug_mode="FULL_HEALTH", circular_buffer_size=-1) # AMP 有効化 if (args.use_amp): os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '1' # Eager execution mode / tensorflow 2.x では明示不要 #tf.enable_eager_execution() # seed 値の固定 set_random_seed(args.seed) # multi gpu mirrored_strategy = tf.distribute.MirroredStrategy() #mirrored_strategy = tf.distribute.MirroredStrategy(tf.config.experimental.list_physical_devices("GPU")) #================================ # データセットの読み込み #================================ # 学習用データセットとテスト用データセットの設定 if (args.use_datagen): datagen_train = TempleteDataGen( dataset_dir=args.dataset_dir, datamode="train", image_height=args.image_height, image_width=args.image_width,
from copy import deepcopy import torch import torch.nn as nn import torch.optim as optim import torch.optim.lr_scheduler as lr_scheduler from torch.utils.data import DataLoader from common.common import parse_args import models.classifier as C from datasets import get_dataset, get_superclass_list, get_subclass_dataset, get_subclass_contaminated_dataset from utils.utils import load_checkpoint, set_random_seed set_random_seed(658965) P = parse_args() ### Set torch device ### if torch.cuda.is_available(): torch.cuda.set_device(P.local_rank) device = torch.device(f"cuda" if torch.cuda.is_available() else "cpu") #P.n_gpus = torch.cuda.device_count() P.n_gpus = 1 if P.n_gpus > 1: P.multi_gpu = True else: P.multi_gpu = False torch.cuda.set_device(f"cuda:{P.single_device}") device = torch.device(f"cuda:{P.single_device}")
flags = tf.flags flags.DEFINE_string("config_model", "config_model", "The model config.") flags.DEFINE_string("config_data", "config_iwslt15", "The dataset config.") flags.DEFINE_string("run_mode", "train_and_evaluate", "Either train_and_evaluate or test.") flags.DEFINE_string("model_dir", "./outputs", "Directory to save the trained model and logs.") FLAGS = flags.FLAGS config_model = importlib.import_module(FLAGS.config_model) config_data = importlib.import_module(FLAGS.config_data) utils.set_random_seed(config_model.random_seed) def main(): """Entrypoint. """ # Load data print('Loading data ...') train_data, dev_data, test_data = data_utils.load_data_numpy( config_data.input_dir, config_data.filename_prefix) print('Load data done') with open(config_data.vocab_file, 'rb') as f: id2w = pickle.load(f) vocab_size = len(id2w) print('vocab_size {}'.format(vocab_size)) bos_token_id, eos_token_id = 1, 2
parser.add_argument("--tpu_ip_address", type=str, default=None) args = parser.parse_args() config_path = args.config_path tpu_ip_address = args.tpu_ip_address config = read_json(config_path) pprint(config) _env, _model, _training = config["env"], config["model"], config["training"] _save_model_root = "saved_models" # hard coding _using_time = False # hard coding _root = "data/corona_nlp" # hard coding # start!! set_random_seed(_env['seed']) project_name = _root.split("/")[-1] run_name = (f"{_model['name']}_{_model['size']}-" f"lr_{_training['lr']}-bsz_{_training['batch_size']}-" f"seed_{_env['seed']}") now = datetime.now().strftime('%Y-%m-%d_%Hh%Mm%Ss') tokenizer = get_tokenizer(_model['name'], _model['size']) train_dataset = CustomDataset(_root, 'train', tokenizer, _training["max_len"]) dev_dataset = CustomDataset(_root, 'dev', tokenizer, _training["max_len"]) Model = get_model_class(_model['name']) Opt = get_optim_class(_model['opt']) Loss_fn = get_loss_fn_class(_model['loss'])
def main(args): cfg = get_default_cfg() cfg.merge_from_file('/home/lh/project/SeqNet/exp_cuhk/config.yaml') cfg.merge_from_list(args.opts) cfg.freeze() device = torch.device(cfg.DEVICE) if cfg.SEED >= 0: set_random_seed(cfg.SEED) print("Creating model") model = SeqNet(cfg) model.to(device) print("Loading data") train_loader = build_train_loader(cfg) gallery_loader, query_loader = build_test_loader(cfg) if args.eval: assert args.ckpt, "--ckpt must be specified when --eval enabled" resume_from_ckpt(args.ckpt, model) evaluate_performance( model, gallery_loader, query_loader, device, use_gt=cfg.EVAL_USE_GT, use_cache=cfg.EVAL_USE_CACHE, use_cbgm=cfg.EVAL_USE_CBGM, ) exit(0) params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD( params, lr=cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.SGD_MOMENTUM, weight_decay=cfg.SOLVER.WEIGHT_DECAY, ) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=cfg.SOLVER.LR_DECAY_MILESTONES, gamma=0.1) start_epoch = 0 if args.resume: assert args.ckpt, "--ckpt must be specified when --resume enabled" start_epoch = resume_from_ckpt(args.ckpt, model, optimizer, lr_scheduler) + 1 print("Creating output folder") output_dir = cfg.OUTPUT_DIR mkdir(output_dir) path = osp.join(output_dir, "config.yaml") with open(path, "w") as f: f.write(cfg.dump()) print(f"Full config is saved to {path}") tfboard = None if cfg.TF_BOARD: from torch.utils.tensorboard import SummaryWriter tf_log_path = osp.join(output_dir, "tf_log") mkdir(tf_log_path) tfboard = SummaryWriter(log_dir=tf_log_path) print(f"TensorBoard files are saved to {tf_log_path}") print("Start training") start_time = time.time() for epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCHS): train_one_epoch(cfg, model, optimizer, train_loader, device, epoch, tfboard) lr_scheduler.step() if (epoch + 1 ) % cfg.EVAL_PERIOD == 0 or epoch == cfg.SOLVER.MAX_EPOCHS - 1: evaluate_performance( model, gallery_loader, query_loader, device, use_gt=cfg.EVAL_USE_GT, use_cache=cfg.EVAL_USE_CACHE, use_cbgm=cfg.EVAL_USE_CBGM, ) if (epoch + 1 ) % cfg.CKPT_PERIOD == 0 or epoch == cfg.SOLVER.MAX_EPOCHS - 1: save_on_master( { "model": model.state_dict(), "optimizer": optimizer.state_dict(), "lr_scheduler": lr_scheduler.state_dict(), "epoch": epoch, }, osp.join(output_dir, f"epoch_{epoch}.pth"), ) if tfboard: tfboard.close() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print(f"Total training time {total_time_str}")
from adv_lib.attack import attack_module import models.classifier as C from datasets import get_dataset from utils.utils import load_checkpoint, set_random_seed P = parse_args() ### Set torch device ### if torch.cuda.is_available(): torch.cuda.set_device(P.local_rank) device = torch.device(f"cuda" if torch.cuda.is_available() else "cpu") P.n_gpus = torch.cuda.device_count() assert P.n_gpus <= 1 # no multi GPU set_random_seed(P.seed) ### Initialize dataset ### train_set, test_set, image_size, n_classes = get_dataset(P, dataset=P.dataset) P.image_size = image_size P.n_classes = n_classes ### Define data loader ### kwargs = {'pin_memory': True, 'num_workers': 8} train_loader = DataLoader(train_set, shuffle=True, batch_size=P.batch_size, **kwargs) test_loader = DataLoader(test_set, shuffle=False, batch_size=P.test_batch_size, **kwargs) ### Initialize model ### model = C.get_classifier(P, n_classes=P.n_classes).to(device) optimizer, lr_decay_gamma = get_optimizer(P, model) scheduler = get_scheduler(P, optimizer, lr_decay_gamma)
def _get_features(P, model, loader, interp=False, imagenet=False, simclr_aug=None, sample_num=1, layers=('simclr', 'shift')): if not isinstance(layers, (list, tuple)): layers = [layers] # check if arguments are valid assert simclr_aug is not None if imagenet is True: # assume batch_size = 1 for ImageNet sample_num = 1 # compute features in full dataset model.eval() feats_all = {layer: [] for layer in layers} # initialize: empty list for i, (x, _) in enumerate(loader): if interp: x_interp = ( x + last ) / 2 if i > 0 else x # omit the first batch, assume batch sizes are equal last = x # save the last batch x = x_interp # use interp as current batch if imagenet is True: x = torch.cat(x[0], dim=0) # augmented list of x x = x.to(device) # gpu tensor # compute features in one batch feats_batch = {layer: [] for layer in layers} # initialize: empty list for seed in range(sample_num): set_random_seed(seed) if P.K_shift > 1: x_t = torch.cat( [P.shift_trans(hflip(x), k) for k in range(P.K_shift)]) else: x_t = x # No shifting: SimCLR x_t = simclr_aug(x_t) # compute augmented features with torch.no_grad(): kwargs = {layer: True for layer in layers} # only forward selected layers _, output_aux = model(x_t, **kwargs) # add features in one batch for layer in layers: feats = output_aux[layer].cpu() if imagenet is False: feats_batch[layer] += feats.chunk(P.K_shift) else: feats_batch[layer] += [feats] # (B, d) cpu tensor # concatenate features in one batch for key, val in feats_batch.items(): if imagenet: feats_batch[key] = torch.stack(val, dim=0) # (B, T, d) else: feats_batch[key] = torch.stack(val, dim=1) # (B, T, d) # add features in full dataset for layer in layers: feats_all[layer] += [feats_batch[layer]] # concatenate features in full dataset for key, val in feats_all.items(): feats_all[key] = torch.cat(val, dim=0) # (N, T, d) # reshape order if imagenet is False: # Convert [1,2,3,4, 1,2,3,4] -> [1,1, 2,2, 3,3, 4,4] for key, val in feats_all.items(): N, T, d = val.size() # T = K * T' val = val.view(N, -1, P.K_shift, d) # (N, T', K, d) val = val.transpose(2, 1) # (N, 4, T', d) val = val.reshape(N, T, d) # (N, T, d) feats_all[key] = val return feats_all
def main(): args = get_args() assert not (args.resume_from and args.load_from) # work_dir work_dir = cfg.work_dir if args.work_dir: work_dir = args.work_dir if work_dir is None: work_dir = "./work_dir" os.makedirs(work_dir, exist_ok=True) torch.backends.cudnn.benchmark = True set_random_seed(cfg.seed, deterministic=args.deterministic) log_file = os.path.join(work_dir, "out.log") logger = get_root_logger(log_file=log_file) logger.info("work_dir: %s, log_file: %s" % (work_dir, log_file)) transforms = build_transforms(cfg) dataset = ImageDataset(cfg.train_root, transforms=transforms, side_refine=cfg.side_refine) sampler = GroupSampler(dataset, cfg.batch_size) dataloader = DataLoader( dataset, sampler=sampler, num_workers=cfg.num_workers, collate_fn=Collate(), ) device = "cuda" device = torch.device(device) model = CTPN(cfg).to(device) only_weights = True checkpoint = cfg.checkpoint if args.load_from: checkpoint = args.load_from if args.resume_from: checkpoint = args.resume_from only_weights = False checkpoint = load_checkpoint(model, checkpoint, only_weights) if cfg.optimizer == "SGD": optimizer = optim.SGD( model.parameters(), lr=cfg.lr, momentum=cfg.momentum, weight_decay=cfg.weight_decay, ) # check_epoch scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.step_size, gamma=cfg.gamma) elif cfg.optimizer == "Adam": optimizer = optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay) if "optimizer" in checkpoint: optimizer.load_state_dict(checkpoint["optimizer"]) if "scheduler" in checkpoint: scheduler.load_state_dict(checkpoint["scheduler"]) epoch = 0 iteration = 0 if "meta" in checkpoint: epoch = checkpoint["meta"]["epoch"] iteration = checkpoint["meta"]["iteration"] max_iterations = cfg.max_epoch * len(dataloader) log_buffer = LogBuffer() model.train() while epoch < cfg.max_epoch: data_start = time.time() for i, (imgs, gt_bboxes, gt_labels, img_metas) in enumerate(dataloader): data_end = time.time() data_time = data_end - data_start optimizer.zero_grad() imgs = imgs.to(device) gt_bboxes = [bboxes.to(device) for bboxes in gt_bboxes] gt_labels = [labels.to(device) for labels in gt_labels] cuda_start = time.time() rpn_cls, rpn_reg = model(imgs) cls_loss, reg_loss, acc = model.loss(rpn_cls, rpn_reg, gt_bboxes, gt_labels, img_metas) loss = cls_loss + reg_loss loss_ = { "cls_loss": cls_loss.item(), "reg_loss": reg_loss.item(), "loss": loss.item(), } log_buffer.update(loss_) loss.backward() optimizer.step() iteration += 1 cuda_end = time.time() cuda_time = cuda_end - cuda_start time_ = {"data_time": data_time, "cuda_time": cuda_time} log_buffer.update(time_) if isinstance(acc, collections.abc.Sequence): acc = acc[0] acc_ = {"accuracy": acc} log_buffer.update(acc_) if iteration % cfg.iteration_show == 0: log_buffer.average() eta = (max_iterations - iteration) * (log_buffer.output["data_time"] + log_buffer.output["cuda_time"]) h, m, s = time2hms(eta) eta = "%d h %d m %d s" % (h, m, s) info = "" for k, v in log_buffer.output.items(): if "loss" in k or "accuracy" in k: info += "%s: %.4f " % (k, v) log = f"[{epoch + 1}/{cfg.max_epoch}][{i + 1}/{len(dataloader)}] iteration: {iteration} data_time: {data_time:.2} cuda_time: {cuda_time:.2} eta: {eta} {info}" logger.info(log) log_buffer.clear() data_start = time.time() epoch += 1 if cfg.optimizer == "SGD": scheduler.step() if epoch % cfg.save_interval == 0 or epoch == cfg.max_epoch: meta = {"epoch": epoch, "iteration": iteration} if cfg.optimizer == "Adam": scheduler = None save_checkpoint( os.path.join(work_dir, "epoch_%d.pth" % epoch), model, optimizer, scheduler, meta, )
'hidden_units': 64, 'relation_hidden_units': 8, 'dropout': 0.5, 'n_layers': 2, 'residual': True } args['data_path'] = f'../dataset/{args["dataset"]}/{args["dataset"]}.pkl' args[ 'data_split_idx_path'] = f'../dataset/{args["dataset"]}/{args["dataset"]}_split_idx.pkl' args['device'] = f'cuda:{args["cuda"]}' if torch.cuda.is_available( ) and args["cuda"] >= 0 else 'cpu' if __name__ == '__main__': warnings.filterwarnings('ignore') set_random_seed(args['seed']) print(f'loading dataset {args["dataset"]}...') graph, labels, num_classes, train_idx, valid_idx, test_idx = load_dataset( data_path=args['data_path'], predict_category=args['predict_category'], data_split_idx_path=args['data_split_idx_path']) r_hgnn = R_HGNN(graph=graph, input_dim_dict={ ntype: graph.nodes[ntype].data['feat'].shape[1] for ntype in graph.ntypes }, hidden_dim=args['hidden_units'], relation_input_dim=args['relation_hidden_units'],
import os import sys if __name__ == "__main__": __file_path = os.path.abspath(__file__) sys.path.append("/".join(__file_path.split("/")[:-3])) from sklearn.model_selection import train_test_split from utils.utils import set_random_seed, read_csv, save_json _data_root = 'data/corona_nlp' _train_name = 'origin/Corona_NLP_train.csv' _test_name = 'origin/Corona_NLP_test.csv' _seed = 0 set_random_seed(_seed) train_path = f"{_data_root}/{_train_name}" test_path = f"{_data_root}/{_test_name}" train = read_csv(train_path, encoding='latin1') test = read_csv(test_path, encoding='latin1') train, dev = train_test_split(train, test_size=0.1) # hard coding cat2idx = { 'Extremely Negative': 0, 'Negative': 1, 'Neutral': 2, 'Positive': 3, 'Extremely Positive': 4, } # hard coding
'tfidf_pca', 'tfidf_pca_small', 'bert_cls_trained_mtl_except', 'bert_masking_trained_app', #'bert_masking_trained_dataset', 'bert_masking_trained_mtl', 'bert_cls_trained_sublabel_app', #'bert_cls_trained_sublabel_dataset', 'bert_cls_trained_sublabel_mtl', 'bert_nsp_cos_trained_app', #'bert_nsp_cos_trained_dataset', 'bert_nsp_cos_trained_mtl', 'bert_nsp_trained_app', #'bert_nsp_trained_dataset', 'bert_nsp_trained_mtl' ] set_random_seed(111) get_data(datasets) map_data(datasets, embeddings) eval_embeds() collate_scores("5_nn_sim") collate_scores("5_cos_nn_sim") collate_scores("p_val")
def main(args): args['device'] = torch.device("cpu") set_random_seed(args['random_seed']) dataset, train_set, val_set, test_set = load_dataset_for_classification( args) train_loader = DataLoader(train_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) val_loader = DataLoader(val_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) test_loader = DataLoader(test_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) args['n_tasks'] = dataset.n_tasks model = load_model(args) loss_criterion = BCEWithLogitsLoss(pos_weight=dataset.task_pos_weights.to( args['device']), reduction='none') optimizer = Adam(model.parameters(), lr=args['lr']) stopper = EarlyStopping(patience=args['patience']) model.to(args['device']) epochx = 0 losses = [] for epoch in range(args['num_epochs']): # Train loss = run_a_train_epoch(args, epoch, model, train_loader, loss_criterion, optimizer) losses.append(loss) # Validation and early stop epochx += 1 val_score = run_an_eval_epoch(args, model, val_loader, epochx, False) early_stop = stopper.step(val_score, model) print( 'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'. format(epoch + 1, args['num_epochs'], args['metric_name'], val_score, args['metric_name'], stopper.best_score)) if early_stop: break stopper.load_checkpoint(model) # Print out the test set score test_score = run_an_eval_epoch(args, model, test_loader, epochx, True) print('test {} {:.4f}'.format(args['metric_name'], test_score)) # Making the loss per epoch figure #print('losses', len(losses)) print(losses) epoch_list = [i + 1 for i in range(len(losses))] ## plt.clf() plt.plot(epoch_list, losses) plt.xlabel("Epoch") plt.ylabel("Loss") plt.rcParams['axes.facecolor'] = 'white' plt.savefig("Loss.Per.Epoch.png")
path_ori += '-ori.png' path_denoise += '-denoise.png' path_clean += '-clean.png' cv2.imwrite(path_ori, pauli_ori) cv2.imwrite(path_denoise, pauli_denoise) cv2.imwrite(path_clean, pauli_clean) if cfg.data.simulate: logger.info(f'overall psnr: {test_psnr_meter.avg}, ssim: {test_ssim_meter.avg}') logger.info(f'\ndone') if __name__=='__main__': cfg = args.get_argparser('configs/hoekman_unetpp4_simulate_step.yml') # choose deterministic algorithms, and disable benchmark for variable size input utils.set_random_seed(0) run_id = utils.get_work_dir(osp.join(cfg.test.out_path, osp.split(osp.split(cfg.test.pth)[0])[1])) shutil.copy(cfg.config_file, run_id) # logger logger = get_logger(run_id) logger.info(f'RUN DIR: {run_id}') test(cfg, logger, run_id) logger.info(f'RUN DIR: {run_id}')