def run(config): """Runs the model for either pretrain or segmentation/classifiction tasks Loads the ResNet backbone and based on task specification performs either pretrain or segmentation/classification finetuning. Args: config: instance of config class Raises: ValueError: In segmentation/classification tasks if number of classes are not specified """ # load the Resnet backbone model = load_ResNet(config.model, config.imagenet_path, include_top=False, cifar10=config.cifar10, weight_decay=config.weight_decay) if config.task == 'pretrain': print("Pretraining model") if config.cifar10: pretrain_cifar10(model, config) else: pretrain(model, config) elif config.task == 'segmentation' or 'classification': print("Fine-tuning model") if config.num_classes is None: raise ValueError("The number of classes must be set") if config.cifar10: finetune_cifar10(model, config) else: finetune(model, config)
def main(): args = parse_arguments() # argument setting print("=== Argument Setting ===") print("src: " + args.src) print("tgt: " + args.tgt) print("seed: " + str(args.seed)) print("train_seed: " + str(args.train_seed)) print("model_type: " + str(args.model)) print("max_seq_length: " + str(args.max_seq_length)) print("batch_size: " + str(args.batch_size)) print("pre_epochs: " + str(args.pre_epochs)) print("num_epochs: " + str(args.num_epochs)) print("AD weight: " + str(args.alpha)) print("KD weight: " + str(args.beta)) print("temperature: " + str(args.temperature)) set_seed(args.train_seed) if args.model in ['roberta', 'distilroberta']: tokenizer = RobertaTokenizer.from_pretrained('roberta-base') else: tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') # preprocess data print("=== Processing datasets ===") if args.src in ['blog', 'airline', 'imdb']: src_x, src_y = CSV2Array( os.path.join('data', args.src, args.src + '.csv')) else: src_x, src_y = XML2Array( os.path.join('data', args.src, 'negative.review'), os.path.join('data', args.src, 'positive.review')) src_x, src_test_x, src_y, src_test_y = train_test_split( src_x, src_y, test_size=0.2, stratify=src_y, random_state=args.seed) if args.tgt in ['blog', 'airline', 'imdb']: tgt_x, tgt_y = CSV2Array( os.path.join('data', args.tgt, args.tgt + '.csv')) else: tgt_x, tgt_y = XML2Array( os.path.join('data', args.tgt, 'negative.review'), os.path.join('data', args.tgt, 'positive.review')) tgt_train_x, tgt_test_y, tgt_train_y, tgt_test_y = train_test_split( tgt_x, tgt_y, test_size=0.2, stratify=tgt_y, random_state=args.seed) if args.model in ['roberta', 'distilroberta']: src_features = roberta_convert_examples_to_features( src_x, src_y, args.max_seq_length, tokenizer) src_test_features = roberta_convert_examples_to_features( src_test_x, src_test_y, args.max_seq_length, tokenizer) tgt_features = roberta_convert_examples_to_features( tgt_x, tgt_y, args.max_seq_length, tokenizer) tgt_train_features = roberta_convert_examples_to_features( tgt_train_x, tgt_train_y, args.max_seq_length, tokenizer) else: src_features = convert_examples_to_features(src_x, src_y, args.max_seq_length, tokenizer) src_test_features = convert_examples_to_features( src_test_x, src_test_y, args.max_seq_length, tokenizer) tgt_features = convert_examples_to_features(tgt_x, tgt_y, args.max_seq_length, tokenizer) tgt_train_features = convert_examples_to_features( tgt_train_x, tgt_train_y, args.max_seq_length, tokenizer) # load dataset src_data_loader = get_data_loader(src_features, args.batch_size) src_data_eval_loader = get_data_loader(src_test_features, args.batch_size) tgt_data_train_loader = get_data_loader(tgt_train_features, args.batch_size) tgt_data_all_loader = get_data_loader(tgt_features, args.batch_size) # load models if args.model == 'bert': src_encoder = BertEncoder() tgt_encoder = BertEncoder() src_classifier = BertClassifier() elif args.model == 'distilbert': src_encoder = DistilBertEncoder() tgt_encoder = DistilBertEncoder() src_classifier = BertClassifier() elif args.model == 'roberta': src_encoder = RobertaEncoder() tgt_encoder = RobertaEncoder() src_classifier = RobertaClassifier() else: src_encoder = DistilRobertaEncoder() tgt_encoder = DistilRobertaEncoder() src_classifier = RobertaClassifier() discriminator = Discriminator() if args.load: src_encoder = init_model(args, src_encoder, restore=param.src_encoder_path) src_classifier = init_model(args, src_classifier, restore=param.src_classifier_path) tgt_encoder = init_model(args, tgt_encoder, restore=param.tgt_encoder_path) discriminator = init_model(args, discriminator, restore=param.d_model_path) else: src_encoder = init_model(args, src_encoder) src_classifier = init_model(args, src_classifier) tgt_encoder = init_model(args, tgt_encoder) discriminator = init_model(args, discriminator) # train source model print("=== Training classifier for source domain ===") if args.pretrain: src_encoder, src_classifier = pretrain(args, src_encoder, src_classifier, src_data_loader) # eval source model print("=== Evaluating classifier for source domain ===") evaluate(src_encoder, src_classifier, src_data_loader) evaluate(src_encoder, src_classifier, src_data_eval_loader) evaluate(src_encoder, src_classifier, tgt_data_all_loader) for params in src_encoder.parameters(): params.requires_grad = False for params in src_classifier.parameters(): params.requires_grad = False # train target encoder by GAN print("=== Training encoder for target domain ===") if args.adapt: tgt_encoder.load_state_dict(src_encoder.state_dict()) tgt_encoder = adapt(args, src_encoder, tgt_encoder, discriminator, src_classifier, src_data_loader, tgt_data_train_loader, tgt_data_all_loader) # eval target encoder on lambda0.1 set of target dataset print("=== Evaluating classifier for encoded target domain ===") print(">>> source only <<<") evaluate(src_encoder, src_classifier, tgt_data_all_loader) print(">>> domain adaption <<<") evaluate(tgt_encoder, src_classifier, tgt_data_all_loader)
def main(args, f): set_seed(args.train_seed) if args.model in ['roberta', 'distilroberta']: tokenizer = RobertaTokenizer.from_pretrained('roberta-base') else: tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') # preprocess data src_eval_loader, src_loader, tgt_all_loader, tgt_train_loader, tgt_te = get_all_dataloader( args, tokenizer) # load models if args.model == 'bert': src_encoder = BertEncoder() # encoder = BertEncoder() classifier = BertClassifier() elif args.model == 'distilbert': src_encoder = DistilBertEncoder() # encoder = DistilBertEncoder() classifier = BertClassifier() elif args.model == 'roberta': src_encoder = RobertaEncoder() # encoder = RobertaEncoder() classifier = RobertaClassifier() else: src_encoder = DistilRobertaEncoder() # encoder = DistilRobertaEncoder() classifier = RobertaClassifier() discriminator = Discriminator() # parallel models if torch.cuda.device_count() > 1: print('Let\'s use {} GPUs!'.format(torch.cuda.device_count())) src_encoder = nn.DataParallel(src_encoder) classifier = nn.DataParallel(classifier) # encoder = nn.DataParallel(encoder) discriminator = nn.DataParallel(discriminator) if args.load: src_encoder = init_model(args, src_encoder, restore_path=param.src_encoder_path) classifier = init_model(args, classifier, restore_path=param.src_classifier_path) # encoder = init_model(args, encoder, restore_path=param.tgt_encoder_path) # discriminator = init_model(args, discriminator, restore_path=param.d_model_path) else: src_encoder = init_model(args, src_encoder) classifier = init_model(args, classifier) # encoder = init_model(args, encoder) discriminator = init_model(args, discriminator) # train source model if args.pretrain: print("=== Training classifier for source domain ===") src_encoder, classifier = pretrain(args, src_encoder, classifier, src_loader) # save pretrained model # save_model(args, src_encoder, param.src_encoder_path) # save_model(args, classifier, param.src_classifier_path) # eval source model print("=== Evaluating classifier for source domain ===") evaluate(args, src_encoder, classifier, src_loader) src_acc = evaluate(args, src_encoder, classifier, tgt_all_loader) f.write(f'{args.src} -> {args.tgt}: No adapt acc on src data: {src_acc}\n') # adapt print("=== Adapt tgt encoder ===") # encoder.load_state_dict(src_encoder.state_dict()) # if args.src_free: # s_res_features = src_gmm(args, src_encoder, src_loader) # src_loader = s_numpy_dataloader(s_res_features, args.batch_size) # encoder = aad_adapt_src_free(args, src_encoder, encoder, discriminator, # classifier, src_loader, tgt_train_loader, tgt_all_loader) # else: if args.adapt: encoder, classifier = shot_adapt(args, src_encoder, classifier, tgt_train_loader, tgt_all_loader, tgt_te) # save_model(args, encoder, param.tgt_encoder_path) # argument setting # print("=== Argument Setting ===") print( f"model_type: {args.model}; max_seq_len: {args.max_seq_length}; batch_size: {args.batch_size}; " f"pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; src: {args.src}; tgt: {args.tgt}; " f'src_free: {args.src_free}; dp: {args.dp}') # eval target encoder on lambda0.1 set of target dataset print("=== Evaluating classifier for encoded target domain ===") print(">>> domain adaption <<<") tgt_acc = evaluate(args, encoder, classifier, tgt_all_loader) f.write(f'{args.src} -> {args.tgt}: DA acc on tgt data: {tgt_acc}\n') f.write( f"model_type: {args.model}; batch_size: {args.batch_size}; pre_epochs: {args.pre_epochs}; " f"num_epochs: {args.num_epochs}; src_free: {args.src_free}; src: {args.src}; " f"tgt: {args.tgt}; dp: {args.dp}\n\n")
def main(): """ Main """ # Arguments args = parser.parse_args() # Setup Distributed Training device, local_rank = setup(distributed=args.distributed) # Get Dataloaders for Dataset of choice dataloaders, args = get_dataloaders(args) # Setup logging, saving models, summaries args = experiment_config(parser, args) # Get available models from /model/network.py model_names = sorted(name for name in models.__dict__ if name.islower() and not name.startswith("__") and callable(models.__dict__[name])) # If model exists if any(args.model in model_name for model_name in model_names): # Load model base_encoder = getattr(models, args.model)( args, num_classes=args.n_classes) # Encoder proj_head = models.projection_MLP(args) sup_head = models.Sup_Head(args) else: raise NotImplementedError("Model Not Implemented: {}".format( args.model)) # Remove last FC layer from resnet base_encoder.fc = nn.Sequential() # Place model onto GPU(s) if args.distributed: torch.cuda.set_device(device) torch.set_num_threads(6) # n cpu threads / n processes per node base_encoder = DistributedDataParallel(base_encoder.cuda(), device_ids=[local_rank], output_device=local_rank, find_unused_parameters=True, broadcast_buffers=False) proj_head = DistributedDataParallel(proj_head.cuda(), device_ids=[local_rank], output_device=local_rank, find_unused_parameters=True, broadcast_buffers=False) sup_head = DistributedDataParallel(sup_head.cuda(), device_ids=[local_rank], output_device=local_rank, find_unused_parameters=True, broadcast_buffers=False) # Only print from process (rank) 0 args.print_progress = True if int( os.environ.get('RANK')) == 0 else False else: # If non Distributed use DataParallel if torch.cuda.device_count() > 1: base_encoder = nn.DataParallel(base_encoder) proj_head = nn.DataParallel(proj_head) sup_head = nn.DataParallel(sup_head) print('\nUsing', torch.cuda.device_count(), 'GPU(s).\n') base_encoder.to(device) proj_head.to(device) sup_head.to(device) args.print_progress = True # Print Network Structure and Params if args.print_progress: print_network(base_encoder, args) # prints out the network architecture etc logging.info('\npretrain/train: {} - valid: {} - test: {}'.format( len(dataloaders['train'].dataset), len(dataloaders['valid'].dataset), len(dataloaders['test'].dataset))) # launch model training or inference if not args.finetune: ''' Pretraining / Finetuning / Evaluate ''' if not args.supervised: # Pretrain the encoder and projection head proj_head.apply(init_weights) pretrain(base_encoder, proj_head, dataloaders, args) else: supervised(base_encoder, sup_head, dataloaders, args) print("\n\nLoading the model: {}\n\n".format(args.load_checkpoint_dir)) # Load the pretrained model checkpoint = torch.load(args.load_checkpoint_dir) # Load the encoder parameters base_encoder.load_state_dict(checkpoint['encoder']) # Initalize weights of the supervised / classification head sup_head.apply(init_weights) # Supervised Finetuning of the supervised classification head finetune(base_encoder, sup_head, dataloaders, args) # Evaluate the pretrained model and trained supervised head test_loss, test_acc, test_acc_top5 = evaluate(base_encoder, sup_head, dataloaders, 'test', args.finetune_epochs, args) print('[Test] loss {:.4f} - acc {:.4f} - acc_top5 {:.4f}'.format( test_loss, test_acc, test_acc_top5)) if args.distributed: # cleanup torch.distributed.destroy_process_group() else: ''' Finetuning / Evaluate ''' # Do not Pretrain, just finetune and inference print("\n\nLoading the model: {}\n\n".format(args.load_checkpoint_dir)) # Load the pretrained model checkpoint = torch.load(args.load_checkpoint_dir) # Load the encoder parameters base_encoder.load_state_dict(checkpoint['encoder']) # .cuda() # Initalize weights of the supervised / classification head sup_head.apply(init_weights) # Supervised Finetuning of the supervised classification head finetune(base_encoder, sup_head, dataloaders, args) # Evaluate the pretrained model and trained supervised head test_loss, test_acc, test_acc_top5 = evaluate(base_encoder, sup_head, dataloaders, 'test', args.finetune_epochs, args) print('[Test] loss {:.4f} - acc {:.4f} - acc_top5 {:.4f}'.format( test_loss, test_acc, test_acc_top5)) if args.distributed: # cleanup torch.distributed.destroy_process_group()
def main(args, f): # args = parse_arguments() set_seed(args.train_seed) if args.model in ['roberta', 'distilroberta']: tokenizer = RobertaTokenizer.from_pretrained('roberta-base') else: tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') # preprocess data src_eval_loader, src_loader, tgt_all_loader, tgt_train_loader = get_all_dataloader( args, tokenizer) # load models if args.model == 'bert': encoder = BertEncoder() src_encoder = BertEncoder() classifier = BertClassifier() elif args.model == 'distilbert': encoder = DistilBertEncoder() src_encoder = DistilBertEncoder() classifier = BertClassifier() elif args.model == 'roberta': encoder = RobertaEncoder() src_encoder = RobertaEncoder() classifier = RobertaClassifier() else: encoder = DistilRobertaEncoder() src_encoder = DistilRobertaEncoder() classifier = RobertaClassifier() # domain discriminator discriminator = AdversarialNetworkCdan(param.input_dim * param.num_labels, param.hidden_dim) # parallel models if torch.cuda.device_count() > 1: print('Let\'s use {} GPUs!'.format(torch.cuda.device_count())) encoder = nn.DataParallel(encoder) src_encoder = nn.DataParallel(src_encoder) classifier = nn.DataParallel(classifier) discriminator = nn.DataParallel(discriminator) if args.load: encoder = init_model(args, encoder, restore_path=param.src_encoder_path) src_encoder = init_model(args, src_encoder, restore_path=param.tgt_encoder_path) classifier = init_model(args, classifier, restore_path=param.src_classifier_path) # discriminator = init_model(args, discriminator, restore_path=param.d_model_path) else: encoder = init_model(args, encoder) src_encoder = init_model(args, src_encoder) classifier = init_model(args, classifier) discriminator = init_model(args, discriminator) # train source model print("=== Pretrain encoder for source domain ===") if args.pretrain: encoder, classifier = pretrain(args, encoder, classifier, src_loader) # eval source model print("=== Evaluating classifier for source domain ===") evaluate(args, encoder, classifier, src_loader) src_acc = evaluate(args, encoder, classifier, tgt_all_loader) f.write(f'{args.src} -> {args.tgt} no adapt acc on src data: {src_acc}\n') # x, y = save_features(args, encoder, src_loader) # np.savez(os.path.join(param.model_root, 's_feat_pretrain'), x, y) # x, y = save_features(args, encoder, tgt_all_loader) # np.savez(os.path.join(param.model_root, 't_feat_pretrain'), x, y) # adapt print("=== Adapt encoder for target domain ===") src_encoder.load_state_dict(encoder.state_dict()) if args.src_free: # use the same encoder and copy encoder to src_encoder have different baseline results s_res_features = src_gmm(args, encoder, src_loader) src_loader = s_numpy_dataloader(s_res_features, args.batch_size) encoder, classifier = cdan_adapt_src_free(args, encoder, src_encoder, discriminator, classifier, src_loader, tgt_train_loader, tgt_all_loader) elif args.data_free: s_res_features = src_gmm(args, encoder, src_loader) t_res_features = tgt_gmm(encoder, tgt_all_loader, 1) src_loader = s_numpy_dataloader(s_res_features, args.batch_size) tgt_train_loader = t_numpy_dataloader(t_res_features, args.batch_size) encoder, classifier = cdan_adapt_data_free(args, encoder, discriminator, classifier, src_loader, tgt_train_loader, tgt_all_loader) else: encoder, classifier = cdan_adapt(args, encoder, discriminator, classifier, src_loader, tgt_train_loader, tgt_all_loader) # x, y = save_features(args, encoder, src_loader) # np.savez(os.path.join(param.model_root, 's_feat_adapt_cdan'), x, y) # x, y = save_features(args, encoder, tgt_all_loader) # np.savez(os.path.join(param.model_root, 't_feat_adapt_cdan'), x, y) # argument setting print( f"model_type: {args.model}; batch_size: {args.batch_size}; data_free: {args.data_free}; " f"src_free: {args.src_free}; pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; " f"src: {args.src}; tgt: {args.tgt}; kd: {args.kd}; dp: {args.dp}; ent: {args.ent}" ) # eval target encoder on lambda0.1 set of target dataset print("=== Evaluating classifier for encoded target domain ===") print(">>> domain adaption <<<") tgt_acc = evaluate(args, encoder, classifier, tgt_all_loader) f.write(f'{args.src} -> {args.tgt}: DA acc on tgt data: {tgt_acc}\n') f.write( f"model_type: {args.model}; batch_size: {args.batch_size}; data_free: {args.data_free}; " f"src_free: {args.src_free}; pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; " f"src: {args.src}; tgt: {args.tgt}; kd: {args.kd}; dp: {args.dp}; ent: {args.ent}\n\n" )
elif args.mode == 'urdf': object_paths = [ str(path) for path in Path(args.objects).rglob('*.obj') ] object_paths = list( filter(lambda path: not path.endswith("collision.obj"), object_paths)) with Pool(48) as p: rv = list( tqdm(p.imap(create_grasp_object_urdf, object_paths), total=len(object_paths))) exit() config = load_config(args.config) if args.mode == 'cotrain': cotrain(*setup(args, config)) elif args.mode == 'vae': train_vae(*setup_train_vae(args, config)) elif args.mode == 'pretrain': pretrain(args, config) elif args.mode == 'pretrain_gn': pretrain_gn(args, config) elif args.mode == 'pretrain_dataset': generate_pretrain_data(*setup(args, config)) elif args.mode == 'pretrain_imprint_dataset': generate_pretrain_imprint_data(*setup(args, config)) elif args.mode == 'grasp_objects': create_grasp_objects(*setup(args, config)) elif args.mode == 'imprint_baseline': generate_imprints(*setup(args, config))
def main(): """ Main """ # Arguments args = parser.parse_args() # Setup Distributed Training device, local_rank = setup(distributed=args.distributed) # Get Dataloaders for Dataset of choice dataloaders, args = get_dataloaders(args) # Setup logging, saving models, summaries args = experiment_config(parser, args) ''' Base Encoder ''' # Get available models from /model/network.py model_names = sorted(name for name in models.__dict__ if name.islower() and not name.startswith("__") and callable(models.__dict__[name])) # If model exists if any(args.model in model_name for model_name in model_names): # Load model base_encoder = getattr(models, args.model)( args, num_classes=args.n_classes) # Encoder else: raise NotImplementedError("Model Not Implemented: {}".format( args.model)) if not args.supervised: # freeze all layers but the last fc for name, param in base_encoder.named_parameters(): if name not in ['fc.weight', 'fc.bias']: param.requires_grad = False # init the fc layer init_weights(base_encoder) ''' MoCo Model ''' moco = MoCo_Model(args, queue_size=args.queue_size, momentum=args.queue_momentum, temperature=args.temperature) # Place model onto GPU(s) if args.distributed: torch.cuda.set_device(device) torch.set_num_threads(6) # n cpu threads / n processes per node moco = DistributedDataParallel(moco.cuda(), device_ids=[local_rank], output_device=local_rank, find_unused_parameters=True, broadcast_buffers=False) base_encoder = DistributedDataParallel(base_encoder.cuda(), device_ids=[local_rank], output_device=local_rank, find_unused_parameters=True, broadcast_buffers=False) # Only print from process (rank) 0 args.print_progress = True if int( os.environ.get('RANK')) == 0 else False else: # If non Distributed use DataParallel if torch.cuda.device_count() > 1: moco = nn.DataParallel(moco) base_encoder = nn.DataParallel(base_encoder) print('\nUsing', torch.cuda.device_count(), 'GPU(s).\n') moco.to(device) base_encoder.to(device) args.print_progress = True # Print Network Structure and Params if args.print_progress: print_network(moco, args) # prints out the network architecture etc logging.info('\npretrain/train: {} - valid: {} - test: {}'.format( len(dataloaders['train'].dataset), len(dataloaders['valid'].dataset), len(dataloaders['test'].dataset))) # launch model training or inference if not args.finetune: ''' Pretraining / Finetuning / Evaluate ''' if not args.supervised: # Pretrain the encoder and projection head pretrain(moco, dataloaders, args) # Load the state_dict from query encoder and load it on finetune net base_encoder = load_moco(base_encoder, args) else: supervised(base_encoder, dataloaders, args) # Load the state_dict from query encoder and load it on finetune net base_encoder = load_sup(base_encoder, args) # Supervised Finetuning of the supervised classification head finetune(base_encoder, dataloaders, args) # Evaluate the pretrained model and trained supervised head test_loss, test_acc, test_acc_top5 = evaluate(base_encoder, dataloaders, 'test', args.finetune_epochs, args) print('[Test] loss {:.4f} - acc {:.4f} - acc_top5 {:.4f}'.format( test_loss, test_acc, test_acc_top5)) if args.distributed: # cleanup torch.distributed.destroy_process_group() else: ''' Finetuning / Evaluate ''' # Do not Pretrain, just finetune and inference # Load the state_dict from query encoder and load it on finetune net base_encoder = load_moco(base_encoder, args) # Supervised Finetuning of the supervised classification head finetune(base_encoder, dataloaders, args) # Evaluate the pretrained model and trained supervised head test_loss, test_acc, test_acc_top5 = evaluate(base_encoder, dataloaders, 'test', args.finetune_epochs, args) print('[Test] loss {:.4f} - acc {:.4f} - acc_top5 {:.4f}'.format( test_loss, test_acc, test_acc_top5)) if args.distributed: # cleanup torch.distributed.destroy_process_group()
def main(): parser = argparse.ArgumentParser( description='Argument Parser for SERIL.') parser.add_argument('--logdir', default='log', help='Name of current experiment.') parser.add_argument('--n_jobs', default=2, type=int) parser.add_argument( '--do', choices=['train', 'test'], default='train', type=str) parser.add_argument( '--mode', choices=['seril', 'finetune'], default='seril', type=str) parser.add_argument( '--model', choices=['LSTM', 'Residual', 'IRM'], default='LSTM', type=str) # Options parser.add_argument( '--config', default='config/config.yaml', required=False) parser.add_argument('--seed', default=1126, type=int, help='Random seed for reproducable results.', required=False) parser.add_argument('--gpu', default='2', type=int, help='Assigning GPU id.') args = parser.parse_args() random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) # build log directory os.makedirs(args.logdir, exist_ok=True) # load configure config = yaml.load(open(args.config, 'r'), Loader=yaml.FullLoader) if config['train']['loss'] == 'sisdr': loss_func = SingleSrcNegSDR("sisdr", zero_mean=False, reduction='mean') if args.do == 'train': torch.cuda.set_device(args.gpu) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') assert len(config['dataset']['train']['clean']) == len( config['dataset']['train']['noisy']) and len(config['dataset']['train']['clean']) >= 1 model_path = f'{args.logdir}/pretrain/{args.model}_model_T0.pth' lifelong_agent_path = f'{args.logdir}/pretrain/{args.model}_synapses_T0.pth' if os.path.exists(model_path) and os.path.exists(lifelong_agent_path): print(f'[Runner] - pretrain model has already existed!') model = torch.load(model_path).to(device) lifelong_agent = torch.load(lifelong_agent_path).to(device) lifelong_agent.load_config(**config['train']['strategies']) else: print(f'[Runner] - run pretrain process!') preprocessor = OnlinePreprocessor(feat_list=feat_list).to(device) model = eval(f'{args.model}')(loss_func, preprocessor, **config['model']).to(device) lifelong_agent = LifeLongAgent(model, **config['train']['strategies']) pretrain(args, config, model, lifelong_agent) print(f'[Runner] - run adaptation process!') args.logdir = f'{args.logdir}/{args.mode}' if args.mode == 'seril': adapt(args, config, model, lifelong_agent) elif args.mode == 'finetune': adapt(args, config, model) if args.do == 'test': test(args, config)
def main(args, f): # args = parse_arguments() set_seed(args.train_seed) if args.model in ['roberta', 'distilroberta']: tokenizer = RobertaTokenizer.from_pretrained('roberta-base') else: tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') # preprocess data src_eval_loader, src_loader, tgt_all_loader, tgt_train_loader = get_all_dataloader( args, tokenizer) # load models if args.model == 'bert': src_encoder = BertEncoder() tgt_encoder = BertEncoder() src_classifier = BertClassifier() elif args.model == 'distilbert': src_encoder = DistilBertEncoder() tgt_encoder = DistilBertEncoder() src_classifier = BertClassifier() elif args.model == 'roberta': src_encoder = RobertaEncoder() tgt_encoder = RobertaEncoder() src_classifier = RobertaClassifier() else: src_encoder = DistilRobertaEncoder() tgt_encoder = DistilRobertaEncoder() src_classifier = RobertaClassifier() discriminator = Discriminator() # output dims is 2 instead of 1 if args.load: src_encoder = init_model(args, src_encoder, restore_path=param.src_encoder_path) src_classifier = init_model(args, src_classifier, restore_path=param.src_classifier_path) tgt_encoder = init_model(args, tgt_encoder, restore_path=param.tgt_encoder_path) discriminator = init_model(args, discriminator, restore_path=param.d_model_path) else: src_encoder = init_model(args, src_encoder) src_classifier = init_model(args, src_classifier) tgt_encoder = init_model(args, tgt_encoder) discriminator = init_model(args, discriminator) # parallel models if torch.cuda.device_count() > 1: print('Let\'s use {} GPUs!'.format(torch.cuda.device_count())) src_encoder = nn.DataParallel(src_encoder) src_classifier = nn.DataParallel(src_classifier) tgt_encoder = nn.DataParallel(tgt_encoder) discriminator = nn.DataParallel(discriminator) # train source model print("=== Training classifier for source domain ===") if args.pretrain: src_encoder, src_classifier = pretrain(args, src_encoder, src_classifier, src_loader) # eval source model print("=== Evaluating classifier for source domain ===") evaluate(args, src_encoder, src_classifier, src_loader) src_acc = evaluate(args, src_encoder, src_classifier, tgt_all_loader) f.write(f'{args.src} -> {args.tgt}: No adapt acc on src data: {src_acc}\n') for params in src_encoder.parameters(): params.requires_grad = False # train target encoder by ADDA print("=== Training encoder for target domain ===") if args.adapt: tgt_encoder.load_state_dict(src_encoder.state_dict()) tgt_encoder = adda_adapt(args, src_encoder, tgt_encoder, discriminator, src_loader, tgt_train_loader) # argument setting print( f"model_type: {args.model}; max_seq_len: {args.max_seq_length}; batch_size: {args.batch_size}; " f"pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; src: {args.src}; tgt: {args.tgt}" ) # eval target encoder on lambda0.1 set of target dataset print("=== Evaluating classifier for encoded target domain ===") print(">>> domain adaption <<<") tgt_acc = evaluate(args, tgt_encoder, src_classifier, tgt_all_loader) f.write(f'{args.src} -> {args.tgt}: DA acc on tgt data: {tgt_acc}\n') f.write( f"model_type: {args.model}; batch_size: {args.batch_size}; pre_epochs: {args.pre_epochs}; " f"num_epochs: {args.num_epochs}; src_free: {args.src_free}; src: {args.src}; " f"tgt: {args.tgt};\n\n")